npm - @aiscene/core - Versions diffs - 1.6.2 - Mend

@aiscene/core 1.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (279) hide show

package/LICENSE +21 -0
package/README.md +9 -0
package/dist/es/agent/agent.mjs +749 -0
package/dist/es/agent/agent.mjs.map +1 -0
package/dist/es/agent/common.mjs +0 -0
package/dist/es/agent/execution-session.mjs +41 -0
package/dist/es/agent/execution-session.mjs.map +1 -0
package/dist/es/agent/index.mjs +6 -0
package/dist/es/agent/task-builder.mjs +332 -0
package/dist/es/agent/task-builder.mjs.map +1 -0
package/dist/es/agent/task-cache.mjs +214 -0
package/dist/es/agent/task-cache.mjs.map +1 -0
package/dist/es/agent/tasks.mjs +424 -0
package/dist/es/agent/tasks.mjs.map +1 -0
package/dist/es/agent/ui-utils.mjs +91 -0
package/dist/es/agent/ui-utils.mjs.map +1 -0
package/dist/es/agent/utils.mjs +198 -0
package/dist/es/agent/utils.mjs.map +1 -0
package/dist/es/ai-model/auto-glm/actions.mjs +237 -0
package/dist/es/ai-model/auto-glm/actions.mjs.map +1 -0
package/dist/es/ai-model/auto-glm/index.mjs +6 -0
package/dist/es/ai-model/auto-glm/parser.mjs +239 -0
package/dist/es/ai-model/auto-glm/parser.mjs.map +1 -0
package/dist/es/ai-model/auto-glm/planning.mjs +71 -0
package/dist/es/ai-model/auto-glm/planning.mjs.map +1 -0
package/dist/es/ai-model/auto-glm/prompt.mjs +222 -0
package/dist/es/ai-model/auto-glm/prompt.mjs.map +1 -0
package/dist/es/ai-model/auto-glm/util.mjs +9 -0
package/dist/es/ai-model/auto-glm/util.mjs.map +1 -0
package/dist/es/ai-model/conversation-history.mjs +195 -0
package/dist/es/ai-model/conversation-history.mjs.map +1 -0
package/dist/es/ai-model/index.mjs +11 -0
package/dist/es/ai-model/inspect.mjs +394 -0
package/dist/es/ai-model/inspect.mjs.map +1 -0
package/dist/es/ai-model/llm-planning.mjs +233 -0
package/dist/es/ai-model/llm-planning.mjs.map +1 -0
package/dist/es/ai-model/prompt/common.mjs +7 -0
package/dist/es/ai-model/prompt/common.mjs.map +1 -0
package/dist/es/ai-model/prompt/describe.mjs +66 -0
package/dist/es/ai-model/prompt/describe.mjs.map +1 -0
package/dist/es/ai-model/prompt/extraction.mjs +169 -0
package/dist/es/ai-model/prompt/extraction.mjs.map +1 -0
package/dist/es/ai-model/prompt/llm-locator.mjs +51 -0
package/dist/es/ai-model/prompt/llm-locator.mjs.map +1 -0
package/dist/es/ai-model/prompt/llm-planning.mjs +568 -0
package/dist/es/ai-model/prompt/llm-planning.mjs.map +1 -0
package/dist/es/ai-model/prompt/llm-section-locator.mjs +44 -0
package/dist/es/ai-model/prompt/llm-section-locator.mjs.map +1 -0
package/dist/es/ai-model/prompt/order-sensitive-judge.mjs +35 -0
package/dist/es/ai-model/prompt/order-sensitive-judge.mjs.map +1 -0
package/dist/es/ai-model/prompt/playwright-generator.mjs +117 -0
package/dist/es/ai-model/prompt/playwright-generator.mjs.map +1 -0
package/dist/es/ai-model/prompt/ui-tars-planning.mjs +36 -0
package/dist/es/ai-model/prompt/ui-tars-planning.mjs.map +1 -0
package/dist/es/ai-model/prompt/util.mjs +59 -0
package/dist/es/ai-model/prompt/util.mjs.map +1 -0
package/dist/es/ai-model/prompt/yaml-generator.mjs +219 -0
package/dist/es/ai-model/prompt/yaml-generator.mjs.map +1 -0
package/dist/es/ai-model/service-caller/codex-app-server.mjs +575 -0
package/dist/es/ai-model/service-caller/codex-app-server.mjs.map +1 -0
package/dist/es/ai-model/service-caller/image-detail.mjs +6 -0
package/dist/es/ai-model/service-caller/image-detail.mjs.map +1 -0
package/dist/es/ai-model/service-caller/index.mjs +473 -0
package/dist/es/ai-model/service-caller/index.mjs.map +1 -0
package/dist/es/ai-model/ui-tars-planning.mjs +249 -0
package/dist/es/ai-model/ui-tars-planning.mjs.map +1 -0
package/dist/es/common.mjs +371 -0
package/dist/es/common.mjs.map +1 -0
package/dist/es/device/device-options.mjs +0 -0
package/dist/es/device/index.mjs +341 -0
package/dist/es/device/index.mjs.map +1 -0
package/dist/es/dump/html-utils.mjs +284 -0
package/dist/es/dump/html-utils.mjs.map +1 -0
package/dist/es/dump/image-restoration.mjs +43 -0
package/dist/es/dump/image-restoration.mjs.map +1 -0
package/dist/es/dump/index.mjs +3 -0
package/dist/es/index.mjs +15 -0
package/dist/es/index.mjs.map +1 -0
package/dist/es/report-generator.mjs +162 -0
package/dist/es/report-generator.mjs.map +1 -0
package/dist/es/report.mjs +137 -0
package/dist/es/report.mjs.map +1 -0
package/dist/es/screenshot-item.mjs +105 -0
package/dist/es/screenshot-item.mjs.map +1 -0
package/dist/es/service/index.mjs +274 -0
package/dist/es/service/index.mjs.map +1 -0
package/dist/es/service/utils.mjs +15 -0
package/dist/es/service/utils.mjs.map +1 -0
package/dist/es/skill/index.mjs +38 -0
package/dist/es/skill/index.mjs.map +1 -0
package/dist/es/task-runner.mjs +263 -0
package/dist/es/task-runner.mjs.map +1 -0
package/dist/es/task-timing.mjs +12 -0
package/dist/es/task-timing.mjs.map +1 -0
package/dist/es/tree.mjs +13 -0
package/dist/es/tree.mjs.map +1 -0
package/dist/es/types.mjs +199 -0
package/dist/es/types.mjs.map +1 -0
package/dist/es/utils.mjs +229 -0
package/dist/es/utils.mjs.map +1 -0
package/dist/es/yaml/builder.mjs +13 -0
package/dist/es/yaml/builder.mjs.map +1 -0
package/dist/es/yaml/index.mjs +4 -0
package/dist/es/yaml/player.mjs +434 -0
package/dist/es/yaml/player.mjs.map +1 -0
package/dist/es/yaml/utils.mjs +102 -0
package/dist/es/yaml/utils.mjs.map +1 -0
package/dist/es/yaml.mjs +0 -0
package/dist/lib/agent/agent.js +797 -0
package/dist/lib/agent/agent.js.map +1 -0
package/dist/lib/agent/common.js +5 -0
package/dist/lib/agent/execution-session.js +75 -0
package/dist/lib/agent/execution-session.js.map +1 -0
package/dist/lib/agent/index.js +81 -0
package/dist/lib/agent/index.js.map +1 -0
package/dist/lib/agent/task-builder.js +369 -0
package/dist/lib/agent/task-builder.js.map +1 -0
package/dist/lib/agent/task-cache.js +266 -0
package/dist/lib/agent/task-cache.js.map +1 -0
package/dist/lib/agent/tasks.js +467 -0
package/dist/lib/agent/tasks.js.map +1 -0
package/dist/lib/agent/ui-utils.js +143 -0
package/dist/lib/agent/ui-utils.js.map +1 -0
package/dist/lib/agent/utils.js +275 -0
package/dist/lib/agent/utils.js.map +1 -0
package/dist/lib/ai-model/auto-glm/actions.js +271 -0
package/dist/lib/ai-model/auto-glm/actions.js.map +1 -0
package/dist/lib/ai-model/auto-glm/index.js +66 -0
package/dist/lib/ai-model/auto-glm/index.js.map +1 -0
package/dist/lib/ai-model/auto-glm/parser.js +282 -0
package/dist/lib/ai-model/auto-glm/parser.js.map +1 -0
package/dist/lib/ai-model/auto-glm/planning.js +105 -0
package/dist/lib/ai-model/auto-glm/planning.js.map +1 -0
package/dist/lib/ai-model/auto-glm/prompt.js +259 -0
package/dist/lib/ai-model/auto-glm/prompt.js.map +1 -0
package/dist/lib/ai-model/auto-glm/util.js +46 -0
package/dist/lib/ai-model/auto-glm/util.js.map +1 -0
package/dist/lib/ai-model/conversation-history.js +229 -0
package/dist/lib/ai-model/conversation-history.js.map +1 -0
package/dist/lib/ai-model/index.js +125 -0
package/dist/lib/ai-model/index.js.map +1 -0
package/dist/lib/ai-model/inspect.js +440 -0
package/dist/lib/ai-model/inspect.js.map +1 -0
package/dist/lib/ai-model/llm-planning.js +270 -0
package/dist/lib/ai-model/llm-planning.js.map +1 -0
package/dist/lib/ai-model/prompt/common.js +41 -0
package/dist/lib/ai-model/prompt/common.js.map +1 -0
package/dist/lib/ai-model/prompt/describe.js +100 -0
package/dist/lib/ai-model/prompt/describe.js.map +1 -0
package/dist/lib/ai-model/prompt/extraction.js +209 -0
package/dist/lib/ai-model/prompt/extraction.js.map +1 -0
package/dist/lib/ai-model/prompt/llm-locator.js +88 -0
package/dist/lib/ai-model/prompt/llm-locator.js.map +1 -0
package/dist/lib/ai-model/prompt/llm-planning.js +605 -0
package/dist/lib/ai-model/prompt/llm-planning.js.map +1 -0
package/dist/lib/ai-model/prompt/llm-section-locator.js +81 -0
package/dist/lib/ai-model/prompt/llm-section-locator.js.map +1 -0
package/dist/lib/ai-model/prompt/order-sensitive-judge.js +72 -0
package/dist/lib/ai-model/prompt/order-sensitive-judge.js.map +1 -0
package/dist/lib/ai-model/prompt/playwright-generator.js +178 -0
package/dist/lib/ai-model/prompt/playwright-generator.js.map +1 -0
package/dist/lib/ai-model/prompt/ui-tars-planning.js +73 -0
package/dist/lib/ai-model/prompt/ui-tars-planning.js.map +1 -0
package/dist/lib/ai-model/prompt/util.js +105 -0
package/dist/lib/ai-model/prompt/util.js.map +1 -0
package/dist/lib/ai-model/prompt/yaml-generator.js +280 -0
package/dist/lib/ai-model/prompt/yaml-generator.js.map +1 -0
package/dist/lib/ai-model/service-caller/codex-app-server.js +624 -0
package/dist/lib/ai-model/service-caller/codex-app-server.js.map +1 -0
package/dist/lib/ai-model/service-caller/image-detail.js +40 -0
package/dist/lib/ai-model/service-caller/image-detail.js.map +1 -0
package/dist/lib/ai-model/service-caller/index.js +538 -0
package/dist/lib/ai-model/service-caller/index.js.map +1 -0
package/dist/lib/ai-model/ui-tars-planning.js +283 -0
package/dist/lib/ai-model/ui-tars-planning.js.map +1 -0
package/dist/lib/common.js +480 -0
package/dist/lib/common.js.map +1 -0
package/dist/lib/device/device-options.js +20 -0
package/dist/lib/device/device-options.js.map +1 -0
package/dist/lib/device/index.js +468 -0
package/dist/lib/device/index.js.map +1 -0
package/dist/lib/dump/html-utils.js +357 -0
package/dist/lib/dump/html-utils.js.map +1 -0
package/dist/lib/dump/image-restoration.js +77 -0
package/dist/lib/dump/image-restoration.js.map +1 -0
package/dist/lib/dump/index.js +60 -0
package/dist/lib/dump/index.js.map +1 -0
package/dist/lib/index.js +146 -0
package/dist/lib/index.js.map +1 -0
package/dist/lib/report-generator.js +200 -0
package/dist/lib/report-generator.js.map +1 -0
package/dist/lib/report.js +171 -0
package/dist/lib/report.js.map +1 -0
package/dist/lib/screenshot-item.js +139 -0
package/dist/lib/screenshot-item.js.map +1 -0
package/dist/lib/service/index.js +308 -0
package/dist/lib/service/index.js.map +1 -0
package/dist/lib/service/utils.js +49 -0
package/dist/lib/service/utils.js.map +1 -0
package/dist/lib/skill/index.js +72 -0
package/dist/lib/skill/index.js.map +1 -0
package/dist/lib/task-runner.js +300 -0
package/dist/lib/task-runner.js.map +1 -0
package/dist/lib/task-timing.js +46 -0
package/dist/lib/task-timing.js.map +1 -0
package/dist/lib/tree.js +53 -0
package/dist/lib/tree.js.map +1 -0
package/dist/lib/types.js +288 -0
package/dist/lib/types.js.map +1 -0
package/dist/lib/utils.js +308 -0
package/dist/lib/utils.js.map +1 -0
package/dist/lib/yaml/builder.js +57 -0
package/dist/lib/yaml/builder.js.map +1 -0
package/dist/lib/yaml/index.js +81 -0
package/dist/lib/yaml/index.js.map +1 -0
package/dist/lib/yaml/player.js +468 -0
package/dist/lib/yaml/player.js.map +1 -0
package/dist/lib/yaml/utils.js +155 -0
package/dist/lib/yaml/utils.js.map +1 -0
package/dist/lib/yaml.js +20 -0
package/dist/lib/yaml.js.map +1 -0
package/dist/types/agent/agent.d.ts +205 -0
package/dist/types/agent/common.d.ts +0 -0
package/dist/types/agent/execution-session.d.ts +36 -0
package/dist/types/agent/index.d.ts +10 -0
package/dist/types/agent/task-builder.d.ts +34 -0
package/dist/types/agent/task-cache.d.ts +49 -0
package/dist/types/agent/tasks.d.ts +69 -0
package/dist/types/agent/ui-utils.d.ts +14 -0
package/dist/types/agent/utils.d.ts +31 -0
package/dist/types/ai-model/auto-glm/actions.d.ts +78 -0
package/dist/types/ai-model/auto-glm/index.d.ts +6 -0
package/dist/types/ai-model/auto-glm/parser.d.ts +18 -0
package/dist/types/ai-model/auto-glm/planning.d.ts +12 -0
package/dist/types/ai-model/auto-glm/prompt.d.ts +27 -0
package/dist/types/ai-model/auto-glm/util.d.ts +13 -0
package/dist/types/ai-model/conversation-history.d.ts +105 -0
package/dist/types/ai-model/index.d.ts +14 -0
package/dist/types/ai-model/inspect.d.ts +67 -0
package/dist/types/ai-model/llm-planning.d.ts +19 -0
package/dist/types/ai-model/prompt/common.d.ts +2 -0
package/dist/types/ai-model/prompt/describe.d.ts +1 -0
package/dist/types/ai-model/prompt/extraction.d.ts +7 -0
package/dist/types/ai-model/prompt/llm-locator.d.ts +3 -0
package/dist/types/ai-model/prompt/llm-planning.d.ts +10 -0
package/dist/types/ai-model/prompt/llm-section-locator.d.ts +3 -0
package/dist/types/ai-model/prompt/order-sensitive-judge.d.ts +2 -0
package/dist/types/ai-model/prompt/playwright-generator.d.ts +26 -0
package/dist/types/ai-model/prompt/ui-tars-planning.d.ts +2 -0
package/dist/types/ai-model/prompt/util.d.ts +33 -0
package/dist/types/ai-model/prompt/yaml-generator.d.ts +100 -0
package/dist/types/ai-model/service-caller/codex-app-server.d.ts +42 -0
package/dist/types/ai-model/service-caller/image-detail.d.ts +2 -0
package/dist/types/ai-model/service-caller/index.d.ts +49 -0
package/dist/types/ai-model/ui-tars-planning.d.ts +72 -0
package/dist/types/common.d.ts +288 -0
package/dist/types/device/device-options.d.ts +142 -0
package/dist/types/device/index.d.ts +2528 -0
package/dist/types/dump/html-utils.d.ts +63 -0
package/dist/types/dump/image-restoration.d.ts +6 -0
package/dist/types/dump/index.d.ts +5 -0
package/dist/types/index.d.ts +17 -0
package/dist/types/report-generator.d.ts +66 -0
package/dist/types/report.d.ts +22 -0
package/dist/types/screenshot-item.d.ts +66 -0
package/dist/types/service/index.d.ts +24 -0
package/dist/types/service/utils.d.ts +2 -0
package/dist/types/skill/index.d.ts +25 -0
package/dist/types/task-runner.d.ts +50 -0
package/dist/types/task-timing.d.ts +8 -0
package/dist/types/tree.d.ts +4 -0
package/dist/types/types.d.ts +669 -0
package/dist/types/utils.d.ts +40 -0
package/dist/types/yaml/builder.d.ts +2 -0
package/dist/types/yaml/index.d.ts +4 -0
package/dist/types/yaml/player.d.ts +34 -0
package/dist/types/yaml/utils.d.ts +9 -0
package/dist/types/yaml.d.ts +215 -0
package/package.json +111 -0

package/dist/types/agent/utils.d.ts ADDED Viewed

@@ -0,0 +1,31 @@
+import type { TMultimodalPrompt, TUserPrompt } from '../common';
+import type { AbstractInterface } from '../device';
+import type { ElementCacheFeature, LocateResultElement, PlanningLocateParam, Rect, UIContext } from '../types';
+import type { TModelFamily } from '@midscene/shared/env';
+import type { TaskCache } from './task-cache';
+export declare function commonContextParser(interfaceInstance: AbstractInterface, _opt: {
+    uploadServerUrl?: string;
+    screenshotShrinkFactor?: number;
+    modelFamily?: TModelFamily;
+}): Promise<UIContext>;
+export declare function getReportFileName(tag?: string): string;
+export declare function printReportMsg(filepath: string): void;
+/**
+ * Get the current execution file name
+ * @returns The name of the current execution file
+ */
+export declare function getCurrentExecutionFile(trace?: string): string | false;
+export declare function generateCacheId(fileName?: string): string;
+export declare function ifPlanLocateParamIsBbox(planLocateParam: PlanningLocateParam): boolean;
+export declare function matchElementFromPlan(planLocateParam: PlanningLocateParam): LocateResultElement | undefined;
+export declare function matchElementFromCache(context: {
+    taskCache?: TaskCache;
+    interfaceInstance: AbstractInterface;
+}, cacheEntry: ElementCacheFeature | undefined, cachePrompt: TUserPrompt, cacheable: boolean | undefined): Promise<LocateResultElement | undefined>;
+export declare const getMidsceneVersion: () => string;
+export declare const parsePrompt: (prompt: TUserPrompt) => {
+    textPrompt: string;
+    multimodalPrompt?: TMultimodalPrompt;
+};
+export declare const transformLogicalElementToScreenshot: (element: LocateResultElement, shrunkShotToLogicalRatio: number) => LocateResultElement;
+export declare const transformLogicalRectToScreenshotRect: (rect: Rect, shrunkShotToLogicalRatio: number) => Rect;

package/dist/types/ai-model/auto-glm/actions.d.ts ADDED Viewed

@@ -0,0 +1,78 @@
+import type { DeviceAction } from '../../device';
+import type { PlanningAction } from '../../types';
+export interface BaseAction {
+    _metadata: string;
+    think?: string;
+}
+export interface TapAction extends BaseAction {
+    _metadata: 'do';
+    action: 'Tap';
+    element: [number, number];
+}
+export interface DoubleTapAction extends BaseAction {
+    _metadata: 'do';
+    action: 'Double Tap';
+    element: [number, number];
+}
+export interface TypeAction extends BaseAction {
+    _metadata: 'do';
+    action: 'Type';
+    text: string;
+}
+export interface SwipeAction extends BaseAction {
+    _metadata: 'do';
+    action: 'Swipe';
+    start: [number, number];
+    end: [number, number];
+}
+export interface LongPressAction extends BaseAction {
+    _metadata: 'do';
+    action: 'Long Press';
+    element: [number, number];
+}
+export interface LaunchAction extends BaseAction {
+    _metadata: 'do';
+    action: 'Launch';
+    app: string;
+}
+export interface BackAction extends BaseAction {
+    _metadata: 'do';
+    action: 'Back';
+}
+export interface HomeAction extends BaseAction {
+    _metadata: 'do';
+    action: 'Home';
+}
+export interface WaitAction extends BaseAction {
+    _metadata: 'do';
+    action: 'Wait';
+    durationMs: number;
+}
+export interface InteractAction extends BaseAction {
+    _metadata: 'do';
+    action: 'Interact';
+}
+export interface CallAPIAction extends BaseAction {
+    _metadata: 'do';
+    action: 'Call_API';
+    instruction: string;
+}
+export interface TakeoverAction extends BaseAction {
+    _metadata: 'do';
+    action: 'Take_over';
+    message: string;
+}
+export interface NoteAction extends BaseAction {
+    _metadata: 'do';
+    action: 'Note';
+    message: string;
+}
+export interface FinishAction extends BaseAction {
+    _metadata: 'finish';
+    message: string;
+}
+export type ParsedAction = TapAction | DoubleTapAction | TypeAction | SwipeAction | LongPressAction | LaunchAction | BackAction | HomeAction | WaitAction | InteractAction | CallAPIAction | TakeoverAction | NoteAction | FinishAction;
+export declare function transformAutoGLMAction(action: ParsedAction, size: {
+    width: number;
+    height: number;
+}, actionSpace?: DeviceAction[]): PlanningAction[];

package/dist/types/ai-model/auto-glm/index.d.ts ADDED Viewed

@@ -0,0 +1,6 @@
+export { getAutoGLMLocatePrompt, getAutoGLMPlanPrompt } from './prompt';
+export { parseAutoGLMLocateResponse, parseAutoGLMResponse, parseAction, } from './parser';
+export { autoGLMPlanning } from './planning';
+export { transformAutoGLMAction } from './actions';
+export { isAutoGLM, isUITars } from './util';
+export type { ParsedAction } from './actions';

package/dist/types/ai-model/auto-glm/parser.d.ts ADDED Viewed

@@ -0,0 +1,18 @@
+import type { ParsedAction } from './actions';
+export declare const extractValueAfter: (src: string, key: string) => string;
+export declare function parseAction(response: {
+    think: string;
+    content: string;
+}): ParsedAction;
+export declare function parseAutoGLMResponse(content: string): {
+    think: string;
+    content: string;
+};
+export declare function parseAutoGLMLocateResponse(rawResponse: string): {
+    think: string;
+    coordinates: {
+        x: number;
+        y: number;
+    } | null;
+    error?: string;
+};

package/dist/types/ai-model/auto-glm/planning.d.ts ADDED Viewed

@@ -0,0 +1,12 @@
+import type { DeviceAction } from '../../device';
+import type { PlanningAIResponse, UIContext } from '../../types';
+import type { IModelConfig } from '@midscene/shared/env';
+import type { ConversationHistory } from '../conversation-history';
+export declare function autoGLMPlanning(userInstruction: string, options: {
+    conversationHistory: ConversationHistory;
+    context: UIContext;
+    modelConfig: IModelConfig;
+    actionContext?: string;
+    actionSpace?: DeviceAction[];
+    abortSignal?: AbortSignal;
+}): Promise<PlanningAIResponse>;

package/dist/types/ai-model/auto-glm/prompt.d.ts ADDED Viewed

@@ -0,0 +1,27 @@
+/**
+ * Auto-GLM Prompt Templates
+ *
+ * Portions of this file are derived from Open-AutoGLM
+ * Copyright (c) 2024 zai-org
+ * Licensed under the Apache License, Version 2.0
+ *
+ * Source: https://github.com/zai-org/Open-AutoGLM
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Modifications:
+ * - Adapted prompts for Midscene.js integration
+ */
+import type { TModelFamily } from '@midscene/shared/env';
+export declare const getAutoGLMPlanPrompt: (modelFamily: TModelFamily | undefined) => string;
+export declare const getAutoGLMLocatePrompt: (modelFamily: TModelFamily | undefined) => string;

package/dist/types/ai-model/auto-glm/util.d.ts ADDED Viewed

@@ -0,0 +1,13 @@
+import type { TModelFamily } from '@midscene/shared/env';
+/**
+ * Check if the modelFamily is auto-glm or auto-glm-multilingual
+ * @param modelFamily The model family to check
+ * @returns true if modelFamily is auto-glm or auto-glm-multilingual
+ */
+export declare function isAutoGLM(modelFamily: TModelFamily | undefined): boolean;
+/**
+ * Check if the modelFamily is a UI-TARS variant
+ * @param modelFamily The model family to check
+ * @returns true if modelFamily is any UI-TARS variant
+ */
+export declare function isUITars(modelFamily: TModelFamily | undefined): boolean;

package/dist/types/ai-model/conversation-history.d.ts ADDED Viewed

@@ -0,0 +1,105 @@
+import type { SubGoal } from '../types';
+import type { ChatCompletionMessageParam } from 'openai/resources/index';
+export interface ConversationHistoryOptions {
+    initialMessages?: ChatCompletionMessageParam[];
+}
+export declare class ConversationHistory {
+    private readonly messages;
+    private subGoals;
+    private memories;
+    private historicalLogs;
+    pendingFeedbackMessage: string;
+    constructor(options?: ConversationHistoryOptions);
+    resetPendingFeedbackMessageIfExists(): void;
+    append(message: ChatCompletionMessageParam): void;
+    seed(messages: ChatCompletionMessageParam[]): void;
+    reset(): void;
+    /**
+     * Snapshot the conversation history, and replace the images with text if the number of images exceeds the limit.
+     * @param maxImages - The maximum number of images to include in the snapshot. Undefined means no limit.
+     * @returns The snapshot of the conversation history.
+     */
+    snapshot(maxImages?: number): ChatCompletionMessageParam[];
+    get length(): number;
+    [Symbol.iterator](): IterableIterator<ChatCompletionMessageParam>;
+    toJSON(): ChatCompletionMessageParam[];
+    /**
+     * Set all sub-goals, replacing any existing ones.
+     * Automatically marks the first pending goal as running.
+     */
+    setSubGoals(subGoals: SubGoal[]): void;
+    /**
+     * Merge sub-goals from update-plan-content.
+     * Preserves existing descriptions when incoming description is empty.
+     *
+     * This handles compact XML updates like:
+     * <sub-goal index="1" status="finished" />
+     */
+    mergeSubGoals(subGoals: SubGoal[]): void;
+    /**
+     * Update a single sub-goal by index.
+     * Clears logs if status or description actually changes.
+     * @returns true if the sub-goal was found and updated, false otherwise
+     */
+    updateSubGoal(index: number, updates: Partial<Omit<SubGoal, 'index'>>): boolean;
+    /**
+     * Mark the first pending sub-goal as running.
+     * Clears logs since status changes.
+     */
+    markFirstPendingAsRunning(): void;
+    /**
+     * Mark a sub-goal as finished.
+     * Automatically marks the next pending goal as running.
+     * @returns true if the sub-goal was found and updated, false otherwise
+     */
+    markSubGoalFinished(index: number): boolean;
+    /**
+     * Mark all sub-goals as finished.
+     * Clears logs for any goal whose status actually changes.
+     */
+    markAllSubGoalsFinished(): void;
+    /**
+     * Append a log entry to the currently running sub-goal.
+     * The log describes an action performed while working on the sub-goal.
+     */
+    appendSubGoalLog(log: string): void;
+    /**
+     * Convert sub-goals to text representation.
+     * Includes actions performed (logs) for the current sub-goal.
+     */
+    subGoalsToText(): string;
+    /**
+     * Append a log entry to the historical logs list.
+     * Used in non-deepThink mode to track executed steps across planning rounds.
+     */
+    appendHistoricalLog(log: string): void;
+    /**
+     * Convert historical logs to text representation.
+     * Provides context about previously executed steps to the model.
+     */
+    historicalLogsToText(): string;
+    /**
+     * Append a memory to the memories list
+     */
+    appendMemory(memory: string): void;
+    /**
+     * Get all memories
+     */
+    getMemories(): string[];
+    /**
+     * Convert memories to text representation
+     */
+    memoriesToText(): string;
+    /**
+     * Clear all memories
+     */
+    clearMemories(): void;
+    /**
+     * Compress the conversation history if it exceeds the threshold.
+     * Removes the oldest messages and replaces them with a single placeholder message.
+     * @param threshold - The number of messages that triggers compression.
+     * @param keepCount - The number of recent messages to keep after compression.
+     * @returns true if compression was performed, false otherwise.
+     */
+    compressHistory(threshold: number, keepCount: number): boolean;
+}

package/dist/types/ai-model/index.d.ts ADDED Viewed

@@ -0,0 +1,14 @@
+export { AIResponseParseError, callAIWithStringResponse, callAIWithObjectResponse, callAI, } from './service-caller/index';
+export { systemPromptToLocateElement } from './prompt/llm-locator';
+export { generatePlaywrightTest, generatePlaywrightTestStream, } from './prompt/playwright-generator';
+export { generateYamlTest, generateYamlTestStream, } from './prompt/yaml-generator';
+export type { ChatCompletionMessageParam } from 'openai/resources/index';
+export { AiLocateElement, AiExtractElementInfo, AiLocateSection, AiJudgeOrderSensitive, } from './inspect';
+export { plan } from './llm-planning';
+export { autoGLMPlanning } from './auto-glm/planning';
+export { adaptBboxToRect } from '../common';
+export { uiTarsPlanning } from './ui-tars-planning';
+export { ConversationHistory, type ConversationHistoryOptions, } from './conversation-history';
+export type { SubGoal, SubGoalStatus } from '../types';
+export type { AIArgs } from '../common';
+export { getMidsceneLocationSchema, PointSchema, SizeSchema, RectSchema, TMultimodalPromptSchema, TUserPromptSchema, type TMultimodalPrompt, type TUserPrompt, findAllMidsceneLocatorField, dumpActionParam, parseActionParam, } from '../common';

package/dist/types/ai-model/inspect.d.ts ADDED Viewed

@@ -0,0 +1,67 @@
+import type { AIDataExtractionResponse, AIUsageInfo, Rect, ServiceExtractOption, UIContext } from '../types';
+import type { IModelConfig } from '@midscene/shared/env';
+import type { LocateResultElement } from '@midscene/shared/types';
+import type { ChatCompletionSystemMessageParam, ChatCompletionUserMessageParam } from 'openai/resources/index';
+import type { TMultimodalPrompt, TUserPrompt } from '../common';
+import { callAIWithObjectResponse } from './service-caller/index';
+export type AIArgs = [
+    ChatCompletionSystemMessageParam,
+    ...ChatCompletionUserMessageParam[]
+];
+export declare function buildSearchAreaConfig(options: {
+    context: UIContext;
+    baseRect: Rect;
+    modelFamily: IModelConfig['modelFamily'];
+}): Promise<{
+    rect: Rect;
+    imageBase64: string;
+    scale: number;
+}>;
+export declare function AiLocateElement(options: {
+    context: UIContext;
+    targetElementDescription: TUserPrompt;
+    searchConfig?: Awaited<ReturnType<typeof AiLocateSection>>;
+    modelConfig: IModelConfig;
+    abortSignal?: AbortSignal;
+}): Promise<{
+    parseResult: {
+        elements: LocateResultElement[];
+        errors?: string[];
+    };
+    rect?: Rect;
+    rawResponse: string;
+    usage?: AIUsageInfo;
+    reasoning_content?: string;
+}>;
+export declare function AiLocateSection(options: {
+    context: UIContext;
+    sectionDescription: TUserPrompt;
+    modelConfig: IModelConfig;
+    abortSignal?: AbortSignal;
+}): Promise<{
+    rect?: Rect;
+    imageBase64?: string;
+    scale?: number;
+    error?: string;
+    rawResponse: string;
+    usage?: AIUsageInfo;
+}>;
+export declare function AiExtractElementInfo<T>(options: {
+    dataQuery: string | Record<string, string>;
+    multimodalPrompt?: TMultimodalPrompt;
+    context: UIContext;
+    pageDescription?: string;
+    extractOption?: ServiceExtractOption;
+    modelConfig: IModelConfig;
+}): Promise<{
+    parseResult: AIDataExtractionResponse<T>;
+    rawResponse: string;
+    usage: AIUsageInfo | undefined;
+    reasoning_content: string | undefined;
+}>;
+export declare function AiJudgeOrderSensitive(description: string, callAIFn: typeof callAIWithObjectResponse<{
+    isOrderSensitive: boolean;
+}>, modelConfig: IModelConfig): Promise<{
+    isOrderSensitive: boolean;
+    usage?: AIUsageInfo;
+}>;

package/dist/types/ai-model/llm-planning.d.ts ADDED Viewed

@@ -0,0 +1,19 @@
+import type { DeepThinkOption, DeviceAction, InterfaceType, PlanningAIResponse, RawResponsePlanningAIResponse, UIContext } from '../types';
+import type { IModelConfig, TModelFamily } from '@midscene/shared/env';
+import type { ConversationHistory } from './conversation-history';
+/**
+ * Parse XML response from LLM and convert to RawResponsePlanningAIResponse
+ */
+export declare function parseXMLPlanningResponse(xmlString: string, modelFamily: TModelFamily | undefined): RawResponsePlanningAIResponse;
+export declare function plan(userInstruction: string, opts: {
+    context: UIContext;
+    interfaceType: InterfaceType;
+    actionSpace: DeviceAction<any>[];
+    actionContext?: string;
+    modelConfig: IModelConfig;
+    conversationHistory: ConversationHistory;
+    includeBbox: boolean;
+    imagesIncludeCount?: number;
+    deepThink?: DeepThinkOption;
+    abortSignal?: AbortSignal;
+}): Promise<PlanningAIResponse>;

package/dist/types/ai-model/prompt/common.d.ts ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ import type { TModelFamily } from '@midscene/shared/env';
2	+ export declare function bboxDescription(modelFamily: TModelFamily \| undefined): "box_2d bounding box for the target element, should be [ymin, xmin, ymax, xmax] normalized to 0-1000." \| "2d bounding box as [xmin, ymin, xmax, ymax]";

package/dist/types/ai-model/prompt/describe.d.ts ADDED Viewed

	@@ -0,0 +1 @@
1	+ export declare const elementDescriberInstruction: () => string;

package/dist/types/ai-model/prompt/extraction.d.ts ADDED Viewed

@@ -0,0 +1,7 @@
+import type { AIDataExtractionResponse } from '../../types';
+/**
+ * Parse XML response from LLM and convert to AIDataExtractionResponse
+ */
+export declare function parseXMLExtractionResponse<T>(xmlString: string): AIDataExtractionResponse<T>;
+export declare function systemPromptToExtract(): string;
+export declare const extractDataQueryPrompt: (pageDescription: string, dataQuery: string | Record<string, string>) => string;

package/dist/types/ai-model/prompt/llm-locator.d.ts ADDED Viewed

@@ -0,0 +1,3 @@
+import type { TModelFamily } from '@midscene/shared/env';
+export declare function systemPromptToLocateElement(modelFamily: TModelFamily | undefined): string;
+export declare const findElementPrompt: (targetElementDescription: string) => string;

package/dist/types/ai-model/prompt/llm-planning.d.ts ADDED Viewed

@@ -0,0 +1,10 @@
+import type { DeviceAction } from '../../types';
+import type { TModelFamily } from '@midscene/shared/env';
+export declare const descriptionForAction: (action: DeviceAction<any>, locatorSchemaTypeDescription: string, includeBbox?: boolean) => string;
+export declare function systemPromptToTaskPlanning({ actionSpace, modelFamily, includeBbox, includeThought, includeSubGoals, }: {
+    actionSpace: DeviceAction<any>[];
+    modelFamily: TModelFamily | undefined;
+    includeBbox: boolean;
+    includeThought?: boolean;
+    includeSubGoals?: boolean;
+}): Promise<string>;

package/dist/types/ai-model/prompt/llm-section-locator.d.ts ADDED Viewed

@@ -0,0 +1,3 @@
+import type { TModelFamily } from '@midscene/shared/env';
+export declare function systemPromptToLocateSection(modelFamily: TModelFamily | undefined): string;
+export declare const sectionLocatorInstruction: (sectionDescription: string) => string;

package/dist/types/ai-model/prompt/order-sensitive-judge.d.ts ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ export declare function systemPromptToJudgeOrderSensitive(): string;
2	+ export declare const orderSensitiveJudgePrompt: (description: string) => string;

package/dist/types/ai-model/prompt/playwright-generator.d.ts ADDED Viewed

@@ -0,0 +1,26 @@
+import type { StreamingAIResponse, StreamingCodeGenerationOptions } from '../../types';
+import type { IModelConfig } from '@midscene/shared/env';
+import { type ChromeRecordedEvent, type EventCounts, type EventSummary, type InputDescription, type ProcessedEvent, createEventCounts, createMessageContent, extractInputDescriptions, filterEventsByType, getScreenshotsForLLM, prepareEventSummary, processEventsForLLM, validateEvents } from './yaml-generator';
+export interface PlaywrightGenerationOptions {
+    testName?: string;
+    includeScreenshots?: boolean;
+    includeTimestamps?: boolean;
+    maxScreenshots?: number;
+    description?: string;
+    viewportSize?: {
+        width: number;
+        height: number;
+    };
+    waitForNetworkIdle?: boolean;
+    waitForNetworkIdleTimeout?: number;
+}
+export type { ChromeRecordedEvent, EventCounts, InputDescription, ProcessedEvent, EventSummary, };
+export { getScreenshotsForLLM, filterEventsByType, createEventCounts, extractInputDescriptions, processEventsForLLM, prepareEventSummary, createMessageContent, validateEvents, };
+/**
+ * Generates Playwright test code from recorded events
+ */
+export declare const generatePlaywrightTest: (events: ChromeRecordedEvent[], options: PlaywrightGenerationOptions, modelConfig: IModelConfig) => Promise<string>;
+/**
+ * Generates Playwright test code from recorded events with streaming support
+ */
+export declare const generatePlaywrightTestStream: (events: ChromeRecordedEvent[], options: PlaywrightGenerationOptions & StreamingCodeGenerationOptions, modelConfig: IModelConfig) => Promise<StreamingAIResponse>;

package/dist/types/ai-model/prompt/ui-tars-planning.d.ts ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ export declare function getUiTarsPlanningPrompt(): string;
2	+ export declare const getSummary: (prediction: string) => string;

package/dist/types/ai-model/prompt/util.d.ts ADDED Viewed

@@ -0,0 +1,33 @@
+import type { SubGoal } from '../../types';
+/**
+ * Extract content from an XML tag in a string, searching from the end.
+ * This approach handles cases where models prepend thinking content (like <think>...</think>)
+ * before the actual response tags, or when there are incomplete/nested tags.
+ *
+ * Strategy: Find the LAST closing tag, then search backwards for the nearest opening tag.
+ * This ensures we get the last complete tag pair, even if there are incomplete tags before it.
+ *
+ * @param xmlString - The XML string to parse
+ * @param tagName - The name of the tag to extract (case-insensitive)
+ * @returns The trimmed content of the tag, or undefined if not found
+ */
+export declare function extractXMLTag(xmlString: string, tagName: string): string | undefined;
+/**
+ * Parse sub-goals from XML content
+ * Handles both formats:
+ * - <sub-goal index="1" status="pending">description</sub-goal>
+ * - <sub-goal index="1" status="finished" />
+ */
+export declare function parseSubGoalsFromXML(xmlContent: string): SubGoal[];
+/**
+ * Extract indexes of sub-goals marked as finished from <mark-sub-goal-done> content
+ */
+export declare function parseMarkFinishedIndexes(xmlContent: string): number[];
+export declare const distanceThreshold = 16;
+export declare function distance(point1: {
+    x: number;
+    y: number;
+}, point2: {
+    x: number;
+    y: number;
+}): number;

package/dist/types/ai-model/prompt/yaml-generator.d.ts ADDED Viewed

@@ -0,0 +1,100 @@
+import type { StreamingAIResponse, StreamingCodeGenerationOptions } from '../../types';
+import type { IModelConfig } from '@midscene/shared/env';
+export interface EventCounts {
+    navigation: number;
+    click: number;
+    input: number;
+    scroll: number;
+    total: number;
+}
+export interface InputDescription {
+    description: string;
+    value: string;
+}
+export interface ProcessedEvent {
+    type: string;
+    timestamp: number;
+    url?: string;
+    title?: string;
+    elementDescription?: string;
+    value?: string;
+    pageInfo?: any;
+    elementRect?: any;
+}
+export interface EventSummary {
+    testName: string;
+    startUrl: string;
+    eventCounts: EventCounts;
+    urls: string[];
+    clickDescriptions: string[];
+    inputDescriptions: InputDescription[];
+    events: ProcessedEvent[];
+}
+export interface ChromeRecordedEvent {
+    type: string;
+    timestamp: number;
+    url?: string;
+    title?: string;
+    elementDescription?: string;
+    value?: string;
+    pageInfo?: any;
+    elementRect?: any;
+    screenshotBefore?: string;
+    screenshotAfter?: string;
+    screenshotWithBox?: string;
+}
+export interface YamlGenerationOptions {
+    testName?: string;
+    includeTimestamps?: boolean;
+    maxScreenshots?: number;
+    description?: string;
+}
+export interface FilteredEvents {
+    navigationEvents: ChromeRecordedEvent[];
+    clickEvents: ChromeRecordedEvent[];
+    inputEvents: ChromeRecordedEvent[];
+    scrollEvents: ChromeRecordedEvent[];
+}
+/**
+ * Get screenshots from events for LLM context
+ */
+export declare const getScreenshotsForLLM: (events: ChromeRecordedEvent[], maxScreenshots?: number) => string[];
+/**
+ * Filter events by type for easier processing
+ */
+export declare const filterEventsByType: (events: ChromeRecordedEvent[]) => FilteredEvents;
+/**
+ * Create event counts summary
+ */
+export declare const createEventCounts: (filteredEvents: FilteredEvents, totalEvents: number) => EventCounts;
+/**
+ * Extract input descriptions from input events
+ */
+export declare const extractInputDescriptions: (inputEvents: ChromeRecordedEvent[]) => InputDescription[];
+/**
+ * Process events for LLM consumption
+ */
+export declare const processEventsForLLM: (events: ChromeRecordedEvent[]) => ProcessedEvent[];
+/**
+ * Prepare comprehensive event summary for LLM
+ */
+export declare const prepareEventSummary: (events: ChromeRecordedEvent[], options?: {
+    testName?: string;
+    maxScreenshots?: number;
+}) => EventSummary;
+/**
+ * Create message content for LLM with optional screenshots
+ */
+export declare const createMessageContent: (promptText: string, screenshots?: string[], includeScreenshots?: boolean) => any[];
+/**
+ * Validate events before processing
+ */
+export declare const validateEvents: (events: ChromeRecordedEvent[]) => void;
+/**
+ * Generates YAML test configuration from recorded events using AI
+ */
+export declare const generateYamlTest: (events: ChromeRecordedEvent[], options: YamlGenerationOptions, modelConfig: IModelConfig) => Promise<string>;
+/**
+ * Generates YAML test configuration from recorded events using AI with streaming support
+ */
+export declare const generateYamlTestStream: (events: ChromeRecordedEvent[], options: YamlGenerationOptions & StreamingCodeGenerationOptions, modelConfig: IModelConfig) => Promise<StreamingAIResponse>;

package/dist/types/ai-model/service-caller/codex-app-server.d.ts ADDED Viewed

@@ -0,0 +1,42 @@
+import type { AIUsageInfo, DeepThinkOption, StreamingCallback } from '../../types';
+import type { IModelConfig } from '@midscene/shared/env';
+import type { ChatCompletionMessageParam } from 'openai/resources/index';
+type CodexReasoningEffort = 'low' | 'medium' | 'high' | 'xhigh';
+type CodexTextInput = {
+    type: 'text';
+    text: string;
+    text_elements: any[];
+};
+type CodexImageInput = {
+    type: 'image';
+    url: string;
+};
+type CodexLocalImageInput = {
+    type: 'localImage';
+    path: string;
+};
+type CodexTurnInput = CodexTextInput | CodexImageInput | CodexLocalImageInput;
+type CodexTurnResult = {
+    content: string;
+    reasoning_content?: string;
+    usage?: AIUsageInfo;
+    isStreamed: boolean;
+};
+export declare const isCodexAppServerProvider: (baseURL?: string) => boolean;
+export declare const normalizeCodexLocalImagePath: (imageUrl: string, platform?: NodeJS.Platform) => string;
+export declare const resolveCodexReasoningEffort: ({ deepThink, modelConfig, }: {
+    deepThink?: DeepThinkOption;
+    modelConfig: IModelConfig;
+}) => CodexReasoningEffort | undefined;
+export declare const buildCodexTurnPayloadFromMessages: (messages: ChatCompletionMessageParam[]) => {
+    developerInstructions?: string;
+    input: CodexTurnInput[];
+};
+export declare function callAIWithCodexAppServer(messages: ChatCompletionMessageParam[], modelConfig: IModelConfig, options?: {
+    stream?: boolean;
+    onChunk?: StreamingCallback;
+    deepThink?: DeepThinkOption;
+    abortSignal?: AbortSignal;
+}): Promise<CodexTurnResult>;
+export declare function __shutdownCodexAppServerForTests(): Promise<void>;
+export {};

package/dist/types/ai-model/service-caller/image-detail.d.ts ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ import type { IModelConfig } from '@midscene/shared/env';
2	+ export declare function shouldForceOriginalImageDetail(modelConfig: Pick<IModelConfig, 'intent' \| 'modelFamily'>): boolean;