npm - @midscene/core - Versions diffs - 1.2.2-beta-20260116064919.0 → 1.2.2-beta-20260116071350.0 - Mend

@midscene/core 1.2.2-beta-20260116064919.0 → 1.2.2-beta-20260116071350.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (66) hide show

package/dist/es/agent/agent.mjs +5 -2
package/dist/es/agent/agent.mjs.map +1 -1
package/dist/es/agent/tasks.mjs +4 -2
package/dist/es/agent/tasks.mjs.map +1 -1
package/dist/es/agent/utils.mjs +1 -1
package/dist/es/ai-model/auto-glm/actions.mjs +224 -0
package/dist/es/ai-model/auto-glm/actions.mjs.map +1 -0
package/dist/es/ai-model/auto-glm/index.mjs +6 -0
package/dist/es/ai-model/auto-glm/parser.mjs +239 -0
package/dist/es/ai-model/auto-glm/parser.mjs.map +1 -0
package/dist/es/ai-model/auto-glm/planning.mjs +63 -0
package/dist/es/ai-model/auto-glm/planning.mjs.map +1 -0
package/dist/es/ai-model/auto-glm/prompt.mjs +222 -0
package/dist/es/ai-model/auto-glm/prompt.mjs.map +1 -0
package/dist/es/ai-model/auto-glm/util.mjs +6 -0
package/dist/es/ai-model/auto-glm/util.mjs.map +1 -0
package/dist/es/ai-model/index.mjs +2 -1
package/dist/es/ai-model/inspect.mjs +68 -3
package/dist/es/ai-model/inspect.mjs.map +1 -1
package/dist/es/ai-model/service-caller/index.mjs +5 -0
package/dist/es/ai-model/service-caller/index.mjs.map +1 -1
package/dist/es/ai-model/ui-tars-planning.mjs +24 -21
package/dist/es/ai-model/ui-tars-planning.mjs.map +1 -1
package/dist/es/common.mjs +15 -2
package/dist/es/common.mjs.map +1 -1
package/dist/es/service/index.mjs +5 -0
package/dist/es/service/index.mjs.map +1 -1
package/dist/es/utils.mjs +2 -2
package/dist/lib/agent/agent.js +5 -2
package/dist/lib/agent/agent.js.map +1 -1
package/dist/lib/agent/tasks.js +3 -1
package/dist/lib/agent/tasks.js.map +1 -1
package/dist/lib/agent/utils.js +1 -1
package/dist/lib/ai-model/auto-glm/actions.js +258 -0
package/dist/lib/ai-model/auto-glm/actions.js.map +1 -0
package/dist/lib/ai-model/auto-glm/index.js +63 -0
package/dist/lib/ai-model/auto-glm/index.js.map +1 -0
package/dist/lib/ai-model/auto-glm/parser.js +282 -0
package/dist/lib/ai-model/auto-glm/parser.js.map +1 -0
package/dist/lib/ai-model/auto-glm/planning.js +97 -0
package/dist/lib/ai-model/auto-glm/planning.js.map +1 -0
package/dist/lib/ai-model/auto-glm/prompt.js +259 -0
package/dist/lib/ai-model/auto-glm/prompt.js.map +1 -0
package/dist/lib/ai-model/auto-glm/util.js +40 -0
package/dist/lib/ai-model/auto-glm/util.js.map +1 -0
package/dist/lib/ai-model/index.js +15 -11
package/dist/lib/ai-model/inspect.js +67 -2
package/dist/lib/ai-model/inspect.js.map +1 -1
package/dist/lib/ai-model/service-caller/index.js +5 -0
package/dist/lib/ai-model/service-caller/index.js.map +1 -1
package/dist/lib/ai-model/ui-tars-planning.js +24 -21
package/dist/lib/ai-model/ui-tars-planning.js.map +1 -1
package/dist/lib/common.js +18 -2
package/dist/lib/common.js.map +1 -1
package/dist/lib/service/index.js +5 -0
package/dist/lib/service/index.js.map +1 -1
package/dist/lib/utils.js +2 -2
package/dist/types/ai-model/auto-glm/actions.d.ts +77 -0
package/dist/types/ai-model/auto-glm/index.d.ts +6 -0
package/dist/types/ai-model/auto-glm/parser.d.ts +18 -0
package/dist/types/ai-model/auto-glm/planning.d.ts +9 -0
package/dist/types/ai-model/auto-glm/prompt.d.ts +27 -0
package/dist/types/ai-model/auto-glm/util.d.ts +7 -0
package/dist/types/ai-model/index.d.ts +1 -0
package/dist/types/common.d.ts +10 -0
package/package.json +2 -2

package/dist/types/ai-model/auto-glm/actions.d.ts ADDED Viewed

@@ -0,0 +1,77 @@
+import type { PlanningAction } from '../../types';
+export interface BaseAction {
+    _metadata: string;
+    think?: string;
+}
+export interface TapAction extends BaseAction {
+    _metadata: 'do';
+    action: 'Tap';
+    element: [number, number];
+}
+export interface DoubleTapAction extends BaseAction {
+    _metadata: 'do';
+    action: 'Double Tap';
+    element: [number, number];
+}
+export interface TypeAction extends BaseAction {
+    _metadata: 'do';
+    action: 'Type';
+    text: string;
+}
+export interface SwipeAction extends BaseAction {
+    _metadata: 'do';
+    action: 'Swipe';
+    start: [number, number];
+    end: [number, number];
+}
+export interface LongPressAction extends BaseAction {
+    _metadata: 'do';
+    action: 'Long Press';
+    element: [number, number];
+}
+export interface LaunchAction extends BaseAction {
+    _metadata: 'do';
+    action: 'Launch';
+    app: string;
+}
+export interface BackAction extends BaseAction {
+    _metadata: 'do';
+    action: 'Back';
+}
+export interface HomeAction extends BaseAction {
+    _metadata: 'do';
+    action: 'Home';
+}
+export interface WaitAction extends BaseAction {
+    _metadata: 'do';
+    action: 'Wait';
+    durationMs: number;
+}
+export interface InteractAction extends BaseAction {
+    _metadata: 'do';
+    action: 'Interact';
+}
+export interface CallAPIAction extends BaseAction {
+    _metadata: 'do';
+    action: 'Call_API';
+    instruction: string;
+}
+export interface TakeoverAction extends BaseAction {
+    _metadata: 'do';
+    action: 'Take_over';
+    message: string;
+}
+export interface NoteAction extends BaseAction {
+    _metadata: 'do';
+    action: 'Note';
+    message: string;
+}
+export interface FinishAction extends BaseAction {
+    _metadata: 'finish';
+    message: string;
+}
+export type ParsedAction = TapAction | DoubleTapAction | TypeAction | SwipeAction | LongPressAction | LaunchAction | BackAction | HomeAction | WaitAction | InteractAction | CallAPIAction | TakeoverAction | NoteAction | FinishAction;
+export declare function transformAutoGLMAction(action: ParsedAction, size: {
+    width: number;
+    height: number;
+}): PlanningAction[];

package/dist/types/ai-model/auto-glm/index.d.ts ADDED Viewed

@@ -0,0 +1,6 @@
+export { getAutoGLMLocatePrompt, getAutoGLMPlanPrompt } from './prompt';
+export { parseAutoGLMLocateResponse, parseAutoGLMResponse, parseAction, } from './parser';
+export { autoGLMPlanning } from './planning';
+export { transformAutoGLMAction } from './actions';
+export { isAutoGLM } from './util';
+export type { ParsedAction } from './actions';

package/dist/types/ai-model/auto-glm/parser.d.ts ADDED Viewed

@@ -0,0 +1,18 @@
+import type { ParsedAction } from './actions';
+export declare const extractValueAfter: (src: string, key: string) => string;
+export declare function parseAction(response: {
+    think: string;
+    content: string;
+}): ParsedAction;
+export declare function parseAutoGLMResponse(content: string): {
+    think: string;
+    content: string;
+};
+export declare function parseAutoGLMLocateResponse(rawResponse: string): {
+    think: string;
+    coordinates: {
+        x: number;
+        y: number;
+    } | null;
+    error?: string;
+};

package/dist/types/ai-model/auto-glm/planning.d.ts ADDED Viewed

@@ -0,0 +1,9 @@
+import type { PlanningAIResponse, UIContext } from '../../types';
+import type { IModelConfig } from '@midscene/shared/env';
+import type { ConversationHistory } from '../conversation-history';
+export declare function autoGLMPlanning(userInstruction: string, options: {
+    conversationHistory: ConversationHistory;
+    context: UIContext;
+    modelConfig: IModelConfig;
+    actionContext?: string;
+}): Promise<PlanningAIResponse>;

package/dist/types/ai-model/auto-glm/prompt.d.ts ADDED Viewed

@@ -0,0 +1,27 @@
+/**
+ * Auto-GLM Prompt Templates
+ *
+ * Portions of this file are derived from Open-AutoGLM
+ * Copyright (c) 2024 zai-org
+ * Licensed under the Apache License, Version 2.0
+ *
+ * Source: https://github.com/zai-org/Open-AutoGLM
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Modifications:
+ * - Adapted prompts for Midscene.js integration
+ */
+import type { TVlModeTypes } from '@midscene/shared/env';
+export declare const getAutoGLMPlanPrompt: (vlMode: TVlModeTypes | undefined) => string;
+export declare const getAutoGLMLocatePrompt: (vlMode: TVlModeTypes | undefined) => string;

package/dist/types/ai-model/auto-glm/util.d.ts ADDED Viewed

@@ -0,0 +1,7 @@
+import type { TVlModeTypes } from '@midscene/shared/env';
+/**
+ * Check if the vlMode is auto-glm or auto-glm-multilingual
+ * @param vlMode The VL mode to check
+ * @returns true if vlMode is auto-glm or auto-glm-multilingual
+ */
+export declare function isAutoGLM(vlMode: TVlModeTypes | undefined): boolean;

package/dist/types/ai-model/index.d.ts CHANGED Viewed

@@ -6,6 +6,7 @@ export { generateYamlTest, generateYamlTestStream, } from './prompt/yaml-generat
 export type { ChatCompletionMessageParam } from 'openai/resources/index';
 export { AiLocateElement, AiExtractElementInfo, AiLocateSection, AiJudgeOrderSensitive, } from './inspect';
 export { plan } from './llm-planning';
+export { autoGLMPlanning } from './auto-glm/planning';
 export { adaptBboxToRect } from '../common';
 export { uiTarsPlanning, resizeImageForUiTars } from './ui-tars-planning';
 export { ConversationHistory, type ConversationHistoryOptions, } from './conversation-history';

package/dist/types/common.d.ts CHANGED Viewed

@@ -5,6 +5,16 @@ import type { TVlModeTypes } from '@midscene/shared/env';
 import { z } from 'zod';
 export type AIArgs = ChatCompletionMessageParam[];
 type AdaptBboxInput = number[] | string[] | string | (number[] | string[])[];
+/**
+ * Convert a point coordinate [0, 1000] to a small bbox [0, 1000]
+ * Creates a small bbox around the center point in the same coordinate space
+ *
+ * @param x - X coordinate in [0, 1000] range
+ * @param y - Y coordinate in [0, 1000] range
+ * @param bboxSize - Size of the bbox to create (default: 20)
+ * @returns [x1, y1, x2, y2] bbox in [0, 1000] coordinate space
+ */
+export declare function pointToBbox(x: number, y: number, bboxSize?: number): [number, number, number, number];
 export declare function fillBboxParam(locate: PlanningLocateParam, width: number, height: number, rightLimit: number, bottomLimit: number, vlMode: TVlModeTypes | undefined): PlanningLocateParam;
 export declare function adaptQwen2_5Bbox(bbox: number[]): [number, number, number, number];
 export declare function adaptDoubaoBbox(bbox: string[] | number[] | string, width: number, height: number): [number, number, number, number];

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "@midscene/core",
   "description": "Automate browser actions, extract data, and perform assertions using AI. It offers JavaScript SDK, Chrome extension, and support for scripting in YAML. See https://midscenejs.com/ for details.",
-  "version": "1.2.2-beta-20260116064919.0",
+  "version": "1.2.2-beta-20260116071350.0",
   "repository": "https://github.com/web-infra-dev/midscene",
   "homepage": "https://midscenejs.com/",
   "main": "./dist/lib/index.js",
@@ -89,7 +89,7 @@
     "semver": "7.5.2",
     "undici": "^6.0.0",
     "zod": "3.24.3",
-    "@midscene/shared": "1.2.2-beta-20260116064919.0"
+    "@midscene/shared": "1.2.2-beta-20260116071350.0"
   },
   "devDependencies": {
     "@rslib/core": "^0.18.3",