@midscene/core 1.2.2-beta-20260115034338.0 → 1.2.2-beta-20260115090041.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/es/agent/agent.mjs +5 -2
- package/dist/es/agent/agent.mjs.map +1 -1
- package/dist/es/agent/tasks.mjs +4 -2
- package/dist/es/agent/tasks.mjs.map +1 -1
- package/dist/es/agent/utils.mjs +1 -1
- package/dist/es/ai-model/auto-glm/actions.mjs +227 -0
- package/dist/es/ai-model/auto-glm/actions.mjs.map +1 -0
- package/dist/es/ai-model/auto-glm/index.mjs +6 -0
- package/dist/es/ai-model/auto-glm/parser.mjs +239 -0
- package/dist/es/ai-model/auto-glm/parser.mjs.map +1 -0
- package/dist/es/ai-model/auto-glm/planning.mjs +63 -0
- package/dist/es/ai-model/auto-glm/planning.mjs.map +1 -0
- package/dist/es/ai-model/auto-glm/prompt.mjs +222 -0
- package/dist/es/ai-model/auto-glm/prompt.mjs.map +1 -0
- package/dist/es/ai-model/auto-glm/util.mjs +22 -0
- package/dist/es/ai-model/auto-glm/util.mjs.map +1 -0
- package/dist/es/ai-model/index.mjs +2 -1
- package/dist/es/ai-model/inspect.mjs +68 -3
- package/dist/es/ai-model/inspect.mjs.map +1 -1
- package/dist/es/ai-model/latest-locate-recorder.mjs +29 -0
- package/dist/es/ai-model/latest-locate-recorder.mjs.map +1 -0
- package/dist/es/ai-model/service-caller/index.mjs +5 -0
- package/dist/es/ai-model/service-caller/index.mjs.map +1 -1
- package/dist/es/ai-model/ui-tars-planning.mjs +41 -29
- package/dist/es/ai-model/ui-tars-planning.mjs.map +1 -1
- package/dist/es/service/index.mjs +5 -0
- package/dist/es/service/index.mjs.map +1 -1
- package/dist/es/utils.mjs +2 -2
- package/dist/lib/agent/agent.js +5 -2
- package/dist/lib/agent/agent.js.map +1 -1
- package/dist/lib/agent/tasks.js +3 -1
- package/dist/lib/agent/tasks.js.map +1 -1
- package/dist/lib/agent/utils.js +1 -1
- package/dist/lib/ai-model/auto-glm/actions.js +261 -0
- package/dist/lib/ai-model/auto-glm/actions.js.map +1 -0
- package/dist/lib/ai-model/auto-glm/index.js +66 -0
- package/dist/lib/ai-model/auto-glm/index.js.map +1 -0
- package/dist/lib/ai-model/auto-glm/parser.js +282 -0
- package/dist/lib/ai-model/auto-glm/parser.js.map +1 -0
- package/dist/lib/ai-model/auto-glm/planning.js +97 -0
- package/dist/lib/ai-model/auto-glm/planning.js.map +1 -0
- package/dist/lib/ai-model/auto-glm/prompt.js +259 -0
- package/dist/lib/ai-model/auto-glm/prompt.js.map +1 -0
- package/dist/lib/ai-model/auto-glm/util.js +62 -0
- package/dist/lib/ai-model/auto-glm/util.js.map +1 -0
- package/dist/lib/ai-model/index.js +15 -11
- package/dist/lib/ai-model/inspect.js +67 -2
- package/dist/lib/ai-model/inspect.js.map +1 -1
- package/dist/lib/ai-model/latest-locate-recorder.js +63 -0
- package/dist/lib/ai-model/latest-locate-recorder.js.map +1 -0
- package/dist/lib/ai-model/service-caller/index.js +5 -0
- package/dist/lib/ai-model/service-caller/index.js.map +1 -1
- package/dist/lib/ai-model/ui-tars-planning.js +41 -29
- package/dist/lib/ai-model/ui-tars-planning.js.map +1 -1
- package/dist/lib/service/index.js +5 -0
- package/dist/lib/service/index.js.map +1 -1
- package/dist/lib/utils.js +2 -2
- package/dist/types/ai-model/auto-glm/actions.d.ts +77 -0
- package/dist/types/ai-model/auto-glm/index.d.ts +6 -0
- package/dist/types/ai-model/auto-glm/parser.d.ts +18 -0
- package/dist/types/ai-model/auto-glm/planning.d.ts +9 -0
- package/dist/types/ai-model/auto-glm/prompt.d.ts +27 -0
- package/dist/types/ai-model/auto-glm/util.d.ts +16 -0
- package/dist/types/ai-model/index.d.ts +1 -0
- package/dist/types/ai-model/latest-locate-recorder.d.ts +14 -0
- package/package.json +2 -2
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
import type { PlanningAction } from '../../types';
|
|
2
|
+
export interface BaseAction {
|
|
3
|
+
_metadata: string;
|
|
4
|
+
think?: string;
|
|
5
|
+
}
|
|
6
|
+
export interface TapAction extends BaseAction {
|
|
7
|
+
_metadata: 'do';
|
|
8
|
+
action: 'Tap';
|
|
9
|
+
element: [number, number];
|
|
10
|
+
}
|
|
11
|
+
export interface DoubleTapAction extends BaseAction {
|
|
12
|
+
_metadata: 'do';
|
|
13
|
+
action: 'Double Tap';
|
|
14
|
+
element: [number, number];
|
|
15
|
+
}
|
|
16
|
+
export interface TypeAction extends BaseAction {
|
|
17
|
+
_metadata: 'do';
|
|
18
|
+
action: 'Type';
|
|
19
|
+
text: string;
|
|
20
|
+
}
|
|
21
|
+
export interface SwipeAction extends BaseAction {
|
|
22
|
+
_metadata: 'do';
|
|
23
|
+
action: 'Swipe';
|
|
24
|
+
start: [number, number];
|
|
25
|
+
end: [number, number];
|
|
26
|
+
}
|
|
27
|
+
export interface LongPressAction extends BaseAction {
|
|
28
|
+
_metadata: 'do';
|
|
29
|
+
action: 'Long Press';
|
|
30
|
+
element: [number, number];
|
|
31
|
+
}
|
|
32
|
+
export interface LaunchAction extends BaseAction {
|
|
33
|
+
_metadata: 'do';
|
|
34
|
+
action: 'Launch';
|
|
35
|
+
app: string;
|
|
36
|
+
}
|
|
37
|
+
export interface BackAction extends BaseAction {
|
|
38
|
+
_metadata: 'do';
|
|
39
|
+
action: 'Back';
|
|
40
|
+
}
|
|
41
|
+
export interface HomeAction extends BaseAction {
|
|
42
|
+
_metadata: 'do';
|
|
43
|
+
action: 'Home';
|
|
44
|
+
}
|
|
45
|
+
export interface WaitAction extends BaseAction {
|
|
46
|
+
_metadata: 'do';
|
|
47
|
+
action: 'Wait';
|
|
48
|
+
durationMs: number;
|
|
49
|
+
}
|
|
50
|
+
export interface InteractAction extends BaseAction {
|
|
51
|
+
_metadata: 'do';
|
|
52
|
+
action: 'Interact';
|
|
53
|
+
}
|
|
54
|
+
export interface CallAPIAction extends BaseAction {
|
|
55
|
+
_metadata: 'do';
|
|
56
|
+
action: 'Call_API';
|
|
57
|
+
instruction: string;
|
|
58
|
+
}
|
|
59
|
+
export interface TakeoverAction extends BaseAction {
|
|
60
|
+
_metadata: 'do';
|
|
61
|
+
action: 'Take_over';
|
|
62
|
+
message: string;
|
|
63
|
+
}
|
|
64
|
+
export interface NoteAction extends BaseAction {
|
|
65
|
+
_metadata: 'do';
|
|
66
|
+
action: 'Note';
|
|
67
|
+
message: string;
|
|
68
|
+
}
|
|
69
|
+
export interface FinishAction extends BaseAction {
|
|
70
|
+
_metadata: 'finish';
|
|
71
|
+
message: string;
|
|
72
|
+
}
|
|
73
|
+
export type ParsedAction = TapAction | DoubleTapAction | TypeAction | SwipeAction | LongPressAction | LaunchAction | BackAction | HomeAction | WaitAction | InteractAction | CallAPIAction | TakeoverAction | NoteAction | FinishAction;
|
|
74
|
+
export declare function transformAutoGLMAction(action: ParsedAction, size: {
|
|
75
|
+
width: number;
|
|
76
|
+
height: number;
|
|
77
|
+
}): PlanningAction[];
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
export { getAutoGLMLocatePrompt, getAutoGLMPlanPrompt } from './prompt';
|
|
2
|
+
export { parseAutoGLMLocateResponse, parseAutoGLMResponse, parseAction, } from './parser';
|
|
3
|
+
export { autoGLMPlanning } from './planning';
|
|
4
|
+
export { transformAutoGLMAction } from './actions';
|
|
5
|
+
export { autoGLMCoordinateToBbox, isAutoGLM } from './util';
|
|
6
|
+
export type { ParsedAction } from './actions';
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import type { ParsedAction } from './actions';
|
|
2
|
+
export declare const extractValueAfter: (src: string, key: string) => string;
|
|
3
|
+
export declare function parseAction(response: {
|
|
4
|
+
think: string;
|
|
5
|
+
content: string;
|
|
6
|
+
}): ParsedAction;
|
|
7
|
+
export declare function parseAutoGLMResponse(content: string): {
|
|
8
|
+
think: string;
|
|
9
|
+
content: string;
|
|
10
|
+
};
|
|
11
|
+
export declare function parseAutoGLMLocateResponse(rawResponse: string): {
|
|
12
|
+
think: string;
|
|
13
|
+
coordinates: {
|
|
14
|
+
x: number;
|
|
15
|
+
y: number;
|
|
16
|
+
} | null;
|
|
17
|
+
error?: string;
|
|
18
|
+
};
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import type { PlanningAIResponse, UIContext } from '../../types';
|
|
2
|
+
import type { IModelConfig } from '@midscene/shared/env';
|
|
3
|
+
import type { ConversationHistory } from '../conversation-history';
|
|
4
|
+
export declare function autoGLMPlanning(userInstruction: string, options: {
|
|
5
|
+
conversationHistory: ConversationHistory;
|
|
6
|
+
context: UIContext;
|
|
7
|
+
modelConfig: IModelConfig;
|
|
8
|
+
actionContext?: string;
|
|
9
|
+
}): Promise<PlanningAIResponse>;
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Auto-GLM Prompt Templates
|
|
3
|
+
*
|
|
4
|
+
* Portions of this file are derived from Open-AutoGLM
|
|
5
|
+
* Copyright (c) 2024 zai-org
|
|
6
|
+
* Licensed under the Apache License, Version 2.0
|
|
7
|
+
*
|
|
8
|
+
* Source: https://github.com/zai-org/Open-AutoGLM
|
|
9
|
+
*
|
|
10
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
11
|
+
* you may not use this file except in compliance with the License.
|
|
12
|
+
* You may obtain a copy of the License at
|
|
13
|
+
*
|
|
14
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
15
|
+
*
|
|
16
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
17
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
18
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
19
|
+
* See the License for the specific language governing permissions and
|
|
20
|
+
* limitations under the License.
|
|
21
|
+
*
|
|
22
|
+
* Modifications:
|
|
23
|
+
* - Adapted prompts for Midscene.js integration
|
|
24
|
+
*/
|
|
25
|
+
import type { TVlModeTypes } from '@midscene/shared/env';
|
|
26
|
+
export declare const getAutoGLMPlanPrompt: (vlMode: TVlModeTypes | undefined) => string;
|
|
27
|
+
export declare const getAutoGLMLocatePrompt: (vlMode: TVlModeTypes | undefined) => string;
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import type { TVlModeTypes } from '@midscene/shared/env';
|
|
2
|
+
/**
|
|
3
|
+
* Auto-GLM coordinate system range: [0, AUTO_GLM_COORDINATE_MAX]
|
|
4
|
+
*/
|
|
5
|
+
export declare const AUTO_GLM_COORDINATE_MAX = 1000;
|
|
6
|
+
/**
|
|
7
|
+
* Check if the vlMode is auto-glm or auto-glm-multilingual
|
|
8
|
+
* @param vlMode The VL mode to check
|
|
9
|
+
* @returns true if vlMode is auto-glm or auto-glm-multilingual
|
|
10
|
+
*/
|
|
11
|
+
export declare function isAutoGLM(vlMode: TVlModeTypes | undefined): boolean;
|
|
12
|
+
/**
|
|
13
|
+
* Convert auto-glm coordinate [0,999] to bbox
|
|
14
|
+
* Auto-glm uses [0,999] coordinate system, maps to image size, and creates a 10x10 bbox around the point
|
|
15
|
+
*/
|
|
16
|
+
export declare function autoGLMCoordinateToBbox(x: number, y: number, width: number, height: number): [number, number, number, number];
|
|
@@ -6,6 +6,7 @@ export { generateYamlTest, generateYamlTestStream, } from './prompt/yaml-generat
|
|
|
6
6
|
export type { ChatCompletionMessageParam } from 'openai/resources/index';
|
|
7
7
|
export { AiLocateElement, AiExtractElementInfo, AiLocateSection, AiJudgeOrderSensitive, } from './inspect';
|
|
8
8
|
export { plan } from './llm-planning';
|
|
9
|
+
export { autoGLMPlanning } from './auto-glm/planning';
|
|
9
10
|
export { adaptBboxToRect } from '../common';
|
|
10
11
|
export { uiTarsPlanning, resizeImageForUiTars } from './ui-tars-planning';
|
|
11
12
|
export { ConversationHistory, type ConversationHistoryOptions, } from './conversation-history';
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
interface ILocate {
|
|
2
|
+
prompt: string;
|
|
3
|
+
bbox: [number, number, number, number];
|
|
4
|
+
}
|
|
5
|
+
export declare class LatestLocateRecorder {
|
|
6
|
+
latestLocate: ILocate | undefined;
|
|
7
|
+
source: string;
|
|
8
|
+
recordLocate(locate: ILocate, source: string): void;
|
|
9
|
+
getLatestLocate(): {
|
|
10
|
+
locate: ILocate | undefined;
|
|
11
|
+
source: string;
|
|
12
|
+
};
|
|
13
|
+
}
|
|
14
|
+
export {};
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@midscene/core",
|
|
3
3
|
"description": "Automate browser actions, extract data, and perform assertions using AI. It offers JavaScript SDK, Chrome extension, and support for scripting in YAML. See https://midscenejs.com/ for details.",
|
|
4
|
-
"version": "1.2.2-beta-
|
|
4
|
+
"version": "1.2.2-beta-20260115090041.0",
|
|
5
5
|
"repository": "https://github.com/web-infra-dev/midscene",
|
|
6
6
|
"homepage": "https://midscenejs.com/",
|
|
7
7
|
"main": "./dist/lib/index.js",
|
|
@@ -89,7 +89,7 @@
|
|
|
89
89
|
"semver": "7.5.2",
|
|
90
90
|
"undici": "^6.0.0",
|
|
91
91
|
"zod": "3.24.3",
|
|
92
|
-
"@midscene/shared": "1.2.2-beta-
|
|
92
|
+
"@midscene/shared": "1.2.2-beta-20260115090041.0"
|
|
93
93
|
},
|
|
94
94
|
"devDependencies": {
|
|
95
95
|
"@rslib/core": "^0.18.3",
|