@midscene/core 1.2.2-beta-20260115034338.0 → 1.2.2-beta-20260115090041.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/dist/es/agent/agent.mjs +5 -2
  2. package/dist/es/agent/agent.mjs.map +1 -1
  3. package/dist/es/agent/tasks.mjs +4 -2
  4. package/dist/es/agent/tasks.mjs.map +1 -1
  5. package/dist/es/agent/utils.mjs +1 -1
  6. package/dist/es/ai-model/auto-glm/actions.mjs +227 -0
  7. package/dist/es/ai-model/auto-glm/actions.mjs.map +1 -0
  8. package/dist/es/ai-model/auto-glm/index.mjs +6 -0
  9. package/dist/es/ai-model/auto-glm/parser.mjs +239 -0
  10. package/dist/es/ai-model/auto-glm/parser.mjs.map +1 -0
  11. package/dist/es/ai-model/auto-glm/planning.mjs +63 -0
  12. package/dist/es/ai-model/auto-glm/planning.mjs.map +1 -0
  13. package/dist/es/ai-model/auto-glm/prompt.mjs +222 -0
  14. package/dist/es/ai-model/auto-glm/prompt.mjs.map +1 -0
  15. package/dist/es/ai-model/auto-glm/util.mjs +22 -0
  16. package/dist/es/ai-model/auto-glm/util.mjs.map +1 -0
  17. package/dist/es/ai-model/index.mjs +2 -1
  18. package/dist/es/ai-model/inspect.mjs +68 -3
  19. package/dist/es/ai-model/inspect.mjs.map +1 -1
  20. package/dist/es/ai-model/latest-locate-recorder.mjs +29 -0
  21. package/dist/es/ai-model/latest-locate-recorder.mjs.map +1 -0
  22. package/dist/es/ai-model/service-caller/index.mjs +5 -0
  23. package/dist/es/ai-model/service-caller/index.mjs.map +1 -1
  24. package/dist/es/ai-model/ui-tars-planning.mjs +41 -29
  25. package/dist/es/ai-model/ui-tars-planning.mjs.map +1 -1
  26. package/dist/es/service/index.mjs +5 -0
  27. package/dist/es/service/index.mjs.map +1 -1
  28. package/dist/es/utils.mjs +2 -2
  29. package/dist/lib/agent/agent.js +5 -2
  30. package/dist/lib/agent/agent.js.map +1 -1
  31. package/dist/lib/agent/tasks.js +3 -1
  32. package/dist/lib/agent/tasks.js.map +1 -1
  33. package/dist/lib/agent/utils.js +1 -1
  34. package/dist/lib/ai-model/auto-glm/actions.js +261 -0
  35. package/dist/lib/ai-model/auto-glm/actions.js.map +1 -0
  36. package/dist/lib/ai-model/auto-glm/index.js +66 -0
  37. package/dist/lib/ai-model/auto-glm/index.js.map +1 -0
  38. package/dist/lib/ai-model/auto-glm/parser.js +282 -0
  39. package/dist/lib/ai-model/auto-glm/parser.js.map +1 -0
  40. package/dist/lib/ai-model/auto-glm/planning.js +97 -0
  41. package/dist/lib/ai-model/auto-glm/planning.js.map +1 -0
  42. package/dist/lib/ai-model/auto-glm/prompt.js +259 -0
  43. package/dist/lib/ai-model/auto-glm/prompt.js.map +1 -0
  44. package/dist/lib/ai-model/auto-glm/util.js +62 -0
  45. package/dist/lib/ai-model/auto-glm/util.js.map +1 -0
  46. package/dist/lib/ai-model/index.js +15 -11
  47. package/dist/lib/ai-model/inspect.js +67 -2
  48. package/dist/lib/ai-model/inspect.js.map +1 -1
  49. package/dist/lib/ai-model/latest-locate-recorder.js +63 -0
  50. package/dist/lib/ai-model/latest-locate-recorder.js.map +1 -0
  51. package/dist/lib/ai-model/service-caller/index.js +5 -0
  52. package/dist/lib/ai-model/service-caller/index.js.map +1 -1
  53. package/dist/lib/ai-model/ui-tars-planning.js +41 -29
  54. package/dist/lib/ai-model/ui-tars-planning.js.map +1 -1
  55. package/dist/lib/service/index.js +5 -0
  56. package/dist/lib/service/index.js.map +1 -1
  57. package/dist/lib/utils.js +2 -2
  58. package/dist/types/ai-model/auto-glm/actions.d.ts +77 -0
  59. package/dist/types/ai-model/auto-glm/index.d.ts +6 -0
  60. package/dist/types/ai-model/auto-glm/parser.d.ts +18 -0
  61. package/dist/types/ai-model/auto-glm/planning.d.ts +9 -0
  62. package/dist/types/ai-model/auto-glm/prompt.d.ts +27 -0
  63. package/dist/types/ai-model/auto-glm/util.d.ts +16 -0
  64. package/dist/types/ai-model/index.d.ts +1 -0
  65. package/dist/types/ai-model/latest-locate-recorder.d.ts +14 -0
  66. package/package.json +2 -2
@@ -0,0 +1,77 @@
1
+ import type { PlanningAction } from '../../types';
2
+ export interface BaseAction {
3
+ _metadata: string;
4
+ think?: string;
5
+ }
6
+ export interface TapAction extends BaseAction {
7
+ _metadata: 'do';
8
+ action: 'Tap';
9
+ element: [number, number];
10
+ }
11
+ export interface DoubleTapAction extends BaseAction {
12
+ _metadata: 'do';
13
+ action: 'Double Tap';
14
+ element: [number, number];
15
+ }
16
+ export interface TypeAction extends BaseAction {
17
+ _metadata: 'do';
18
+ action: 'Type';
19
+ text: string;
20
+ }
21
+ export interface SwipeAction extends BaseAction {
22
+ _metadata: 'do';
23
+ action: 'Swipe';
24
+ start: [number, number];
25
+ end: [number, number];
26
+ }
27
+ export interface LongPressAction extends BaseAction {
28
+ _metadata: 'do';
29
+ action: 'Long Press';
30
+ element: [number, number];
31
+ }
32
+ export interface LaunchAction extends BaseAction {
33
+ _metadata: 'do';
34
+ action: 'Launch';
35
+ app: string;
36
+ }
37
+ export interface BackAction extends BaseAction {
38
+ _metadata: 'do';
39
+ action: 'Back';
40
+ }
41
+ export interface HomeAction extends BaseAction {
42
+ _metadata: 'do';
43
+ action: 'Home';
44
+ }
45
+ export interface WaitAction extends BaseAction {
46
+ _metadata: 'do';
47
+ action: 'Wait';
48
+ durationMs: number;
49
+ }
50
+ export interface InteractAction extends BaseAction {
51
+ _metadata: 'do';
52
+ action: 'Interact';
53
+ }
54
+ export interface CallAPIAction extends BaseAction {
55
+ _metadata: 'do';
56
+ action: 'Call_API';
57
+ instruction: string;
58
+ }
59
+ export interface TakeoverAction extends BaseAction {
60
+ _metadata: 'do';
61
+ action: 'Take_over';
62
+ message: string;
63
+ }
64
+ export interface NoteAction extends BaseAction {
65
+ _metadata: 'do';
66
+ action: 'Note';
67
+ message: string;
68
+ }
69
+ export interface FinishAction extends BaseAction {
70
+ _metadata: 'finish';
71
+ message: string;
72
+ }
73
+ export type ParsedAction = TapAction | DoubleTapAction | TypeAction | SwipeAction | LongPressAction | LaunchAction | BackAction | HomeAction | WaitAction | InteractAction | CallAPIAction | TakeoverAction | NoteAction | FinishAction;
74
+ export declare function transformAutoGLMAction(action: ParsedAction, size: {
75
+ width: number;
76
+ height: number;
77
+ }): PlanningAction[];
@@ -0,0 +1,6 @@
1
+ export { getAutoGLMLocatePrompt, getAutoGLMPlanPrompt } from './prompt';
2
+ export { parseAutoGLMLocateResponse, parseAutoGLMResponse, parseAction, } from './parser';
3
+ export { autoGLMPlanning } from './planning';
4
+ export { transformAutoGLMAction } from './actions';
5
+ export { autoGLMCoordinateToBbox, isAutoGLM } from './util';
6
+ export type { ParsedAction } from './actions';
@@ -0,0 +1,18 @@
1
+ import type { ParsedAction } from './actions';
2
+ export declare const extractValueAfter: (src: string, key: string) => string;
3
+ export declare function parseAction(response: {
4
+ think: string;
5
+ content: string;
6
+ }): ParsedAction;
7
+ export declare function parseAutoGLMResponse(content: string): {
8
+ think: string;
9
+ content: string;
10
+ };
11
+ export declare function parseAutoGLMLocateResponse(rawResponse: string): {
12
+ think: string;
13
+ coordinates: {
14
+ x: number;
15
+ y: number;
16
+ } | null;
17
+ error?: string;
18
+ };
@@ -0,0 +1,9 @@
1
+ import type { PlanningAIResponse, UIContext } from '../../types';
2
+ import type { IModelConfig } from '@midscene/shared/env';
3
+ import type { ConversationHistory } from '../conversation-history';
4
+ export declare function autoGLMPlanning(userInstruction: string, options: {
5
+ conversationHistory: ConversationHistory;
6
+ context: UIContext;
7
+ modelConfig: IModelConfig;
8
+ actionContext?: string;
9
+ }): Promise<PlanningAIResponse>;
@@ -0,0 +1,27 @@
1
+ /**
2
+ * Auto-GLM Prompt Templates
3
+ *
4
+ * Portions of this file are derived from Open-AutoGLM
5
+ * Copyright (c) 2024 zai-org
6
+ * Licensed under the Apache License, Version 2.0
7
+ *
8
+ * Source: https://github.com/zai-org/Open-AutoGLM
9
+ *
10
+ * Licensed under the Apache License, Version 2.0 (the "License");
11
+ * you may not use this file except in compliance with the License.
12
+ * You may obtain a copy of the License at
13
+ *
14
+ * http://www.apache.org/licenses/LICENSE-2.0
15
+ *
16
+ * Unless required by applicable law or agreed to in writing, software
17
+ * distributed under the License is distributed on an "AS IS" BASIS,
18
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19
+ * See the License for the specific language governing permissions and
20
+ * limitations under the License.
21
+ *
22
+ * Modifications:
23
+ * - Adapted prompts for Midscene.js integration
24
+ */
25
+ import type { TVlModeTypes } from '@midscene/shared/env';
26
+ export declare const getAutoGLMPlanPrompt: (vlMode: TVlModeTypes | undefined) => string;
27
+ export declare const getAutoGLMLocatePrompt: (vlMode: TVlModeTypes | undefined) => string;
@@ -0,0 +1,16 @@
1
+ import type { TVlModeTypes } from '@midscene/shared/env';
2
+ /**
3
+ * Auto-GLM coordinate system range: [0, AUTO_GLM_COORDINATE_MAX]
4
+ */
5
+ export declare const AUTO_GLM_COORDINATE_MAX = 1000;
6
+ /**
7
+ * Check if the vlMode is auto-glm or auto-glm-multilingual
8
+ * @param vlMode The VL mode to check
9
+ * @returns true if vlMode is auto-glm or auto-glm-multilingual
10
+ */
11
+ export declare function isAutoGLM(vlMode: TVlModeTypes | undefined): boolean;
12
+ /**
13
+ * Convert auto-glm coordinate [0,999] to bbox
14
+ * Auto-glm uses [0,999] coordinate system, maps to image size, and creates a 10x10 bbox around the point
15
+ */
16
+ export declare function autoGLMCoordinateToBbox(x: number, y: number, width: number, height: number): [number, number, number, number];
@@ -6,6 +6,7 @@ export { generateYamlTest, generateYamlTestStream, } from './prompt/yaml-generat
6
6
  export type { ChatCompletionMessageParam } from 'openai/resources/index';
7
7
  export { AiLocateElement, AiExtractElementInfo, AiLocateSection, AiJudgeOrderSensitive, } from './inspect';
8
8
  export { plan } from './llm-planning';
9
+ export { autoGLMPlanning } from './auto-glm/planning';
9
10
  export { adaptBboxToRect } from '../common';
10
11
  export { uiTarsPlanning, resizeImageForUiTars } from './ui-tars-planning';
11
12
  export { ConversationHistory, type ConversationHistoryOptions, } from './conversation-history';
@@ -0,0 +1,14 @@
1
+ interface ILocate {
2
+ prompt: string;
3
+ bbox: [number, number, number, number];
4
+ }
5
+ export declare class LatestLocateRecorder {
6
+ latestLocate: ILocate | undefined;
7
+ source: string;
8
+ recordLocate(locate: ILocate, source: string): void;
9
+ getLatestLocate(): {
10
+ locate: ILocate | undefined;
11
+ source: string;
12
+ };
13
+ }
14
+ export {};
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@midscene/core",
3
3
  "description": "Automate browser actions, extract data, and perform assertions using AI. It offers JavaScript SDK, Chrome extension, and support for scripting in YAML. See https://midscenejs.com/ for details.",
4
- "version": "1.2.2-beta-20260115034338.0",
4
+ "version": "1.2.2-beta-20260115090041.0",
5
5
  "repository": "https://github.com/web-infra-dev/midscene",
6
6
  "homepage": "https://midscenejs.com/",
7
7
  "main": "./dist/lib/index.js",
@@ -89,7 +89,7 @@
89
89
  "semver": "7.5.2",
90
90
  "undici": "^6.0.0",
91
91
  "zod": "3.24.3",
92
- "@midscene/shared": "1.2.2-beta-20260115034338.0"
92
+ "@midscene/shared": "1.2.2-beta-20260115090041.0"
93
93
  },
94
94
  "devDependencies": {
95
95
  "@rslib/core": "^0.18.3",