@rpascene/shared 0.30.11 → 0.30.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -31,6 +31,15 @@ interface IModelConfigKeys {
31
31
  * Extra
32
32
  */
33
33
  vlMode: string;
34
+ domain: string;
35
+ yht_access_token: string;
36
+ model: string;
37
+ baseURL: string;
38
+ chatType: string;
39
+ modelCategory: string;
40
+ stream: string;
41
+ temperature: string;
42
+ top_p: string;
34
43
  }
35
44
  export declare const VQA_MODEL_CONFIG_KEYS: IModelConfigKeys;
36
45
  export declare const GROUNDING_MODEL_CONFIG_KEYS: IModelConfigKeys;
@@ -123,14 +123,15 @@ export declare const STRING_ENV_KEYS: readonly ["OPENAI_MAX_TOKENS", "MIDSCENE_A
123
123
  * Can be access at any time
124
124
  */
125
125
  export declare const GLOBAL_ENV_KEYS: readonly ["MIDSCENE_CACHE", "MIDSCENE_LANGSMITH_DEBUG", "MIDSCENE_FORCE_DEEP_THINK", "MIDSCENE_MCP_USE_PUPPETEER_MODE", "MIDSCENE_MCP_ANDROID_MODE", "MIDSCENE_CACHE_MAX_FILENAME_LENGTH", "MIDSCENE_REPLANNING_CYCLE_LIMIT", "OPENAI_MAX_TOKENS", "MIDSCENE_ADB_PATH", "MIDSCENE_ADB_REMOTE_HOST", "MIDSCENE_ADB_REMOTE_PORT", "MIDSCENE_ANDROID_IME_STRATEGY", "MIDSCENE_IOS_DEVICE_UDID", "MIDSCENE_IOS_SIMULATOR_UDID", "MIDSCENE_REPORT_TAG_NAME", "MIDSCENE_PREFERRED_LANGUAGE", "MATCH_BY_POSITION", "MIDSCENE_MCP_CHROME_PATH", "DOCKER_CONTAINER"];
126
+ export declare const YHT_STRING_ENV_KEYS: readonly ["domain", "yht_access_token", "model", "baseURL"];
127
+ export declare const YHT_NUMBER_ENV_KEYS: readonly ["chatType", "modelCategory", "stream", "temperature", "top_p"];
126
128
  /**
127
129
  * Model related eve keys, used for declare which model to use.
128
130
  * Can be override by both agent.modelConfig and overrideAIConfig
129
131
  * Can only be access after agent.constructor
130
132
  */
131
- export declare const MODEL_ENV_KEYS: readonly ["MIDSCENE_MODEL_NAME", "MIDSCENE_OPENAI_INIT_CONFIG_JSON", "MIDSCENE_OPENAI_API_KEY", "MIDSCENE_OPENAI_BASE_URL", "MIDSCENE_OPENAI_USE_AZURE", "MIDSCENE_OPENAI_SOCKS_PROXY", "MIDSCENE_OPENAI_HTTP_PROXY", "MIDSCENE_USE_AZURE_OPENAI", "MIDSCENE_AZURE_OPENAI_SCOPE", "MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON", "MIDSCENE_USE_ANTHROPIC_SDK", "MIDSCENE_USE_VLM_UI_TARS", "MIDSCENE_USE_QWEN_VL", "MIDSCENE_USE_QWEN3_VL", "MIDSCENE_USE_DOUBAO_VISION", "MIDSCENE_USE_GEMINI", "MIDSCENE_USE_VL_MODEL", "ANTHROPIC_API_KEY", "MIDSCENE_AZURE_OPENAI_ENDPOINT", "MIDSCENE_AZURE_OPENAI_KEY", "MIDSCENE_AZURE_OPENAI_API_VERSION", "MIDSCENE_AZURE_OPENAI_DEPLOYMENT", "MIDSCENE_VL_MODE", "OPENAI_API_KEY", "OPENAI_BASE_URL", "OPENAI_USE_AZURE", "ANTHROPIC_API_KEY", "AZURE_OPENAI_ENDPOINT", "AZURE_OPENAI_KEY", "AZURE_OPENAI_API_VERSION", "AZURE_OPENAI_DEPLOYMENT", "MIDSCENE_VQA_MODEL_NAME", "MIDSCENE_VQA_OPENAI_SOCKS_PROXY", "MIDSCENE_VQA_OPENAI_HTTP_PROXY", "MIDSCENE_VQA_OPENAI_BASE_URL", "MIDSCENE_VQA_OPENAI_API_KEY", "MIDSCENE_VQA_OPENAI_INIT_CONFIG_JSON", "MIDSCENE_VQA_OPENAI_USE_AZURE", "MIDSCENE_VQA_USE_AZURE_OPENAI", "MIDSCENE_VQA_AZURE_OPENAI_SCOPE", "MIDSCENE_VQA_AZURE_OPENAI_KEY", "MIDSCENE_VQA_AZURE_OPENAI_ENDPOINT", "MIDSCENE_VQA_AZURE_OPENAI_API_VERSION", "MIDSCENE_VQA_AZURE_OPENAI_DEPLOYMENT", "MIDSCENE_VQA_AZURE_OPENAI_INIT_CONFIG_JSON", "MIDSCENE_VQA_USE_ANTHROPIC_SDK", "MIDSCENE_VQA_ANTHROPIC_API_KEY", "MIDSCENE_VQA_VL_MODE", "MIDSCENE_PLANNING_MODEL_NAME", "MIDSCENE_PLANNING_OPENAI_SOCKS_PROXY", "MIDSCENE_PLANNING_OPENAI_HTTP_PROXY", "MIDSCENE_PLANNING_OPENAI_BASE_URL", "MIDSCENE_PLANNING_OPENAI_API_KEY", "MIDSCENE_PLANNING_OPENAI_INIT_CONFIG_JSON", "MIDSCENE_PLANNING_OPENAI_USE_AZURE", "MIDSCENE_PLANNING_USE_AZURE_OPENAI", "MIDSCENE_PLANNING_AZURE_OPENAI_SCOPE", "MIDSCENE_PLANNING_AZURE_OPENAI_KEY", "MIDSCENE_PLANNING_AZURE_OPENAI_ENDPOINT", "MIDSCENE_PLANNING_AZURE_OPENAI_API_VERSION", "MIDSCENE_PLANNING_AZURE_OPENAI_DEPLOYMENT", "MIDSCENE_PLANNING_AZURE_OPENAI_INIT_CONFIG_JSON", "MIDSCENE_PLANNING_USE_ANTHROPIC_SDK", "MIDSCENE_PLANNING_ANTHROPIC_API_KEY", "MIDSCENE_PLANNING_VL_MODE", "MIDSCENE_GROUNDING_MODEL_NAME", "MIDSCENE_GROUNDING_OPENAI_SOCKS_PROXY", "MIDSCENE_GROUNDING_OPENAI_HTTP_PROXY", "MIDSCENE_GROUNDING_OPENAI_BASE_URL", "MIDSCENE_GROUNDING_OPENAI_API_KEY", "MIDSCENE_GROUNDING_OPENAI_INIT_CONFIG_JSON", "MIDSCENE_GROUNDING_OPENAI_USE_AZURE", "MIDSCENE_GROUNDING_USE_AZURE_OPENAI", "MIDSCENE_GROUNDING_AZURE_OPENAI_SCOPE", "MIDSCENE_GROUNDING_AZURE_OPENAI_KEY", "MIDSCENE_GROUNDING_AZURE_OPENAI_ENDPOINT", "MIDSCENE_GROUNDING_AZURE_OPENAI_API_VERSION", "MIDSCENE_GROUNDING_AZURE_OPENAI_DEPLOYMENT", "MIDSCENE_GROUNDING_AZURE_OPENAI_INIT_CONFIG_JSON", "MIDSCENE_GROUNDING_USE_ANTHROPIC_SDK", "MIDSCENE_GROUNDING_ANTHROPIC_API_KEY", "MIDSCENE_GROUNDING_VL_MODE"];
132
- export declare const Yht_ENV_KEYS: string[];
133
- export declare const ALL_ENV_KEYS: readonly string[];
133
+ export declare const MODEL_ENV_KEYS: readonly ["MIDSCENE_MODEL_NAME", "MIDSCENE_OPENAI_INIT_CONFIG_JSON", "MIDSCENE_OPENAI_API_KEY", "MIDSCENE_OPENAI_BASE_URL", "MIDSCENE_OPENAI_USE_AZURE", "MIDSCENE_OPENAI_SOCKS_PROXY", "MIDSCENE_OPENAI_HTTP_PROXY", "MIDSCENE_USE_AZURE_OPENAI", "MIDSCENE_AZURE_OPENAI_SCOPE", "MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON", "MIDSCENE_USE_ANTHROPIC_SDK", "MIDSCENE_USE_VLM_UI_TARS", "MIDSCENE_USE_QWEN_VL", "MIDSCENE_USE_QWEN3_VL", "MIDSCENE_USE_DOUBAO_VISION", "MIDSCENE_USE_GEMINI", "MIDSCENE_USE_VL_MODEL", "ANTHROPIC_API_KEY", "MIDSCENE_AZURE_OPENAI_ENDPOINT", "MIDSCENE_AZURE_OPENAI_KEY", "MIDSCENE_AZURE_OPENAI_API_VERSION", "MIDSCENE_AZURE_OPENAI_DEPLOYMENT", "MIDSCENE_VL_MODE", "OPENAI_API_KEY", "OPENAI_BASE_URL", "OPENAI_USE_AZURE", "ANTHROPIC_API_KEY", "AZURE_OPENAI_ENDPOINT", "AZURE_OPENAI_KEY", "AZURE_OPENAI_API_VERSION", "AZURE_OPENAI_DEPLOYMENT", "MIDSCENE_VQA_MODEL_NAME", "MIDSCENE_VQA_OPENAI_SOCKS_PROXY", "MIDSCENE_VQA_OPENAI_HTTP_PROXY", "MIDSCENE_VQA_OPENAI_BASE_URL", "MIDSCENE_VQA_OPENAI_API_KEY", "MIDSCENE_VQA_OPENAI_INIT_CONFIG_JSON", "MIDSCENE_VQA_OPENAI_USE_AZURE", "MIDSCENE_VQA_USE_AZURE_OPENAI", "MIDSCENE_VQA_AZURE_OPENAI_SCOPE", "MIDSCENE_VQA_AZURE_OPENAI_KEY", "MIDSCENE_VQA_AZURE_OPENAI_ENDPOINT", "MIDSCENE_VQA_AZURE_OPENAI_API_VERSION", "MIDSCENE_VQA_AZURE_OPENAI_DEPLOYMENT", "MIDSCENE_VQA_AZURE_OPENAI_INIT_CONFIG_JSON", "MIDSCENE_VQA_USE_ANTHROPIC_SDK", "MIDSCENE_VQA_ANTHROPIC_API_KEY", "MIDSCENE_VQA_VL_MODE", "MIDSCENE_PLANNING_MODEL_NAME", "MIDSCENE_PLANNING_OPENAI_SOCKS_PROXY", "MIDSCENE_PLANNING_OPENAI_HTTP_PROXY", "MIDSCENE_PLANNING_OPENAI_BASE_URL", "MIDSCENE_PLANNING_OPENAI_API_KEY", "MIDSCENE_PLANNING_OPENAI_INIT_CONFIG_JSON", "MIDSCENE_PLANNING_OPENAI_USE_AZURE", "MIDSCENE_PLANNING_USE_AZURE_OPENAI", "MIDSCENE_PLANNING_AZURE_OPENAI_SCOPE", "MIDSCENE_PLANNING_AZURE_OPENAI_KEY", "MIDSCENE_PLANNING_AZURE_OPENAI_ENDPOINT", "MIDSCENE_PLANNING_AZURE_OPENAI_API_VERSION", "MIDSCENE_PLANNING_AZURE_OPENAI_DEPLOYMENT", "MIDSCENE_PLANNING_AZURE_OPENAI_INIT_CONFIG_JSON", "MIDSCENE_PLANNING_USE_ANTHROPIC_SDK", "MIDSCENE_PLANNING_ANTHROPIC_API_KEY", "MIDSCENE_PLANNING_VL_MODE", "MIDSCENE_GROUNDING_MODEL_NAME", "MIDSCENE_GROUNDING_OPENAI_SOCKS_PROXY", "MIDSCENE_GROUNDING_OPENAI_HTTP_PROXY", "MIDSCENE_GROUNDING_OPENAI_BASE_URL", "MIDSCENE_GROUNDING_OPENAI_API_KEY", "MIDSCENE_GROUNDING_OPENAI_INIT_CONFIG_JSON", "MIDSCENE_GROUNDING_OPENAI_USE_AZURE", "MIDSCENE_GROUNDING_USE_AZURE_OPENAI", "MIDSCENE_GROUNDING_AZURE_OPENAI_SCOPE", "MIDSCENE_GROUNDING_AZURE_OPENAI_KEY", "MIDSCENE_GROUNDING_AZURE_OPENAI_ENDPOINT", "MIDSCENE_GROUNDING_AZURE_OPENAI_API_VERSION", "MIDSCENE_GROUNDING_AZURE_OPENAI_DEPLOYMENT", "MIDSCENE_GROUNDING_AZURE_OPENAI_INIT_CONFIG_JSON", "MIDSCENE_GROUNDING_USE_ANTHROPIC_SDK", "MIDSCENE_GROUNDING_ANTHROPIC_API_KEY", "MIDSCENE_GROUNDING_VL_MODE", "domain", "yht_access_token", "model", "baseURL", "chatType", "modelCategory", "stream", "temperature", "top_p"];
134
+ export declare const ALL_ENV_KEYS: readonly [...string[], "MIDSCENE_DEBUG_MODE", "MIDSCENE_DEBUG_AI_PROFILE", "MIDSCENE_DEBUG_AI_RESPONSE", "MIDSCENE_RUN_DIR", "MIDSCENE_CACHE", "MIDSCENE_LANGSMITH_DEBUG", "MIDSCENE_FORCE_DEEP_THINK", "MIDSCENE_MCP_USE_PUPPETEER_MODE", "MIDSCENE_MCP_ANDROID_MODE", "MIDSCENE_CACHE_MAX_FILENAME_LENGTH", "MIDSCENE_REPLANNING_CYCLE_LIMIT", "OPENAI_MAX_TOKENS", "MIDSCENE_ADB_PATH", "MIDSCENE_ADB_REMOTE_HOST", "MIDSCENE_ADB_REMOTE_PORT", "MIDSCENE_ANDROID_IME_STRATEGY", "MIDSCENE_IOS_DEVICE_UDID", "MIDSCENE_IOS_SIMULATOR_UDID", "MIDSCENE_REPORT_TAG_NAME", "MIDSCENE_PREFERRED_LANGUAGE", "MATCH_BY_POSITION", "MIDSCENE_MCP_CHROME_PATH", "DOCKER_CONTAINER", "MIDSCENE_MODEL_NAME", "MIDSCENE_OPENAI_INIT_CONFIG_JSON", "MIDSCENE_OPENAI_API_KEY", "MIDSCENE_OPENAI_BASE_URL", "MIDSCENE_OPENAI_USE_AZURE", "MIDSCENE_OPENAI_SOCKS_PROXY", "MIDSCENE_OPENAI_HTTP_PROXY", "MIDSCENE_USE_AZURE_OPENAI", "MIDSCENE_AZURE_OPENAI_SCOPE", "MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON", "MIDSCENE_USE_ANTHROPIC_SDK", "MIDSCENE_USE_VLM_UI_TARS", "MIDSCENE_USE_QWEN_VL", "MIDSCENE_USE_QWEN3_VL", "MIDSCENE_USE_DOUBAO_VISION", "MIDSCENE_USE_GEMINI", "MIDSCENE_USE_VL_MODEL", "ANTHROPIC_API_KEY", "MIDSCENE_AZURE_OPENAI_ENDPOINT", "MIDSCENE_AZURE_OPENAI_KEY", "MIDSCENE_AZURE_OPENAI_API_VERSION", "MIDSCENE_AZURE_OPENAI_DEPLOYMENT", "MIDSCENE_VL_MODE", "OPENAI_API_KEY", "OPENAI_BASE_URL", "OPENAI_USE_AZURE", "ANTHROPIC_API_KEY", "AZURE_OPENAI_ENDPOINT", "AZURE_OPENAI_KEY", "AZURE_OPENAI_API_VERSION", "AZURE_OPENAI_DEPLOYMENT", "MIDSCENE_VQA_MODEL_NAME", "MIDSCENE_VQA_OPENAI_SOCKS_PROXY", "MIDSCENE_VQA_OPENAI_HTTP_PROXY", "MIDSCENE_VQA_OPENAI_BASE_URL", "MIDSCENE_VQA_OPENAI_API_KEY", "MIDSCENE_VQA_OPENAI_INIT_CONFIG_JSON", "MIDSCENE_VQA_OPENAI_USE_AZURE", "MIDSCENE_VQA_USE_AZURE_OPENAI", "MIDSCENE_VQA_AZURE_OPENAI_SCOPE", "MIDSCENE_VQA_AZURE_OPENAI_KEY", "MIDSCENE_VQA_AZURE_OPENAI_ENDPOINT", "MIDSCENE_VQA_AZURE_OPENAI_API_VERSION", "MIDSCENE_VQA_AZURE_OPENAI_DEPLOYMENT", "MIDSCENE_VQA_AZURE_OPENAI_INIT_CONFIG_JSON", "MIDSCENE_VQA_USE_ANTHROPIC_SDK", "MIDSCENE_VQA_ANTHROPIC_API_KEY", "MIDSCENE_VQA_VL_MODE", "MIDSCENE_PLANNING_MODEL_NAME", "MIDSCENE_PLANNING_OPENAI_SOCKS_PROXY", "MIDSCENE_PLANNING_OPENAI_HTTP_PROXY", "MIDSCENE_PLANNING_OPENAI_BASE_URL", "MIDSCENE_PLANNING_OPENAI_API_KEY", "MIDSCENE_PLANNING_OPENAI_INIT_CONFIG_JSON", "MIDSCENE_PLANNING_OPENAI_USE_AZURE", "MIDSCENE_PLANNING_USE_AZURE_OPENAI", "MIDSCENE_PLANNING_AZURE_OPENAI_SCOPE", "MIDSCENE_PLANNING_AZURE_OPENAI_KEY", "MIDSCENE_PLANNING_AZURE_OPENAI_ENDPOINT", "MIDSCENE_PLANNING_AZURE_OPENAI_API_VERSION", "MIDSCENE_PLANNING_AZURE_OPENAI_DEPLOYMENT", "MIDSCENE_PLANNING_AZURE_OPENAI_INIT_CONFIG_JSON", "MIDSCENE_PLANNING_USE_ANTHROPIC_SDK", "MIDSCENE_PLANNING_ANTHROPIC_API_KEY", "MIDSCENE_PLANNING_VL_MODE", "MIDSCENE_GROUNDING_MODEL_NAME", "MIDSCENE_GROUNDING_OPENAI_SOCKS_PROXY", "MIDSCENE_GROUNDING_OPENAI_HTTP_PROXY", "MIDSCENE_GROUNDING_OPENAI_BASE_URL", "MIDSCENE_GROUNDING_OPENAI_API_KEY", "MIDSCENE_GROUNDING_OPENAI_INIT_CONFIG_JSON", "MIDSCENE_GROUNDING_OPENAI_USE_AZURE", "MIDSCENE_GROUNDING_USE_AZURE_OPENAI", "MIDSCENE_GROUNDING_AZURE_OPENAI_SCOPE", "MIDSCENE_GROUNDING_AZURE_OPENAI_KEY", "MIDSCENE_GROUNDING_AZURE_OPENAI_ENDPOINT", "MIDSCENE_GROUNDING_AZURE_OPENAI_API_VERSION", "MIDSCENE_GROUNDING_AZURE_OPENAI_DEPLOYMENT", "MIDSCENE_GROUNDING_AZURE_OPENAI_INIT_CONFIG_JSON", "MIDSCENE_GROUNDING_USE_ANTHROPIC_SDK", "MIDSCENE_GROUNDING_ANTHROPIC_API_KEY", "MIDSCENE_GROUNDING_VL_MODE", "domain", "yht_access_token", "model", "baseURL", "chatType", "modelCategory", "stream", "temperature", "top_p"];
134
135
  export type TEnvKeys = (typeof ALL_ENV_KEYS)[number];
135
136
  export type TGlobalConfig = Record<TEnvKeys, string | undefined>;
136
137
  export type TVlModeValues = 'qwen-vl' | 'qwen3-vl' | 'doubao-vision' | 'gemini' | 'vlm-ui-tars' | 'vlm-ui-tars-doubao' | 'vlm-ui-tars-doubao-1.5';
@@ -4,4 +4,4 @@ import { type GLOBAL_ENV_KEYS, type MODEL_ENV_KEYS } from './types';
4
4
  export declare const globalModelConfigManager: ModelConfigManager;
5
5
  export declare const globalConfigManager: GlobalConfigManager;
6
6
  export declare const getPreferredLanguage: () => string;
7
- export declare const overrideAIConfig: (newConfig: Partial<Record<(typeof GLOBAL_ENV_KEYS)[number] | (typeof MODEL_ENV_KEYS)[number], string>>, extendMode?: boolean) => void;
7
+ export declare const overrideAIConfig: (newConfig: Partial<Record<(typeof GLOBAL_ENV_KEYS)[number] | (typeof MODEL_ENV_KEYS)[number], string | number>>, extendMode?: boolean) => void;
@@ -27,7 +27,7 @@ export interface ElementNode {
27
27
  }
28
28
  export { descriptionOfTree, traverseTree, treeToList, truncateText, trimAttributes, } from './tree';
29
29
  export { extractTextWithPosition as webExtractTextWithPosition } from './web-extractor';
30
- export { extractTreeNode as webExtractNodeTree } from './web-extractor';
30
+ export { extractTreeNode as webExtractNodeTree, getElementFromPoint } from './web-extractor';
31
31
  export { extractTreeNodeAsString as webExtractNodeTreeAsString } from './web-extractor';
32
32
  export { setNodeHashCacheListOnWindow, getNodeFromCacheList } from './util';
33
33
  export { getXpathsById, getXpathsByPoint, getNodeInfoByXpath, getElementInfoByXpath, getElementXpath, } from './locator';
@@ -16,4 +16,9 @@ export declare function mergeElementAndChildrenRects(node: Node, currentWindow:
16
16
  zoom: number;
17
17
  isVisible: boolean;
18
18
  } | null;
19
+ export declare const getElementFromPoint: (args: {
20
+ x: number;
21
+ y: number;
22
+ containerPaths: string[];
23
+ }) => Promise<any>;
19
24
  export {};
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@rpascene/shared",
3
3
  "description": "RPA shared",
4
- "version": "0.30.11",
4
+ "version": "0.30.13",
5
5
  "repository": "",
6
6
  "homepage": "",
7
7
  "types": "./dist/types/index.d.ts",
@@ -29,53 +29,104 @@ export const DEFAULT_WAIT_FOR_NETWORK_IDLE_CONCURRENCY = 2;
29
29
 
30
30
  export { PLAYWRIGHT_EXAMPLE_CODE, YAML_EXAMPLE_CODE } from './example-code';
31
31
 
32
+
33
+ export const rpasceneCommand = {
34
+ Tap: "Tap",
35
+ RightClick: "RightClick",
36
+ DoubleClick: "DoubleClick",
37
+ Hover: "Hover",
38
+ Input: "Input",
39
+ KeyboardPress: "KeyboardPress",
40
+ Scroll: "Scroll",
41
+ DragAndDrop: "DragAndDrop",
42
+ ClearInput: "ClearInput",
43
+ Sleep: "Sleep",
44
+ LongPress: "LongPress",
45
+ Swipe: "Swipe",
46
+ Reload: "Reload",
47
+ GoBack: "GoBack",
48
+ Navigate: 'Navigate'
49
+ }
50
+
51
+ export const rpastudioCommand = {
52
+ SeleniumClickCommand: "SeleniumClickCommand",
53
+ SeleniumRightClickCommand: "SeleniumRightClickCommand",
54
+ SeleniumDoubleClickCommand: "SeleniumDoubleClickCommand",
55
+ SeleniumMouseHoverCommand: "SeleniumMouseHoverCommand",
56
+ SeleniumSetValueCommand: "SeleniumSetValueCommand",
57
+ SeleniumElementSendKeysCommand: "SeleniumElementSendKeysCommand",
58
+ SeleniumSendKeysCommand: "SeleniumSendKeysCommand",
59
+ SeleniumElementScrollCommand: 'SeleniumElementScrollCommand',
60
+ SeleniumWindowScrollCommand: 'SeleniumWindowScrollCommand',
61
+ SeleniumDragAndDropCommand: 'SeleniumDragAndDropCommand',
62
+ SeleniumClearValueCommand: "SeleniumClearValueCommand",
63
+ PauseCommand: "PauseCommand",
64
+ SeleniumBrowserRefreshCommand: "SeleniumBrowserRefreshCommand",
65
+ SeleniumBrowserNavigateBackCommand: 'SeleniumBrowserNavigateBackCommand',
66
+ SeleniumBrowserNavigateCommand: 'SeleniumBrowserNavigateCommand'
67
+ }
68
+
69
+
70
+
32
71
  export const commandMap: any = {
33
- Tap: {
34
- CommandName: "SeleniumClickCommand",
72
+ [rpasceneCommand.Tap]: {
73
+ CommandName: rpastudioCommand.SeleniumClickCommand,
35
74
  DisplayName: '元素点击'
36
75
  },
37
- RightClick: {
38
- CommandName: "SeleniumRightClickCommand",
76
+ [rpasceneCommand.RightClick]: {
77
+ CommandName: rpastudioCommand.SeleniumRightClickCommand,
39
78
  DisplayName: "元素右键点击"
40
79
  },
41
- DoubleClick: {
42
- CommandName: "SeleniumDoubleClickCommand",
80
+ [rpasceneCommand.DoubleClick]: {
81
+ CommandName: rpastudioCommand.SeleniumDoubleClickCommand,
43
82
  DisplayName: "元素双击"
44
83
  },
45
- Hover: {
46
- CommandName: "SeleniumMouseHoverCommand",
84
+ [rpasceneCommand.Hover]: {
85
+ CommandName: rpastudioCommand.SeleniumMouseHoverCommand,
47
86
  DisplayName: "鼠标悬停"
48
87
  },
49
- Input: {
50
- CommandName: "SeleniumSetValueCommand",
88
+ [rpasceneCommand.Input]: {
89
+ CommandName: rpastudioCommand.SeleniumSetValueCommand,
51
90
  DisplayName: "设置文本"
52
91
  },
53
- KeyboardPress: {
54
- CommandName: "SeleniumSendKeysCommand",
92
+ [rpasceneCommand.KeyboardPress]: {
93
+ CommandName: rpastudioCommand.SeleniumElementSendKeysCommand,
55
94
  DisplayName: "模拟按键"
56
95
  },
57
- Scroll: {
58
- CommandName: "SeleniumElementScrollCommand",
96
+ [rpasceneCommand.Scroll]: {
97
+ CommandName: rpastudioCommand.SeleniumElementScrollCommand,
59
98
  DisplayName: "元素滚动"
60
99
  },
61
- DragAndDrop: {
62
- CommandName: "SeleniumDragDropCommand",
100
+ [rpasceneCommand.DragAndDrop]: {
101
+ CommandName: rpastudioCommand.SeleniumDragAndDropCommand,
63
102
  DisplayName: "元素拖拽(至指定元素)"
64
103
  },
65
- ClearInput: {
66
- CommandName: "SeleniumClearTextCommand",
104
+ [rpasceneCommand.ClearInput]: {
105
+ CommandName: rpastudioCommand.SeleniumClearValueCommand,
67
106
  DisplayName: "清空文本"
68
107
  },
69
- Sleep: {
70
- CommandName: "SeleniumPauseCommand",
108
+ [rpasceneCommand.Sleep]: {
109
+ CommandName: rpastudioCommand.PauseCommand,
71
110
  DisplayName: "暂停脚本"
72
111
  },
73
- LongPress: {
74
- CommandName: "SeleniumSendKeysCommand",
75
- DisplayName: "模拟按键"
112
+ [rpasceneCommand.LongPress]: {
113
+ CommandName: "",
114
+ DisplayName: ""
115
+ },
116
+ [rpasceneCommand.Swipe]: {
117
+ CommandName: "",
118
+ DisplayName: ""
119
+ },
120
+ [rpasceneCommand.Reload]: {
121
+ CommandName: rpastudioCommand.SeleniumBrowserRefreshCommand,
122
+ DisplayName: "刷新"
76
123
  },
77
- Swipe: {
78
- CommandName: "SeleniumClickCommand",
79
- DisplayName: "元素点击"
124
+ [rpasceneCommand.GoBack]: {
125
+ CommandName: rpastudioCommand.SeleniumBrowserNavigateBackCommand,
126
+ DisplayName: "后退"
80
127
  },
128
+ [rpasceneCommand.Navigate]: {
129
+ CommandName: rpastudioCommand.SeleniumBrowserNavigateCommand,
130
+ DisplayName: "打开页面"
131
+ }
81
132
  }
@@ -111,6 +111,20 @@ interface IModelConfigKeys {
111
111
  * Extra
112
112
  */
113
113
  vlMode: string;
114
+
115
+ domain: string,
116
+ yht_access_token: string,
117
+ model: string,
118
+ baseURL: string,
119
+
120
+ chatType: string,
121
+ modelCategory: string,
122
+ stream: string,
123
+ temperature: string,
124
+ top_p: string,
125
+
126
+
127
+
114
128
  }
115
129
 
116
130
  export const VQA_MODEL_CONFIG_KEYS: IModelConfigKeys = {
@@ -146,6 +160,18 @@ export const VQA_MODEL_CONFIG_KEYS: IModelConfigKeys = {
146
160
  * Extra
147
161
  */
148
162
  vlMode: MIDSCENE_VQA_VL_MODE,
163
+
164
+ // 友互通
165
+ domain: 'domain',
166
+ yht_access_token: 'yht_access_token',
167
+ model: 'model',
168
+ baseURL: 'baseURL',
169
+ chatType: 'chatType',
170
+ modelCategory: 'modelCategory',
171
+ stream: 'stream',
172
+ temperature: 'temperature',
173
+ top_p: 'top_p',
174
+
149
175
  } as const;
150
176
 
151
177
  export const GROUNDING_MODEL_CONFIG_KEYS: IModelConfigKeys = {
@@ -181,6 +207,17 @@ export const GROUNDING_MODEL_CONFIG_KEYS: IModelConfigKeys = {
181
207
  * Extra
182
208
  */
183
209
  vlMode: MIDSCENE_GROUNDING_VL_MODE,
210
+
211
+ // 友互通
212
+ domain: 'domain',
213
+ yht_access_token: 'yht_access_token',
214
+ model: 'model',
215
+ baseURL: 'baseURL',
216
+ chatType: 'chatType',
217
+ modelCategory: 'modelCategory',
218
+ stream: 'stream',
219
+ temperature: 'temperature',
220
+ top_p: 'top_p',
184
221
  } as const;
185
222
 
186
223
  export const PLANNING_MODEL_CONFIG_KEYS: IModelConfigKeys = {
@@ -216,6 +253,17 @@ export const PLANNING_MODEL_CONFIG_KEYS: IModelConfigKeys = {
216
253
  * Extra
217
254
  */
218
255
  vlMode: MIDSCENE_PLANNING_VL_MODE,
256
+
257
+ // 友互通
258
+ domain: 'domain',
259
+ yht_access_token: 'yht_access_token',
260
+ model: 'model',
261
+ baseURL: 'baseURL',
262
+ chatType: 'chatType',
263
+ modelCategory: 'modelCategory',
264
+ stream: 'stream',
265
+ temperature: 'temperature',
266
+ top_p: 'top_p',
219
267
  } as const;
220
268
 
221
269
  // modelConfig return default
@@ -252,6 +300,17 @@ export const DEFAULT_MODEL_CONFIG_KEYS: IModelConfigKeys = {
252
300
  * Extra
253
301
  */
254
302
  vlMode: MIDSCENE_VL_MODE,
303
+
304
+ // 友互通
305
+ domain: 'domain',
306
+ yht_access_token: 'yht_access_token',
307
+ model: 'model',
308
+ baseURL: 'baseURL',
309
+ chatType: 'chatType',
310
+ modelCategory: 'modelCategory',
311
+ stream: 'stream',
312
+ temperature: 'temperature',
313
+ top_p: 'top_p',
255
314
  } as const;
256
315
 
257
316
  // read from process.env
@@ -288,4 +347,16 @@ export const DEFAULT_MODEL_CONFIG_KEYS_LEGACY: IModelConfigKeys = {
288
347
  * Extra
289
348
  */
290
349
  vlMode: 'DEFAULT_MODEL_CONFIG_KEYS has no vlMode key',
291
- } as const;
350
+
351
+ // 友互通
352
+ domain: 'domain',
353
+ yht_access_token: 'yht_access_token',
354
+ model: 'model',
355
+ baseURL: 'baseURL',
356
+ chatType: 'chatType',
357
+ modelCategory: 'modelCategory',
358
+ stream: 'stream',
359
+ temperature: 'temperature',
360
+ top_p: 'top_p',
361
+
362
+ } as const;
@@ -30,10 +30,10 @@ type TModelConfigKeys =
30
30
  | typeof DEFAULT_MODEL_CONFIG_KEYS_LEGACY;
31
31
 
32
32
  const KEYS_MAP: Record<TIntent, TModelConfigKeys> = {
33
- VQA: VQA_MODEL_CONFIG_KEYS,
34
- grounding: GROUNDING_MODEL_CONFIG_KEYS,
35
- planning: PLANNING_MODEL_CONFIG_KEYS,
36
- default: DEFAULT_MODEL_CONFIG_KEYS,
33
+ VQA: VQA_MODEL_CONFIG_KEYS, // 验证、断言、提取数据
34
+ grounding: GROUNDING_MODEL_CONFIG_KEYS, // 元素定位
35
+ planning: PLANNING_MODEL_CONFIG_KEYS, // 规划
36
+ default: DEFAULT_MODEL_CONFIG_KEYS, // 默认
37
37
  } as const;
38
38
 
39
39
  /**
@@ -248,6 +248,7 @@ export const decideModelConfigFromEnv = (
248
248
  const keysForEnv =
249
249
  intent === 'default' ? DEFAULT_MODEL_CONFIG_KEYS_LEGACY : KEYS_MAP[intent];
250
250
 
251
+ // 按照modelName判断是否配置了对应的模型策略
251
252
  if (intent !== 'default' && allEnvConfig[keysForEnv.modelName]) {
252
253
  const modelName = allEnvConfig[keysForEnv.modelName]!;
253
254
 
@@ -283,7 +284,7 @@ export const decideModelConfigFromEnv = (
283
284
  }
284
285
 
285
286
  debugLog(`decideModelConfig as legacy logic with intent ${intent}.`);
286
-
287
+ // 走默认环境变量策略
287
288
  // TODO: when fallback to legacy logic, prefer to read MIDSCENE_OPENAI_API_KEY rather than OPENAI_API_KEY
288
289
  const result = decideOpenaiSdkConfig({
289
290
  keys: DEFAULT_MODEL_CONFIG_KEYS_LEGACY,
@@ -39,6 +39,7 @@ export class GlobalConfigManager {
39
39
  * recalculate allEnvConfig every time because process.env can be updated any time
40
40
  */
41
41
  public getAllEnvConfig() {
42
+ // .env中的配置
42
43
  const envConfig = ALL_ENV_KEYS.reduce(
43
44
  (p, name) => {
44
45
  p[name] = process.env[name];
@@ -50,8 +51,10 @@ export class GlobalConfigManager {
50
51
  if (this.override) {
51
52
  const { newConfig, extendMode } = this.override;
52
53
  if (extendMode) {
54
+ // 扩展模式
53
55
  return { ...envConfig, ...newConfig };
54
56
  } else {
57
+ // 覆盖模式
55
58
  return { ...newConfig };
56
59
  }
57
60
  } else {
@@ -134,22 +137,23 @@ export class GlobalConfigManager {
134
137
  >,
135
138
  extendMode = false, // true: merge with global config, false: override global config
136
139
  ) {
137
- // for (const key in newConfig) {
138
- // if (![...GLOBAL_ENV_KEYS, ...MODEL_ENV_KEYS].includes(key as never)) {
139
- // throw new Error(`Failed to override AI config, invalid key: ${key}`);
140
- // }
141
- // const value = newConfig[key as keyof typeof newConfig];
142
- // if (typeof value !== 'string') {
143
- // throw new Error(
144
- // `Failed to override AI config, value for key ${key} must be a string, but got with type ${typeof value}`,
145
- // );
146
- // }
147
- // if (this.keysHaveBeenRead[key]) {
148
- // console.warn(
149
- // `Warning: try to override AI config with key ${key} ,but it has been read.`,
150
- // );
151
- // }
152
- // }
140
+ for (const key in newConfig) {
141
+ if (![...GLOBAL_ENV_KEYS, ...MODEL_ENV_KEYS].includes(key as never)) {
142
+ throw new Error(`Failed to override AI config, invalid key: ${key}`);
143
+ }
144
+ const value = newConfig[key as keyof typeof newConfig];
145
+ if (typeof value !== 'string') {
146
+ throw new Error(
147
+ `Failed to override AI config, value for key ${key} must be a string, but got with type ${typeof value}`,
148
+ );
149
+ }
150
+ // 应该在使用前配置
151
+ if (this.keysHaveBeenRead[key]) {
152
+ console.warn(
153
+ `Warning: try to override AI config with key ${key} ,but it has been read.`,
154
+ );
155
+ }
156
+ }
153
157
  const savedNewConfig = extendMode
154
158
  ? {
155
159
  ...this.override?.newConfig,
package/src/env/types.ts CHANGED
@@ -228,6 +228,22 @@ export const GLOBAL_ENV_KEYS = [
228
228
  ...STRING_ENV_KEYS,
229
229
  ] as const;
230
230
 
231
+ export const YHT_STRING_ENV_KEYS = [
232
+ 'domain',
233
+ 'yht_access_token',
234
+ 'model',
235
+ 'baseURL'
236
+ ] as const;
237
+
238
+ export const YHT_NUMBER_ENV_KEYS = [
239
+ 'chatType',
240
+ 'modelCategory',
241
+ 'stream',
242
+ 'temperature',
243
+ 'top_p',
244
+ ] as const;
245
+
246
+
231
247
  /**
232
248
  * Model related eve keys, used for declare which model to use.
233
249
  * Can be override by both agent.modelConfig and overrideAIConfig
@@ -321,26 +337,15 @@ export const MODEL_ENV_KEYS = [
321
337
  MIDSCENE_GROUNDING_USE_ANTHROPIC_SDK,
322
338
  MIDSCENE_GROUNDING_ANTHROPIC_API_KEY,
323
339
  MIDSCENE_GROUNDING_VL_MODE,
340
+ ...YHT_STRING_ENV_KEYS,
341
+ ...YHT_NUMBER_ENV_KEYS
324
342
  ] as const;
325
343
 
326
- export const Yht_ENV_KEYS = [
327
- 'domain',
328
- 'yht_access_token',
329
- 'chatType',
330
- 'model',
331
- 'modelCategory',
332
- 'stream',
333
- 'temperature',
334
- 'top_p',
335
- 'baseURL'
336
- ]
337
-
338
344
  export const ALL_ENV_KEYS = [
339
345
  ...UNUSED_ENV_KEYS,
340
346
  ...BASIC_ENV_KEYS,
341
347
  ...GLOBAL_ENV_KEYS,
342
- ...MODEL_ENV_KEYS,
343
- ...Yht_ENV_KEYS
348
+ ...MODEL_ENV_KEYS
344
349
  ] as const;
345
350
 
346
351
  export type TEnvKeys = (typeof ALL_ENV_KEYS)[number];
package/src/env/utils.ts CHANGED
@@ -30,10 +30,19 @@ export const overrideAIConfig = (
30
30
  newConfig: Partial<
31
31
  Record<
32
32
  (typeof GLOBAL_ENV_KEYS)[number] | (typeof MODEL_ENV_KEYS)[number],
33
- string
33
+ string | number
34
34
  >
35
35
  >,
36
36
  extendMode = false, // true: merge with global config, false: override global config
37
37
  ) => {
38
- globalConfigManager.overrideAIConfig(newConfig, extendMode);
38
+
39
+ // 全部转string
40
+ const config = Object.entries(newConfig).reduce((pre, [key, value]) => {
41
+ return {
42
+ ...pre,
43
+ [key]: String(value)
44
+ }
45
+ }, {})
46
+
47
+ globalConfigManager.overrideAIConfig(config, extendMode);
39
48
  };
@@ -33,7 +33,7 @@ export {
33
33
 
34
34
  export { extractTextWithPosition as webExtractTextWithPosition } from './web-extractor';
35
35
 
36
- export { extractTreeNode as webExtractNodeTree } from './web-extractor';
36
+ export { extractTreeNode as webExtractNodeTree, getElementFromPoint } from './web-extractor';
37
37
 
38
38
  export { extractTreeNodeAsString as webExtractNodeTreeAsString } from './web-extractor';
39
39
 
@@ -554,6 +554,72 @@ if (typeof window !== 'undefined') {
554
554
  error: 'error message'
555
555
  }
556
556
  }
557
+ } else if (action === 'getElementFromPoint') {
558
+ // @ts-ignore
559
+ try {
560
+ return await getElementFromPoint({
561
+ x: data.x,
562
+ y: data.y,
563
+ containerPaths: data.containerPaths
564
+ })
565
+ } catch (error) {
566
+ return {
567
+ error: 'error message'
568
+ }
569
+ }
557
570
  }
558
571
  })
559
572
  }
573
+
574
+ const getElementOffset = (element: any) => {
575
+ const pLeft = parseInt(window.getComputedStyle(element, null).getPropertyValue('padding-left'), 10) || 0;
576
+ const pTop = parseInt(window.getComputedStyle(element, null).getPropertyValue('padding-top'), 10) || 0;
577
+ const bLeft = parseInt(window.getComputedStyle(element, null).getPropertyValue('border-left-width'), 10) || 0;
578
+ const bTop = parseInt(window.getComputedStyle(element, null).getPropertyValue('border-top-width'), 10) || 0;
579
+ return {
580
+ x: pLeft + bLeft,
581
+ y: pTop + bTop,
582
+ };
583
+ };
584
+
585
+ const getCssScale = (element: any) => {
586
+ try {
587
+ const { width, height } = element.getBoundingClientRect();
588
+ return {
589
+ cssScaleX: Number((width / element.offsetWidth).toFixed(1)),
590
+ cssScaleY: Number((height / element.offsetHeight).toFixed(1)),
591
+ };
592
+ } catch (error) {
593
+ return {
594
+ cssScaleX: 1,
595
+ cssScaleY: 1,
596
+ };
597
+ }
598
+ }
599
+
600
+ export const getElementFromPoint = async (args: { x: number, y: number, containerPaths: string[] }) => {
601
+ if (!args.containerPaths) {
602
+ args.containerPaths = []
603
+ }
604
+ const dom: any = document.elementFromPoint(args.x, args.y);
605
+ if (dom?.nodeName === 'IFRAME') {
606
+ const containerPath: any = getContainerPath(dom)
607
+ const bounding = dom.getBoundingClientRect();
608
+ const offset = getElementOffset(dom);
609
+ const { cssScaleX, cssScaleY } = getCssScale(dom);
610
+ return await postWindowMessage(dom.contentWindow, window, {
611
+ action: 'getElementFromPoint',
612
+ data: {
613
+ x: (args.x - bounding.x - offset.x) / cssScaleX,
614
+ y: (args.y - bounding.y - offset.y) / cssScaleY,
615
+ containerPaths: args.containerPaths.concat(containerPath)
616
+ }
617
+ })
618
+ } else {
619
+ const elementInfo = {
620
+ allPaths: getLocators(dom),
621
+ containerPaths: args.containerPaths
622
+ }
623
+ return elementInfo
624
+ }
625
+ }