@midscene/shared 1.0.1-beta-20251024063839.0 → 1.0.1-beta-20251024064637.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/dist/es/env/constants.mjs +28 -28
  2. package/dist/es/env/decide-model-config.mjs +2 -5
  3. package/dist/es/env/init-debug.mjs +6 -6
  4. package/dist/es/env/model-config-manager.mjs +1 -2
  5. package/dist/es/env/types.mjs +53 -57
  6. package/dist/es/extractor/dom-util.mjs +7 -9
  7. package/dist/es/extractor/index.mjs +1 -2
  8. package/dist/es/extractor/tree.mjs +1 -3
  9. package/dist/es/extractor/util.mjs +1 -25
  10. package/dist/es/node/fs.mjs +2 -2
  11. package/dist/lib/env/constants.js +27 -27
  12. package/dist/lib/env/decide-model-config.js +2 -5
  13. package/dist/lib/env/init-debug.js +5 -5
  14. package/dist/lib/env/model-config-manager.js +1 -2
  15. package/dist/lib/env/types.js +130 -140
  16. package/dist/lib/extractor/dom-util.js +7 -9
  17. package/dist/lib/extractor/index.js +7 -14
  18. package/dist/lib/extractor/tree.js +1 -3
  19. package/dist/lib/extractor/util.js +0 -33
  20. package/dist/lib/node/fs.js +2 -2
  21. package/dist/types/env/model-config-manager.d.ts +2 -2
  22. package/dist/types/env/types.d.ts +59 -76
  23. package/dist/types/extractor/dom-util.d.ts +2 -15
  24. package/dist/types/extractor/index.d.ts +0 -1
  25. package/dist/types/extractor/tree.d.ts +1 -4
  26. package/dist/types/extractor/util.d.ts +0 -3
  27. package/dist/types/types/index.d.ts +6 -2
  28. package/package.json +1 -1
  29. package/src/env/constants.ts +52 -54
  30. package/src/env/decide-model-config.ts +2 -20
  31. package/src/env/init-debug.ts +6 -11
  32. package/src/env/model-config-manager.ts +3 -9
  33. package/src/env/types.ts +95 -122
  34. package/src/extractor/dom-util.ts +12 -8
  35. package/src/extractor/index.ts +0 -2
  36. package/src/extractor/locator.ts +0 -1
  37. package/src/extractor/tree.ts +4 -4
  38. package/src/extractor/util.ts +0 -32
  39. package/src/node/fs.ts +1 -1
  40. package/src/types/index.ts +9 -2
@@ -1,4 +1,4 @@
1
- import { NodeType } from '../constants';
1
+ import type { LocateResultElement } from '../types';
2
2
  export declare function isFormElement(node: globalThis.Node): boolean;
3
3
  export declare function isButtonElement(node: globalThis.Node): node is globalThis.HTMLButtonElement;
4
4
  export declare function isAElement(node: globalThis.Node): node is globalThis.HTMLButtonElement;
@@ -10,17 +10,4 @@ export declare function isContainerElement(node: globalThis.Node): node is globa
10
10
  export declare function generateElementByPosition(position: {
11
11
  x: number;
12
12
  y: number;
13
- }): {
14
- id: string;
15
- attributes: {
16
- nodeType: NodeType;
17
- };
18
- rect: {
19
- left: number;
20
- top: number;
21
- width: number;
22
- height: number;
23
- };
24
- content: string;
25
- center: number[];
26
- };
13
+ }): LocateResultElement;
@@ -27,7 +27,6 @@ export { descriptionOfTree, traverseTree, treeToList, truncateText, trimAttribut
27
27
  export { extractTextWithPosition as webExtractTextWithPosition } from './web-extractor';
28
28
  export { extractTreeNode as webExtractNodeTree } from './web-extractor';
29
29
  export { extractTreeNodeAsString as webExtractNodeTreeAsString } from './web-extractor';
30
- export { setNodeHashCacheListOnWindow, getNodeFromCacheList } from './util';
31
30
  export { getXpathsByPoint, getNodeInfoByXpath, getElementInfoByXpath, getElementXpath, } from './locator';
32
31
  export { generateElementByPosition } from './dom-util';
33
32
  export { isNotContainerElement } from './dom-util';
@@ -1,9 +1,6 @@
1
1
  import type { BaseElement, ElementTreeNode } from '../types';
2
2
  export declare function truncateText(text: string | number | object | undefined, maxLength?: number): string;
3
- export declare function trimAttributes(attributes: Record<string, any>, truncateTextLength?: number): {
4
- [key: string]: string;
5
- nodeType: import("./constants").NodeType;
6
- };
3
+ export declare function trimAttributes(attributes: Record<string, any>, truncateTextLength?: number): Record<string, string>;
7
4
  export declare function descriptionOfTree<ElementType extends BaseElement = BaseElement>(tree: ElementTreeNode<ElementType>, truncateTextLength?: number, filterNonTextContent?: boolean, visibleOnly?: boolean): string;
8
5
  export declare function treeToList<T extends BaseElement>(tree: ElementTreeNode<T>): T[];
9
6
  export declare function traverseTree<T extends BaseElement, ReturnNodeType extends BaseElement>(tree: ElementTreeNode<T>, onNode: (node: T) => ReturnNodeType): ElementTreeNode<ReturnNodeType>;
@@ -33,9 +33,6 @@ export declare function elementRect(el: globalThis.HTMLElement | globalThis.Node
33
33
  export declare function validTextNodeContent(node: globalThis.Node): string | false;
34
34
  export declare function getNodeAttributes(node: globalThis.HTMLElement | globalThis.Node, currentWindow: typeof globalThis.window): Record<string, string>;
35
35
  export declare function midsceneGenerateHash(node: globalThis.Node | null, content: string, rect: Rect): string;
36
- export declare function setNodeHashCacheListOnWindow(): void;
37
- export declare function setNodeToCacheList(node: globalThis.Node, id: string): void;
38
- export declare function getNodeFromCacheList(id: string): any;
39
36
  export declare function generateId(numberId: number): string;
40
37
  export declare function setGenerateHashOnWindow(): void;
41
38
  export declare function setMidsceneVisibleRectOnWindow(): void;
@@ -14,7 +14,6 @@ export type Rect = Point & Size & {
14
14
  };
15
15
  export declare abstract class BaseElement {
16
16
  abstract id: string;
17
- abstract indexId?: number;
18
17
  abstract attributes: {
19
18
  nodeType: NodeType;
20
19
  [key: string]: string;
@@ -22,7 +21,6 @@ export declare abstract class BaseElement {
22
21
  abstract content: string;
23
22
  abstract rect: Rect;
24
23
  abstract center: [number, number];
25
- abstract xpaths?: string[];
26
24
  abstract isVisible: boolean;
27
25
  }
28
26
  export interface ElementTreeNode<ElementType extends BaseElement = BaseElement> {
@@ -32,3 +30,9 @@ export interface ElementTreeNode<ElementType extends BaseElement = BaseElement>
32
30
  export interface WebElementInfo extends ElementInfo {
33
31
  zoom: number;
34
32
  }
33
+ export type LocateResultElement = {
34
+ center: [number, number];
35
+ rect: Rect;
36
+ id: string;
37
+ isOrderSensitive?: boolean;
38
+ };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@midscene/shared",
3
- "version": "1.0.1-beta-20251024063839.0",
3
+ "version": "1.0.1-beta-20251024064637.0",
4
4
  "repository": "https://github.com/web-infra-dev/midscene",
5
5
  "homepage": "https://midscenejs.com/",
6
6
  "types": "./dist/types/index.d.ts",
@@ -1,35 +1,33 @@
1
1
  import {
2
- MIDSCENE_GROUNDING_LOCATOR_MODE,
3
- MIDSCENE_GROUNDING_MODEL_API_KEY,
4
- MIDSCENE_GROUNDING_MODEL_BASE_URL,
5
- MIDSCENE_GROUNDING_MODEL_HTTP_PROXY,
6
- MIDSCENE_GROUNDING_MODEL_INIT_CONFIG_JSON,
7
2
  MIDSCENE_GROUNDING_MODEL_NAME,
8
- MIDSCENE_GROUNDING_MODEL_SOCKS_PROXY,
9
- MIDSCENE_LOCATOR_MODE,
10
- MIDSCENE_MODEL_API_KEY,
11
- MIDSCENE_MODEL_BASE_URL,
12
- MIDSCENE_MODEL_HTTP_PROXY,
13
- MIDSCENE_MODEL_INIT_CONFIG_JSON,
3
+ MIDSCENE_GROUNDING_OPENAI_API_KEY,
4
+ MIDSCENE_GROUNDING_OPENAI_BASE_URL,
5
+ MIDSCENE_GROUNDING_OPENAI_HTTP_PROXY,
6
+ MIDSCENE_GROUNDING_OPENAI_INIT_CONFIG_JSON,
7
+ MIDSCENE_GROUNDING_OPENAI_SOCKS_PROXY,
8
+ MIDSCENE_GROUNDING_VL_MODE,
14
9
  MIDSCENE_MODEL_NAME,
15
- MIDSCENE_MODEL_SOCKS_PROXY,
16
- MIDSCENE_PLANNING_LOCATOR_MODE,
17
- MIDSCENE_PLANNING_MODEL_API_KEY,
18
- MIDSCENE_PLANNING_MODEL_BASE_URL,
19
- MIDSCENE_PLANNING_MODEL_HTTP_PROXY,
20
- MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON,
10
+ MIDSCENE_OPENAI_API_KEY,
11
+ MIDSCENE_OPENAI_BASE_URL,
12
+ MIDSCENE_OPENAI_HTTP_PROXY,
13
+ MIDSCENE_OPENAI_INIT_CONFIG_JSON,
14
+ MIDSCENE_OPENAI_SOCKS_PROXY,
21
15
  MIDSCENE_PLANNING_MODEL_NAME,
22
- MIDSCENE_PLANNING_MODEL_SOCKS_PROXY,
23
- MIDSCENE_VQA_LOCATOR_MODE,
24
- MIDSCENE_VQA_MODEL_API_KEY,
25
- MIDSCENE_VQA_MODEL_BASE_URL,
26
- MIDSCENE_VQA_MODEL_HTTP_PROXY,
27
- MIDSCENE_VQA_MODEL_INIT_CONFIG_JSON,
16
+ MIDSCENE_PLANNING_OPENAI_API_KEY,
17
+ MIDSCENE_PLANNING_OPENAI_BASE_URL,
18
+ MIDSCENE_PLANNING_OPENAI_HTTP_PROXY,
19
+ MIDSCENE_PLANNING_OPENAI_INIT_CONFIG_JSON,
20
+ MIDSCENE_PLANNING_OPENAI_SOCKS_PROXY,
21
+ MIDSCENE_PLANNING_VL_MODE,
22
+ MIDSCENE_VL_MODE,
28
23
  // VQA
29
24
  MIDSCENE_VQA_MODEL_NAME,
30
- MIDSCENE_VQA_MODEL_SOCKS_PROXY,
31
- MODEL_API_KEY,
32
- MODEL_BASE_URL,
25
+ MIDSCENE_VQA_OPENAI_API_KEY,
26
+ MIDSCENE_VQA_OPENAI_BASE_URL,
27
+ MIDSCENE_VQA_OPENAI_HTTP_PROXY,
28
+ MIDSCENE_VQA_OPENAI_INIT_CONFIG_JSON,
29
+ MIDSCENE_VQA_OPENAI_SOCKS_PROXY,
30
+ MIDSCENE_VQA_VL_MODE,
33
31
  OPENAI_API_KEY,
34
32
  OPENAI_BASE_URL,
35
33
  } from './types';
@@ -58,18 +56,18 @@ export const VQA_MODEL_CONFIG_KEYS: IModelConfigKeys = {
58
56
  /**
59
57
  * proxy
60
58
  */
61
- socksProxy: MIDSCENE_VQA_MODEL_SOCKS_PROXY,
62
- httpProxy: MIDSCENE_VQA_MODEL_HTTP_PROXY,
59
+ socksProxy: MIDSCENE_VQA_OPENAI_SOCKS_PROXY,
60
+ httpProxy: MIDSCENE_VQA_OPENAI_HTTP_PROXY,
63
61
  /**
64
62
  * OpenAI
65
63
  */
66
- openaiBaseURL: MIDSCENE_VQA_MODEL_BASE_URL,
67
- openaiApiKey: MIDSCENE_VQA_MODEL_API_KEY,
68
- openaiExtraConfig: MIDSCENE_VQA_MODEL_INIT_CONFIG_JSON,
64
+ openaiBaseURL: MIDSCENE_VQA_OPENAI_BASE_URL,
65
+ openaiApiKey: MIDSCENE_VQA_OPENAI_API_KEY,
66
+ openaiExtraConfig: MIDSCENE_VQA_OPENAI_INIT_CONFIG_JSON,
69
67
  /**
70
68
  * Extra
71
69
  */
72
- vlMode: MIDSCENE_VQA_LOCATOR_MODE,
70
+ vlMode: MIDSCENE_VQA_VL_MODE,
73
71
  } as const;
74
72
 
75
73
  export const GROUNDING_MODEL_CONFIG_KEYS: IModelConfigKeys = {
@@ -77,18 +75,18 @@ export const GROUNDING_MODEL_CONFIG_KEYS: IModelConfigKeys = {
77
75
  /**
78
76
  * proxy
79
77
  */
80
- socksProxy: MIDSCENE_GROUNDING_MODEL_SOCKS_PROXY,
81
- httpProxy: MIDSCENE_GROUNDING_MODEL_HTTP_PROXY,
78
+ socksProxy: MIDSCENE_GROUNDING_OPENAI_SOCKS_PROXY,
79
+ httpProxy: MIDSCENE_GROUNDING_OPENAI_HTTP_PROXY,
82
80
  /**
83
81
  * OpenAI
84
82
  */
85
- openaiBaseURL: MIDSCENE_GROUNDING_MODEL_BASE_URL,
86
- openaiApiKey: MIDSCENE_GROUNDING_MODEL_API_KEY,
87
- openaiExtraConfig: MIDSCENE_GROUNDING_MODEL_INIT_CONFIG_JSON,
83
+ openaiBaseURL: MIDSCENE_GROUNDING_OPENAI_BASE_URL,
84
+ openaiApiKey: MIDSCENE_GROUNDING_OPENAI_API_KEY,
85
+ openaiExtraConfig: MIDSCENE_GROUNDING_OPENAI_INIT_CONFIG_JSON,
88
86
  /**
89
87
  * Extra
90
88
  */
91
- vlMode: MIDSCENE_GROUNDING_LOCATOR_MODE,
89
+ vlMode: MIDSCENE_GROUNDING_VL_MODE,
92
90
  } as const;
93
91
 
94
92
  export const PLANNING_MODEL_CONFIG_KEYS: IModelConfigKeys = {
@@ -96,18 +94,18 @@ export const PLANNING_MODEL_CONFIG_KEYS: IModelConfigKeys = {
96
94
  /**
97
95
  * proxy
98
96
  */
99
- socksProxy: MIDSCENE_PLANNING_MODEL_SOCKS_PROXY,
100
- httpProxy: MIDSCENE_PLANNING_MODEL_HTTP_PROXY,
97
+ socksProxy: MIDSCENE_PLANNING_OPENAI_SOCKS_PROXY,
98
+ httpProxy: MIDSCENE_PLANNING_OPENAI_HTTP_PROXY,
101
99
  /**
102
100
  * OpenAI
103
101
  */
104
- openaiBaseURL: MIDSCENE_PLANNING_MODEL_BASE_URL,
105
- openaiApiKey: MIDSCENE_PLANNING_MODEL_API_KEY,
106
- openaiExtraConfig: MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON,
102
+ openaiBaseURL: MIDSCENE_PLANNING_OPENAI_BASE_URL,
103
+ openaiApiKey: MIDSCENE_PLANNING_OPENAI_API_KEY,
104
+ openaiExtraConfig: MIDSCENE_PLANNING_OPENAI_INIT_CONFIG_JSON,
107
105
  /**
108
106
  * Extra
109
107
  */
110
- vlMode: MIDSCENE_PLANNING_LOCATOR_MODE,
108
+ vlMode: MIDSCENE_PLANNING_VL_MODE,
111
109
  } as const;
112
110
 
113
111
  // modelConfig return default
@@ -116,18 +114,18 @@ export const DEFAULT_MODEL_CONFIG_KEYS: IModelConfigKeys = {
116
114
  /**
117
115
  * proxy
118
116
  */
119
- socksProxy: MIDSCENE_MODEL_SOCKS_PROXY,
120
- httpProxy: MIDSCENE_MODEL_HTTP_PROXY,
117
+ socksProxy: MIDSCENE_OPENAI_SOCKS_PROXY,
118
+ httpProxy: MIDSCENE_OPENAI_HTTP_PROXY,
121
119
  /**
122
120
  * OpenAI
123
121
  */
124
- openaiBaseURL: MIDSCENE_MODEL_BASE_URL,
125
- openaiApiKey: MIDSCENE_MODEL_API_KEY,
126
- openaiExtraConfig: MIDSCENE_MODEL_INIT_CONFIG_JSON,
122
+ openaiBaseURL: MIDSCENE_OPENAI_BASE_URL,
123
+ openaiApiKey: MIDSCENE_OPENAI_API_KEY,
124
+ openaiExtraConfig: MIDSCENE_OPENAI_INIT_CONFIG_JSON,
127
125
  /**
128
126
  * Extra
129
127
  */
130
- vlMode: MIDSCENE_LOCATOR_MODE,
128
+ vlMode: MIDSCENE_VL_MODE,
131
129
  } as const;
132
130
 
133
131
  // read from process.env
@@ -136,14 +134,14 @@ export const DEFAULT_MODEL_CONFIG_KEYS_LEGACY: IModelConfigKeys = {
136
134
  /**
137
135
  * proxy
138
136
  */
139
- socksProxy: MIDSCENE_MODEL_SOCKS_PROXY,
140
- httpProxy: MIDSCENE_MODEL_HTTP_PROXY,
137
+ socksProxy: MIDSCENE_OPENAI_SOCKS_PROXY,
138
+ httpProxy: MIDSCENE_OPENAI_HTTP_PROXY,
141
139
  /**
142
- * Model API - Uses legacy OPENAI_* variables for backward compatibility
140
+ * OpenAI
143
141
  */
144
142
  openaiBaseURL: OPENAI_BASE_URL,
145
143
  openaiApiKey: OPENAI_API_KEY,
146
- openaiExtraConfig: MIDSCENE_MODEL_INIT_CONFIG_JSON,
144
+ openaiExtraConfig: MIDSCENE_OPENAI_INIT_CONFIG_JSON,
147
145
  /**
148
146
  * Extra
149
147
  */
@@ -12,7 +12,6 @@ import {
12
12
  PLANNING_MODEL_CONFIG_KEYS,
13
13
  VQA_MODEL_CONFIG_KEYS,
14
14
  } from './constants';
15
- import { MODEL_API_KEY, MODEL_BASE_URL } from './types';
16
15
 
17
16
  import { getDebug } from '../logger';
18
17
  import { assert } from '../utils';
@@ -70,25 +69,8 @@ export const decideOpenaiSdkConfig = ({
70
69
 
71
70
  debugLog('enter decideOpenaiSdkConfig with keys:', keys);
72
71
 
73
- // Implement compatibility logic: prefer new variable names (MODEL_*), fallback to old ones (OPENAI_*)
74
- let openaiBaseURL: string | undefined;
75
- let openaiApiKey: string | undefined;
76
-
77
- // When using legacy keys (OPENAI_BASE_URL, OPENAI_API_KEY), check for new names first
78
- if (keys.openaiBaseURL === 'OPENAI_BASE_URL') {
79
- // Priority: MODEL_BASE_URL > OPENAI_BASE_URL
80
- openaiBaseURL = provider[MODEL_BASE_URL] || provider[keys.openaiBaseURL];
81
- } else {
82
- openaiBaseURL = provider[keys.openaiBaseURL];
83
- }
84
-
85
- if (keys.openaiApiKey === 'OPENAI_API_KEY') {
86
- // Priority: MODEL_API_KEY > OPENAI_API_KEY
87
- openaiApiKey = provider[MODEL_API_KEY] || provider[keys.openaiApiKey];
88
- } else {
89
- openaiApiKey = provider[keys.openaiApiKey];
90
- }
91
-
72
+ const openaiBaseURL = provider[keys.openaiBaseURL];
73
+ const openaiApiKey = provider[keys.openaiApiKey];
92
74
  const openaiExtraConfig = parseJson(
93
75
  keys.openaiExtraConfig,
94
76
  provider[keys.openaiExtraConfig],
@@ -1,26 +1,21 @@
1
1
  import { enableDebug } from '../logger';
2
2
  import { getBasicEnvValue } from './basic';
3
- import {
4
- MIDSCENE_DEBUG_MODEL_PROFILE,
5
- MIDSCENE_DEBUG_MODEL_RESPONSE,
6
- } from './types';
3
+ import { MIDSCENE_DEBUG_AI_PROFILE, MIDSCENE_DEBUG_AI_RESPONSE } from './types';
7
4
 
8
5
  export const initDebugConfig = () => {
9
- const shouldPrintTiming = getBasicEnvValue(MIDSCENE_DEBUG_MODEL_PROFILE);
6
+ const shouldPrintTiming = getBasicEnvValue(MIDSCENE_DEBUG_AI_PROFILE);
10
7
  let debugConfig = '';
11
8
  if (shouldPrintTiming) {
12
9
  console.warn(
13
- 'MIDSCENE_DEBUG_MODEL_PROFILE is deprecated, use DEBUG=midscene:ai:profile instead',
10
+ 'MIDSCENE_DEBUG_AI_PROFILE is deprecated, use DEBUG=midscene:ai:profile instead',
14
11
  );
15
12
  debugConfig = 'ai:profile';
16
13
  }
17
- const shouldPrintModelResponse = getBasicEnvValue(
18
- MIDSCENE_DEBUG_MODEL_RESPONSE,
19
- );
14
+ const shouldPrintAIResponse = getBasicEnvValue(MIDSCENE_DEBUG_AI_RESPONSE);
20
15
 
21
- if (shouldPrintModelResponse) {
16
+ if (shouldPrintAIResponse) {
22
17
  console.warn(
23
- 'MIDSCENE_DEBUG_MODEL_RESPONSE is deprecated, use DEBUG=midscene:ai:response instead',
18
+ 'MIDSCENE_DEBUG_AI_RESPONSE is deprecated, use DEBUG=midscene:ai:response instead',
24
19
  );
25
20
  if (debugConfig) {
26
21
  debugConfig = 'ai:*';
@@ -9,7 +9,6 @@ import type {
9
9
  IModelConfig,
10
10
  TIntent,
11
11
  TModelConfigFn,
12
- TModelConfigFnInternal,
13
12
  } from './types';
14
13
  import { VL_MODE_RAW_VALID_VALUES as VL_MODES } from './types';
15
14
 
@@ -17,7 +16,7 @@ const ALL_INTENTS: TIntent[] = ['VQA', 'default', 'grounding', 'planning'];
17
16
 
18
17
  export type TIntentConfigMap = Record<
19
18
  TIntent,
20
- ReturnType<TModelConfigFnInternal> | undefined
19
+ ReturnType<TModelConfigFn> | undefined
21
20
  >;
22
21
 
23
22
  export class ModelConfigManager {
@@ -38,18 +37,13 @@ export class ModelConfigManager {
38
37
  this.createOpenAIClientFn = createOpenAIClientFn;
39
38
  if (modelConfigFn) {
40
39
  this.isolatedMode = true;
41
- // Cast to internal type - user function can optionally use intent parameter
42
- // even though it's not shown in the type definition
43
- const internalFn = modelConfigFn as unknown as TModelConfigFnInternal;
44
- const intentConfigMap = this.calcIntentConfigMap(internalFn);
40
+ const intentConfigMap = this.calcIntentConfigMap(modelConfigFn);
45
41
  this.modelConfigMap =
46
42
  this.calcModelConfigMapBaseOnIntent(intentConfigMap);
47
43
  }
48
44
  }
49
45
 
50
- private calcIntentConfigMap(
51
- modelConfigFn: TModelConfigFnInternal,
52
- ): TIntentConfigMap {
46
+ private calcIntentConfigMap(modelConfigFn: TModelConfigFn): TIntentConfigMap {
53
47
  const intentConfigMap: TIntentConfigMap = {
54
48
  VQA: undefined,
55
49
  default: undefined,