@midscene/shared 0.30.10 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (129) hide show
  1. package/dist/es/build/rspack-config.mjs +4 -0
  2. package/dist/es/constants/example-code.mjs +4 -4
  3. package/dist/es/env/constants.mjs +27 -82
  4. package/dist/es/env/global-config-manager.mjs +2 -3
  5. package/dist/es/env/helper.mjs +12 -17
  6. package/dist/es/env/init-debug.mjs +6 -6
  7. package/dist/es/env/model-config-manager.mjs +45 -65
  8. package/dist/es/env/parse-model-config.mjs +112 -0
  9. package/dist/es/env/types.mjs +70 -162
  10. package/dist/es/extractor/dom-util.mjs +10 -18
  11. package/dist/es/extractor/index.mjs +2 -3
  12. package/dist/es/extractor/locator.mjs +8 -15
  13. package/dist/es/extractor/tree.mjs +2 -5
  14. package/dist/es/extractor/util.mjs +4 -28
  15. package/dist/es/extractor/web-extractor.mjs +7 -14
  16. package/dist/es/index.mjs +2 -1
  17. package/dist/es/mcp/base-server.mjs +250 -0
  18. package/dist/es/mcp/base-tools.mjs +84 -0
  19. package/dist/es/mcp/index.mjs +5 -0
  20. package/dist/es/mcp/inject-report-html-plugin.mjs +53 -0
  21. package/dist/es/mcp/tool-generator.mjs +207 -0
  22. package/dist/es/mcp/types.mjs +3 -0
  23. package/dist/es/node/fs.mjs +2 -2
  24. package/dist/es/utils.mjs +2 -3
  25. package/dist/es/zod-schema-utils.mjs +54 -0
  26. package/dist/lib/baseDB.js +2 -2
  27. package/dist/lib/build/copy-static.js +4 -4
  28. package/dist/lib/build/rspack-config.js +38 -0
  29. package/dist/lib/common.js +4 -4
  30. package/dist/lib/constants/example-code.js +6 -6
  31. package/dist/lib/constants/index.js +13 -13
  32. package/dist/lib/env/basic.js +2 -2
  33. package/dist/lib/env/constants.js +32 -90
  34. package/dist/lib/env/global-config-manager.js +4 -5
  35. package/dist/lib/env/helper.js +13 -22
  36. package/dist/lib/env/index.js +24 -28
  37. package/dist/lib/env/init-debug.js +7 -7
  38. package/dist/lib/env/model-config-manager.js +47 -67
  39. package/dist/lib/env/parse-model-config.js +155 -0
  40. package/dist/lib/env/types.js +146 -379
  41. package/dist/lib/env/utils.js +4 -4
  42. package/dist/lib/extractor/constants.js +4 -4
  43. package/dist/lib/extractor/debug.js +1 -1
  44. package/dist/lib/extractor/dom-util.js +18 -26
  45. package/dist/lib/extractor/index.js +11 -21
  46. package/dist/lib/extractor/locator.js +10 -20
  47. package/dist/lib/extractor/tree.js +4 -7
  48. package/dist/lib/extractor/util.js +17 -50
  49. package/dist/lib/extractor/web-extractor.js +12 -19
  50. package/dist/lib/img/box-select.js +4 -4
  51. package/dist/lib/img/draw-box.js +2 -2
  52. package/dist/lib/img/get-jimp.js +16 -34
  53. package/dist/lib/img/get-photon.js +24 -47
  54. package/dist/lib/img/get-sharp.js +16 -34
  55. package/dist/lib/img/index.js +18 -18
  56. package/dist/lib/img/info.js +4 -4
  57. package/dist/lib/img/transform.js +10 -10
  58. package/dist/lib/index.js +8 -4
  59. package/dist/lib/logger.js +4 -4
  60. package/dist/lib/mcp/base-server.js +300 -0
  61. package/dist/lib/mcp/base-tools.js +118 -0
  62. package/dist/lib/mcp/index.js +86 -0
  63. package/dist/lib/mcp/inject-report-html-plugin.js +98 -0
  64. package/dist/lib/mcp/tool-generator.js +244 -0
  65. package/dist/lib/mcp/types.js +40 -0
  66. package/dist/lib/node/fs.js +6 -6
  67. package/dist/lib/node/index.js +6 -8
  68. package/dist/lib/polyfills/async-hooks.js +2 -2
  69. package/dist/lib/polyfills/index.js +6 -8
  70. package/dist/lib/types/index.js +2 -2
  71. package/dist/lib/us-keyboard-layout.js +2 -2
  72. package/dist/lib/utils.js +13 -14
  73. package/dist/lib/zod-schema-utils.js +97 -0
  74. package/dist/types/build/rspack-config.d.ts +8 -0
  75. package/dist/types/constants/example-code.d.ts +1 -1
  76. package/dist/types/env/constants.d.ts +5 -18
  77. package/dist/types/env/global-config-manager.d.ts +1 -2
  78. package/dist/types/env/helper.d.ts +2 -4
  79. package/dist/types/env/model-config-manager.d.ts +8 -7
  80. package/dist/types/env/parse-model-config.d.ts +28 -0
  81. package/dist/types/env/types.d.ts +152 -191
  82. package/dist/types/extractor/dom-util.d.ts +2 -15
  83. package/dist/types/extractor/index.d.ts +1 -2
  84. package/dist/types/extractor/locator.d.ts +0 -1
  85. package/dist/types/extractor/tree.d.ts +1 -4
  86. package/dist/types/extractor/util.d.ts +0 -3
  87. package/dist/types/index.d.ts +1 -0
  88. package/dist/types/mcp/base-server.d.ts +77 -0
  89. package/dist/types/mcp/base-tools.d.ts +55 -0
  90. package/dist/types/mcp/index.d.ts +5 -0
  91. package/dist/types/mcp/inject-report-html-plugin.d.ts +18 -0
  92. package/dist/types/mcp/tool-generator.d.ts +11 -0
  93. package/dist/types/mcp/types.d.ts +100 -0
  94. package/dist/types/types/index.d.ts +5 -2
  95. package/dist/types/zod-schema-utils.d.ts +23 -0
  96. package/package.json +19 -4
  97. package/src/build/rspack-config.ts +12 -0
  98. package/src/constants/example-code.ts +4 -4
  99. package/src/env/constants.ts +58 -203
  100. package/src/env/global-config-manager.ts +7 -7
  101. package/src/env/helper.ts +10 -31
  102. package/src/env/init-debug.ts +11 -6
  103. package/src/env/model-config-manager.ts +91 -87
  104. package/src/env/parse-model-config.ts +265 -0
  105. package/src/env/types.ts +212 -344
  106. package/src/extractor/dom-util.ts +15 -12
  107. package/src/extractor/index.ts +0 -3
  108. package/src/extractor/locator.ts +3 -12
  109. package/src/extractor/tree.ts +4 -4
  110. package/src/extractor/util.ts +0 -32
  111. package/src/index.ts +2 -0
  112. package/src/mcp/base-server.ts +435 -0
  113. package/src/mcp/base-tools.ts +196 -0
  114. package/src/mcp/index.ts +5 -0
  115. package/src/mcp/inject-report-html-plugin.ts +119 -0
  116. package/src/mcp/tool-generator.ts +330 -0
  117. package/src/mcp/types.ts +108 -0
  118. package/src/node/fs.ts +1 -1
  119. package/src/types/index.ts +8 -2
  120. package/src/utils.ts +1 -1
  121. package/src/zod-schema-utils.ts +133 -0
  122. package/dist/es/env/decide-model-config.mjs +0 -172
  123. package/dist/es/env/parse.mjs +0 -69
  124. package/dist/lib/env/decide-model-config.js +0 -212
  125. package/dist/lib/env/parse.js +0 -106
  126. package/dist/types/env/decide-model-config.d.ts +0 -14
  127. package/dist/types/env/parse.d.ts +0 -12
  128. package/src/env/decide-model-config.ts +0 -319
  129. package/src/env/parse.ts +0 -131
@@ -0,0 +1,4 @@
1
+ const commonIgnoreWarnings = [
2
+ /Critical dependency: the request of a dependency is an expression/
3
+ ];
4
+ export { commonIgnoreWarnings };
@@ -113,12 +113,12 @@ tasks:
113
113
  # Auto Planning (.ai)
114
114
  # ----------------
115
115
 
116
- # Perform an interaction. \`ai\` is a shorthand for \`aiAction\`.
116
+ # Perform an interaction. \`ai\` is a shorthand for \`aiAct\`.
117
117
  - ai: <prompt>
118
118
  cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.
119
119
 
120
120
  # This usage is the same as \`ai\`.
121
- - aiAction: <prompt>
121
+ - aiAct: <prompt>
122
122
  cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.
123
123
 
124
124
  # Instant Action (.aiTap, .aiDoubleClick, .aiHover, .aiInput, .aiKeyboardPress, .aiScroll)
@@ -166,8 +166,8 @@ tasks:
166
166
  xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.
167
167
  cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.
168
168
 
169
- # Log the current screenshot with a description in the report file.
170
- - logScreenshot: <title> # Optional, the title of the screenshot. If not provided, the title will be 'untitled'.
169
+ # Record the current screenshot with a description in the report file.
170
+ - recordToReport: <title> # Optional, the title of the screenshot. If not provided, the title will be 'untitled'.
171
171
  content: <content> # Optional, the description of the screenshot.
172
172
 
173
173
  # Data Extraction
@@ -1,79 +1,33 @@
1
- import { ANTHROPIC_API_KEY, AZURE_OPENAI_API_VERSION, AZURE_OPENAI_DEPLOYMENT, AZURE_OPENAI_ENDPOINT, AZURE_OPENAI_KEY, MIDSCENE_ANTHROPIC_API_KEY, MIDSCENE_AZURE_OPENAI_API_VERSION, MIDSCENE_AZURE_OPENAI_DEPLOYMENT, MIDSCENE_AZURE_OPENAI_ENDPOINT, MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON, MIDSCENE_AZURE_OPENAI_KEY, MIDSCENE_AZURE_OPENAI_SCOPE, MIDSCENE_GROUNDING_ANTHROPIC_API_KEY, MIDSCENE_GROUNDING_AZURE_OPENAI_API_VERSION, MIDSCENE_GROUNDING_AZURE_OPENAI_DEPLOYMENT, MIDSCENE_GROUNDING_AZURE_OPENAI_ENDPOINT, MIDSCENE_GROUNDING_AZURE_OPENAI_INIT_CONFIG_JSON, MIDSCENE_GROUNDING_AZURE_OPENAI_KEY, MIDSCENE_GROUNDING_AZURE_OPENAI_SCOPE, MIDSCENE_GROUNDING_MODEL_NAME, MIDSCENE_GROUNDING_OPENAI_API_KEY, MIDSCENE_GROUNDING_OPENAI_BASE_URL, MIDSCENE_GROUNDING_OPENAI_HTTP_PROXY, MIDSCENE_GROUNDING_OPENAI_INIT_CONFIG_JSON, MIDSCENE_GROUNDING_OPENAI_SOCKS_PROXY, MIDSCENE_GROUNDING_OPENAI_USE_AZURE, MIDSCENE_GROUNDING_USE_ANTHROPIC_SDK, MIDSCENE_GROUNDING_USE_AZURE_OPENAI, MIDSCENE_GROUNDING_VL_MODE, MIDSCENE_MODEL_NAME, MIDSCENE_OPENAI_API_KEY, MIDSCENE_OPENAI_BASE_URL, MIDSCENE_OPENAI_HTTP_PROXY, MIDSCENE_OPENAI_INIT_CONFIG_JSON, MIDSCENE_OPENAI_SOCKS_PROXY, MIDSCENE_OPENAI_USE_AZURE, MIDSCENE_PLANNING_ANTHROPIC_API_KEY, MIDSCENE_PLANNING_AZURE_OPENAI_API_VERSION, MIDSCENE_PLANNING_AZURE_OPENAI_DEPLOYMENT, MIDSCENE_PLANNING_AZURE_OPENAI_ENDPOINT, MIDSCENE_PLANNING_AZURE_OPENAI_INIT_CONFIG_JSON, MIDSCENE_PLANNING_AZURE_OPENAI_KEY, MIDSCENE_PLANNING_AZURE_OPENAI_SCOPE, MIDSCENE_PLANNING_MODEL_NAME, MIDSCENE_PLANNING_OPENAI_API_KEY, MIDSCENE_PLANNING_OPENAI_BASE_URL, MIDSCENE_PLANNING_OPENAI_HTTP_PROXY, MIDSCENE_PLANNING_OPENAI_INIT_CONFIG_JSON, MIDSCENE_PLANNING_OPENAI_SOCKS_PROXY, MIDSCENE_PLANNING_OPENAI_USE_AZURE, MIDSCENE_PLANNING_USE_ANTHROPIC_SDK, MIDSCENE_PLANNING_USE_AZURE_OPENAI, MIDSCENE_PLANNING_VL_MODE, MIDSCENE_USE_ANTHROPIC_SDK, MIDSCENE_USE_AZURE_OPENAI, MIDSCENE_VL_MODE, MIDSCENE_VQA_ANTHROPIC_API_KEY, MIDSCENE_VQA_AZURE_OPENAI_API_VERSION, MIDSCENE_VQA_AZURE_OPENAI_DEPLOYMENT, MIDSCENE_VQA_AZURE_OPENAI_ENDPOINT, MIDSCENE_VQA_AZURE_OPENAI_INIT_CONFIG_JSON, MIDSCENE_VQA_AZURE_OPENAI_KEY, MIDSCENE_VQA_AZURE_OPENAI_SCOPE, MIDSCENE_VQA_MODEL_NAME, MIDSCENE_VQA_OPENAI_API_KEY, MIDSCENE_VQA_OPENAI_BASE_URL, MIDSCENE_VQA_OPENAI_HTTP_PROXY, MIDSCENE_VQA_OPENAI_INIT_CONFIG_JSON, MIDSCENE_VQA_OPENAI_SOCKS_PROXY, MIDSCENE_VQA_OPENAI_USE_AZURE, MIDSCENE_VQA_USE_ANTHROPIC_SDK, MIDSCENE_VQA_USE_AZURE_OPENAI, MIDSCENE_VQA_VL_MODE, OPENAI_API_KEY, OPENAI_BASE_URL, OPENAI_USE_AZURE } from "./types.mjs";
2
- const VQA_MODEL_CONFIG_KEYS = {
3
- modelName: MIDSCENE_VQA_MODEL_NAME,
4
- socksProxy: MIDSCENE_VQA_OPENAI_SOCKS_PROXY,
5
- httpProxy: MIDSCENE_VQA_OPENAI_HTTP_PROXY,
6
- openaiBaseURL: MIDSCENE_VQA_OPENAI_BASE_URL,
7
- openaiApiKey: MIDSCENE_VQA_OPENAI_API_KEY,
8
- openaiExtraConfig: MIDSCENE_VQA_OPENAI_INIT_CONFIG_JSON,
9
- openaiUseAzureDeprecated: MIDSCENE_VQA_OPENAI_USE_AZURE,
10
- useAzureOpenai: MIDSCENE_VQA_USE_AZURE_OPENAI,
11
- azureOpenaiScope: MIDSCENE_VQA_AZURE_OPENAI_SCOPE,
12
- azureOpenaiKey: MIDSCENE_VQA_AZURE_OPENAI_KEY,
13
- azureOpenaiEndpoint: MIDSCENE_VQA_AZURE_OPENAI_ENDPOINT,
14
- azureOpenaiApiVersion: MIDSCENE_VQA_AZURE_OPENAI_API_VERSION,
15
- azureOpenaiDeployment: MIDSCENE_VQA_AZURE_OPENAI_DEPLOYMENT,
16
- azureExtraConfig: MIDSCENE_VQA_AZURE_OPENAI_INIT_CONFIG_JSON,
17
- useAnthropicSdk: MIDSCENE_VQA_USE_ANTHROPIC_SDK,
18
- anthropicApiKey: MIDSCENE_VQA_ANTHROPIC_API_KEY,
19
- vlMode: MIDSCENE_VQA_VL_MODE
20
- };
21
- const GROUNDING_MODEL_CONFIG_KEYS = {
22
- modelName: MIDSCENE_GROUNDING_MODEL_NAME,
23
- socksProxy: MIDSCENE_GROUNDING_OPENAI_SOCKS_PROXY,
24
- httpProxy: MIDSCENE_GROUNDING_OPENAI_HTTP_PROXY,
25
- openaiBaseURL: MIDSCENE_GROUNDING_OPENAI_BASE_URL,
26
- openaiApiKey: MIDSCENE_GROUNDING_OPENAI_API_KEY,
27
- openaiExtraConfig: MIDSCENE_GROUNDING_OPENAI_INIT_CONFIG_JSON,
28
- openaiUseAzureDeprecated: MIDSCENE_GROUNDING_OPENAI_USE_AZURE,
29
- useAzureOpenai: MIDSCENE_GROUNDING_USE_AZURE_OPENAI,
30
- azureOpenaiScope: MIDSCENE_GROUNDING_AZURE_OPENAI_SCOPE,
31
- azureOpenaiKey: MIDSCENE_GROUNDING_AZURE_OPENAI_KEY,
32
- azureOpenaiEndpoint: MIDSCENE_GROUNDING_AZURE_OPENAI_ENDPOINT,
33
- azureOpenaiApiVersion: MIDSCENE_GROUNDING_AZURE_OPENAI_API_VERSION,
34
- azureOpenaiDeployment: MIDSCENE_GROUNDING_AZURE_OPENAI_DEPLOYMENT,
35
- azureExtraConfig: MIDSCENE_GROUNDING_AZURE_OPENAI_INIT_CONFIG_JSON,
36
- useAnthropicSdk: MIDSCENE_GROUNDING_USE_ANTHROPIC_SDK,
37
- anthropicApiKey: MIDSCENE_GROUNDING_ANTHROPIC_API_KEY,
38
- vlMode: MIDSCENE_GROUNDING_VL_MODE
1
+ import { MIDSCENE_INSIGHT_MODEL_API_KEY, MIDSCENE_INSIGHT_MODEL_BASE_URL, MIDSCENE_INSIGHT_MODEL_HTTP_PROXY, MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON, MIDSCENE_INSIGHT_MODEL_NAME, MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY, MIDSCENE_INSIGHT_MODEL_TIMEOUT, MIDSCENE_MODEL_API_KEY, MIDSCENE_MODEL_BASE_URL, MIDSCENE_MODEL_FAMILY, MIDSCENE_MODEL_HTTP_PROXY, MIDSCENE_MODEL_INIT_CONFIG_JSON, MIDSCENE_MODEL_NAME, MIDSCENE_MODEL_SOCKS_PROXY, MIDSCENE_MODEL_TIMEOUT, MIDSCENE_OPENAI_HTTP_PROXY, MIDSCENE_OPENAI_INIT_CONFIG_JSON, MIDSCENE_OPENAI_SOCKS_PROXY, MIDSCENE_PLANNING_MODEL_API_KEY, MIDSCENE_PLANNING_MODEL_BASE_URL, MIDSCENE_PLANNING_MODEL_HTTP_PROXY, MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON, MIDSCENE_PLANNING_MODEL_NAME, MIDSCENE_PLANNING_MODEL_SOCKS_PROXY, MIDSCENE_PLANNING_MODEL_TIMEOUT, OPENAI_API_KEY, OPENAI_BASE_URL } from "./types.mjs";
2
+ const INSIGHT_MODEL_CONFIG_KEYS = {
3
+ modelName: MIDSCENE_INSIGHT_MODEL_NAME,
4
+ socksProxy: MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY,
5
+ httpProxy: MIDSCENE_INSIGHT_MODEL_HTTP_PROXY,
6
+ openaiBaseURL: MIDSCENE_INSIGHT_MODEL_BASE_URL,
7
+ openaiApiKey: MIDSCENE_INSIGHT_MODEL_API_KEY,
8
+ openaiExtraConfig: MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON,
9
+ modelFamily: 'THERE_IS_NO_MODEL_FAMILY_FOR_INSIGHT',
10
+ timeout: MIDSCENE_INSIGHT_MODEL_TIMEOUT
39
11
  };
40
12
  const PLANNING_MODEL_CONFIG_KEYS = {
41
13
  modelName: MIDSCENE_PLANNING_MODEL_NAME,
42
- socksProxy: MIDSCENE_PLANNING_OPENAI_SOCKS_PROXY,
43
- httpProxy: MIDSCENE_PLANNING_OPENAI_HTTP_PROXY,
44
- openaiBaseURL: MIDSCENE_PLANNING_OPENAI_BASE_URL,
45
- openaiApiKey: MIDSCENE_PLANNING_OPENAI_API_KEY,
46
- openaiExtraConfig: MIDSCENE_PLANNING_OPENAI_INIT_CONFIG_JSON,
47
- openaiUseAzureDeprecated: MIDSCENE_PLANNING_OPENAI_USE_AZURE,
48
- useAzureOpenai: MIDSCENE_PLANNING_USE_AZURE_OPENAI,
49
- azureOpenaiScope: MIDSCENE_PLANNING_AZURE_OPENAI_SCOPE,
50
- azureOpenaiKey: MIDSCENE_PLANNING_AZURE_OPENAI_KEY,
51
- azureOpenaiEndpoint: MIDSCENE_PLANNING_AZURE_OPENAI_ENDPOINT,
52
- azureOpenaiApiVersion: MIDSCENE_PLANNING_AZURE_OPENAI_API_VERSION,
53
- azureOpenaiDeployment: MIDSCENE_PLANNING_AZURE_OPENAI_DEPLOYMENT,
54
- azureExtraConfig: MIDSCENE_PLANNING_AZURE_OPENAI_INIT_CONFIG_JSON,
55
- useAnthropicSdk: MIDSCENE_PLANNING_USE_ANTHROPIC_SDK,
56
- anthropicApiKey: MIDSCENE_PLANNING_ANTHROPIC_API_KEY,
57
- vlMode: MIDSCENE_PLANNING_VL_MODE
14
+ socksProxy: MIDSCENE_PLANNING_MODEL_SOCKS_PROXY,
15
+ httpProxy: MIDSCENE_PLANNING_MODEL_HTTP_PROXY,
16
+ openaiBaseURL: MIDSCENE_PLANNING_MODEL_BASE_URL,
17
+ openaiApiKey: MIDSCENE_PLANNING_MODEL_API_KEY,
18
+ openaiExtraConfig: MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON,
19
+ modelFamily: 'THERE_IS_NO_MODEL_FAMILY_FOR_PLANNING',
20
+ timeout: MIDSCENE_PLANNING_MODEL_TIMEOUT
58
21
  };
59
22
  const DEFAULT_MODEL_CONFIG_KEYS = {
60
23
  modelName: MIDSCENE_MODEL_NAME,
61
- socksProxy: MIDSCENE_OPENAI_SOCKS_PROXY,
62
- httpProxy: MIDSCENE_OPENAI_HTTP_PROXY,
63
- openaiBaseURL: MIDSCENE_OPENAI_BASE_URL,
64
- openaiApiKey: MIDSCENE_OPENAI_API_KEY,
65
- openaiExtraConfig: MIDSCENE_OPENAI_INIT_CONFIG_JSON,
66
- openaiUseAzureDeprecated: MIDSCENE_OPENAI_USE_AZURE,
67
- useAzureOpenai: MIDSCENE_USE_AZURE_OPENAI,
68
- azureOpenaiScope: MIDSCENE_AZURE_OPENAI_SCOPE,
69
- azureOpenaiKey: MIDSCENE_AZURE_OPENAI_KEY,
70
- azureOpenaiEndpoint: MIDSCENE_AZURE_OPENAI_ENDPOINT,
71
- azureOpenaiApiVersion: MIDSCENE_AZURE_OPENAI_API_VERSION,
72
- azureOpenaiDeployment: MIDSCENE_AZURE_OPENAI_DEPLOYMENT,
73
- azureExtraConfig: MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON,
74
- useAnthropicSdk: MIDSCENE_USE_ANTHROPIC_SDK,
75
- anthropicApiKey: MIDSCENE_ANTHROPIC_API_KEY,
76
- vlMode: MIDSCENE_VL_MODE
24
+ socksProxy: MIDSCENE_MODEL_SOCKS_PROXY,
25
+ httpProxy: MIDSCENE_MODEL_HTTP_PROXY,
26
+ openaiBaseURL: MIDSCENE_MODEL_BASE_URL,
27
+ openaiApiKey: MIDSCENE_MODEL_API_KEY,
28
+ openaiExtraConfig: MIDSCENE_MODEL_INIT_CONFIG_JSON,
29
+ modelFamily: MIDSCENE_MODEL_FAMILY,
30
+ timeout: MIDSCENE_MODEL_TIMEOUT
77
31
  };
78
32
  const DEFAULT_MODEL_CONFIG_KEYS_LEGACY = {
79
33
  modelName: MIDSCENE_MODEL_NAME,
@@ -82,16 +36,7 @@ const DEFAULT_MODEL_CONFIG_KEYS_LEGACY = {
82
36
  openaiBaseURL: OPENAI_BASE_URL,
83
37
  openaiApiKey: OPENAI_API_KEY,
84
38
  openaiExtraConfig: MIDSCENE_OPENAI_INIT_CONFIG_JSON,
85
- openaiUseAzureDeprecated: OPENAI_USE_AZURE,
86
- useAzureOpenai: MIDSCENE_USE_AZURE_OPENAI,
87
- azureOpenaiScope: MIDSCENE_AZURE_OPENAI_SCOPE,
88
- azureOpenaiKey: AZURE_OPENAI_KEY,
89
- azureOpenaiEndpoint: AZURE_OPENAI_ENDPOINT,
90
- azureOpenaiApiVersion: AZURE_OPENAI_API_VERSION,
91
- azureOpenaiDeployment: AZURE_OPENAI_DEPLOYMENT,
92
- azureExtraConfig: MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON,
93
- useAnthropicSdk: MIDSCENE_USE_ANTHROPIC_SDK,
94
- anthropicApiKey: ANTHROPIC_API_KEY,
95
- vlMode: 'DEFAULT_MODEL_CONFIG_KEYS has no vlMode key'
39
+ modelFamily: 'DEFAULT_MODEL_CONFIG_KEYS has no modelFamily key',
40
+ timeout: MIDSCENE_MODEL_TIMEOUT
96
41
  };
97
- export { DEFAULT_MODEL_CONFIG_KEYS, DEFAULT_MODEL_CONFIG_KEYS_LEGACY, GROUNDING_MODEL_CONFIG_KEYS, PLANNING_MODEL_CONFIG_KEYS, VQA_MODEL_CONFIG_KEYS };
42
+ export { DEFAULT_MODEL_CONFIG_KEYS, DEFAULT_MODEL_CONFIG_KEYS_LEGACY, INSIGHT_MODEL_CONFIG_KEYS, PLANNING_MODEL_CONFIG_KEYS };
@@ -30,8 +30,8 @@ class GlobalConfigManager {
30
30
  }
31
31
  getEnvConfigValue(key) {
32
32
  const allConfig = this.getAllEnvConfig();
33
+ if (key === MATCH_BY_POSITION) throw new Error('MATCH_BY_POSITION is discarded, use MIDSCENE_MODEL_FAMILY instead');
33
34
  if (!STRING_ENV_KEYS.includes(key)) throw new Error(`getEnvConfigValue with key ${key} is not supported.`);
34
- if (key === MATCH_BY_POSITION) throw new Error('MATCH_BY_POSITION is deprecated, use MIDSCENE_USE_VL_MODEL instead');
35
35
  const value = allConfig[key];
36
36
  this.keysHaveBeenRead[key] = true;
37
37
  if ('string' == typeof value) return value.trim();
@@ -58,7 +58,6 @@ class GlobalConfigManager {
58
58
  this.globalModelConfigManager = globalModelConfigManager;
59
59
  }
60
60
  overrideAIConfig(newConfig, extendMode = false) {
61
- var _this_override;
62
61
  for(const key in newConfig){
63
62
  if (![
64
63
  ...GLOBAL_ENV_KEYS,
@@ -69,7 +68,7 @@ class GlobalConfigManager {
69
68
  if (this.keysHaveBeenRead[key]) console.warn(`Warning: try to override AI config with key ${key} ,but it has been read.`);
70
69
  }
71
70
  const savedNewConfig = extendMode ? {
72
- ...null == (_this_override = this.override) ? void 0 : _this_override.newConfig,
71
+ ...this.override?.newConfig,
73
72
  ...newConfig
74
73
  } : newConfig;
75
74
  this.override = {
@@ -1,4 +1,3 @@
1
- import { assert } from "../utils.mjs";
2
1
  const maskKey = (key, maskChar = '*')=>{
3
2
  if ('string' != typeof key || 0 === key.length) return key;
4
3
  const prefixLen = 3;
@@ -12,21 +11,21 @@ const maskKey = (key, maskChar = '*')=>{
12
11
  return `${prefix}${mask}${suffix}`;
13
12
  };
14
13
  const maskConfig = (config)=>Object.fromEntries(Object.entries(config).map(([key, value])=>{
15
- if ([
16
- 'openaiApiKey',
17
- 'azureOpenaiKey',
18
- 'anthropicApiKey'
19
- ].includes(key)) return [
14
+ if (!value) return [
20
15
  key,
21
- maskKey(value)
16
+ value
22
17
  ];
23
- if ([
24
- 'openaiExtraConfig',
25
- 'azureExtraConfig'
26
- ].includes(key)) return [
18
+ if ('string' == typeof value && /key/i.test(key)) return [
27
19
  key,
28
- maskKey(JSON.stringify(value))
20
+ maskKey(value)
29
21
  ];
22
+ if ('object' == typeof value) {
23
+ const valueStr = JSON.stringify(value);
24
+ if (/key/i.test(valueStr)) return [
25
+ key,
26
+ maskKey(valueStr)
27
+ ];
28
+ }
30
29
  return [
31
30
  key,
32
31
  value
@@ -41,8 +40,4 @@ const parseJson = (key, value)=>{
41
40
  });
42
41
  }
43
42
  };
44
- const createAssert = (modelNameKey, provider, modelName)=>(value, key, modelVendorFlag)=>{
45
- if (modelName) modelVendorFlag ? assert(value, `The ${key} must be a non-empty string because of the ${modelNameKey} is declared as ${modelName} and ${modelVendorFlag} has also been specified in ${provider}, but got: ${value}. Please check your config.`) : assert(value, `The ${key} must be a non-empty string because of the ${modelNameKey} is declared as ${modelName} in ${provider}, but got: ${value}. Please check your config.`);
46
- else assert(value, `The ${key} must be a non-empty string, but got: ${value}. Please check your config.`);
47
- };
48
- export { createAssert, maskConfig, parseJson };
43
+ export { maskConfig, parseJson };
@@ -1,16 +1,16 @@
1
1
  import { enableDebug } from "../logger.mjs";
2
2
  import { getBasicEnvValue } from "./basic.mjs";
3
- import { MIDSCENE_DEBUG_AI_PROFILE, MIDSCENE_DEBUG_AI_RESPONSE } from "./types.mjs";
3
+ import { MIDSCENE_DEBUG_MODEL_PROFILE, MIDSCENE_DEBUG_MODEL_RESPONSE } from "./types.mjs";
4
4
  const initDebugConfig = ()=>{
5
- const shouldPrintTiming = getBasicEnvValue(MIDSCENE_DEBUG_AI_PROFILE);
5
+ const shouldPrintTiming = getBasicEnvValue(MIDSCENE_DEBUG_MODEL_PROFILE);
6
6
  let debugConfig = '';
7
7
  if (shouldPrintTiming) {
8
- console.warn('MIDSCENE_DEBUG_AI_PROFILE is deprecated, use DEBUG=midscene:ai:profile instead');
8
+ console.warn('MIDSCENE_DEBUG_MODEL_PROFILE is deprecated, use DEBUG=midscene:ai:profile instead');
9
9
  debugConfig = 'ai:profile';
10
10
  }
11
- const shouldPrintAIResponse = getBasicEnvValue(MIDSCENE_DEBUG_AI_RESPONSE);
12
- if (shouldPrintAIResponse) {
13
- console.warn('MIDSCENE_DEBUG_AI_RESPONSE is deprecated, use DEBUG=midscene:ai:response instead');
11
+ const shouldPrintModelResponse = getBasicEnvValue(MIDSCENE_DEBUG_MODEL_RESPONSE);
12
+ if (shouldPrintModelResponse) {
13
+ console.warn('MIDSCENE_DEBUG_MODEL_RESPONSE is deprecated, use DEBUG=midscene:ai:response instead');
14
14
  debugConfig = debugConfig ? 'ai:*' : 'ai:call';
15
15
  }
16
16
  if (debugConfig) enableDebug(debugConfig);
@@ -1,4 +1,4 @@
1
- import { decideModelConfigFromEnv, decideModelConfigFromIntentConfig } from "./decide-model-config.mjs";
1
+ import { decideModelConfigFromIntentConfig } from "./parse-model-config.mjs";
2
2
  function _define_property(obj, key, value) {
3
3
  if (key in obj) Object.defineProperty(obj, key, {
4
4
  value: value,
@@ -9,91 +9,71 @@ function _define_property(obj, key, value) {
9
9
  else obj[key] = value;
10
10
  return obj;
11
11
  }
12
- const ALL_INTENTS = [
13
- 'VQA',
14
- 'default',
15
- 'grounding',
16
- 'planning'
17
- ];
18
12
  class ModelConfigManager {
19
- calcIntentConfigMap(modelConfigFn) {
20
- const intentConfigMap = {
21
- VQA: void 0,
22
- default: void 0,
23
- grounding: void 0,
24
- planning: void 0
13
+ initialize() {
14
+ if (this.isInitialized) return;
15
+ let configMap;
16
+ if (this.modelConfig) {
17
+ this.isolatedMode = true;
18
+ configMap = this.normalizeModelConfig(this.modelConfig);
19
+ } else configMap = this.globalConfigManager?.getAllEnvConfig() || {};
20
+ const defaultConfig = decideModelConfigFromIntentConfig('default', configMap);
21
+ if (!defaultConfig) throw new Error('default model config is not found, which should not happen');
22
+ const insightConfig = decideModelConfigFromIntentConfig('insight', configMap);
23
+ const planningConfig = decideModelConfigFromIntentConfig('planning', configMap);
24
+ this.modelConfigMap = {
25
+ default: {
26
+ ...defaultConfig,
27
+ createOpenAIClient: this.createOpenAIClientFn
28
+ },
29
+ insight: {
30
+ ...insightConfig || defaultConfig,
31
+ createOpenAIClient: this.createOpenAIClientFn
32
+ },
33
+ planning: {
34
+ ...planningConfig || defaultConfig,
35
+ createOpenAIClient: this.createOpenAIClientFn
36
+ }
25
37
  };
26
- for (const i of ALL_INTENTS){
27
- const result = modelConfigFn({
28
- intent: i
29
- });
30
- if (!result) throw new Error(`The agent has an option named modelConfig is a function, but it return ${result} when call with intent ${i}, which should be a object.`);
31
- intentConfigMap[i] = result;
32
- }
33
- return intentConfigMap;
38
+ this.isInitialized = true;
34
39
  }
35
- calcModelConfigMapBaseOnIntent(intentConfigMap) {
36
- const modelConfigMap = {
37
- VQA: void 0,
38
- default: void 0,
39
- grounding: void 0,
40
- planning: void 0
41
- };
42
- for (const i of ALL_INTENTS){
43
- const result = decideModelConfigFromIntentConfig(i, intentConfigMap[i]);
44
- modelConfigMap[i] = result;
45
- }
46
- return modelConfigMap;
47
- }
48
- calcModelConfigMapBaseOnEnv(allEnvConfig) {
49
- const modelConfigMap = {
50
- VQA: void 0,
51
- default: void 0,
52
- grounding: void 0,
53
- planning: void 0
54
- };
55
- for (const i of ALL_INTENTS){
56
- const result = decideModelConfigFromEnv(i, allEnvConfig);
57
- modelConfigMap[i] = result;
58
- }
59
- return modelConfigMap;
40
+ normalizeModelConfig(config) {
41
+ return Object.entries(config).reduce((acc, [key, value])=>{
42
+ if (null == value) return acc;
43
+ acc[key] = String(value);
44
+ return acc;
45
+ }, Object.create(null));
60
46
  }
61
47
  clearModelConfigMap() {
62
48
  if (this.isolatedMode) throw new Error('ModelConfigManager work in isolated mode, so clearModelConfigMap should not be called');
63
- this.modelConfigMap = void 0;
49
+ this.isInitialized = false;
64
50
  }
65
51
  getModelConfig(intent) {
66
- if (this.isolatedMode) {
67
- if (!this.modelConfigMap) throw new Error('modelConfigMap is not initialized in isolated mode, which should not happen');
68
- return this.modelConfigMap[intent];
69
- }
70
- if (!this.modelConfigMap) {
71
- if (!this.globalConfigManager) throw new Error('globalConfigManager is not registered, which should not happen');
72
- this.modelConfigMap = this.calcModelConfigMapBaseOnEnv(this.globalConfigManager.getAllEnvConfig());
73
- }
52
+ if (!this.isInitialized) this.initialize();
53
+ if (!this.modelConfigMap) throw new Error('modelConfigMap is not initialized, which should not happen');
74
54
  return this.modelConfigMap[intent];
75
55
  }
76
56
  getUploadTestServerUrl() {
77
57
  const { openaiExtraConfig } = this.getModelConfig('default');
78
- const serverUrl = null == openaiExtraConfig ? void 0 : openaiExtraConfig.REPORT_SERVER_URL;
58
+ const serverUrl = openaiExtraConfig?.REPORT_SERVER_URL;
79
59
  return serverUrl;
80
60
  }
81
61
  registerGlobalConfigManager(globalConfigManager) {
82
62
  this.globalConfigManager = globalConfigManager;
83
63
  }
84
- throwErrorIfNonVLModel(intent = 'grounding') {
85
- const modelConfig = this.getModelConfig(intent);
86
- if (!modelConfig.vlMode) throw new Error('No visual language model (VL model) detected for the current scenario. Element localization may be inaccurate. Please verify your model configuration. Learn more: https://midscenejs.com/choose-a-model');
64
+ throwErrorIfNonVLModel() {
65
+ const modelConfig = this.getModelConfig('default');
66
+ if (!modelConfig.vlMode) throw new Error('MIDSCENE_MODEL_FAMILY is not set to a visual language model (VL model), the element localization can not be achieved. Check your model configuration. See https://midscenejs.com/model-strategy.html');
87
67
  }
88
- constructor(modelConfigFn){
68
+ constructor(modelConfig, createOpenAIClientFn){
89
69
  _define_property(this, "modelConfigMap", void 0);
70
+ _define_property(this, "isInitialized", false);
90
71
  _define_property(this, "isolatedMode", false);
91
72
  _define_property(this, "globalConfigManager", void 0);
92
- if (modelConfigFn) {
93
- this.isolatedMode = true;
94
- const intentConfigMap = this.calcIntentConfigMap(modelConfigFn);
95
- this.modelConfigMap = this.calcModelConfigMapBaseOnIntent(intentConfigMap);
96
- }
73
+ _define_property(this, "modelConfig", void 0);
74
+ _define_property(this, "createOpenAIClientFn", void 0);
75
+ this.modelConfig = modelConfig;
76
+ this.createOpenAIClientFn = createOpenAIClientFn;
97
77
  }
98
78
  }
99
79
  export { ModelConfigManager };
@@ -0,0 +1,112 @@
1
+ import { DEFAULT_MODEL_CONFIG_KEYS, INSIGHT_MODEL_CONFIG_KEYS, PLANNING_MODEL_CONFIG_KEYS } from "./constants.mjs";
2
+ import { MIDSCENE_OPENAI_HTTP_PROXY, MIDSCENE_OPENAI_INIT_CONFIG_JSON, MIDSCENE_OPENAI_SOCKS_PROXY, MIDSCENE_USE_DOUBAO_VISION, MIDSCENE_USE_GEMINI, MIDSCENE_USE_QWEN3_VL, MIDSCENE_USE_QWEN_VL, MIDSCENE_USE_VLM_UI_TARS, MODEL_FAMILY_VALUES, OPENAI_API_KEY, OPENAI_BASE_URL, UITarsModelVersion } from "./types.mjs";
3
+ import { getDebug } from "../logger.mjs";
4
+ import { assert } from "../utils.mjs";
5
+ import { maskConfig, parseJson } from "./helper.mjs";
6
+ import { initDebugConfig } from "./init-debug.mjs";
7
+ const KEYS_MAP = {
8
+ insight: INSIGHT_MODEL_CONFIG_KEYS,
9
+ planning: PLANNING_MODEL_CONFIG_KEYS,
10
+ default: DEFAULT_MODEL_CONFIG_KEYS
11
+ };
12
+ const modelFamilyToVLConfig = (modelFamily)=>{
13
+ if (!modelFamily) return {
14
+ vlMode: void 0,
15
+ uiTarsVersion: void 0
16
+ };
17
+ if ('vlm-ui-tars' === modelFamily) return {
18
+ vlMode: 'vlm-ui-tars',
19
+ uiTarsVersion: UITarsModelVersion.V1_0
20
+ };
21
+ if ('vlm-ui-tars-doubao' === modelFamily || 'vlm-ui-tars-doubao-1.5' === modelFamily) return {
22
+ vlMode: 'vlm-ui-tars',
23
+ uiTarsVersion: UITarsModelVersion.DOUBAO_1_5_20B
24
+ };
25
+ if (!MODEL_FAMILY_VALUES.includes(modelFamily)) throw new Error(`Invalid MIDSCENE_MODEL_FAMILY value: ${modelFamily}`);
26
+ return {
27
+ vlMode: modelFamily,
28
+ uiTarsVersion: void 0
29
+ };
30
+ };
31
+ const legacyConfigToModelFamily = (provider)=>{
32
+ const isDoubao = provider[MIDSCENE_USE_DOUBAO_VISION];
33
+ const isQwen = provider[MIDSCENE_USE_QWEN_VL];
34
+ const isQwen3 = provider[MIDSCENE_USE_QWEN3_VL];
35
+ const isUiTars = provider[MIDSCENE_USE_VLM_UI_TARS];
36
+ const isGemini = provider[MIDSCENE_USE_GEMINI];
37
+ const enabledModes = [
38
+ isDoubao && MIDSCENE_USE_DOUBAO_VISION,
39
+ isQwen && MIDSCENE_USE_QWEN_VL,
40
+ isQwen3 && MIDSCENE_USE_QWEN3_VL,
41
+ isUiTars && MIDSCENE_USE_VLM_UI_TARS,
42
+ isGemini && MIDSCENE_USE_GEMINI
43
+ ].filter(Boolean);
44
+ if (enabledModes.length > 1) throw new Error(`Only one vision mode can be enabled at a time. Currently enabled modes: ${enabledModes.join(', ')}. Please disable all but one mode.`);
45
+ if (isQwen3) return 'qwen3-vl';
46
+ if (isQwen) return 'qwen2.5-vl';
47
+ if (isDoubao) return 'doubao-vision';
48
+ if (isGemini) return 'gemini';
49
+ if (isUiTars) if ('1' === isUiTars) return 'vlm-ui-tars';
50
+ else if ('DOUBAO' === isUiTars || 'DOUBAO-1.5' === isUiTars) return 'vlm-ui-tars-doubao-1.5';
51
+ else return 'vlm-ui-tars-doubao';
52
+ };
53
+ const parseOpenaiSdkConfig = ({ keys, provider, useLegacyLogic = false })=>{
54
+ initDebugConfig();
55
+ const debugLog = getDebug('ai:config');
56
+ debugLog('enter parseOpenaiSdkConfig with keys:', keys);
57
+ const legacyAPIKey = useLegacyLogic ? provider[OPENAI_API_KEY] : void 0;
58
+ const legacyBaseURL = useLegacyLogic ? provider[OPENAI_BASE_URL] : void 0;
59
+ const legacySocksProxy = useLegacyLogic ? provider[MIDSCENE_OPENAI_SOCKS_PROXY] : void 0;
60
+ const legacyHttpProxy = useLegacyLogic ? provider[MIDSCENE_OPENAI_HTTP_PROXY] : void 0;
61
+ const legacyOpenaiExtraConfig = useLegacyLogic ? provider[MIDSCENE_OPENAI_INIT_CONFIG_JSON] : void 0;
62
+ const legacyModelFamily = useLegacyLogic ? legacyConfigToModelFamily(provider) : void 0;
63
+ const modelFamilyRaw = provider[keys.modelFamily] || legacyModelFamily;
64
+ const openaiApiKey = provider[keys.openaiApiKey] || legacyAPIKey;
65
+ const openaiBaseURL = provider[keys.openaiBaseURL] || legacyBaseURL;
66
+ const socksProxy = provider[keys.socksProxy] || legacySocksProxy;
67
+ const httpProxy = provider[keys.httpProxy] || legacyHttpProxy;
68
+ const modelName = provider[keys.modelName];
69
+ const openaiExtraConfigStr = provider[keys.openaiExtraConfig];
70
+ const openaiExtraConfig = parseJson(keys.openaiExtraConfig, openaiExtraConfigStr || legacyOpenaiExtraConfig);
71
+ const { vlMode, uiTarsVersion } = modelFamilyToVLConfig(modelFamilyRaw);
72
+ const getModelDescription = (vlMode, uiTarsVersion)=>{
73
+ if (vlMode) if (uiTarsVersion) return `UI-TARS=${uiTarsVersion}`;
74
+ else return `${vlMode} mode`;
75
+ return '';
76
+ };
77
+ const modelDescription = getModelDescription(vlMode, uiTarsVersion);
78
+ return {
79
+ socksProxy,
80
+ httpProxy,
81
+ vlModeRaw: vlMode,
82
+ openaiBaseURL,
83
+ openaiApiKey,
84
+ openaiExtraConfig,
85
+ vlMode,
86
+ uiTarsModelVersion: uiTarsVersion,
87
+ modelName: modelName,
88
+ modelDescription,
89
+ intent: '-',
90
+ timeout: provider[keys.timeout] ? Number(provider[keys.timeout]) : void 0
91
+ };
92
+ };
93
+ const decideModelConfigFromIntentConfig = (intent, configMap)=>{
94
+ const debugLog = getDebug('ai:config');
95
+ debugLog('will decideModelConfig base on agent.modelConfig()', intent, maskConfig(configMap));
96
+ const keysForFn = KEYS_MAP[intent];
97
+ const modelName = configMap[keysForFn.modelName];
98
+ if (!modelName) return void debugLog('no modelName found for intent', intent);
99
+ const finalResult = parseOpenaiSdkConfig({
100
+ keys: keysForFn,
101
+ provider: configMap,
102
+ useLegacyLogic: 'default' === intent
103
+ });
104
+ finalResult.intent = intent;
105
+ debugLog('decideModelConfig result by agent.modelConfig() with intent', intent, maskConfig({
106
+ ...finalResult
107
+ }));
108
+ assert(finalResult.openaiBaseURL, `failed to get base URL of model (intent=${intent}). See https://midscenejs.com/model-strategy`);
109
+ if (!finalResult.modelName) console.warn(`modelName is not set for intent ${intent}, this may cause unexpected behavior. See https://midscenejs.com/model-strategy`);
110
+ return finalResult;
111
+ };
112
+ export { decideModelConfigFromIntentConfig, legacyConfigToModelFamily, modelFamilyToVLConfig, parseOpenaiSdkConfig };