@midscene/shared 0.30.10 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (129) hide show
  1. package/dist/es/build/rspack-config.mjs +4 -0
  2. package/dist/es/constants/example-code.mjs +4 -4
  3. package/dist/es/env/constants.mjs +27 -82
  4. package/dist/es/env/global-config-manager.mjs +2 -3
  5. package/dist/es/env/helper.mjs +12 -17
  6. package/dist/es/env/init-debug.mjs +6 -6
  7. package/dist/es/env/model-config-manager.mjs +45 -65
  8. package/dist/es/env/parse-model-config.mjs +112 -0
  9. package/dist/es/env/types.mjs +70 -162
  10. package/dist/es/extractor/dom-util.mjs +10 -18
  11. package/dist/es/extractor/index.mjs +2 -3
  12. package/dist/es/extractor/locator.mjs +8 -15
  13. package/dist/es/extractor/tree.mjs +2 -5
  14. package/dist/es/extractor/util.mjs +4 -28
  15. package/dist/es/extractor/web-extractor.mjs +7 -14
  16. package/dist/es/index.mjs +2 -1
  17. package/dist/es/mcp/base-server.mjs +250 -0
  18. package/dist/es/mcp/base-tools.mjs +84 -0
  19. package/dist/es/mcp/index.mjs +5 -0
  20. package/dist/es/mcp/inject-report-html-plugin.mjs +53 -0
  21. package/dist/es/mcp/tool-generator.mjs +207 -0
  22. package/dist/es/mcp/types.mjs +3 -0
  23. package/dist/es/node/fs.mjs +2 -2
  24. package/dist/es/utils.mjs +2 -3
  25. package/dist/es/zod-schema-utils.mjs +54 -0
  26. package/dist/lib/baseDB.js +2 -2
  27. package/dist/lib/build/copy-static.js +4 -4
  28. package/dist/lib/build/rspack-config.js +38 -0
  29. package/dist/lib/common.js +4 -4
  30. package/dist/lib/constants/example-code.js +6 -6
  31. package/dist/lib/constants/index.js +13 -13
  32. package/dist/lib/env/basic.js +2 -2
  33. package/dist/lib/env/constants.js +32 -90
  34. package/dist/lib/env/global-config-manager.js +4 -5
  35. package/dist/lib/env/helper.js +13 -22
  36. package/dist/lib/env/index.js +24 -28
  37. package/dist/lib/env/init-debug.js +7 -7
  38. package/dist/lib/env/model-config-manager.js +47 -67
  39. package/dist/lib/env/parse-model-config.js +155 -0
  40. package/dist/lib/env/types.js +146 -379
  41. package/dist/lib/env/utils.js +4 -4
  42. package/dist/lib/extractor/constants.js +4 -4
  43. package/dist/lib/extractor/debug.js +1 -1
  44. package/dist/lib/extractor/dom-util.js +18 -26
  45. package/dist/lib/extractor/index.js +11 -21
  46. package/dist/lib/extractor/locator.js +10 -20
  47. package/dist/lib/extractor/tree.js +4 -7
  48. package/dist/lib/extractor/util.js +17 -50
  49. package/dist/lib/extractor/web-extractor.js +12 -19
  50. package/dist/lib/img/box-select.js +4 -4
  51. package/dist/lib/img/draw-box.js +2 -2
  52. package/dist/lib/img/get-jimp.js +16 -34
  53. package/dist/lib/img/get-photon.js +24 -47
  54. package/dist/lib/img/get-sharp.js +16 -34
  55. package/dist/lib/img/index.js +18 -18
  56. package/dist/lib/img/info.js +4 -4
  57. package/dist/lib/img/transform.js +10 -10
  58. package/dist/lib/index.js +8 -4
  59. package/dist/lib/logger.js +4 -4
  60. package/dist/lib/mcp/base-server.js +300 -0
  61. package/dist/lib/mcp/base-tools.js +118 -0
  62. package/dist/lib/mcp/index.js +86 -0
  63. package/dist/lib/mcp/inject-report-html-plugin.js +98 -0
  64. package/dist/lib/mcp/tool-generator.js +244 -0
  65. package/dist/lib/mcp/types.js +40 -0
  66. package/dist/lib/node/fs.js +6 -6
  67. package/dist/lib/node/index.js +6 -8
  68. package/dist/lib/polyfills/async-hooks.js +2 -2
  69. package/dist/lib/polyfills/index.js +6 -8
  70. package/dist/lib/types/index.js +2 -2
  71. package/dist/lib/us-keyboard-layout.js +2 -2
  72. package/dist/lib/utils.js +13 -14
  73. package/dist/lib/zod-schema-utils.js +97 -0
  74. package/dist/types/build/rspack-config.d.ts +8 -0
  75. package/dist/types/constants/example-code.d.ts +1 -1
  76. package/dist/types/env/constants.d.ts +5 -18
  77. package/dist/types/env/global-config-manager.d.ts +1 -2
  78. package/dist/types/env/helper.d.ts +2 -4
  79. package/dist/types/env/model-config-manager.d.ts +8 -7
  80. package/dist/types/env/parse-model-config.d.ts +28 -0
  81. package/dist/types/env/types.d.ts +152 -191
  82. package/dist/types/extractor/dom-util.d.ts +2 -15
  83. package/dist/types/extractor/index.d.ts +1 -2
  84. package/dist/types/extractor/locator.d.ts +0 -1
  85. package/dist/types/extractor/tree.d.ts +1 -4
  86. package/dist/types/extractor/util.d.ts +0 -3
  87. package/dist/types/index.d.ts +1 -0
  88. package/dist/types/mcp/base-server.d.ts +77 -0
  89. package/dist/types/mcp/base-tools.d.ts +55 -0
  90. package/dist/types/mcp/index.d.ts +5 -0
  91. package/dist/types/mcp/inject-report-html-plugin.d.ts +18 -0
  92. package/dist/types/mcp/tool-generator.d.ts +11 -0
  93. package/dist/types/mcp/types.d.ts +100 -0
  94. package/dist/types/types/index.d.ts +5 -2
  95. package/dist/types/zod-schema-utils.d.ts +23 -0
  96. package/package.json +19 -4
  97. package/src/build/rspack-config.ts +12 -0
  98. package/src/constants/example-code.ts +4 -4
  99. package/src/env/constants.ts +58 -203
  100. package/src/env/global-config-manager.ts +7 -7
  101. package/src/env/helper.ts +10 -31
  102. package/src/env/init-debug.ts +11 -6
  103. package/src/env/model-config-manager.ts +91 -87
  104. package/src/env/parse-model-config.ts +265 -0
  105. package/src/env/types.ts +212 -344
  106. package/src/extractor/dom-util.ts +15 -12
  107. package/src/extractor/index.ts +0 -3
  108. package/src/extractor/locator.ts +3 -12
  109. package/src/extractor/tree.ts +4 -4
  110. package/src/extractor/util.ts +0 -32
  111. package/src/index.ts +2 -0
  112. package/src/mcp/base-server.ts +435 -0
  113. package/src/mcp/base-tools.ts +196 -0
  114. package/src/mcp/index.ts +5 -0
  115. package/src/mcp/inject-report-html-plugin.ts +119 -0
  116. package/src/mcp/tool-generator.ts +330 -0
  117. package/src/mcp/types.ts +108 -0
  118. package/src/node/fs.ts +1 -1
  119. package/src/types/index.ts +8 -2
  120. package/src/utils.ts +1 -1
  121. package/src/zod-schema-utils.ts +133 -0
  122. package/dist/es/env/decide-model-config.mjs +0 -172
  123. package/dist/es/env/parse.mjs +0 -69
  124. package/dist/lib/env/decide-model-config.js +0 -212
  125. package/dist/lib/env/parse.js +0 -106
  126. package/dist/types/env/decide-model-config.d.ts +0 -14
  127. package/dist/types/env/parse.d.ts +0 -12
  128. package/src/env/decide-model-config.ts +0 -319
  129. package/src/env/parse.ts +0 -131
package/src/env/types.ts CHANGED
@@ -1,10 +1,9 @@
1
1
  // config keys
2
- export const MIDSCENE_OPENAI_INIT_CONFIG_JSON =
3
- 'MIDSCENE_OPENAI_INIT_CONFIG_JSON';
2
+ export const MIDSCENE_MODEL_INIT_CONFIG_JSON =
3
+ 'MIDSCENE_MODEL_INIT_CONFIG_JSON';
4
4
  export const MIDSCENE_MODEL_NAME = 'MIDSCENE_MODEL_NAME';
5
- export const MIDSCENE_LANGSMITH_DEBUG = 'MIDSCENE_LANGSMITH_DEBUG';
6
- export const MIDSCENE_DEBUG_AI_PROFILE = 'MIDSCENE_DEBUG_AI_PROFILE';
7
- export const MIDSCENE_DEBUG_AI_RESPONSE = 'MIDSCENE_DEBUG_AI_RESPONSE';
5
+ export const MIDSCENE_DEBUG_MODEL_PROFILE = 'MIDSCENE_DEBUG_MODEL_PROFILE';
6
+ export const MIDSCENE_DEBUG_MODEL_RESPONSE = 'MIDSCENE_DEBUG_MODEL_RESPONSE';
8
7
  export const MIDSCENE_DANGEROUSLY_PRINT_ALL_CONFIG =
9
8
  'MIDSCENE_DANGEROUSLY_PRINT_ALL_CONFIG';
10
9
  export const MIDSCENE_DEBUG_MODE = 'MIDSCENE_DEBUG_MODE';
@@ -15,10 +14,43 @@ export const MIDSCENE_MCP_ANDROID_MODE = 'MIDSCENE_MCP_ANDROID_MODE';
15
14
  export const DOCKER_CONTAINER = 'DOCKER_CONTAINER';
16
15
  export const MIDSCENE_FORCE_DEEP_THINK = 'MIDSCENE_FORCE_DEEP_THINK';
17
16
 
18
- export const MIDSCENE_OPENAI_SOCKS_PROXY = 'MIDSCENE_OPENAI_SOCKS_PROXY';
19
- export const MIDSCENE_OPENAI_HTTP_PROXY = 'MIDSCENE_OPENAI_HTTP_PROXY';
17
+ // Observability
18
+ export const MIDSCENE_LANGSMITH_DEBUG = 'MIDSCENE_LANGSMITH_DEBUG';
19
+ export const MIDSCENE_LANGFUSE_DEBUG = 'MIDSCENE_LANGFUSE_DEBUG';
20
+
21
+ export const MIDSCENE_MODEL_SOCKS_PROXY = 'MIDSCENE_MODEL_SOCKS_PROXY';
22
+ export const MIDSCENE_MODEL_HTTP_PROXY = 'MIDSCENE_MODEL_HTTP_PROXY';
23
+
24
+ // New primary names for public API
25
+ export const MIDSCENE_MODEL_API_KEY = 'MIDSCENE_MODEL_API_KEY';
26
+ export const MIDSCENE_MODEL_BASE_URL = 'MIDSCENE_MODEL_BASE_URL';
27
+ export const MIDSCENE_MODEL_MAX_TOKENS = 'MIDSCENE_MODEL_MAX_TOKENS';
28
+ export const MIDSCENE_MODEL_TIMEOUT = 'MIDSCENE_MODEL_TIMEOUT';
29
+
30
+ /**
31
+ * @deprecated Use MIDSCENE_MODEL_API_KEY instead. This is kept for backward compatibility.
32
+ */
20
33
  export const OPENAI_API_KEY = 'OPENAI_API_KEY';
34
+ /**
35
+ * @deprecated Use MIDSCENE_MODEL_BASE_URL instead. This is kept for backward compatibility.
36
+ */
21
37
  export const OPENAI_BASE_URL = 'OPENAI_BASE_URL';
38
+ /**
39
+ * @deprecated Use MIDSCENE_MODEL_INIT_CONFIG_JSON instead. This is kept for backward compatibility.
40
+ */
41
+ export const MIDSCENE_OPENAI_INIT_CONFIG_JSON =
42
+ 'MIDSCENE_OPENAI_INIT_CONFIG_JSON';
43
+ /**
44
+ * @deprecated Use MIDSCENE_MODEL_HTTP_PROXY instead. This is kept for backward compatibility.
45
+ */
46
+ export const MIDSCENE_OPENAI_HTTP_PROXY = 'MIDSCENE_OPENAI_HTTP_PROXY';
47
+ /**
48
+ * @deprecated Use MIDSCENE_MODEL_SOCKS_PROXY instead. This is kept for backward compatibility.
49
+ */
50
+ export const MIDSCENE_OPENAI_SOCKS_PROXY = 'MIDSCENE_OPENAI_SOCKS_PROXY';
51
+ /**
52
+ * @deprecated Use MIDSCENE_MODEL_MAX_TOKENS instead. This is kept for backward compatibility.
53
+ */
22
54
  export const OPENAI_MAX_TOKENS = 'OPENAI_MAX_TOKENS';
23
55
 
24
56
  export const MIDSCENE_ADB_PATH = 'MIDSCENE_ADB_PATH';
@@ -37,141 +69,46 @@ export const MIDSCENE_USE_DOUBAO_VISION = 'MIDSCENE_USE_DOUBAO_VISION';
37
69
  export const MIDSCENE_USE_GEMINI = 'MIDSCENE_USE_GEMINI';
38
70
  export const MIDSCENE_USE_VL_MODEL = 'MIDSCENE_USE_VL_MODEL';
39
71
  export const MATCH_BY_POSITION = 'MATCH_BY_POSITION';
40
- export const MIDSCENE_API_TYPE = 'MIDSCENE-API-TYPE';
41
72
  export const MIDSCENE_REPORT_TAG_NAME = 'MIDSCENE_REPORT_TAG_NAME';
42
73
 
43
- export const MIDSCENE_REPLANNING_CYCLE_LIMIT =
44
- 'MIDSCENE_REPLANNING_CYCLE_LIMIT';
45
-
46
74
  export const MIDSCENE_PREFERRED_LANGUAGE = 'MIDSCENE_PREFERRED_LANGUAGE';
47
75
 
48
- export const MIDSCENE_USE_AZURE_OPENAI = 'MIDSCENE_USE_AZURE_OPENAI';
49
- export const MIDSCENE_AZURE_OPENAI_SCOPE = 'MIDSCENE_AZURE_OPENAI_SCOPE';
50
- export const MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON =
51
- 'MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON';
52
-
53
76
  export const MIDSCENE_CACHE_MAX_FILENAME_LENGTH =
54
77
  'MIDSCENE_CACHE_MAX_FILENAME_LENGTH';
55
78
 
56
- export const AZURE_OPENAI_ENDPOINT = 'AZURE_OPENAI_ENDPOINT';
57
- export const AZURE_OPENAI_KEY = 'AZURE_OPENAI_KEY';
58
- export const AZURE_OPENAI_API_VERSION = 'AZURE_OPENAI_API_VERSION';
59
- export const AZURE_OPENAI_DEPLOYMENT = 'AZURE_OPENAI_DEPLOYMENT';
60
-
61
- export const MIDSCENE_USE_ANTHROPIC_SDK = 'MIDSCENE_USE_ANTHROPIC_SDK';
62
- export const ANTHROPIC_API_KEY = 'ANTHROPIC_API_KEY';
79
+ export const MIDSCENE_REPLANNING_CYCLE_LIMIT =
80
+ 'MIDSCENE_REPLANNING_CYCLE_LIMIT';
63
81
 
64
82
  export const MIDSCENE_RUN_DIR = 'MIDSCENE_RUN_DIR';
65
83
 
66
- // default new
67
- export const MIDSCENE_OPENAI_BASE_URL = 'MIDSCENE_OPENAI_BASE_URL';
68
- export const MIDSCENE_OPENAI_API_KEY = 'MIDSCENE_OPENAI_API_KEY';
69
- export const MIDSCENE_OPENAI_USE_AZURE = 'MIDSCENE_OPENAI_USE_AZURE';
70
- export const MIDSCENE_AZURE_OPENAI_KEY = 'MIDSCENE_AZURE_OPENAI_KEY';
71
- export const MIDSCENE_AZURE_OPENAI_ENDPOINT = 'MIDSCENE_AZURE_OPENAI_ENDPOINT';
72
- export const MIDSCENE_AZURE_OPENAI_API_VERSION =
73
- 'MIDSCENE_AZURE_OPENAI_API_VERSION';
74
- export const MIDSCENE_AZURE_OPENAI_DEPLOYMENT =
75
- 'MIDSCENE_AZURE_OPENAI_DEPLOYMENT';
76
- export const MIDSCENE_ANTHROPIC_API_KEY = 'MIDSCENE_ANTHROPIC_API_KEY';
77
- export const MIDSCENE_VL_MODE = 'MIDSCENE_VL_MODE';
78
-
79
- // VQA
80
- export const MIDSCENE_VQA_MODEL_NAME = 'MIDSCENE_VQA_MODEL_NAME';
81
- export const MIDSCENE_VQA_OPENAI_SOCKS_PROXY =
82
- 'MIDSCENE_VQA_OPENAI_SOCKS_PROXY';
83
- export const MIDSCENE_VQA_OPENAI_HTTP_PROXY = 'MIDSCENE_VQA_OPENAI_HTTP_PROXY';
84
- export const MIDSCENE_VQA_OPENAI_BASE_URL = 'MIDSCENE_VQA_OPENAI_BASE_URL';
85
- export const MIDSCENE_VQA_OPENAI_API_KEY = 'MIDSCENE_VQA_OPENAI_API_KEY';
86
- export const MIDSCENE_VQA_OPENAI_INIT_CONFIG_JSON =
87
- 'MIDSCENE_VQA_OPENAI_INIT_CONFIG_JSON';
88
- export const MIDSCENE_VQA_OPENAI_USE_AZURE = 'MIDSCENE_VQA_OPENAI_USE_AZURE';
89
- export const MIDSCENE_VQA_USE_AZURE_OPENAI = 'MIDSCENE_VQA_USE_AZURE_OPENAI';
90
- export const MIDSCENE_VQA_AZURE_OPENAI_SCOPE =
91
- 'MIDSCENE_VQA_AZURE_OPENAI_SCOPE';
92
- export const MIDSCENE_VQA_AZURE_OPENAI_KEY = 'MIDSCENE_VQA_AZURE_OPENAI_KEY';
93
- export const MIDSCENE_VQA_AZURE_OPENAI_ENDPOINT =
94
- 'MIDSCENE_VQA_AZURE_OPENAI_ENDPOINT';
95
- export const MIDSCENE_VQA_AZURE_OPENAI_API_VERSION =
96
- 'MIDSCENE_VQA_AZURE_OPENAI_API_VERSION';
97
- export const MIDSCENE_VQA_AZURE_OPENAI_DEPLOYMENT =
98
- 'MIDSCENE_VQA_AZURE_OPENAI_DEPLOYMENT';
99
- export const MIDSCENE_VQA_AZURE_OPENAI_INIT_CONFIG_JSON =
100
- 'MIDSCENE_VQA_AZURE_OPENAI_INIT_CONFIG_JSON';
101
- export const MIDSCENE_VQA_USE_ANTHROPIC_SDK = 'MIDSCENE_VQA_USE_ANTHROPIC_SDK';
102
- export const MIDSCENE_VQA_ANTHROPIC_API_KEY = 'MIDSCENE_VQA_ANTHROPIC_API_KEY';
103
- export const MIDSCENE_VQA_VL_MODE = 'MIDSCENE_VQA_VL_MODE';
84
+ // INSIGHT (unified VQA and Grounding)
85
+ export const MIDSCENE_INSIGHT_MODEL_NAME = 'MIDSCENE_INSIGHT_MODEL_NAME';
86
+ export const MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY =
87
+ 'MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY';
88
+ export const MIDSCENE_INSIGHT_MODEL_HTTP_PROXY =
89
+ 'MIDSCENE_INSIGHT_MODEL_HTTP_PROXY';
90
+ export const MIDSCENE_INSIGHT_MODEL_BASE_URL =
91
+ 'MIDSCENE_INSIGHT_MODEL_BASE_URL';
92
+ export const MIDSCENE_INSIGHT_MODEL_API_KEY = 'MIDSCENE_INSIGHT_MODEL_API_KEY';
93
+ export const MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON =
94
+ 'MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON';
95
+ export const MIDSCENE_INSIGHT_MODEL_TIMEOUT = 'MIDSCENE_INSIGHT_MODEL_TIMEOUT';
104
96
 
105
97
  // PLANNING
106
98
  export const MIDSCENE_PLANNING_MODEL_NAME = 'MIDSCENE_PLANNING_MODEL_NAME';
107
- export const MIDSCENE_PLANNING_OPENAI_SOCKS_PROXY =
108
- 'MIDSCENE_PLANNING_OPENAI_SOCKS_PROXY';
109
- export const MIDSCENE_PLANNING_OPENAI_HTTP_PROXY =
110
- 'MIDSCENE_PLANNING_OPENAI_HTTP_PROXY';
111
- export const MIDSCENE_PLANNING_OPENAI_BASE_URL =
112
- 'MIDSCENE_PLANNING_OPENAI_BASE_URL';
113
- export const MIDSCENE_PLANNING_OPENAI_API_KEY =
114
- 'MIDSCENE_PLANNING_OPENAI_API_KEY';
115
- export const MIDSCENE_PLANNING_OPENAI_INIT_CONFIG_JSON =
116
- 'MIDSCENE_PLANNING_OPENAI_INIT_CONFIG_JSON';
117
- export const MIDSCENE_PLANNING_OPENAI_USE_AZURE =
118
- 'MIDSCENE_PLANNING_OPENAI_USE_AZURE';
119
- export const MIDSCENE_PLANNING_USE_AZURE_OPENAI =
120
- 'MIDSCENE_PLANNING_USE_AZURE_OPENAI';
121
- export const MIDSCENE_PLANNING_AZURE_OPENAI_SCOPE =
122
- 'MIDSCENE_PLANNING_AZURE_OPENAI_SCOPE';
123
- export const MIDSCENE_PLANNING_AZURE_OPENAI_KEY =
124
- 'MIDSCENE_PLANNING_AZURE_OPENAI_KEY';
125
- export const MIDSCENE_PLANNING_AZURE_OPENAI_ENDPOINT =
126
- 'MIDSCENE_PLANNING_AZURE_OPENAI_ENDPOINT';
127
- export const MIDSCENE_PLANNING_AZURE_OPENAI_API_VERSION =
128
- 'MIDSCENE_PLANNING_AZURE_OPENAI_API_VERSION';
129
- export const MIDSCENE_PLANNING_AZURE_OPENAI_DEPLOYMENT =
130
- 'MIDSCENE_PLANNING_AZURE_OPENAI_DEPLOYMENT';
131
- export const MIDSCENE_PLANNING_AZURE_OPENAI_INIT_CONFIG_JSON =
132
- 'MIDSCENE_PLANNING_AZURE_OPENAI_INIT_CONFIG_JSON';
133
- export const MIDSCENE_PLANNING_USE_ANTHROPIC_SDK =
134
- 'MIDSCENE_PLANNING_USE_ANTHROPIC_SDK';
135
- export const MIDSCENE_PLANNING_ANTHROPIC_API_KEY =
136
- 'MIDSCENE_PLANNING_ANTHROPIC_API_KEY';
137
- export const MIDSCENE_PLANNING_VL_MODE = 'MIDSCENE_PLANNING_VL_MODE';
138
-
139
- // GROUNDING
140
- export const MIDSCENE_GROUNDING_MODEL_NAME = 'MIDSCENE_GROUNDING_MODEL_NAME';
141
- export const MIDSCENE_GROUNDING_OPENAI_SOCKS_PROXY =
142
- 'MIDSCENE_GROUNDING_OPENAI_SOCKS_PROXY';
143
- export const MIDSCENE_GROUNDING_OPENAI_HTTP_PROXY =
144
- 'MIDSCENE_GROUNDING_OPENAI_HTTP_PROXY';
145
- export const MIDSCENE_GROUNDING_OPENAI_BASE_URL =
146
- 'MIDSCENE_GROUNDING_OPENAI_BASE_URL';
147
- export const MIDSCENE_GROUNDING_OPENAI_API_KEY =
148
- 'MIDSCENE_GROUNDING_OPENAI_API_KEY';
149
- export const MIDSCENE_GROUNDING_OPENAI_INIT_CONFIG_JSON =
150
- 'MIDSCENE_GROUNDING_OPENAI_INIT_CONFIG_JSON';
151
- export const MIDSCENE_GROUNDING_OPENAI_USE_AZURE =
152
- 'MIDSCENE_GROUNDING_OPENAI_USE_AZURE';
153
- export const MIDSCENE_GROUNDING_USE_AZURE_OPENAI =
154
- 'MIDSCENE_GROUNDING_USE_AZURE_OPENAI';
155
- export const MIDSCENE_GROUNDING_AZURE_OPENAI_SCOPE =
156
- 'MIDSCENE_GROUNDING_AZURE_OPENAI_SCOPE';
157
- export const MIDSCENE_GROUNDING_AZURE_OPENAI_KEY =
158
- 'MIDSCENE_GROUNDING_AZURE_OPENAI_KEY';
159
- export const MIDSCENE_GROUNDING_AZURE_OPENAI_ENDPOINT =
160
- 'MIDSCENE_GROUNDING_AZURE_OPENAI_ENDPOINT';
161
- export const MIDSCENE_GROUNDING_AZURE_OPENAI_API_VERSION =
162
- 'MIDSCENE_GROUNDING_AZURE_OPENAI_API_VERSION';
163
- export const MIDSCENE_GROUNDING_AZURE_OPENAI_DEPLOYMENT =
164
- 'MIDSCENE_GROUNDING_AZURE_OPENAI_DEPLOYMENT';
165
- export const MIDSCENE_GROUNDING_AZURE_OPENAI_INIT_CONFIG_JSON =
166
- 'MIDSCENE_GROUNDING_AZURE_OPENAI_INIT_CONFIG_JSON';
167
- export const MIDSCENE_GROUNDING_USE_ANTHROPIC_SDK =
168
- 'MIDSCENE_GROUNDING_USE_ANTHROPIC_SDK';
169
- export const MIDSCENE_GROUNDING_ANTHROPIC_API_KEY =
170
- 'MIDSCENE_GROUNDING_ANTHROPIC_API_KEY';
171
- export const MIDSCENE_GROUNDING_VL_MODE = 'MIDSCENE_GROUNDING_VL_MODE';
172
-
173
- // @deprecated
174
- export const OPENAI_USE_AZURE = 'OPENAI_USE_AZURE';
99
+ export const MIDSCENE_PLANNING_MODEL_SOCKS_PROXY =
100
+ 'MIDSCENE_PLANNING_MODEL_SOCKS_PROXY';
101
+ export const MIDSCENE_PLANNING_MODEL_HTTP_PROXY =
102
+ 'MIDSCENE_PLANNING_MODEL_HTTP_PROXY';
103
+ export const MIDSCENE_PLANNING_MODEL_BASE_URL =
104
+ 'MIDSCENE_PLANNING_MODEL_BASE_URL';
105
+ export const MIDSCENE_PLANNING_MODEL_API_KEY =
106
+ 'MIDSCENE_PLANNING_MODEL_API_KEY';
107
+ export const MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON =
108
+ 'MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON';
109
+ export const MIDSCENE_PLANNING_MODEL_TIMEOUT =
110
+ 'MIDSCENE_PLANNING_MODEL_TIMEOUT';
111
+ export const MIDSCENE_MODEL_FAMILY = 'MIDSCENE_MODEL_FAMILY';
175
112
 
176
113
  /**
177
114
  * env keys declared but unused
@@ -184,17 +121,18 @@ export const UNUSED_ENV_KEYS = [MIDSCENE_DANGEROUSLY_PRINT_ALL_CONFIG];
184
121
  */
185
122
  export const BASIC_ENV_KEYS = [
186
123
  MIDSCENE_DEBUG_MODE,
187
- MIDSCENE_DEBUG_AI_PROFILE,
188
- MIDSCENE_DEBUG_AI_RESPONSE,
124
+ MIDSCENE_DEBUG_MODEL_PROFILE,
125
+ MIDSCENE_DEBUG_MODEL_RESPONSE,
189
126
  MIDSCENE_RUN_DIR,
190
127
  ] as const;
191
128
 
192
129
  export const BOOLEAN_ENV_KEYS = [
193
130
  MIDSCENE_CACHE,
194
- MIDSCENE_LANGSMITH_DEBUG,
195
131
  MIDSCENE_FORCE_DEEP_THINK,
196
132
  MIDSCENE_MCP_USE_PUPPETEER_MODE,
197
133
  MIDSCENE_MCP_ANDROID_MODE,
134
+ MIDSCENE_LANGSMITH_DEBUG,
135
+ MIDSCENE_LANGFUSE_DEBUG,
198
136
  ] as const;
199
137
 
200
138
  export const NUMBER_ENV_KEYS = [
@@ -203,6 +141,7 @@ export const NUMBER_ENV_KEYS = [
203
141
  ] as const;
204
142
 
205
143
  export const STRING_ENV_KEYS = [
144
+ MIDSCENE_MODEL_MAX_TOKENS,
206
145
  OPENAI_MAX_TOKENS,
207
146
  MIDSCENE_ADB_PATH,
208
147
  MIDSCENE_ADB_REMOTE_HOST,
@@ -236,91 +175,41 @@ export const GLOBAL_ENV_KEYS = [
236
175
  export const MODEL_ENV_KEYS = [
237
176
  // model default
238
177
  MIDSCENE_MODEL_NAME,
239
- MIDSCENE_OPENAI_INIT_CONFIG_JSON,
240
- MIDSCENE_OPENAI_API_KEY,
241
- MIDSCENE_OPENAI_BASE_URL,
242
- MIDSCENE_OPENAI_USE_AZURE,
243
- MIDSCENE_OPENAI_SOCKS_PROXY,
244
- MIDSCENE_OPENAI_HTTP_PROXY,
245
- MIDSCENE_USE_AZURE_OPENAI,
246
- MIDSCENE_AZURE_OPENAI_SCOPE,
247
- MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON,
248
- MIDSCENE_USE_ANTHROPIC_SDK,
178
+ MIDSCENE_MODEL_INIT_CONFIG_JSON,
179
+ MIDSCENE_MODEL_API_KEY,
180
+ MIDSCENE_MODEL_BASE_URL,
181
+ MIDSCENE_MODEL_SOCKS_PROXY,
182
+ MIDSCENE_MODEL_HTTP_PROXY,
183
+ MIDSCENE_MODEL_TIMEOUT,
249
184
  MIDSCENE_USE_VLM_UI_TARS,
250
185
  MIDSCENE_USE_QWEN_VL,
251
186
  MIDSCENE_USE_QWEN3_VL,
252
187
  MIDSCENE_USE_DOUBAO_VISION,
253
188
  MIDSCENE_USE_GEMINI,
254
189
  MIDSCENE_USE_VL_MODEL,
255
- ANTHROPIC_API_KEY,
256
- MIDSCENE_AZURE_OPENAI_ENDPOINT,
257
- MIDSCENE_AZURE_OPENAI_KEY,
258
- MIDSCENE_AZURE_OPENAI_API_VERSION,
259
- MIDSCENE_AZURE_OPENAI_DEPLOYMENT,
260
- MIDSCENE_VL_MODE,
261
190
  // model default legacy
262
191
  OPENAI_API_KEY,
263
192
  OPENAI_BASE_URL,
264
- OPENAI_USE_AZURE,
265
- ANTHROPIC_API_KEY,
266
- AZURE_OPENAI_ENDPOINT,
267
- AZURE_OPENAI_KEY,
268
- AZURE_OPENAI_API_VERSION,
269
- AZURE_OPENAI_DEPLOYMENT,
270
- // VQA
271
- MIDSCENE_VQA_MODEL_NAME,
272
- MIDSCENE_VQA_OPENAI_SOCKS_PROXY,
273
- MIDSCENE_VQA_OPENAI_HTTP_PROXY,
274
- MIDSCENE_VQA_OPENAI_BASE_URL,
275
- MIDSCENE_VQA_OPENAI_API_KEY,
276
- MIDSCENE_VQA_OPENAI_INIT_CONFIG_JSON,
277
- MIDSCENE_VQA_OPENAI_USE_AZURE,
278
- MIDSCENE_VQA_USE_AZURE_OPENAI,
279
- MIDSCENE_VQA_AZURE_OPENAI_SCOPE,
280
- MIDSCENE_VQA_AZURE_OPENAI_KEY,
281
- MIDSCENE_VQA_AZURE_OPENAI_ENDPOINT,
282
- MIDSCENE_VQA_AZURE_OPENAI_API_VERSION,
283
- MIDSCENE_VQA_AZURE_OPENAI_DEPLOYMENT,
284
- MIDSCENE_VQA_AZURE_OPENAI_INIT_CONFIG_JSON,
285
- MIDSCENE_VQA_USE_ANTHROPIC_SDK,
286
- MIDSCENE_VQA_ANTHROPIC_API_KEY,
287
- MIDSCENE_VQA_VL_MODE,
193
+ MIDSCENE_OPENAI_INIT_CONFIG_JSON,
194
+ MIDSCENE_OPENAI_HTTP_PROXY,
195
+ MIDSCENE_OPENAI_SOCKS_PROXY,
196
+ // INSIGHT (unified VQA and Grounding)
197
+ MIDSCENE_INSIGHT_MODEL_NAME,
198
+ MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY,
199
+ MIDSCENE_INSIGHT_MODEL_HTTP_PROXY,
200
+ MIDSCENE_INSIGHT_MODEL_BASE_URL,
201
+ MIDSCENE_INSIGHT_MODEL_API_KEY,
202
+ MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON,
203
+ MIDSCENE_INSIGHT_MODEL_TIMEOUT,
288
204
  // PLANNING
289
205
  MIDSCENE_PLANNING_MODEL_NAME,
290
- MIDSCENE_PLANNING_OPENAI_SOCKS_PROXY,
291
- MIDSCENE_PLANNING_OPENAI_HTTP_PROXY,
292
- MIDSCENE_PLANNING_OPENAI_BASE_URL,
293
- MIDSCENE_PLANNING_OPENAI_API_KEY,
294
- MIDSCENE_PLANNING_OPENAI_INIT_CONFIG_JSON,
295
- MIDSCENE_PLANNING_OPENAI_USE_AZURE,
296
- MIDSCENE_PLANNING_USE_AZURE_OPENAI,
297
- MIDSCENE_PLANNING_AZURE_OPENAI_SCOPE,
298
- MIDSCENE_PLANNING_AZURE_OPENAI_KEY,
299
- MIDSCENE_PLANNING_AZURE_OPENAI_ENDPOINT,
300
- MIDSCENE_PLANNING_AZURE_OPENAI_API_VERSION,
301
- MIDSCENE_PLANNING_AZURE_OPENAI_DEPLOYMENT,
302
- MIDSCENE_PLANNING_AZURE_OPENAI_INIT_CONFIG_JSON,
303
- MIDSCENE_PLANNING_USE_ANTHROPIC_SDK,
304
- MIDSCENE_PLANNING_ANTHROPIC_API_KEY,
305
- MIDSCENE_PLANNING_VL_MODE,
306
- // GROUNDING
307
- MIDSCENE_GROUNDING_MODEL_NAME,
308
- MIDSCENE_GROUNDING_OPENAI_SOCKS_PROXY,
309
- MIDSCENE_GROUNDING_OPENAI_HTTP_PROXY,
310
- MIDSCENE_GROUNDING_OPENAI_BASE_URL,
311
- MIDSCENE_GROUNDING_OPENAI_API_KEY,
312
- MIDSCENE_GROUNDING_OPENAI_INIT_CONFIG_JSON,
313
- MIDSCENE_GROUNDING_OPENAI_USE_AZURE,
314
- MIDSCENE_GROUNDING_USE_AZURE_OPENAI,
315
- MIDSCENE_GROUNDING_AZURE_OPENAI_SCOPE,
316
- MIDSCENE_GROUNDING_AZURE_OPENAI_KEY,
317
- MIDSCENE_GROUNDING_AZURE_OPENAI_ENDPOINT,
318
- MIDSCENE_GROUNDING_AZURE_OPENAI_API_VERSION,
319
- MIDSCENE_GROUNDING_AZURE_OPENAI_DEPLOYMENT,
320
- MIDSCENE_GROUNDING_AZURE_OPENAI_INIT_CONFIG_JSON,
321
- MIDSCENE_GROUNDING_USE_ANTHROPIC_SDK,
322
- MIDSCENE_GROUNDING_ANTHROPIC_API_KEY,
323
- MIDSCENE_GROUNDING_VL_MODE,
206
+ MIDSCENE_PLANNING_MODEL_SOCKS_PROXY,
207
+ MIDSCENE_PLANNING_MODEL_HTTP_PROXY,
208
+ MIDSCENE_PLANNING_MODEL_BASE_URL,
209
+ MIDSCENE_PLANNING_MODEL_API_KEY,
210
+ MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON,
211
+ MIDSCENE_PLANNING_MODEL_TIMEOUT,
212
+ MIDSCENE_MODEL_FAMILY,
324
213
  ] as const;
325
214
 
326
215
  export const ALL_ENV_KEYS = [
@@ -334,7 +223,7 @@ export type TEnvKeys = (typeof ALL_ENV_KEYS)[number];
334
223
  export type TGlobalConfig = Record<TEnvKeys, string | undefined>;
335
224
 
336
225
  export type TVlModeValues =
337
- | 'qwen-vl'
226
+ | 'qwen2.5-vl'
338
227
  | 'qwen3-vl'
339
228
  | 'doubao-vision'
340
229
  | 'gemini'
@@ -343,114 +232,91 @@ export type TVlModeValues =
343
232
  | 'vlm-ui-tars-doubao-1.5';
344
233
 
345
234
  export type TVlModeTypes =
346
- | 'qwen-vl'
235
+ | 'qwen2.5-vl'
347
236
  | 'qwen3-vl'
348
237
  | 'doubao-vision'
349
238
  | 'gemini'
350
239
  | 'vlm-ui-tars';
351
240
 
352
- export interface IModelConfigForVQA {
241
+ export const VL_MODE_RAW_VALID_VALUES: TVlModeValues[] = [
242
+ 'doubao-vision',
243
+ 'gemini',
244
+ 'qwen2.5-vl',
245
+ 'qwen3-vl',
246
+ 'vlm-ui-tars',
247
+ 'vlm-ui-tars-doubao',
248
+ 'vlm-ui-tars-doubao-1.5',
249
+ ];
250
+
251
+ /**
252
+ * Model family values - unified model configuration approach
253
+ * Replaces the old MIDSCENE_USE_* environment variables
254
+ *
255
+ * Note: These values directly correspond to VL_MODE_RAW_VALID_VALUES
256
+ * - 'qwen2.5-vl' is Qwen 2.5
257
+ * - 'qwen3-vl' is Qwen 3
258
+ */
259
+ export type TModelFamily = TVlModeValues;
260
+
261
+ export const MODEL_FAMILY_VALUES: TVlModeValues[] = [
262
+ ...VL_MODE_RAW_VALID_VALUES,
263
+ ];
264
+
265
+ export interface IModelConfigForInsight {
353
266
  // model name
354
- [MIDSCENE_VQA_MODEL_NAME]: string;
267
+ [MIDSCENE_INSIGHT_MODEL_NAME]: string;
355
268
  // proxy
356
- [MIDSCENE_VQA_OPENAI_SOCKS_PROXY]?: string;
357
- [MIDSCENE_VQA_OPENAI_HTTP_PROXY]?: string;
269
+ [MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY]?: string;
270
+ [MIDSCENE_INSIGHT_MODEL_HTTP_PROXY]?: string;
358
271
  // OpenAI
359
- [MIDSCENE_VQA_OPENAI_BASE_URL]?: string;
360
- [MIDSCENE_VQA_OPENAI_API_KEY]?: string;
361
- [MIDSCENE_VQA_OPENAI_INIT_CONFIG_JSON]?: string;
362
- // Azure
363
- [MIDSCENE_VQA_OPENAI_USE_AZURE]?: string;
364
- [MIDSCENE_VQA_USE_AZURE_OPENAI]?: string;
365
- [MIDSCENE_VQA_AZURE_OPENAI_SCOPE]?: string;
366
- [MIDSCENE_VQA_AZURE_OPENAI_KEY]?: string;
367
- [MIDSCENE_VQA_AZURE_OPENAI_ENDPOINT]?: string;
368
- [MIDSCENE_VQA_AZURE_OPENAI_API_VERSION]?: string;
369
- [MIDSCENE_VQA_AZURE_OPENAI_DEPLOYMENT]?: string;
370
- [MIDSCENE_VQA_AZURE_OPENAI_INIT_CONFIG_JSON]?: string;
371
- // Anthropic
372
- [MIDSCENE_VQA_USE_ANTHROPIC_SDK]?: string;
373
- [MIDSCENE_VQA_ANTHROPIC_API_KEY]?: string;
374
- // extra
375
- [MIDSCENE_VQA_VL_MODE]?: TVlModeValues;
272
+ [MIDSCENE_INSIGHT_MODEL_BASE_URL]?: string;
273
+ [MIDSCENE_INSIGHT_MODEL_API_KEY]?: string;
274
+ [MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON]?: string;
275
+ // timeout
276
+ [MIDSCENE_INSIGHT_MODEL_TIMEOUT]?: string;
376
277
  }
377
278
 
378
279
  export interface IModelConfigForPlanning {
379
280
  // model name
380
281
  [MIDSCENE_PLANNING_MODEL_NAME]: string;
381
282
  // proxy
382
- [MIDSCENE_PLANNING_OPENAI_SOCKS_PROXY]?: string;
383
- [MIDSCENE_PLANNING_OPENAI_HTTP_PROXY]?: string;
384
- // OpenAI
385
- [MIDSCENE_PLANNING_OPENAI_BASE_URL]?: string;
386
- [MIDSCENE_PLANNING_OPENAI_API_KEY]?: string;
387
- [MIDSCENE_PLANNING_OPENAI_INIT_CONFIG_JSON]?: string;
388
- // Azure
389
- [MIDSCENE_PLANNING_OPENAI_USE_AZURE]?: string;
390
- [MIDSCENE_PLANNING_USE_AZURE_OPENAI]?: string;
391
- [MIDSCENE_PLANNING_AZURE_OPENAI_SCOPE]?: string;
392
- [MIDSCENE_PLANNING_AZURE_OPENAI_KEY]?: string;
393
- [MIDSCENE_PLANNING_AZURE_OPENAI_ENDPOINT]?: string;
394
- [MIDSCENE_PLANNING_AZURE_OPENAI_API_VERSION]?: string;
395
- [MIDSCENE_PLANNING_AZURE_OPENAI_DEPLOYMENT]?: string;
396
- [MIDSCENE_PLANNING_AZURE_OPENAI_INIT_CONFIG_JSON]?: string;
397
- // Anthropic
398
- [MIDSCENE_PLANNING_USE_ANTHROPIC_SDK]?: string;
399
- [MIDSCENE_PLANNING_ANTHROPIC_API_KEY]?: string;
400
- // extra
401
- [MIDSCENE_PLANNING_VL_MODE]?: TVlModeValues;
402
- }
403
-
404
- export interface IModeConfigForGrounding {
405
- // model name
406
- [MIDSCENE_GROUNDING_MODEL_NAME]: string;
407
- // proxy
408
- [MIDSCENE_GROUNDING_OPENAI_SOCKS_PROXY]?: string;
409
- [MIDSCENE_GROUNDING_OPENAI_HTTP_PROXY]?: string;
283
+ [MIDSCENE_PLANNING_MODEL_SOCKS_PROXY]?: string;
284
+ [MIDSCENE_PLANNING_MODEL_HTTP_PROXY]?: string;
410
285
  // OpenAI
411
- [MIDSCENE_GROUNDING_OPENAI_BASE_URL]?: string;
412
- [MIDSCENE_GROUNDING_OPENAI_API_KEY]?: string;
413
- [MIDSCENE_GROUNDING_OPENAI_INIT_CONFIG_JSON]?: string;
414
- // Azure
415
- [MIDSCENE_GROUNDING_OPENAI_USE_AZURE]?: string;
416
- [MIDSCENE_GROUNDING_USE_AZURE_OPENAI]?: string;
417
- [MIDSCENE_GROUNDING_AZURE_OPENAI_SCOPE]?: string;
418
- [MIDSCENE_GROUNDING_AZURE_OPENAI_KEY]?: string;
419
- [MIDSCENE_GROUNDING_AZURE_OPENAI_ENDPOINT]?: string;
420
- [MIDSCENE_GROUNDING_AZURE_OPENAI_API_VERSION]?: string;
421
- [MIDSCENE_GROUNDING_AZURE_OPENAI_DEPLOYMENT]?: string;
422
- [MIDSCENE_GROUNDING_AZURE_OPENAI_INIT_CONFIG_JSON]?: string;
423
- // Anthropic
424
- [MIDSCENE_GROUNDING_USE_ANTHROPIC_SDK]?: string;
425
- [MIDSCENE_GROUNDING_ANTHROPIC_API_KEY]?: string;
426
- // extra
427
- [MIDSCENE_GROUNDING_VL_MODE]?: TVlModeValues;
286
+ [MIDSCENE_PLANNING_MODEL_BASE_URL]?: string;
287
+ [MIDSCENE_PLANNING_MODEL_API_KEY]?: string;
288
+ [MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON]?: string;
289
+ // timeout
290
+ [MIDSCENE_PLANNING_MODEL_TIMEOUT]?: string;
428
291
  }
429
292
 
293
+ /**
294
+ * Model configuration for Planning intent.
295
+ *
296
+ * IMPORTANT: Planning MUST use a vision language model (VL mode).
297
+ * DOM-based planning is not supported.
298
+ *
299
+ * Required: MIDSCENE_MODEL_FAMILY must be set to one of:
300
+ * - 'qwen2.5-vl'
301
+ * - 'qwen3-vl'
302
+ * - 'gemini'
303
+ * - 'doubao-vision'
304
+ * - 'vlm-ui-tars'
305
+ * - 'vlm-ui-tars-doubao'
306
+ * - 'vlm-ui-tars-doubao-1.5'
307
+ */
430
308
  export interface IModelConfigForDefault {
431
309
  // model name
432
310
  [MIDSCENE_MODEL_NAME]: string;
433
311
  // proxy
434
- [MIDSCENE_OPENAI_SOCKS_PROXY]?: string;
435
- [MIDSCENE_OPENAI_HTTP_PROXY]?: string;
312
+ [MIDSCENE_MODEL_SOCKS_PROXY]?: string;
313
+ [MIDSCENE_MODEL_HTTP_PROXY]?: string;
436
314
  // OpenAI
437
- [MIDSCENE_OPENAI_BASE_URL]?: string;
438
- [MIDSCENE_OPENAI_API_KEY]?: string;
439
- [MIDSCENE_OPENAI_INIT_CONFIG_JSON]?: string;
440
- // Azure
441
- [MIDSCENE_OPENAI_USE_AZURE]?: string;
442
- [MIDSCENE_USE_AZURE_OPENAI]?: string;
443
- [MIDSCENE_AZURE_OPENAI_SCOPE]?: string;
444
- [MIDSCENE_AZURE_OPENAI_KEY]?: string;
445
- [MIDSCENE_AZURE_OPENAI_ENDPOINT]?: string;
446
- [MIDSCENE_AZURE_OPENAI_API_VERSION]?: string;
447
- [MIDSCENE_AZURE_OPENAI_DEPLOYMENT]?: string;
448
- [MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON]?: string;
449
- // Anthropic
450
- [MIDSCENE_USE_ANTHROPIC_SDK]?: string;
451
- [MIDSCENE_ANTHROPIC_API_KEY]?: string;
315
+ [MIDSCENE_MODEL_BASE_URL]?: string;
316
+ [MIDSCENE_MODEL_API_KEY]?: string;
317
+ [MIDSCENE_MODEL_INIT_CONFIG_JSON]?: string;
452
318
  // extra
453
- [MIDSCENE_VL_MODE]?: TVlModeValues;
319
+ [MIDSCENE_MODEL_FAMILY]?: TVlModeValues;
454
320
  }
455
321
 
456
322
  export interface IModelConfigForDefaultLegacy {
@@ -463,37 +329,20 @@ export interface IModelConfigForDefaultLegacy {
463
329
  [OPENAI_BASE_URL]?: string;
464
330
  [OPENAI_API_KEY]?: string;
465
331
  [MIDSCENE_OPENAI_INIT_CONFIG_JSON]?: string;
466
- // Azure
467
- [OPENAI_USE_AZURE]?: string;
468
- [MIDSCENE_USE_AZURE_OPENAI]?: string;
469
- [MIDSCENE_AZURE_OPENAI_SCOPE]?: string;
470
- [AZURE_OPENAI_KEY]?: string;
471
- [AZURE_OPENAI_ENDPOINT]?: string;
472
- [AZURE_OPENAI_API_VERSION]?: string;
473
- [AZURE_OPENAI_DEPLOYMENT]?: string;
474
- [MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON]?: string;
475
- // Anthropic
476
- [MIDSCENE_USE_ANTHROPIC_SDK]?: string;
477
- [ANTHROPIC_API_KEY]?: string;
478
- // extra
479
- [MIDSCENE_VL_MODE]?: TVlModeValues;
480
332
  }
481
333
 
482
334
  /**
483
- * - VQA: Visual Question Answering
484
- * - grounding:short for Visual Grounding
335
+ * - insight: Visual Question Answering and Visual Grounding (unified)
485
336
  * - planning: planning
486
- * - default: all except VQAgrounding、planning
337
+ * - default: all except insight、planning
487
338
  */
488
- export type TIntent = 'VQA' | 'planning' | 'grounding' | 'default';
339
+ export type TIntent = 'insight' | 'planning' | 'default';
489
340
 
490
- export type TModelConfigFn = (options: {
491
- intent: TIntent;
492
- }) =>
493
- | IModelConfigForVQA
494
- | IModelConfigForPlanning
495
- | IModeConfigForGrounding
496
- | IModelConfigForDefault;
341
+ /**
342
+ * Env-style model configuration map supplied directly to the agent.
343
+ * Numbers are allowed so callers can pass numeric env values (e.g. limits) without casting.
344
+ */
345
+ export type TModelConfig = Record<string, string | number>;
497
346
 
498
347
  export enum UITarsModelVersion {
499
348
  V1_0 = '1.0',
@@ -502,15 +351,36 @@ export enum UITarsModelVersion {
502
351
  DOUBAO_1_5_20B = 'doubao-1.5-20B',
503
352
  }
504
353
 
505
- export const VL_MODE_RAW_VALID_VALUES: TVlModeValues[] = [
506
- 'doubao-vision',
507
- 'gemini',
508
- 'qwen-vl',
509
- 'qwen3-vl',
510
- 'vlm-ui-tars',
511
- 'vlm-ui-tars-doubao',
512
- 'vlm-ui-tars-doubao-1.5',
513
- ];
354
+ /**
355
+ * Callback to create custom OpenAI client instance
356
+ * @param config - Resolved model configuration including apiKey, baseURL, modelName, intent, etc.
357
+ * @returns OpenAI client instance (can be wrapped with langsmith, langfuse, etc.)
358
+ *
359
+ * Note: Wrapper functions like langsmith's wrapOpenAI() return the same OpenAI instance
360
+ * with enhanced behavior, so the return type remains compatible with OpenAI.
361
+ *
362
+ * Note: The return type is `any` in the shared package to avoid requiring openai as a dependency.
363
+ * The actual implementation should return an OpenAI instance.
364
+ *
365
+ * @example
366
+ * ```typescript
367
+ * import OpenAI from 'openai';
368
+ * import { wrapOpenAI } from 'langsmith/wrappers';
369
+ *
370
+ * createOpenAIClient: async (openai, opts) => {
371
+ * // Wrap with langsmith for planning tasks
372
+ * if (opts.baseURL?.includes('planning')) {
373
+ * return wrapOpenAI(openai, { metadata: { task: 'planning' } });
374
+ * }
375
+ *
376
+ * return openai;
377
+ * }
378
+ * ```
379
+ */
380
+ export type CreateOpenAIClientFn = (
381
+ openAIInstance: any,
382
+ options: Record<string, unknown>,
383
+ ) => Promise<any>; // OpenAI instance, but typed as `any` to avoid dependency
514
384
 
515
385
  export interface IModelConfig {
516
386
  /**
@@ -529,32 +399,30 @@ export interface IModelConfig {
529
399
  openaiApiKey?: string;
530
400
  openaiExtraConfig?: Record<string, unknown>;
531
401
  /**
532
- * Azure
533
- */
534
- openaiUseAzureDeprecated?: boolean;
535
- useAzureOpenai?: boolean;
536
- azureOpenaiScope?: string;
537
- azureOpenaiKey?: string;
538
- azureOpenaiEndpoint?: string;
539
- azureOpenaiApiVersion?: string;
540
- azureOpenaiDeployment?: string;
541
- azureExtraConfig?: Record<string, unknown>;
542
- /**
543
- * Anthropic
402
+ * Timeout for API calls in milliseconds.
403
+ * If not set, uses OpenAI SDK default (10 minutes).
544
404
  */
545
- useAnthropicSdk?: boolean;
546
- anthropicApiKey?: string;
405
+ timeout?: number;
547
406
  /**
548
- * - vlModeRaw: exists only in non-legacy logic. value can be 'doubao-vision', 'gemini', 'qwen-vl', 'vlm-ui-tars', 'vlm-ui-tars-doubao', 'vlm-ui-tars-doubao-1.5'
549
- * - vlMode: based on the results of the vlModoRaw classification,value can be 'doubao-vision', 'gemini', 'qwen-vl', 'vlm-ui-tars'
407
+ * - vlModeRaw: exists only in non-legacy logic. value can be 'doubao-vision', 'gemini', 'qwen2.5-vl', 'vlm-ui-tars', 'vlm-ui-tars-doubao', 'vlm-ui-tars-doubao-1.5'
408
+ * - vlMode: based on the results of the vlModoRaw classification,value can be 'doubao-vision', 'gemini', 'qwen2.5-vl', 'vlm-ui-tars'
550
409
  */
551
410
  vlModeRaw?: string;
552
411
  vlMode?: TVlModeTypes;
553
412
  uiTarsModelVersion?: UITarsModelVersion;
554
413
  modelDescription: string;
555
414
  /**
556
- * for debug
415
+ * original intent from the config
557
416
  */
558
417
  intent: TIntent;
559
- from: 'modelConfig' | 'env' | 'legacy-env';
418
+ /**
419
+ * Custom OpenAI client factory function
420
+ *
421
+ * If provided, this function will be called to create OpenAI client instances
422
+ * for each AI call, allowing you to:
423
+ * - Wrap clients with observability tools (langsmith, langfuse)
424
+ * - Use custom OpenAI-compatible clients
425
+ * - Apply different configurations based on intent
426
+ */
427
+ createOpenAIClient?: CreateOpenAIClientFn;
560
428
  }