@midscene/shared 0.30.10 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (129) hide show
  1. package/dist/es/build/rspack-config.mjs +4 -0
  2. package/dist/es/constants/example-code.mjs +4 -4
  3. package/dist/es/env/constants.mjs +27 -82
  4. package/dist/es/env/global-config-manager.mjs +2 -3
  5. package/dist/es/env/helper.mjs +12 -17
  6. package/dist/es/env/init-debug.mjs +6 -6
  7. package/dist/es/env/model-config-manager.mjs +45 -65
  8. package/dist/es/env/parse-model-config.mjs +112 -0
  9. package/dist/es/env/types.mjs +70 -162
  10. package/dist/es/extractor/dom-util.mjs +10 -18
  11. package/dist/es/extractor/index.mjs +2 -3
  12. package/dist/es/extractor/locator.mjs +8 -15
  13. package/dist/es/extractor/tree.mjs +2 -5
  14. package/dist/es/extractor/util.mjs +4 -28
  15. package/dist/es/extractor/web-extractor.mjs +7 -14
  16. package/dist/es/index.mjs +2 -1
  17. package/dist/es/mcp/base-server.mjs +250 -0
  18. package/dist/es/mcp/base-tools.mjs +84 -0
  19. package/dist/es/mcp/index.mjs +5 -0
  20. package/dist/es/mcp/inject-report-html-plugin.mjs +53 -0
  21. package/dist/es/mcp/tool-generator.mjs +207 -0
  22. package/dist/es/mcp/types.mjs +3 -0
  23. package/dist/es/node/fs.mjs +2 -2
  24. package/dist/es/utils.mjs +2 -3
  25. package/dist/es/zod-schema-utils.mjs +54 -0
  26. package/dist/lib/baseDB.js +2 -2
  27. package/dist/lib/build/copy-static.js +4 -4
  28. package/dist/lib/build/rspack-config.js +38 -0
  29. package/dist/lib/common.js +4 -4
  30. package/dist/lib/constants/example-code.js +6 -6
  31. package/dist/lib/constants/index.js +13 -13
  32. package/dist/lib/env/basic.js +2 -2
  33. package/dist/lib/env/constants.js +32 -90
  34. package/dist/lib/env/global-config-manager.js +4 -5
  35. package/dist/lib/env/helper.js +13 -22
  36. package/dist/lib/env/index.js +24 -28
  37. package/dist/lib/env/init-debug.js +7 -7
  38. package/dist/lib/env/model-config-manager.js +47 -67
  39. package/dist/lib/env/parse-model-config.js +155 -0
  40. package/dist/lib/env/types.js +146 -379
  41. package/dist/lib/env/utils.js +4 -4
  42. package/dist/lib/extractor/constants.js +4 -4
  43. package/dist/lib/extractor/debug.js +1 -1
  44. package/dist/lib/extractor/dom-util.js +18 -26
  45. package/dist/lib/extractor/index.js +11 -21
  46. package/dist/lib/extractor/locator.js +10 -20
  47. package/dist/lib/extractor/tree.js +4 -7
  48. package/dist/lib/extractor/util.js +17 -50
  49. package/dist/lib/extractor/web-extractor.js +12 -19
  50. package/dist/lib/img/box-select.js +4 -4
  51. package/dist/lib/img/draw-box.js +2 -2
  52. package/dist/lib/img/get-jimp.js +16 -34
  53. package/dist/lib/img/get-photon.js +24 -47
  54. package/dist/lib/img/get-sharp.js +16 -34
  55. package/dist/lib/img/index.js +18 -18
  56. package/dist/lib/img/info.js +4 -4
  57. package/dist/lib/img/transform.js +10 -10
  58. package/dist/lib/index.js +8 -4
  59. package/dist/lib/logger.js +4 -4
  60. package/dist/lib/mcp/base-server.js +300 -0
  61. package/dist/lib/mcp/base-tools.js +118 -0
  62. package/dist/lib/mcp/index.js +86 -0
  63. package/dist/lib/mcp/inject-report-html-plugin.js +98 -0
  64. package/dist/lib/mcp/tool-generator.js +244 -0
  65. package/dist/lib/mcp/types.js +40 -0
  66. package/dist/lib/node/fs.js +6 -6
  67. package/dist/lib/node/index.js +6 -8
  68. package/dist/lib/polyfills/async-hooks.js +2 -2
  69. package/dist/lib/polyfills/index.js +6 -8
  70. package/dist/lib/types/index.js +2 -2
  71. package/dist/lib/us-keyboard-layout.js +2 -2
  72. package/dist/lib/utils.js +13 -14
  73. package/dist/lib/zod-schema-utils.js +97 -0
  74. package/dist/types/build/rspack-config.d.ts +8 -0
  75. package/dist/types/constants/example-code.d.ts +1 -1
  76. package/dist/types/env/constants.d.ts +5 -18
  77. package/dist/types/env/global-config-manager.d.ts +1 -2
  78. package/dist/types/env/helper.d.ts +2 -4
  79. package/dist/types/env/model-config-manager.d.ts +8 -7
  80. package/dist/types/env/parse-model-config.d.ts +28 -0
  81. package/dist/types/env/types.d.ts +152 -191
  82. package/dist/types/extractor/dom-util.d.ts +2 -15
  83. package/dist/types/extractor/index.d.ts +1 -2
  84. package/dist/types/extractor/locator.d.ts +0 -1
  85. package/dist/types/extractor/tree.d.ts +1 -4
  86. package/dist/types/extractor/util.d.ts +0 -3
  87. package/dist/types/index.d.ts +1 -0
  88. package/dist/types/mcp/base-server.d.ts +77 -0
  89. package/dist/types/mcp/base-tools.d.ts +55 -0
  90. package/dist/types/mcp/index.d.ts +5 -0
  91. package/dist/types/mcp/inject-report-html-plugin.d.ts +18 -0
  92. package/dist/types/mcp/tool-generator.d.ts +11 -0
  93. package/dist/types/mcp/types.d.ts +100 -0
  94. package/dist/types/types/index.d.ts +5 -2
  95. package/dist/types/zod-schema-utils.d.ts +23 -0
  96. package/package.json +19 -4
  97. package/src/build/rspack-config.ts +12 -0
  98. package/src/constants/example-code.ts +4 -4
  99. package/src/env/constants.ts +58 -203
  100. package/src/env/global-config-manager.ts +7 -7
  101. package/src/env/helper.ts +10 -31
  102. package/src/env/init-debug.ts +11 -6
  103. package/src/env/model-config-manager.ts +91 -87
  104. package/src/env/parse-model-config.ts +265 -0
  105. package/src/env/types.ts +212 -344
  106. package/src/extractor/dom-util.ts +15 -12
  107. package/src/extractor/index.ts +0 -3
  108. package/src/extractor/locator.ts +3 -12
  109. package/src/extractor/tree.ts +4 -4
  110. package/src/extractor/util.ts +0 -32
  111. package/src/index.ts +2 -0
  112. package/src/mcp/base-server.ts +435 -0
  113. package/src/mcp/base-tools.ts +196 -0
  114. package/src/mcp/index.ts +5 -0
  115. package/src/mcp/inject-report-html-plugin.ts +119 -0
  116. package/src/mcp/tool-generator.ts +330 -0
  117. package/src/mcp/types.ts +108 -0
  118. package/src/node/fs.ts +1 -1
  119. package/src/types/index.ts +8 -2
  120. package/src/utils.ts +1 -1
  121. package/src/zod-schema-utils.ts +133 -0
  122. package/dist/es/env/decide-model-config.mjs +0 -172
  123. package/dist/es/env/parse.mjs +0 -69
  124. package/dist/lib/env/decide-model-config.js +0 -212
  125. package/dist/lib/env/parse.js +0 -106
  126. package/dist/types/env/decide-model-config.d.ts +0 -14
  127. package/dist/types/env/parse.d.ts +0 -12
  128. package/src/env/decide-model-config.ts +0 -319
  129. package/src/env/parse.ts +0 -131
@@ -0,0 +1,97 @@
1
+ "use strict";
2
+ var __webpack_require__ = {};
3
+ (()=>{
4
+ __webpack_require__.d = (exports1, definition)=>{
5
+ for(var key in definition)if (__webpack_require__.o(definition, key) && !__webpack_require__.o(exports1, key)) Object.defineProperty(exports1, key, {
6
+ enumerable: true,
7
+ get: definition[key]
8
+ });
9
+ };
10
+ })();
11
+ (()=>{
12
+ __webpack_require__.o = (obj, prop)=>Object.prototype.hasOwnProperty.call(obj, prop);
13
+ })();
14
+ (()=>{
15
+ __webpack_require__.r = (exports1)=>{
16
+ if ('undefined' != typeof Symbol && Symbol.toStringTag) Object.defineProperty(exports1, Symbol.toStringTag, {
17
+ value: 'Module'
18
+ });
19
+ Object.defineProperty(exports1, '__esModule', {
20
+ value: true
21
+ });
22
+ };
23
+ })();
24
+ var __webpack_exports__ = {};
25
+ __webpack_require__.r(__webpack_exports__);
26
+ __webpack_require__.d(__webpack_exports__, {
27
+ getZodDescription: ()=>getZodDescription,
28
+ getZodTypeName: ()=>getZodTypeName,
29
+ isMidsceneLocatorField: ()=>isMidsceneLocatorField,
30
+ unwrapZodField: ()=>unwrapZodField
31
+ });
32
+ function unwrapZodField(field) {
33
+ const f = field;
34
+ if (!f._def) return f;
35
+ const typeName = f._def.typeName;
36
+ if ('ZodOptional' === typeName || 'ZodNullable' === typeName || 'ZodDefault' === typeName) return unwrapZodField(f._def.innerType);
37
+ if ('ZodEffects' === typeName) {
38
+ if (f._def.schema) return unwrapZodField(f._def.schema);
39
+ }
40
+ return f;
41
+ }
42
+ function isMidsceneLocatorField(field) {
43
+ const actualField = unwrapZodField(field);
44
+ if (actualField._def?.typeName === 'ZodObject') {
45
+ const shape = actualField._def.shape?.();
46
+ if (shape) {
47
+ if ('midscene_location_field_flag' in shape) return true;
48
+ if ('prompt' in shape && shape.prompt) return true;
49
+ }
50
+ }
51
+ return false;
52
+ }
53
+ function getZodTypeName(field, locatorTypeDescription) {
54
+ const actualField = unwrapZodField(field);
55
+ const fieldTypeName = actualField._def?.typeName;
56
+ if ('ZodString' === fieldTypeName) return 'string';
57
+ if ('ZodNumber' === fieldTypeName) return 'number';
58
+ if ('ZodBoolean' === fieldTypeName) return 'boolean';
59
+ if ('ZodArray' === fieldTypeName) return 'array';
60
+ if ('ZodObject' === fieldTypeName) {
61
+ if (isMidsceneLocatorField(actualField)) return locatorTypeDescription || 'object';
62
+ return 'object';
63
+ }
64
+ if ('ZodEnum' === fieldTypeName) {
65
+ const values = actualField._def?.values?.map((option)=>String(`'${option}'`)).join(', ') ?? 'enum';
66
+ return `enum(${values})`;
67
+ }
68
+ if ('ZodUnion' === fieldTypeName) {
69
+ const options = actualField._def?.options;
70
+ if (options && options.length > 0) {
71
+ const types = options.map((opt)=>getZodTypeName(opt, locatorTypeDescription));
72
+ return types.join(' | ');
73
+ }
74
+ return 'union';
75
+ }
76
+ return 'unknown';
77
+ }
78
+ function getZodDescription(field) {
79
+ if ("description" in field) return field.description || null;
80
+ const actualField = unwrapZodField(field);
81
+ if ("description" in actualField) return actualField.description || null;
82
+ if (isMidsceneLocatorField(actualField)) return 'Location information for the target element';
83
+ return null;
84
+ }
85
+ exports.getZodDescription = __webpack_exports__.getZodDescription;
86
+ exports.getZodTypeName = __webpack_exports__.getZodTypeName;
87
+ exports.isMidsceneLocatorField = __webpack_exports__.isMidsceneLocatorField;
88
+ exports.unwrapZodField = __webpack_exports__.unwrapZodField;
89
+ for(var __rspack_i in __webpack_exports__)if (-1 === [
90
+ "getZodDescription",
91
+ "getZodTypeName",
92
+ "isMidsceneLocatorField",
93
+ "unwrapZodField"
94
+ ].indexOf(__rspack_i)) exports[__rspack_i] = __webpack_exports__[__rspack_i];
95
+ Object.defineProperty(exports, '__esModule', {
96
+ value: true
97
+ });
@@ -0,0 +1,8 @@
1
+ /**
2
+ * Common Rspack configuration helpers for rsbuild projects
3
+ */
4
+ /**
5
+ * Common warning patterns to ignore in Rspack builds.
6
+ * These warnings are typically from optional dependencies or known non-critical issues.
7
+ */
8
+ export declare const commonIgnoreWarnings: RegExp[];
@@ -1,2 +1,2 @@
1
1
  export declare const PLAYWRIGHT_EXAMPLE_CODE = "\n// Reference the following code to generate Midscene test cases\n// The following is test code for Midscene AI, for reference\n// The following is Playwright syntax, you can use Playwright to assist in test generation\nIMPORTANT: Follow these exact type signatures for AI functions:\n\n// Type signatures for AI functions:\naiInput(value: string, locator: string): Promise<void>\naiTap(locator: string): Promise<void>\naiDoubleClick(locator: string): Promise<void>\naiScroll(scrollParam: {\n direction: 'up' | 'down' | 'left' | 'right',\n scrollType: 'once' | 'untilBottom' | 'untilTop' | 'untilRight' | 'untilLeft',\n distance: number - scroll distance, px is the unit\n}): Promise<void>\naiAssert(assertion: string): Promise<void>\naiQuery<T>(queryObject: Record<string, string>): Promise<T> // Extracts data from page based on descriptions\n\n// examples:\n// Reference the following code to generate Midscene test cases\n// The following is test code for Midscene AI, for reference\n// The following is Playwright syntax, you can use Playwright to assist in test generation\nimport { test as base } from '@playwright/test';\nimport type { PlayWrightAiFixtureType } from '@midscene/web/playwright';\nimport { PlaywrightAiFixture } from '@midscene/web/playwright';\n\nconst test = base.extend<PlayWrightAiFixtureType>(PlaywrightAiFixture({\n waitForNetworkIdleTimeout: 2000, // optional, the timeout for waiting for network idle between each action, default is 2000ms\n}));\n\n\ntest.beforeEach(async ({ page }) => {\n await page.goto('https://www.xxx.com/');\n await page.setViewportSize({ width: 1920, height: 1080 });\n});\n\ntest('ai shop', async ({\n aiInput,\n aiAssert,\n aiQuery,\n aiKeyboardPress,\n aiHover,\n aiTap,\n agentForPage,\n page,\n}) => {\n // login\n await aiAssert('The page shows the login interface');\n await aiInput('user_name', 'in user name input');\n await aiInput('password', 'in password input');\n await aiKeyboardPress('Enter', 'Login Button');\n\n // check the login success\n await aiWaitFor('The page shows that the loading is complete');\n await aiAssert('The current page shows the product detail page');\n\n // check the product info\n const dataA = await aiQuery({\n userInfo: 'User information in the format {name: string}',\n theFirstProductInfo: 'The first product info in the format {name: string, price: number}',\n });\n expect(dataA.theFirstProductInfo.name).toBe('xxx');\n expect(dataA.theFirstProductInfo.price).toBe(100);\n\n\n // add to cart\n await aiTap('click add to cart button');\n \n await aiTap('click right top cart icon');\n await aiAssert('The cart icon shows the number 1');\n});\n";
2
- export declare const YAML_EXAMPLE_CODE = "\n1. Format:\n\nweb:\n url: \"starting_url\"\n viewportWidth: 1280\n viewportHeight: 960\n\ntasks:\n - name: \"descriptive task name\"\n flow:\n - aiTap: \"element description\"\n - aiInput: 'text value'\n locate: 'input field description'\n - aiScroll:\n direction: down/up\n scrollType: untilBottom/untilTop/page\n - aiAssert: \"expected state\"\n - sleep: milliseconds\n\n2. Action Types:\n- aiTap: for clicks (natural language targeting)\n- aiInput: for text input with 'locate' field\n- aiScroll: with direction and scrollType\n- aiAssert: for validations\n- sleep: for delays (milliseconds)\n\n3. Best Practices:\n- Group related actions into logical tasks\n- Use natural language descriptions\n- Add deepThink: true for complex interactions\n- Keep task names concise but descriptive\n\n\n\nYAML type\ntasks:\n - name: <name>\n continueOnError: <boolean> # Optional, whether to continue to the next task on error, defaults to false.\n flow:\n # Auto Planning (.ai)\n # ----------------\n\n # Perform an interaction. `ai` is a shorthand for `aiAction`.\n - ai: <prompt>\n cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.\n\n # This usage is the same as `ai`.\n - aiAction: <prompt>\n cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.\n\n # Instant Action (.aiTap, .aiDoubleClick, .aiHover, .aiInput, .aiKeyboardPress, .aiScroll)\n # ----------------\n\n # Tap an element described by a prompt.\n - aiTap: <prompt>\n deepThink: <boolean> # Optional, whether to use deepThink to precisely locate the element. Defaults to False.\n xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.\n cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.\n\n # Double click an element described by a prompt.\n - aiDoubleClick: <prompt>\n deepThink: <boolean> # Optional, whether to use deepThink to precisely locate the element. Defaults to False.\n xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.\n cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.\n\n # Hover over an element described by a prompt.\n - aiHover: <prompt>\n deepThink: <boolean> # Optional, whether to use deepThink to precisely locate the element. Defaults to False.\n xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.\n cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.\n\n # Input text into an element described by a prompt.\n - aiInput: <final text content of the input>\n locate: <prompt>\n deepThink: <boolean> # Optional, whether to use deepThink to precisely locate the element. Defaults to False.\n xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.\n cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.\n\n # Press a key (e.g., Enter, Tab, Escape) on an element described by a prompt.\n - aiKeyboardPress: <key>\n locate: <prompt>\n deepThink: <boolean> # Optional, whether to use deepThink to precisely locate the element. Defaults to False.\n xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.\n cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.\n\n # Scroll globally or on an element described by a prompt.\n - aiScroll:\n direction: 'up' # or 'down' | 'left' | 'right'\n scrollType: 'once' # or 'untilTop' | 'untilBottom' | 'untilLeft' | 'untilRight'\n distance: <number> # Optional, the scroll distance in pixels.\n locate: <prompt> # Optional, the element to scroll on.\n deepThink: <boolean> # Optional, whether to use deepThink to precisely locate the element. Defaults to False.\n xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.\n cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.\n\n # Log the current screenshot with a description in the report file.\n - logScreenshot: <title> # Optional, the title of the screenshot. If not provided, the title will be 'untitled'.\n content: <content> # Optional, the description of the screenshot.\n\n # Data Extraction\n # ----------------\n\n # Perform a query that returns a JSON object.\n - aiQuery: <prompt> # Remember to describe the format of the result in the prompt.\n name: <name> # The key for the query result in the JSON output.\n\n # More APIs\n # ----------------\n\n # Wait for a condition to be met, with a timeout (in ms, optional, defaults to 30000).\n - aiWaitFor: <prompt>\n timeout: <ms>\n\n # Perform an assertion.\n - aiAssert: <prompt>\n errorMessage: <error-message> # Optional, the error message to print if the assertion fails.\n\n # Wait for a specified amount of time.\n - sleep: <ms>\n\n # Execute a piece of JavaScript code in the web page context.\n - javascript: <javascript>\n name: <name> # Optional, assign a name to the return value, which will be used as a key in the JSON output.\n\n - name: <name>\n flow:\n # ...\n";
2
+ export declare const YAML_EXAMPLE_CODE = "\n1. Format:\n\nweb:\n url: \"starting_url\"\n viewportWidth: 1280\n viewportHeight: 960\n\ntasks:\n - name: \"descriptive task name\"\n flow:\n - aiTap: \"element description\"\n - aiInput: 'text value'\n locate: 'input field description'\n - aiScroll:\n direction: down/up\n scrollType: untilBottom/untilTop/page\n - aiAssert: \"expected state\"\n - sleep: milliseconds\n\n2. Action Types:\n- aiTap: for clicks (natural language targeting)\n- aiInput: for text input with 'locate' field\n- aiScroll: with direction and scrollType\n- aiAssert: for validations\n- sleep: for delays (milliseconds)\n\n3. Best Practices:\n- Group related actions into logical tasks\n- Use natural language descriptions\n- Add deepThink: true for complex interactions\n- Keep task names concise but descriptive\n\n\n\nYAML type\ntasks:\n - name: <name>\n continueOnError: <boolean> # Optional, whether to continue to the next task on error, defaults to false.\n flow:\n # Auto Planning (.ai)\n # ----------------\n\n # Perform an interaction. `ai` is a shorthand for `aiAct`.\n - ai: <prompt>\n cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.\n\n # This usage is the same as `ai`.\n - aiAct: <prompt>\n cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.\n\n # Instant Action (.aiTap, .aiDoubleClick, .aiHover, .aiInput, .aiKeyboardPress, .aiScroll)\n # ----------------\n\n # Tap an element described by a prompt.\n - aiTap: <prompt>\n deepThink: <boolean> # Optional, whether to use deepThink to precisely locate the element. Defaults to False.\n xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.\n cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.\n\n # Double click an element described by a prompt.\n - aiDoubleClick: <prompt>\n deepThink: <boolean> # Optional, whether to use deepThink to precisely locate the element. Defaults to False.\n xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.\n cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.\n\n # Hover over an element described by a prompt.\n - aiHover: <prompt>\n deepThink: <boolean> # Optional, whether to use deepThink to precisely locate the element. Defaults to False.\n xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.\n cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.\n\n # Input text into an element described by a prompt.\n - aiInput: <final text content of the input>\n locate: <prompt>\n deepThink: <boolean> # Optional, whether to use deepThink to precisely locate the element. Defaults to False.\n xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.\n cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.\n\n # Press a key (e.g., Enter, Tab, Escape) on an element described by a prompt.\n - aiKeyboardPress: <key>\n locate: <prompt>\n deepThink: <boolean> # Optional, whether to use deepThink to precisely locate the element. Defaults to False.\n xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.\n cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.\n\n # Scroll globally or on an element described by a prompt.\n - aiScroll:\n direction: 'up' # or 'down' | 'left' | 'right'\n scrollType: 'once' # or 'untilTop' | 'untilBottom' | 'untilLeft' | 'untilRight'\n distance: <number> # Optional, the scroll distance in pixels.\n locate: <prompt> # Optional, the element to scroll on.\n deepThink: <boolean> # Optional, whether to use deepThink to precisely locate the element. Defaults to False.\n xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.\n cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.\n\n # Record the current screenshot with a description in the report file.\n - recordToReport: <title> # Optional, the title of the screenshot. If not provided, the title will be 'untitled'.\n content: <content> # Optional, the description of the screenshot.\n\n # Data Extraction\n # ----------------\n\n # Perform a query that returns a JSON object.\n - aiQuery: <prompt> # Remember to describe the format of the result in the prompt.\n name: <name> # The key for the query result in the JSON output.\n\n # More APIs\n # ----------------\n\n # Wait for a condition to be met, with a timeout (in ms, optional, defaults to 30000).\n - aiWaitFor: <prompt>\n timeout: <ms>\n\n # Perform an assertion.\n - aiAssert: <prompt>\n errorMessage: <error-message> # Optional, the error message to print if the assertion fails.\n\n # Wait for a specified amount of time.\n - sleep: <ms>\n\n # Execute a piece of JavaScript code in the web page context.\n - javascript: <javascript>\n name: <name> # Optional, assign a name to the return value, which will be used as a key in the JSON output.\n\n - name: <name>\n flow:\n # ...\n";
@@ -12,28 +12,15 @@ interface IModelConfigKeys {
12
12
  openaiApiKey: string;
13
13
  openaiExtraConfig: string;
14
14
  /**
15
- * Azure
16
- */
17
- openaiUseAzureDeprecated: string;
18
- useAzureOpenai: string;
19
- azureOpenaiScope: string;
20
- azureOpenaiKey: string;
21
- azureOpenaiEndpoint: string;
22
- azureOpenaiApiVersion: string;
23
- azureOpenaiDeployment: string;
24
- azureExtraConfig: string;
25
- /**
26
- * Anthropic
15
+ * Extra
27
16
  */
28
- useAnthropicSdk: string;
29
- anthropicApiKey: string;
17
+ modelFamily: string;
30
18
  /**
31
- * Extra
19
+ * Timeout
32
20
  */
33
- vlMode: string;
21
+ timeout: string;
34
22
  }
35
- export declare const VQA_MODEL_CONFIG_KEYS: IModelConfigKeys;
36
- export declare const GROUNDING_MODEL_CONFIG_KEYS: IModelConfigKeys;
23
+ export declare const INSIGHT_MODEL_CONFIG_KEYS: IModelConfigKeys;
37
24
  export declare const PLANNING_MODEL_CONFIG_KEYS: IModelConfigKeys;
38
25
  export declare const DEFAULT_MODEL_CONFIG_KEYS: IModelConfigKeys;
39
26
  export declare const DEFAULT_MODEL_CONFIG_KEYS_LEGACY: IModelConfigKeys;
@@ -25,8 +25,7 @@ export declare class GlobalConfigManager {
25
25
  getEnvConfigInBoolean(key: (typeof BOOLEAN_ENV_KEYS)[number]): boolean;
26
26
  registerModelConfigManager(globalModelConfigManager: ModelConfigManager): void;
27
27
  /**
28
- * for overrideAIConfig
29
- * can only override keys in MODEL_ENV_KEYS
28
+ * @deprecated use the modelConfig param in Agent constructor instead
30
29
  */
31
30
  overrideAIConfig(newConfig: Partial<Record<(typeof GLOBAL_ENV_KEYS)[number] | (typeof MODEL_ENV_KEYS)[number], string>>, extendMode?: boolean): void;
32
31
  }
@@ -1,6 +1,4 @@
1
- import type { IModelConfig } from './types';
2
- export declare const maskConfig: (config: IModelConfig) => {
3
- [k: string]: any;
1
+ export declare const maskConfig: (config: Record<string, unknown>) => {
2
+ [k: string]: unknown;
4
3
  };
5
4
  export declare const parseJson: (key: string, value: string | undefined) => any;
6
- export declare const createAssert: (modelNameKey: string, provider: "process.env" | "modelConfig", modelName?: string) => (value: string | undefined, key: string, modelVendorFlag?: string) => void;
@@ -1,14 +1,15 @@
1
1
  import type { GlobalConfigManager } from './global-config-manager';
2
- import type { IModelConfig, TIntent, TModelConfigFn } from './types';
3
- export type TIntentConfigMap = Record<TIntent, ReturnType<TModelConfigFn> | undefined>;
2
+ import type { CreateOpenAIClientFn, IModelConfig, TIntent, TModelConfig } from './types';
4
3
  export declare class ModelConfigManager {
5
4
  private modelConfigMap;
5
+ private isInitialized;
6
6
  private isolatedMode;
7
7
  private globalConfigManager;
8
- constructor(modelConfigFn?: TModelConfigFn);
9
- private calcIntentConfigMap;
10
- private calcModelConfigMapBaseOnIntent;
11
- private calcModelConfigMapBaseOnEnv;
8
+ private modelConfig?;
9
+ private createOpenAIClientFn?;
10
+ constructor(modelConfig?: TModelConfig, createOpenAIClientFn?: CreateOpenAIClientFn);
11
+ private initialize;
12
+ private normalizeModelConfig;
12
13
  /**
13
14
  * should only be called by GlobalConfigManager
14
15
  */
@@ -20,5 +21,5 @@ export declare class ModelConfigManager {
20
21
  getModelConfig(intent: TIntent): IModelConfig;
21
22
  getUploadTestServerUrl(): string | undefined;
22
23
  registerGlobalConfigManager(globalConfigManager: GlobalConfigManager): void;
23
- throwErrorIfNonVLModel(intent?: TIntent): void;
24
+ throwErrorIfNonVLModel(): void;
24
25
  }
@@ -0,0 +1,28 @@
1
+ import { DEFAULT_MODEL_CONFIG_KEYS, type DEFAULT_MODEL_CONFIG_KEYS_LEGACY, INSIGHT_MODEL_CONFIG_KEYS, PLANNING_MODEL_CONFIG_KEYS } from './constants';
2
+ import { type IModelConfig, type TIntent, type TModelFamily, type TVlModeTypes, UITarsModelVersion } from './types';
3
+ type TModelConfigKeys = typeof INSIGHT_MODEL_CONFIG_KEYS | typeof PLANNING_MODEL_CONFIG_KEYS | typeof DEFAULT_MODEL_CONFIG_KEYS | typeof DEFAULT_MODEL_CONFIG_KEYS_LEGACY;
4
+ /**
5
+ * Convert model family to VL configuration
6
+ * @param modelFamily - The model family value
7
+ * @returns Object containing vlMode and uiTarsVersion
8
+ */
9
+ export declare const modelFamilyToVLConfig: (modelFamily?: TModelFamily) => {
10
+ vlMode?: TVlModeTypes;
11
+ uiTarsVersion?: UITarsModelVersion;
12
+ };
13
+ /**
14
+ * Convert legacy environment variables to model family
15
+ * @param provider - Environment variable provider (e.g., process.env)
16
+ * @returns The corresponding model family value, or undefined if no legacy config is found
17
+ */
18
+ export declare const legacyConfigToModelFamily: (provider: Record<string, string | undefined>) => TModelFamily | undefined;
19
+ /**
20
+ * Parse OpenAI SDK config
21
+ */
22
+ export declare const parseOpenaiSdkConfig: ({ keys, provider, useLegacyLogic, }: {
23
+ keys: TModelConfigKeys;
24
+ provider: Record<string, string | undefined>;
25
+ useLegacyLogic?: boolean;
26
+ }) => IModelConfig;
27
+ export declare const decideModelConfigFromIntentConfig: (intent: TIntent, configMap: Record<string, string | undefined>) => IModelConfig | undefined;
28
+ export {};