@midscene/shared 0.30.10 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (129) hide show
  1. package/dist/es/build/rspack-config.mjs +4 -0
  2. package/dist/es/constants/example-code.mjs +4 -4
  3. package/dist/es/env/constants.mjs +27 -82
  4. package/dist/es/env/global-config-manager.mjs +2 -3
  5. package/dist/es/env/helper.mjs +12 -17
  6. package/dist/es/env/init-debug.mjs +6 -6
  7. package/dist/es/env/model-config-manager.mjs +45 -65
  8. package/dist/es/env/parse-model-config.mjs +112 -0
  9. package/dist/es/env/types.mjs +70 -162
  10. package/dist/es/extractor/dom-util.mjs +10 -18
  11. package/dist/es/extractor/index.mjs +2 -3
  12. package/dist/es/extractor/locator.mjs +8 -15
  13. package/dist/es/extractor/tree.mjs +2 -5
  14. package/dist/es/extractor/util.mjs +4 -28
  15. package/dist/es/extractor/web-extractor.mjs +7 -14
  16. package/dist/es/index.mjs +2 -1
  17. package/dist/es/mcp/base-server.mjs +250 -0
  18. package/dist/es/mcp/base-tools.mjs +84 -0
  19. package/dist/es/mcp/index.mjs +5 -0
  20. package/dist/es/mcp/inject-report-html-plugin.mjs +53 -0
  21. package/dist/es/mcp/tool-generator.mjs +207 -0
  22. package/dist/es/mcp/types.mjs +3 -0
  23. package/dist/es/node/fs.mjs +2 -2
  24. package/dist/es/utils.mjs +2 -3
  25. package/dist/es/zod-schema-utils.mjs +54 -0
  26. package/dist/lib/baseDB.js +2 -2
  27. package/dist/lib/build/copy-static.js +4 -4
  28. package/dist/lib/build/rspack-config.js +38 -0
  29. package/dist/lib/common.js +4 -4
  30. package/dist/lib/constants/example-code.js +6 -6
  31. package/dist/lib/constants/index.js +13 -13
  32. package/dist/lib/env/basic.js +2 -2
  33. package/dist/lib/env/constants.js +32 -90
  34. package/dist/lib/env/global-config-manager.js +4 -5
  35. package/dist/lib/env/helper.js +13 -22
  36. package/dist/lib/env/index.js +24 -28
  37. package/dist/lib/env/init-debug.js +7 -7
  38. package/dist/lib/env/model-config-manager.js +47 -67
  39. package/dist/lib/env/parse-model-config.js +155 -0
  40. package/dist/lib/env/types.js +146 -379
  41. package/dist/lib/env/utils.js +4 -4
  42. package/dist/lib/extractor/constants.js +4 -4
  43. package/dist/lib/extractor/debug.js +1 -1
  44. package/dist/lib/extractor/dom-util.js +18 -26
  45. package/dist/lib/extractor/index.js +11 -21
  46. package/dist/lib/extractor/locator.js +10 -20
  47. package/dist/lib/extractor/tree.js +4 -7
  48. package/dist/lib/extractor/util.js +17 -50
  49. package/dist/lib/extractor/web-extractor.js +12 -19
  50. package/dist/lib/img/box-select.js +4 -4
  51. package/dist/lib/img/draw-box.js +2 -2
  52. package/dist/lib/img/get-jimp.js +16 -34
  53. package/dist/lib/img/get-photon.js +24 -47
  54. package/dist/lib/img/get-sharp.js +16 -34
  55. package/dist/lib/img/index.js +18 -18
  56. package/dist/lib/img/info.js +4 -4
  57. package/dist/lib/img/transform.js +10 -10
  58. package/dist/lib/index.js +8 -4
  59. package/dist/lib/logger.js +4 -4
  60. package/dist/lib/mcp/base-server.js +300 -0
  61. package/dist/lib/mcp/base-tools.js +118 -0
  62. package/dist/lib/mcp/index.js +86 -0
  63. package/dist/lib/mcp/inject-report-html-plugin.js +98 -0
  64. package/dist/lib/mcp/tool-generator.js +244 -0
  65. package/dist/lib/mcp/types.js +40 -0
  66. package/dist/lib/node/fs.js +6 -6
  67. package/dist/lib/node/index.js +6 -8
  68. package/dist/lib/polyfills/async-hooks.js +2 -2
  69. package/dist/lib/polyfills/index.js +6 -8
  70. package/dist/lib/types/index.js +2 -2
  71. package/dist/lib/us-keyboard-layout.js +2 -2
  72. package/dist/lib/utils.js +13 -14
  73. package/dist/lib/zod-schema-utils.js +97 -0
  74. package/dist/types/build/rspack-config.d.ts +8 -0
  75. package/dist/types/constants/example-code.d.ts +1 -1
  76. package/dist/types/env/constants.d.ts +5 -18
  77. package/dist/types/env/global-config-manager.d.ts +1 -2
  78. package/dist/types/env/helper.d.ts +2 -4
  79. package/dist/types/env/model-config-manager.d.ts +8 -7
  80. package/dist/types/env/parse-model-config.d.ts +28 -0
  81. package/dist/types/env/types.d.ts +152 -191
  82. package/dist/types/extractor/dom-util.d.ts +2 -15
  83. package/dist/types/extractor/index.d.ts +1 -2
  84. package/dist/types/extractor/locator.d.ts +0 -1
  85. package/dist/types/extractor/tree.d.ts +1 -4
  86. package/dist/types/extractor/util.d.ts +0 -3
  87. package/dist/types/index.d.ts +1 -0
  88. package/dist/types/mcp/base-server.d.ts +77 -0
  89. package/dist/types/mcp/base-tools.d.ts +55 -0
  90. package/dist/types/mcp/index.d.ts +5 -0
  91. package/dist/types/mcp/inject-report-html-plugin.d.ts +18 -0
  92. package/dist/types/mcp/tool-generator.d.ts +11 -0
  93. package/dist/types/mcp/types.d.ts +100 -0
  94. package/dist/types/types/index.d.ts +5 -2
  95. package/dist/types/zod-schema-utils.d.ts +23 -0
  96. package/package.json +19 -4
  97. package/src/build/rspack-config.ts +12 -0
  98. package/src/constants/example-code.ts +4 -4
  99. package/src/env/constants.ts +58 -203
  100. package/src/env/global-config-manager.ts +7 -7
  101. package/src/env/helper.ts +10 -31
  102. package/src/env/init-debug.ts +11 -6
  103. package/src/env/model-config-manager.ts +91 -87
  104. package/src/env/parse-model-config.ts +265 -0
  105. package/src/env/types.ts +212 -344
  106. package/src/extractor/dom-util.ts +15 -12
  107. package/src/extractor/index.ts +0 -3
  108. package/src/extractor/locator.ts +3 -12
  109. package/src/extractor/tree.ts +4 -4
  110. package/src/extractor/util.ts +0 -32
  111. package/src/index.ts +2 -0
  112. package/src/mcp/base-server.ts +435 -0
  113. package/src/mcp/base-tools.ts +196 -0
  114. package/src/mcp/index.ts +5 -0
  115. package/src/mcp/inject-report-html-plugin.ts +119 -0
  116. package/src/mcp/tool-generator.ts +330 -0
  117. package/src/mcp/types.ts +108 -0
  118. package/src/node/fs.ts +1 -1
  119. package/src/types/index.ts +8 -2
  120. package/src/utils.ts +1 -1
  121. package/src/zod-schema-utils.ts +133 -0
  122. package/dist/es/env/decide-model-config.mjs +0 -172
  123. package/dist/es/env/parse.mjs +0 -69
  124. package/dist/lib/env/decide-model-config.js +0 -212
  125. package/dist/lib/env/parse.js +0 -106
  126. package/dist/types/env/decide-model-config.d.ts +0 -14
  127. package/dist/types/env/parse.d.ts +0 -12
  128. package/src/env/decide-model-config.ts +0 -319
  129. package/src/env/parse.ts +0 -131
@@ -0,0 +1,108 @@
1
+ import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
2
+ import type { z } from 'zod';
3
+
4
+ // Avoid circular dependency: don't import from @midscene/core
5
+ // Instead, use generic types that will be provided by implementation
6
+
7
+ /**
8
+ * Default timeout constants for app loading verification
9
+ */
10
+ export const defaultAppLoadingTimeoutMs = 10000;
11
+ export const defaultAppLoadingCheckIntervalMs = 2000;
12
+
13
+ /**
14
+ * Content item types for tool results (MCP compatible)
15
+ */
16
+ export type ToolResultContent =
17
+ | { type: 'text'; text: string }
18
+ | { type: 'image'; data: string; mimeType: string }
19
+ | { type: 'audio'; data: string; mimeType: string }
20
+ | {
21
+ type: 'resource';
22
+ resource:
23
+ | { text: string; uri: string; mimeType?: string }
24
+ | { uri: string; blob: string; mimeType?: string };
25
+ };
26
+
27
+ /**
28
+ * Result type for tool execution (MCP compatible)
29
+ */
30
+ export interface ToolResult {
31
+ [x: string]: unknown;
32
+ content: ToolResultContent[];
33
+ isError?: boolean;
34
+ _meta?: Record<string, unknown>;
35
+ }
36
+
37
+ /**
38
+ * Tool handler function type
39
+ * Takes parsed arguments and returns a tool result
40
+ */
41
+ export type ToolHandler<T = Record<string, unknown>> = (
42
+ args: T,
43
+ ) => Promise<ToolResult>;
44
+
45
+ /**
46
+ * Tool schema type using Zod
47
+ */
48
+ export type ToolSchema = Record<string, z.ZodTypeAny>;
49
+
50
+ /**
51
+ * Tool definition for MCP server
52
+ */
53
+ export interface ToolDefinition<T = Record<string, unknown>> {
54
+ name: string;
55
+ description: string;
56
+ schema: ToolSchema;
57
+ handler: ToolHandler<T>;
58
+ autoDestroy?: boolean;
59
+ }
60
+
61
+ /**
62
+ * Action space item definition
63
+ * Note: Intentionally no index signature to maintain compatibility with DeviceAction
64
+ */
65
+ export interface ActionSpaceItem {
66
+ name: string;
67
+ description?: string;
68
+ args?: Record<string, unknown>;
69
+ paramSchema?: z.ZodTypeAny;
70
+ }
71
+
72
+ /**
73
+ * Base agent interface
74
+ * Represents a platform-specific agent (Android, iOS, Web)
75
+ * Note: Return types use `unknown` for compatibility with platform-specific implementations
76
+ */
77
+ export interface BaseAgent {
78
+ getActionSpace(): Promise<ActionSpaceItem[]>;
79
+ destroy?(): Promise<void>;
80
+ page?: {
81
+ screenshotBase64(): Promise<string>;
82
+ };
83
+ aiAction?: (
84
+ description: string,
85
+ params?: Record<string, unknown>,
86
+ ) => Promise<unknown>;
87
+ aiWaitFor?: (
88
+ assertion: string,
89
+ options: Record<string, unknown>,
90
+ ) => Promise<unknown>;
91
+ }
92
+
93
+ /**
94
+ * Base device interface for temporary device instances
95
+ */
96
+ export interface BaseDevice {
97
+ actionSpace(): ActionSpaceItem[];
98
+ destroy?(): Promise<void>;
99
+ }
100
+
101
+ /**
102
+ * Interface for platform-specific MCP tools manager
103
+ */
104
+ export interface IMidsceneTools {
105
+ attachToServer(server: McpServer): void;
106
+ initTools(): Promise<void>;
107
+ closeBrowser?(): Promise<void>;
108
+ }
package/src/node/fs.ts CHANGED
@@ -75,7 +75,7 @@ export async function getExtraReturnLogic(tree = false) {
75
75
  return null;
76
76
  }
77
77
 
78
- const elementInfosScriptContent = `${getElementInfosScriptContent()}midscene_element_inspector.setNodeHashCacheListOnWindow();`;
78
+ const elementInfosScriptContent = `${getElementInfosScriptContent()};`;
79
79
 
80
80
  if (tree) {
81
81
  return `${elementInfosScriptContent}midscene_element_inspector.webExtractNodeTree()`;
@@ -17,7 +17,7 @@ export type Rect = Point & Size & { zoom?: number };
17
17
  export abstract class BaseElement {
18
18
  abstract id: string;
19
19
 
20
- abstract indexId?: number; // markerId for web
20
+ // abstract indexId?: number; // markerId for web
21
21
 
22
22
  abstract attributes: {
23
23
  nodeType: NodeType;
@@ -30,7 +30,7 @@ export abstract class BaseElement {
30
30
 
31
31
  abstract center: [number, number];
32
32
 
33
- abstract xpaths?: string[];
33
+ // abstract xpaths?: string[];
34
34
 
35
35
  abstract isVisible: boolean;
36
36
  }
@@ -45,3 +45,9 @@ export interface ElementTreeNode<
45
45
  export interface WebElementInfo extends ElementInfo {
46
46
  zoom: number;
47
47
  }
48
+
49
+ export type LocateResultElement = {
50
+ description: string; // the description of the element
51
+ center: [number, number];
52
+ rect: Rect;
53
+ };
package/src/utils.ts CHANGED
@@ -123,5 +123,5 @@ export const antiEscapeScriptTag = (html: string) => {
123
123
 
124
124
  export function replaceIllegalPathCharsAndSpace(str: string) {
125
125
  // Only replace characters that are illegal in filenames, but preserve path separators
126
- return str.replace(/[:*?"<>| ]/g, '-');
126
+ return str.replace(/[:*?"<>|# ]/g, '-');
127
127
  }
@@ -0,0 +1,133 @@
1
+ import type { z } from 'zod';
2
+
3
+ /**
4
+ * Recursively unwrap optional, nullable, default, and effects wrapper types
5
+ * to get the actual inner Zod type
6
+ */
7
+ export function unwrapZodField(field: unknown): unknown {
8
+ const f = field as {
9
+ _def?: { typeName?: string; innerType?: unknown; schema?: unknown };
10
+ };
11
+ if (!f._def) return f;
12
+
13
+ const typeName = f._def.typeName;
14
+
15
+ // Handle wrapper types that have innerType
16
+ if (
17
+ typeName === 'ZodOptional' ||
18
+ typeName === 'ZodNullable' ||
19
+ typeName === 'ZodDefault'
20
+ ) {
21
+ return unwrapZodField(f._def.innerType);
22
+ }
23
+
24
+ // Handle ZodEffects (transformations, refinements, preprocessors)
25
+ if (typeName === 'ZodEffects') {
26
+ if (f._def.schema) {
27
+ return unwrapZodField(f._def.schema);
28
+ }
29
+ }
30
+
31
+ return f;
32
+ }
33
+
34
+ /**
35
+ * Check if a field is a Midscene locator field
36
+ * Checks for either:
37
+ * 1. midscene_location_field_flag in shape (result schema)
38
+ * 2. prompt field in shape (input schema)
39
+ */
40
+ export function isMidsceneLocatorField(field: unknown): boolean {
41
+ const actualField = unwrapZodField(field) as {
42
+ _def?: { typeName?: string; shape?: () => Record<string, unknown> };
43
+ };
44
+
45
+ if (actualField._def?.typeName === 'ZodObject') {
46
+ const shape = actualField._def.shape?.();
47
+ if (shape) {
48
+ // Method 1: Check for the location field flag (for result schema)
49
+ if ('midscene_location_field_flag' in shape) {
50
+ return true;
51
+ }
52
+ // Method 2: Check if it's the input schema by checking for 'prompt' field
53
+ if ('prompt' in shape && shape.prompt) {
54
+ return true;
55
+ }
56
+ }
57
+ }
58
+ return false;
59
+ }
60
+
61
+ /**
62
+ * Get type name string from a Zod schema field
63
+ * @param field - Zod schema field
64
+ * @param locatorTypeDescription - Optional description for MidsceneLocation fields (used by core)
65
+ */
66
+ export function getZodTypeName(
67
+ field: unknown,
68
+ locatorTypeDescription?: string,
69
+ ): string {
70
+ const actualField = unwrapZodField(field) as {
71
+ _def?: { typeName?: string; values?: unknown[]; options?: unknown[] };
72
+ };
73
+ const fieldTypeName = actualField._def?.typeName;
74
+
75
+ if (fieldTypeName === 'ZodString') return 'string';
76
+ if (fieldTypeName === 'ZodNumber') return 'number';
77
+ if (fieldTypeName === 'ZodBoolean') return 'boolean';
78
+ if (fieldTypeName === 'ZodArray') return 'array';
79
+ if (fieldTypeName === 'ZodObject') {
80
+ // Check if this is a Midscene locator field
81
+ if (isMidsceneLocatorField(actualField)) {
82
+ return locatorTypeDescription || 'object';
83
+ }
84
+ return 'object';
85
+ }
86
+ if (fieldTypeName === 'ZodEnum') {
87
+ const values =
88
+ (actualField._def?.values as unknown[] | undefined)
89
+ ?.map((option: unknown) => String(`'${option}'`))
90
+ .join(', ') ?? 'enum';
91
+ return `enum(${values})`;
92
+ }
93
+ // Handle ZodUnion by listing all option types
94
+ if (fieldTypeName === 'ZodUnion') {
95
+ const options = actualField._def?.options as unknown[] | undefined;
96
+ if (options && options.length > 0) {
97
+ const types = options.map((opt: unknown) =>
98
+ getZodTypeName(opt, locatorTypeDescription),
99
+ );
100
+ return types.join(' | ');
101
+ }
102
+ return 'union';
103
+ }
104
+
105
+ return 'unknown';
106
+ }
107
+
108
+ /**
109
+ * Get description from a Zod schema field
110
+ */
111
+ export function getZodDescription(field: z.ZodTypeAny): string | null {
112
+ // Check for direct description on the original field (wrapper may have description)
113
+ if ('description' in field) {
114
+ return (field as { description?: string }).description || null;
115
+ }
116
+
117
+ const actualField = unwrapZodField(field) as {
118
+ description?: string;
119
+ _def?: { typeName?: string; shape?: () => Record<string, unknown> };
120
+ };
121
+
122
+ // Check for description on the unwrapped field
123
+ if ('description' in actualField) {
124
+ return actualField.description || null;
125
+ }
126
+
127
+ // Check for MidsceneLocation fields and add description
128
+ if (isMidsceneLocatorField(actualField)) {
129
+ return 'Location information for the target element';
130
+ }
131
+
132
+ return null;
133
+ }
@@ -1,172 +0,0 @@
1
- import { DEFAULT_MODEL_CONFIG_KEYS, DEFAULT_MODEL_CONFIG_KEYS_LEGACY, GROUNDING_MODEL_CONFIG_KEYS, PLANNING_MODEL_CONFIG_KEYS, VQA_MODEL_CONFIG_KEYS } from "./constants.mjs";
2
- import { getDebug } from "../logger.mjs";
3
- import { assert } from "../utils.mjs";
4
- import { createAssert, maskConfig, parseJson } from "./helper.mjs";
5
- import { initDebugConfig } from "./init-debug.mjs";
6
- import { parseVlModeAndUiTarsFromGlobalConfig, parseVlModeAndUiTarsModelVersionFromRawValue } from "./parse.mjs";
7
- const KEYS_MAP = {
8
- VQA: VQA_MODEL_CONFIG_KEYS,
9
- grounding: GROUNDING_MODEL_CONFIG_KEYS,
10
- planning: PLANNING_MODEL_CONFIG_KEYS,
11
- default: DEFAULT_MODEL_CONFIG_KEYS
12
- };
13
- const decideOpenaiSdkConfig = ({ keys, provider, valueAssert })=>{
14
- initDebugConfig();
15
- const debugLog = getDebug('ai:config');
16
- const socksProxy = provider[keys.socksProxy];
17
- const httpProxy = provider[keys.httpProxy];
18
- const vlMode = provider[keys.vlMode];
19
- debugLog('enter decideOpenaiSdkConfig with keys:', keys);
20
- if (provider[keys.openaiUseAzureDeprecated]) {
21
- debugLog(`provider has ${keys.openaiUseAzureDeprecated} with value${provider[keys.openaiUseAzureDeprecated]}`);
22
- const openaiBaseURL = provider[keys.openaiBaseURL];
23
- const openaiApiKey = provider[keys.openaiApiKey];
24
- const openaiExtraConfig = parseJson(keys.openaiExtraConfig, provider[keys.openaiExtraConfig]);
25
- valueAssert(openaiApiKey, keys.openaiApiKey, keys.openaiUseAzureDeprecated);
26
- return {
27
- socksProxy,
28
- httpProxy,
29
- vlModeRaw: vlMode,
30
- openaiUseAzureDeprecated: true,
31
- openaiApiKey,
32
- openaiBaseURL,
33
- openaiExtraConfig
34
- };
35
- }
36
- if (provider[keys.useAzureOpenai]) {
37
- debugLog(`provider has ${keys.useAzureOpenai} with value ${provider[keys.useAzureOpenai]}`);
38
- const azureOpenaiScope = provider[keys.azureOpenaiScope];
39
- const azureOpenaiKey = provider[keys.azureOpenaiKey];
40
- const azureOpenaiEndpoint = provider[keys.azureOpenaiEndpoint];
41
- const azureOpenaiDeployment = provider[keys.azureOpenaiDeployment];
42
- const azureOpenaiApiVersion = provider[keys.azureOpenaiApiVersion];
43
- const azureExtraConfig = parseJson(keys.azureExtraConfig, provider[keys.azureExtraConfig]);
44
- const openaiExtraConfig = parseJson(keys.openaiExtraConfig, provider[keys.openaiExtraConfig]);
45
- valueAssert(azureOpenaiKey, keys.azureOpenaiKey, keys.useAzureOpenai);
46
- return {
47
- socksProxy,
48
- httpProxy,
49
- vlModeRaw: vlMode,
50
- useAzureOpenai: true,
51
- azureOpenaiScope,
52
- azureOpenaiKey,
53
- azureOpenaiEndpoint,
54
- azureOpenaiDeployment,
55
- azureOpenaiApiVersion,
56
- azureExtraConfig,
57
- openaiExtraConfig
58
- };
59
- }
60
- if (provider[keys.useAnthropicSdk]) {
61
- debugLog(`provider has ${keys.useAnthropicSdk} with value ${provider[keys.useAnthropicSdk]}`);
62
- const anthropicApiKey = provider[keys.anthropicApiKey];
63
- valueAssert(anthropicApiKey, keys.anthropicApiKey, keys.useAnthropicSdk);
64
- return {
65
- socksProxy,
66
- httpProxy,
67
- useAnthropicSdk: true,
68
- anthropicApiKey
69
- };
70
- }
71
- {
72
- debugLog('provider has no specific model SDK declared');
73
- const openaiBaseURL = provider[keys.openaiBaseURL];
74
- const openaiApiKey = provider[keys.openaiApiKey];
75
- const openaiExtraConfig = parseJson(keys.openaiExtraConfig, provider[keys.openaiExtraConfig]);
76
- valueAssert(openaiApiKey, keys.openaiApiKey);
77
- return {
78
- socksProxy,
79
- httpProxy,
80
- vlModeRaw: vlMode,
81
- openaiBaseURL,
82
- openaiApiKey,
83
- openaiExtraConfig
84
- };
85
- }
86
- };
87
- const getModelDescription = (vlMode, uiTarsVersion)=>{
88
- if (vlMode) if (uiTarsVersion) return `UI-TARS=${uiTarsVersion}`;
89
- else return `${vlMode} mode`;
90
- return '';
91
- };
92
- const decideModelConfigFromIntentConfig = (intent, intentConfig)=>{
93
- const debugLog = getDebug('ai:config');
94
- debugLog('decideModelConfig base on agent.modelConfig()');
95
- const keysForFn = KEYS_MAP[intent];
96
- const candidateModelNameFromConfig = intentConfig[keysForFn.modelName];
97
- debugLog('Got modelName from modelConfigFn', candidateModelNameFromConfig);
98
- const chosenKeys = (()=>{
99
- if (candidateModelNameFromConfig) {
100
- debugLog('query modelConfig from fn by intent got corresponding modelName, will get other corresponding keys');
101
- return keysForFn;
102
- }
103
- debugLog('query modelConfig from fn by intent got no corresponding modelName, will get other keys by default');
104
- assert(intentConfig[DEFAULT_MODEL_CONFIG_KEYS.modelName], `The return value of agent.modelConfig do not have a valid value with key ${DEFAULT_MODEL_CONFIG_KEYS.modelName}.`);
105
- return DEFAULT_MODEL_CONFIG_KEYS;
106
- })();
107
- const result = decideOpenaiSdkConfig({
108
- keys: chosenKeys,
109
- provider: intentConfig,
110
- valueAssert: createAssert(chosenKeys.modelName, 'modelConfig', candidateModelNameFromConfig)
111
- });
112
- const { vlMode, uiTarsVersion } = parseVlModeAndUiTarsModelVersionFromRawValue(result.vlModeRaw);
113
- const modelDescription = getModelDescription(vlMode, uiTarsVersion);
114
- const finalResult = {
115
- ...result,
116
- modelName: intentConfig[chosenKeys.modelName],
117
- vlMode,
118
- uiTarsModelVersion: uiTarsVersion,
119
- modelDescription,
120
- from: 'modelConfig',
121
- intent
122
- };
123
- debugLog(`decideModelConfig result by agent.modelConfig() with intent ${intent}:`, maskConfig(finalResult));
124
- return finalResult;
125
- };
126
- const decideModelConfigFromEnv = (intent, allEnvConfig)=>{
127
- initDebugConfig();
128
- const debugLog = getDebug('ai:config');
129
- const keysForEnv = 'default' === intent ? DEFAULT_MODEL_CONFIG_KEYS_LEGACY : KEYS_MAP[intent];
130
- if ('default' !== intent && allEnvConfig[keysForEnv.modelName]) {
131
- const modelName = allEnvConfig[keysForEnv.modelName];
132
- debugLog(`Got intent ${intent} corresponding modelName ${modelName} by key ${keysForEnv.modelName} from globalConfig, will get other config by intent.`);
133
- const result = decideOpenaiSdkConfig({
134
- keys: keysForEnv,
135
- provider: allEnvConfig,
136
- valueAssert: createAssert(keysForEnv.modelName, 'process.env', modelName)
137
- });
138
- const { vlMode, uiTarsVersion } = parseVlModeAndUiTarsModelVersionFromRawValue(result.vlModeRaw);
139
- const modelDescription = getModelDescription(vlMode, uiTarsVersion);
140
- const finalResult = {
141
- ...result,
142
- modelName,
143
- vlMode,
144
- uiTarsModelVersion: uiTarsVersion,
145
- modelDescription,
146
- from: 'env',
147
- intent
148
- };
149
- debugLog(`decideModelConfig result by process.env with intent ${intent}:`, maskConfig(finalResult));
150
- return finalResult;
151
- }
152
- debugLog(`decideModelConfig as legacy logic with intent ${intent}.`);
153
- const result = decideOpenaiSdkConfig({
154
- keys: DEFAULT_MODEL_CONFIG_KEYS_LEGACY,
155
- provider: allEnvConfig,
156
- valueAssert: createAssert(DEFAULT_MODEL_CONFIG_KEYS_LEGACY.modelName, 'process.env')
157
- });
158
- const { vlMode, uiTarsVersion } = parseVlModeAndUiTarsFromGlobalConfig(allEnvConfig);
159
- const modelDescription = getModelDescription(vlMode, uiTarsVersion);
160
- const finalResult = {
161
- ...result,
162
- modelName: allEnvConfig[DEFAULT_MODEL_CONFIG_KEYS_LEGACY.modelName] || 'gpt-4o',
163
- vlMode,
164
- uiTarsModelVersion: uiTarsVersion,
165
- modelDescription,
166
- from: 'legacy-env',
167
- intent
168
- };
169
- debugLog(`decideModelConfig result by legacy logic with intent ${intent}:`, maskConfig(finalResult));
170
- return finalResult;
171
- };
172
- export { decideModelConfigFromEnv, decideModelConfigFromIntentConfig, decideOpenaiSdkConfig };
@@ -1,69 +0,0 @@
1
- import { MIDSCENE_USE_DOUBAO_VISION, MIDSCENE_USE_GEMINI, MIDSCENE_USE_QWEN3_VL, MIDSCENE_USE_QWEN_VL, MIDSCENE_USE_VLM_UI_TARS, UITarsModelVersion, VL_MODE_RAW_VALID_VALUES } from "./types.mjs";
2
- const parseVlModeAndUiTarsModelVersionFromRawValue = (vlModeRaw)=>{
3
- if (!vlModeRaw) return {
4
- vlMode: void 0,
5
- uiTarsVersion: void 0
6
- };
7
- if (!VL_MODE_RAW_VALID_VALUES.includes(vlModeRaw)) throw new Error(`the value ${vlModeRaw} is not a valid VL_MODE value, must be one of ${VL_MODE_RAW_VALID_VALUES}`);
8
- const raw = vlModeRaw;
9
- if ('vlm-ui-tars' === raw) return {
10
- vlMode: 'vlm-ui-tars',
11
- uiTarsVersion: UITarsModelVersion.V1_0
12
- };
13
- if ('vlm-ui-tars-doubao' === raw || 'vlm-ui-tars-doubao-1.5' === raw) return {
14
- vlMode: 'vlm-ui-tars',
15
- uiTarsVersion: UITarsModelVersion.DOUBAO_1_5_20B
16
- };
17
- return {
18
- vlMode: raw,
19
- uiTarsVersion: void 0
20
- };
21
- };
22
- const parseVlModeAndUiTarsFromGlobalConfig = (provider)=>{
23
- const isDoubao = provider[MIDSCENE_USE_DOUBAO_VISION];
24
- const isQwen = provider[MIDSCENE_USE_QWEN_VL];
25
- const isQwen3 = provider[MIDSCENE_USE_QWEN3_VL];
26
- const isUiTars = provider[MIDSCENE_USE_VLM_UI_TARS];
27
- const isGemini = provider[MIDSCENE_USE_GEMINI];
28
- const enabledModes = [
29
- isDoubao && MIDSCENE_USE_DOUBAO_VISION,
30
- isQwen && MIDSCENE_USE_QWEN_VL,
31
- isQwen3 && MIDSCENE_USE_QWEN3_VL,
32
- isUiTars && MIDSCENE_USE_VLM_UI_TARS,
33
- isGemini && MIDSCENE_USE_GEMINI
34
- ].filter(Boolean);
35
- if (enabledModes.length > 1) throw new Error(`Only one vision mode can be enabled at a time. Currently enabled modes: ${enabledModes.join(', ')}. Please disable all but one mode.`);
36
- if (isQwen3) return {
37
- vlMode: 'qwen3-vl',
38
- uiTarsVersion: void 0
39
- };
40
- if (isQwen) return {
41
- vlMode: 'qwen-vl',
42
- uiTarsVersion: void 0
43
- };
44
- if (isDoubao) return {
45
- vlMode: 'doubao-vision',
46
- uiTarsVersion: void 0
47
- };
48
- if (isGemini) return {
49
- vlMode: 'gemini',
50
- uiTarsVersion: void 0
51
- };
52
- if (isUiTars) if ('1' === isUiTars) return {
53
- vlMode: 'vlm-ui-tars',
54
- uiTarsVersion: UITarsModelVersion.V1_0
55
- };
56
- else if ('DOUBAO' === isUiTars || 'DOUBAO-1.5' === isUiTars) return {
57
- vlMode: 'vlm-ui-tars',
58
- uiTarsVersion: UITarsModelVersion.DOUBAO_1_5_20B
59
- };
60
- else return {
61
- vlMode: 'vlm-ui-tars',
62
- uiTarsVersion: `${isUiTars}`
63
- };
64
- return {
65
- vlMode: void 0,
66
- uiTarsVersion: void 0
67
- };
68
- };
69
- export { parseVlModeAndUiTarsFromGlobalConfig, parseVlModeAndUiTarsModelVersionFromRawValue };