@midscene/shared 0.30.10 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (129) hide show
  1. package/dist/es/build/rspack-config.mjs +4 -0
  2. package/dist/es/constants/example-code.mjs +4 -4
  3. package/dist/es/env/constants.mjs +27 -82
  4. package/dist/es/env/global-config-manager.mjs +2 -3
  5. package/dist/es/env/helper.mjs +12 -17
  6. package/dist/es/env/init-debug.mjs +6 -6
  7. package/dist/es/env/model-config-manager.mjs +45 -65
  8. package/dist/es/env/parse-model-config.mjs +112 -0
  9. package/dist/es/env/types.mjs +70 -162
  10. package/dist/es/extractor/dom-util.mjs +10 -18
  11. package/dist/es/extractor/index.mjs +2 -3
  12. package/dist/es/extractor/locator.mjs +8 -15
  13. package/dist/es/extractor/tree.mjs +2 -5
  14. package/dist/es/extractor/util.mjs +4 -28
  15. package/dist/es/extractor/web-extractor.mjs +7 -14
  16. package/dist/es/index.mjs +2 -1
  17. package/dist/es/mcp/base-server.mjs +250 -0
  18. package/dist/es/mcp/base-tools.mjs +84 -0
  19. package/dist/es/mcp/index.mjs +5 -0
  20. package/dist/es/mcp/inject-report-html-plugin.mjs +53 -0
  21. package/dist/es/mcp/tool-generator.mjs +207 -0
  22. package/dist/es/mcp/types.mjs +3 -0
  23. package/dist/es/node/fs.mjs +2 -2
  24. package/dist/es/utils.mjs +2 -3
  25. package/dist/es/zod-schema-utils.mjs +54 -0
  26. package/dist/lib/baseDB.js +2 -2
  27. package/dist/lib/build/copy-static.js +4 -4
  28. package/dist/lib/build/rspack-config.js +38 -0
  29. package/dist/lib/common.js +4 -4
  30. package/dist/lib/constants/example-code.js +6 -6
  31. package/dist/lib/constants/index.js +13 -13
  32. package/dist/lib/env/basic.js +2 -2
  33. package/dist/lib/env/constants.js +32 -90
  34. package/dist/lib/env/global-config-manager.js +4 -5
  35. package/dist/lib/env/helper.js +13 -22
  36. package/dist/lib/env/index.js +24 -28
  37. package/dist/lib/env/init-debug.js +7 -7
  38. package/dist/lib/env/model-config-manager.js +47 -67
  39. package/dist/lib/env/parse-model-config.js +155 -0
  40. package/dist/lib/env/types.js +146 -379
  41. package/dist/lib/env/utils.js +4 -4
  42. package/dist/lib/extractor/constants.js +4 -4
  43. package/dist/lib/extractor/debug.js +1 -1
  44. package/dist/lib/extractor/dom-util.js +18 -26
  45. package/dist/lib/extractor/index.js +11 -21
  46. package/dist/lib/extractor/locator.js +10 -20
  47. package/dist/lib/extractor/tree.js +4 -7
  48. package/dist/lib/extractor/util.js +17 -50
  49. package/dist/lib/extractor/web-extractor.js +12 -19
  50. package/dist/lib/img/box-select.js +4 -4
  51. package/dist/lib/img/draw-box.js +2 -2
  52. package/dist/lib/img/get-jimp.js +16 -34
  53. package/dist/lib/img/get-photon.js +24 -47
  54. package/dist/lib/img/get-sharp.js +16 -34
  55. package/dist/lib/img/index.js +18 -18
  56. package/dist/lib/img/info.js +4 -4
  57. package/dist/lib/img/transform.js +10 -10
  58. package/dist/lib/index.js +8 -4
  59. package/dist/lib/logger.js +4 -4
  60. package/dist/lib/mcp/base-server.js +300 -0
  61. package/dist/lib/mcp/base-tools.js +118 -0
  62. package/dist/lib/mcp/index.js +86 -0
  63. package/dist/lib/mcp/inject-report-html-plugin.js +98 -0
  64. package/dist/lib/mcp/tool-generator.js +244 -0
  65. package/dist/lib/mcp/types.js +40 -0
  66. package/dist/lib/node/fs.js +6 -6
  67. package/dist/lib/node/index.js +6 -8
  68. package/dist/lib/polyfills/async-hooks.js +2 -2
  69. package/dist/lib/polyfills/index.js +6 -8
  70. package/dist/lib/types/index.js +2 -2
  71. package/dist/lib/us-keyboard-layout.js +2 -2
  72. package/dist/lib/utils.js +13 -14
  73. package/dist/lib/zod-schema-utils.js +97 -0
  74. package/dist/types/build/rspack-config.d.ts +8 -0
  75. package/dist/types/constants/example-code.d.ts +1 -1
  76. package/dist/types/env/constants.d.ts +5 -18
  77. package/dist/types/env/global-config-manager.d.ts +1 -2
  78. package/dist/types/env/helper.d.ts +2 -4
  79. package/dist/types/env/model-config-manager.d.ts +8 -7
  80. package/dist/types/env/parse-model-config.d.ts +28 -0
  81. package/dist/types/env/types.d.ts +152 -191
  82. package/dist/types/extractor/dom-util.d.ts +2 -15
  83. package/dist/types/extractor/index.d.ts +1 -2
  84. package/dist/types/extractor/locator.d.ts +0 -1
  85. package/dist/types/extractor/tree.d.ts +1 -4
  86. package/dist/types/extractor/util.d.ts +0 -3
  87. package/dist/types/index.d.ts +1 -0
  88. package/dist/types/mcp/base-server.d.ts +77 -0
  89. package/dist/types/mcp/base-tools.d.ts +55 -0
  90. package/dist/types/mcp/index.d.ts +5 -0
  91. package/dist/types/mcp/inject-report-html-plugin.d.ts +18 -0
  92. package/dist/types/mcp/tool-generator.d.ts +11 -0
  93. package/dist/types/mcp/types.d.ts +100 -0
  94. package/dist/types/types/index.d.ts +5 -2
  95. package/dist/types/zod-schema-utils.d.ts +23 -0
  96. package/package.json +19 -4
  97. package/src/build/rspack-config.ts +12 -0
  98. package/src/constants/example-code.ts +4 -4
  99. package/src/env/constants.ts +58 -203
  100. package/src/env/global-config-manager.ts +7 -7
  101. package/src/env/helper.ts +10 -31
  102. package/src/env/init-debug.ts +11 -6
  103. package/src/env/model-config-manager.ts +91 -87
  104. package/src/env/parse-model-config.ts +265 -0
  105. package/src/env/types.ts +212 -344
  106. package/src/extractor/dom-util.ts +15 -12
  107. package/src/extractor/index.ts +0 -3
  108. package/src/extractor/locator.ts +3 -12
  109. package/src/extractor/tree.ts +4 -4
  110. package/src/extractor/util.ts +0 -32
  111. package/src/index.ts +2 -0
  112. package/src/mcp/base-server.ts +435 -0
  113. package/src/mcp/base-tools.ts +196 -0
  114. package/src/mcp/index.ts +5 -0
  115. package/src/mcp/inject-report-html-plugin.ts +119 -0
  116. package/src/mcp/tool-generator.ts +330 -0
  117. package/src/mcp/types.ts +108 -0
  118. package/src/node/fs.ts +1 -1
  119. package/src/types/index.ts +8 -2
  120. package/src/utils.ts +1 -1
  121. package/src/zod-schema-utils.ts +133 -0
  122. package/dist/es/env/decide-model-config.mjs +0 -172
  123. package/dist/es/env/parse.mjs +0 -69
  124. package/dist/lib/env/decide-model-config.js +0 -212
  125. package/dist/lib/env/parse.js +0 -106
  126. package/dist/types/env/decide-model-config.d.ts +0 -14
  127. package/dist/types/env/parse.d.ts +0 -12
  128. package/src/env/decide-model-config.ts +0 -319
  129. package/src/env/parse.ts +0 -131
@@ -0,0 +1,55 @@
1
+ import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
2
+ import type { BaseAgent, BaseDevice, IMidsceneTools, ToolDefinition } from './types';
3
+ /**
4
+ * Base class for platform-specific MCP tools
5
+ * Generic type TAgent allows subclasses to use their specific agent types
6
+ */
7
+ export declare abstract class BaseMidsceneTools<TAgent extends BaseAgent = BaseAgent> implements IMidsceneTools {
8
+ protected mcpServer?: McpServer;
9
+ protected agent?: TAgent;
10
+ protected toolDefinitions: ToolDefinition[];
11
+ /**
12
+ * Ensure agent is initialized and ready for use.
13
+ * Must be implemented by subclasses to create platform-specific agent.
14
+ * @param initParam Optional initialization parameter (platform-specific, e.g., URL, device ID)
15
+ * @returns Promise resolving to initialized agent instance
16
+ * @throws Error if agent initialization fails
17
+ */
18
+ protected abstract ensureAgent(initParam?: string): Promise<TAgent>;
19
+ /**
20
+ * Optional: prepare platform-specific tools (e.g., device connection)
21
+ */
22
+ protected preparePlatformTools(): ToolDefinition[];
23
+ /**
24
+ * Must be implemented by subclasses to create a temporary device instance
25
+ * This allows getting real actionSpace without connecting to device
26
+ */
27
+ protected abstract createTemporaryDevice(): BaseDevice;
28
+ /**
29
+ * Initialize all tools by querying actionSpace
30
+ * Uses two-layer fallback strategy:
31
+ * 1. Try to get actionSpace from connected agent (if available)
32
+ * 2. Create temporary device instance to read actionSpace (always succeeds)
33
+ */
34
+ initTools(): Promise<void>;
35
+ /**
36
+ * Attach to MCP server and register all tools
37
+ */
38
+ attachToServer(server: McpServer): void;
39
+ /**
40
+ * Wrapper for auto-destroy behavior
41
+ */
42
+ private toolWithAutoDestroy;
43
+ /**
44
+ * Cleanup method - destroy agent and release resources
45
+ */
46
+ closeBrowser(): Promise<void>;
47
+ /**
48
+ * Helper: Convert base64 screenshot to image content array
49
+ */
50
+ protected buildScreenshotContent(screenshot: string): {
51
+ type: "image";
52
+ data: string;
53
+ mimeType: string;
54
+ }[];
55
+ }
@@ -0,0 +1,5 @@
1
+ export * from './base-server';
2
+ export * from './base-tools';
3
+ export * from './tool-generator';
4
+ export * from './types';
5
+ export * from './inject-report-html-plugin';
@@ -0,0 +1,18 @@
1
+ interface RslibPluginApi {
2
+ onAfterBuild: (callback: () => void) => void;
3
+ }
4
+ /**
5
+ * Rslib plugin to inject report HTML from @midscene/core dist into MCP bundle.
6
+ * This runs after build and reads the already-injected HTML from core.
7
+ *
8
+ * Prerequisites:
9
+ * - @midscene/report must be in devDependencies to ensure correct build order
10
+ * - @midscene/core dist must exist with injected HTML
11
+ *
12
+ * @param packageDir - The directory of the MCP package (use __dirname)
13
+ */
14
+ export declare function injectReportHtmlFromCore(packageDir: string): {
15
+ name: string;
16
+ setup(api: RslibPluginApi): void;
17
+ };
18
+ export {};
@@ -0,0 +1,11 @@
1
+ import type { ActionSpaceItem, BaseAgent, ToolDefinition } from './types';
2
+ /**
3
+ * Converts DeviceAction from actionSpace into MCP ToolDefinition
4
+ * This is the core logic that removes need for hardcoded tool definitions
5
+ */
6
+ export declare function generateToolsFromActionSpace(actionSpace: ActionSpaceItem[], getAgent: () => Promise<BaseAgent>): ToolDefinition[];
7
+ /**
8
+ * Generate common tools (screenshot, waitFor)
9
+ * SIMPLIFIED: Only keep essential helper tools, removed assert
10
+ */
11
+ export declare function generateCommonTools(getAgent: () => Promise<BaseAgent>): ToolDefinition[];
@@ -0,0 +1,100 @@
1
+ import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
2
+ import type { z } from 'zod';
3
+ /**
4
+ * Default timeout constants for app loading verification
5
+ */
6
+ export declare const defaultAppLoadingTimeoutMs = 10000;
7
+ export declare const defaultAppLoadingCheckIntervalMs = 2000;
8
+ /**
9
+ * Content item types for tool results (MCP compatible)
10
+ */
11
+ export type ToolResultContent = {
12
+ type: 'text';
13
+ text: string;
14
+ } | {
15
+ type: 'image';
16
+ data: string;
17
+ mimeType: string;
18
+ } | {
19
+ type: 'audio';
20
+ data: string;
21
+ mimeType: string;
22
+ } | {
23
+ type: 'resource';
24
+ resource: {
25
+ text: string;
26
+ uri: string;
27
+ mimeType?: string;
28
+ } | {
29
+ uri: string;
30
+ blob: string;
31
+ mimeType?: string;
32
+ };
33
+ };
34
+ /**
35
+ * Result type for tool execution (MCP compatible)
36
+ */
37
+ export interface ToolResult {
38
+ [x: string]: unknown;
39
+ content: ToolResultContent[];
40
+ isError?: boolean;
41
+ _meta?: Record<string, unknown>;
42
+ }
43
+ /**
44
+ * Tool handler function type
45
+ * Takes parsed arguments and returns a tool result
46
+ */
47
+ export type ToolHandler<T = Record<string, unknown>> = (args: T) => Promise<ToolResult>;
48
+ /**
49
+ * Tool schema type using Zod
50
+ */
51
+ export type ToolSchema = Record<string, z.ZodTypeAny>;
52
+ /**
53
+ * Tool definition for MCP server
54
+ */
55
+ export interface ToolDefinition<T = Record<string, unknown>> {
56
+ name: string;
57
+ description: string;
58
+ schema: ToolSchema;
59
+ handler: ToolHandler<T>;
60
+ autoDestroy?: boolean;
61
+ }
62
+ /**
63
+ * Action space item definition
64
+ * Note: Intentionally no index signature to maintain compatibility with DeviceAction
65
+ */
66
+ export interface ActionSpaceItem {
67
+ name: string;
68
+ description?: string;
69
+ args?: Record<string, unknown>;
70
+ paramSchema?: z.ZodTypeAny;
71
+ }
72
+ /**
73
+ * Base agent interface
74
+ * Represents a platform-specific agent (Android, iOS, Web)
75
+ * Note: Return types use `unknown` for compatibility with platform-specific implementations
76
+ */
77
+ export interface BaseAgent {
78
+ getActionSpace(): Promise<ActionSpaceItem[]>;
79
+ destroy?(): Promise<void>;
80
+ page?: {
81
+ screenshotBase64(): Promise<string>;
82
+ };
83
+ aiAction?: (description: string, params?: Record<string, unknown>) => Promise<unknown>;
84
+ aiWaitFor?: (assertion: string, options: Record<string, unknown>) => Promise<unknown>;
85
+ }
86
+ /**
87
+ * Base device interface for temporary device instances
88
+ */
89
+ export interface BaseDevice {
90
+ actionSpace(): ActionSpaceItem[];
91
+ destroy?(): Promise<void>;
92
+ }
93
+ /**
94
+ * Interface for platform-specific MCP tools manager
95
+ */
96
+ export interface IMidsceneTools {
97
+ attachToServer(server: McpServer): void;
98
+ initTools(): Promise<void>;
99
+ closeBrowser?(): Promise<void>;
100
+ }
@@ -14,7 +14,6 @@ export type Rect = Point & Size & {
14
14
  };
15
15
  export declare abstract class BaseElement {
16
16
  abstract id: string;
17
- abstract indexId?: number;
18
17
  abstract attributes: {
19
18
  nodeType: NodeType;
20
19
  [key: string]: string;
@@ -22,7 +21,6 @@ export declare abstract class BaseElement {
22
21
  abstract content: string;
23
22
  abstract rect: Rect;
24
23
  abstract center: [number, number];
25
- abstract xpaths?: string[];
26
24
  abstract isVisible: boolean;
27
25
  }
28
26
  export interface ElementTreeNode<ElementType extends BaseElement = BaseElement> {
@@ -32,3 +30,8 @@ export interface ElementTreeNode<ElementType extends BaseElement = BaseElement>
32
30
  export interface WebElementInfo extends ElementInfo {
33
31
  zoom: number;
34
32
  }
33
+ export type LocateResultElement = {
34
+ description: string;
35
+ center: [number, number];
36
+ rect: Rect;
37
+ };
@@ -0,0 +1,23 @@
1
+ import type { z } from 'zod';
2
+ /**
3
+ * Recursively unwrap optional, nullable, default, and effects wrapper types
4
+ * to get the actual inner Zod type
5
+ */
6
+ export declare function unwrapZodField(field: unknown): unknown;
7
+ /**
8
+ * Check if a field is a Midscene locator field
9
+ * Checks for either:
10
+ * 1. midscene_location_field_flag in shape (result schema)
11
+ * 2. prompt field in shape (input schema)
12
+ */
13
+ export declare function isMidsceneLocatorField(field: unknown): boolean;
14
+ /**
15
+ * Get type name string from a Zod schema field
16
+ * @param field - Zod schema field
17
+ * @param locatorTypeDescription - Optional description for MidsceneLocation fields (used by core)
18
+ */
19
+ export declare function getZodTypeName(field: unknown, locatorTypeDescription?: string): string;
20
+ /**
21
+ * Get description from a Zod schema field
22
+ */
23
+ export declare function getZodDescription(field: z.ZodTypeAny): string | null;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@midscene/shared",
3
- "version": "0.30.10",
3
+ "version": "1.0.0",
4
4
  "repository": "https://github.com/web-infra-dev/midscene",
5
5
  "homepage": "https://midscenejs.com/",
6
6
  "types": "./dist/types/index.d.ts",
@@ -57,6 +57,16 @@
57
57
  "import": "./dist/es/common.mjs",
58
58
  "require": "./dist/lib/common.js"
59
59
  },
60
+ "./mcp": {
61
+ "types": "./dist/types/mcp/index.d.ts",
62
+ "import": "./dist/es/mcp/index.mjs",
63
+ "require": "./dist/lib/mcp/index.js"
64
+ },
65
+ "./logger": {
66
+ "types": "./dist/types/logger.d.ts",
67
+ "import": "./dist/es/logger.mjs",
68
+ "require": "./dist/lib/logger.js"
69
+ },
60
70
  "./*": {
61
71
  "types": "./dist/types/*.d.ts",
62
72
  "import": "./dist/es/*.mjs",
@@ -72,20 +82,25 @@
72
82
  "@silvia-odwyer/photon": "0.3.3",
73
83
  "@silvia-odwyer/photon-node": "0.3.3",
74
84
  "debug": "4.4.0",
85
+ "express": "^4.21.2",
75
86
  "jimp": "0.22.12",
76
87
  "js-sha256": "0.11.0",
77
88
  "sharp": "^0.34.3",
78
89
  "uuid": "11.1.0"
79
90
  },
80
91
  "devDependencies": {
81
- "@rslib/core": "^0.11.2",
92
+ "@rslib/core": "^0.18.3",
93
+ "@modelcontextprotocol/sdk": "1.10.2",
82
94
  "@types/debug": "4.1.12",
95
+ "@types/express": "^4.17.21",
83
96
  "@types/node": "^18.0.0",
84
97
  "@ui-tars/shared": "1.2.0",
85
98
  "dotenv": "^16.4.5",
99
+ "openai": "6.3.0",
86
100
  "rimraf": "~3.0.2",
87
101
  "typescript": "^5.8.3",
88
- "vitest": "3.0.5"
102
+ "vitest": "3.0.5",
103
+ "zod": "3.24.3"
89
104
  },
90
105
  "sideEffects": [],
91
106
  "publishConfig": {
@@ -98,7 +113,7 @@
98
113
  "build": "npm run build:script && npm run build:pkg",
99
114
  "build:pkg": "rslib build",
100
115
  "build:script": "rslib build -c ./rslib.inspect.config.ts",
101
- "build:watch": "npm run build:script && rslib build --watch",
116
+ "build:watch": "npm run build:script && rslib build --watch --no-clean",
102
117
  "reset": "rimraf ./**/node_modules",
103
118
  "test": "vitest --run",
104
119
  "test:u": "vitest --run -u"
@@ -0,0 +1,12 @@
1
+ /**
2
+ * Common Rspack configuration helpers for rsbuild projects
3
+ */
4
+
5
+ /**
6
+ * Common warning patterns to ignore in Rspack builds.
7
+ * These warnings are typically from optional dependencies or known non-critical issues.
8
+ */
9
+ export const commonIgnoreWarnings = [
10
+ // Ignore dynamic import warnings from langsmith/langfuse optional dependencies
11
+ /Critical dependency: the request of a dependency is an expression/,
12
+ ];
@@ -114,12 +114,12 @@ tasks:
114
114
  # Auto Planning (.ai)
115
115
  # ----------------
116
116
 
117
- # Perform an interaction. \`ai\` is a shorthand for \`aiAction\`.
117
+ # Perform an interaction. \`ai\` is a shorthand for \`aiAct\`.
118
118
  - ai: <prompt>
119
119
  cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.
120
120
 
121
121
  # This usage is the same as \`ai\`.
122
- - aiAction: <prompt>
122
+ - aiAct: <prompt>
123
123
  cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.
124
124
 
125
125
  # Instant Action (.aiTap, .aiDoubleClick, .aiHover, .aiInput, .aiKeyboardPress, .aiScroll)
@@ -167,8 +167,8 @@ tasks:
167
167
  xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.
168
168
  cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.
169
169
 
170
- # Log the current screenshot with a description in the report file.
171
- - logScreenshot: <title> # Optional, the title of the screenshot. If not provided, the title will be 'untitled'.
170
+ # Record the current screenshot with a description in the report file.
171
+ - recordToReport: <title> # Optional, the title of the screenshot. If not provided, the title will be 'untitled'.
172
172
  content: <content> # Optional, the description of the screenshot.
173
173
 
174
174
  # Data Extraction
@@ -1,81 +1,31 @@
1
1
  import {
2
- ANTHROPIC_API_KEY,
3
- AZURE_OPENAI_API_VERSION,
4
- AZURE_OPENAI_DEPLOYMENT,
5
- AZURE_OPENAI_ENDPOINT,
6
- AZURE_OPENAI_KEY,
7
- MIDSCENE_ANTHROPIC_API_KEY,
8
- MIDSCENE_AZURE_OPENAI_API_VERSION,
9
- MIDSCENE_AZURE_OPENAI_DEPLOYMENT,
10
- MIDSCENE_AZURE_OPENAI_ENDPOINT,
11
- MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON,
12
- MIDSCENE_AZURE_OPENAI_KEY,
13
- MIDSCENE_AZURE_OPENAI_SCOPE,
14
- MIDSCENE_GROUNDING_ANTHROPIC_API_KEY,
15
- MIDSCENE_GROUNDING_AZURE_OPENAI_API_VERSION,
16
- MIDSCENE_GROUNDING_AZURE_OPENAI_DEPLOYMENT,
17
- MIDSCENE_GROUNDING_AZURE_OPENAI_ENDPOINT,
18
- MIDSCENE_GROUNDING_AZURE_OPENAI_INIT_CONFIG_JSON,
19
- MIDSCENE_GROUNDING_AZURE_OPENAI_KEY,
20
- MIDSCENE_GROUNDING_AZURE_OPENAI_SCOPE,
21
- MIDSCENE_GROUNDING_MODEL_NAME,
22
- MIDSCENE_GROUNDING_OPENAI_API_KEY,
23
- MIDSCENE_GROUNDING_OPENAI_BASE_URL,
24
- MIDSCENE_GROUNDING_OPENAI_HTTP_PROXY,
25
- MIDSCENE_GROUNDING_OPENAI_INIT_CONFIG_JSON,
26
- MIDSCENE_GROUNDING_OPENAI_SOCKS_PROXY,
27
- MIDSCENE_GROUNDING_OPENAI_USE_AZURE,
28
- MIDSCENE_GROUNDING_USE_ANTHROPIC_SDK,
29
- MIDSCENE_GROUNDING_USE_AZURE_OPENAI,
30
- MIDSCENE_GROUNDING_VL_MODE,
2
+ MIDSCENE_INSIGHT_MODEL_API_KEY,
3
+ MIDSCENE_INSIGHT_MODEL_BASE_URL,
4
+ MIDSCENE_INSIGHT_MODEL_HTTP_PROXY,
5
+ MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON,
6
+ MIDSCENE_INSIGHT_MODEL_NAME,
7
+ MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY,
8
+ MIDSCENE_INSIGHT_MODEL_TIMEOUT,
9
+ MIDSCENE_MODEL_API_KEY,
10
+ MIDSCENE_MODEL_BASE_URL,
11
+ MIDSCENE_MODEL_FAMILY,
12
+ MIDSCENE_MODEL_HTTP_PROXY,
13
+ MIDSCENE_MODEL_INIT_CONFIG_JSON,
31
14
  MIDSCENE_MODEL_NAME,
32
- MIDSCENE_OPENAI_API_KEY,
33
- MIDSCENE_OPENAI_BASE_URL,
15
+ MIDSCENE_MODEL_SOCKS_PROXY,
16
+ MIDSCENE_MODEL_TIMEOUT,
34
17
  MIDSCENE_OPENAI_HTTP_PROXY,
35
18
  MIDSCENE_OPENAI_INIT_CONFIG_JSON,
36
19
  MIDSCENE_OPENAI_SOCKS_PROXY,
37
- MIDSCENE_OPENAI_USE_AZURE,
38
- MIDSCENE_PLANNING_ANTHROPIC_API_KEY,
39
- MIDSCENE_PLANNING_AZURE_OPENAI_API_VERSION,
40
- MIDSCENE_PLANNING_AZURE_OPENAI_DEPLOYMENT,
41
- MIDSCENE_PLANNING_AZURE_OPENAI_ENDPOINT,
42
- MIDSCENE_PLANNING_AZURE_OPENAI_INIT_CONFIG_JSON,
43
- MIDSCENE_PLANNING_AZURE_OPENAI_KEY,
44
- MIDSCENE_PLANNING_AZURE_OPENAI_SCOPE,
20
+ MIDSCENE_PLANNING_MODEL_API_KEY,
21
+ MIDSCENE_PLANNING_MODEL_BASE_URL,
22
+ MIDSCENE_PLANNING_MODEL_HTTP_PROXY,
23
+ MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON,
45
24
  MIDSCENE_PLANNING_MODEL_NAME,
46
- MIDSCENE_PLANNING_OPENAI_API_KEY,
47
- MIDSCENE_PLANNING_OPENAI_BASE_URL,
48
- MIDSCENE_PLANNING_OPENAI_HTTP_PROXY,
49
- MIDSCENE_PLANNING_OPENAI_INIT_CONFIG_JSON,
50
- MIDSCENE_PLANNING_OPENAI_SOCKS_PROXY,
51
- MIDSCENE_PLANNING_OPENAI_USE_AZURE,
52
- MIDSCENE_PLANNING_USE_ANTHROPIC_SDK,
53
- MIDSCENE_PLANNING_USE_AZURE_OPENAI,
54
- MIDSCENE_PLANNING_VL_MODE,
55
- MIDSCENE_USE_ANTHROPIC_SDK,
56
- MIDSCENE_USE_AZURE_OPENAI,
57
- MIDSCENE_VL_MODE,
58
- MIDSCENE_VQA_ANTHROPIC_API_KEY,
59
- MIDSCENE_VQA_AZURE_OPENAI_API_VERSION,
60
- MIDSCENE_VQA_AZURE_OPENAI_DEPLOYMENT,
61
- MIDSCENE_VQA_AZURE_OPENAI_ENDPOINT,
62
- MIDSCENE_VQA_AZURE_OPENAI_INIT_CONFIG_JSON,
63
- MIDSCENE_VQA_AZURE_OPENAI_KEY,
64
- MIDSCENE_VQA_AZURE_OPENAI_SCOPE,
65
- // VQA
66
- MIDSCENE_VQA_MODEL_NAME,
67
- MIDSCENE_VQA_OPENAI_API_KEY,
68
- MIDSCENE_VQA_OPENAI_BASE_URL,
69
- MIDSCENE_VQA_OPENAI_HTTP_PROXY,
70
- MIDSCENE_VQA_OPENAI_INIT_CONFIG_JSON,
71
- MIDSCENE_VQA_OPENAI_SOCKS_PROXY,
72
- MIDSCENE_VQA_OPENAI_USE_AZURE,
73
- MIDSCENE_VQA_USE_ANTHROPIC_SDK,
74
- MIDSCENE_VQA_USE_AZURE_OPENAI,
75
- MIDSCENE_VQA_VL_MODE,
25
+ MIDSCENE_PLANNING_MODEL_SOCKS_PROXY,
26
+ MIDSCENE_PLANNING_MODEL_TIMEOUT,
76
27
  OPENAI_API_KEY,
77
28
  OPENAI_BASE_URL,
78
- OPENAI_USE_AZURE,
79
29
  } from './types';
80
30
 
81
31
  interface IModelConfigKeys {
@@ -92,95 +42,36 @@ interface IModelConfigKeys {
92
42
  openaiApiKey: string;
93
43
  openaiExtraConfig: string;
94
44
  /**
95
- * Azure
45
+ * Extra
96
46
  */
97
- openaiUseAzureDeprecated: string;
98
- useAzureOpenai: string;
99
- azureOpenaiScope: string;
100
- azureOpenaiKey: string;
101
- azureOpenaiEndpoint: string;
102
- azureOpenaiApiVersion: string;
103
- azureOpenaiDeployment: string;
104
- azureExtraConfig: string;
47
+ modelFamily: string;
105
48
  /**
106
- * Anthropic
49
+ * Timeout
107
50
  */
108
- useAnthropicSdk: string;
109
- anthropicApiKey: string;
110
- /**
111
- * Extra
112
- */
113
- vlMode: string;
51
+ timeout: string;
114
52
  }
115
53
 
116
- export const VQA_MODEL_CONFIG_KEYS: IModelConfigKeys = {
117
- modelName: MIDSCENE_VQA_MODEL_NAME,
54
+ export const INSIGHT_MODEL_CONFIG_KEYS: IModelConfigKeys = {
55
+ modelName: MIDSCENE_INSIGHT_MODEL_NAME,
118
56
  /**
119
57
  * proxy
120
58
  */
121
- socksProxy: MIDSCENE_VQA_OPENAI_SOCKS_PROXY,
122
- httpProxy: MIDSCENE_VQA_OPENAI_HTTP_PROXY,
59
+ socksProxy: MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY,
60
+ httpProxy: MIDSCENE_INSIGHT_MODEL_HTTP_PROXY,
123
61
  /**
124
62
  * OpenAI
125
63
  */
126
- openaiBaseURL: MIDSCENE_VQA_OPENAI_BASE_URL,
127
- openaiApiKey: MIDSCENE_VQA_OPENAI_API_KEY,
128
- openaiExtraConfig: MIDSCENE_VQA_OPENAI_INIT_CONFIG_JSON,
129
- /**
130
- * Azure
131
- */
132
- openaiUseAzureDeprecated: MIDSCENE_VQA_OPENAI_USE_AZURE,
133
- useAzureOpenai: MIDSCENE_VQA_USE_AZURE_OPENAI,
134
- azureOpenaiScope: MIDSCENE_VQA_AZURE_OPENAI_SCOPE,
135
- azureOpenaiKey: MIDSCENE_VQA_AZURE_OPENAI_KEY,
136
- azureOpenaiEndpoint: MIDSCENE_VQA_AZURE_OPENAI_ENDPOINT,
137
- azureOpenaiApiVersion: MIDSCENE_VQA_AZURE_OPENAI_API_VERSION,
138
- azureOpenaiDeployment: MIDSCENE_VQA_AZURE_OPENAI_DEPLOYMENT,
139
- azureExtraConfig: MIDSCENE_VQA_AZURE_OPENAI_INIT_CONFIG_JSON,
140
- /**
141
- * Anthropic
142
- */
143
- useAnthropicSdk: MIDSCENE_VQA_USE_ANTHROPIC_SDK,
144
- anthropicApiKey: MIDSCENE_VQA_ANTHROPIC_API_KEY,
64
+ openaiBaseURL: MIDSCENE_INSIGHT_MODEL_BASE_URL,
65
+ openaiApiKey: MIDSCENE_INSIGHT_MODEL_API_KEY,
66
+ openaiExtraConfig: MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON,
145
67
  /**
146
68
  * Extra
147
69
  */
148
- vlMode: MIDSCENE_VQA_VL_MODE,
149
- } as const;
150
-
151
- export const GROUNDING_MODEL_CONFIG_KEYS: IModelConfigKeys = {
152
- modelName: MIDSCENE_GROUNDING_MODEL_NAME,
153
- /**
154
- * proxy
155
- */
156
- socksProxy: MIDSCENE_GROUNDING_OPENAI_SOCKS_PROXY,
157
- httpProxy: MIDSCENE_GROUNDING_OPENAI_HTTP_PROXY,
158
- /**
159
- * OpenAI
160
- */
161
- openaiBaseURL: MIDSCENE_GROUNDING_OPENAI_BASE_URL,
162
- openaiApiKey: MIDSCENE_GROUNDING_OPENAI_API_KEY,
163
- openaiExtraConfig: MIDSCENE_GROUNDING_OPENAI_INIT_CONFIG_JSON,
164
- /**
165
- * Azure
166
- */
167
- openaiUseAzureDeprecated: MIDSCENE_GROUNDING_OPENAI_USE_AZURE,
168
- useAzureOpenai: MIDSCENE_GROUNDING_USE_AZURE_OPENAI,
169
- azureOpenaiScope: MIDSCENE_GROUNDING_AZURE_OPENAI_SCOPE,
170
- azureOpenaiKey: MIDSCENE_GROUNDING_AZURE_OPENAI_KEY,
171
- azureOpenaiEndpoint: MIDSCENE_GROUNDING_AZURE_OPENAI_ENDPOINT,
172
- azureOpenaiApiVersion: MIDSCENE_GROUNDING_AZURE_OPENAI_API_VERSION,
173
- azureOpenaiDeployment: MIDSCENE_GROUNDING_AZURE_OPENAI_DEPLOYMENT,
174
- azureExtraConfig: MIDSCENE_GROUNDING_AZURE_OPENAI_INIT_CONFIG_JSON,
175
- /**
176
- * Anthropic
177
- */
178
- useAnthropicSdk: MIDSCENE_GROUNDING_USE_ANTHROPIC_SDK,
179
- anthropicApiKey: MIDSCENE_GROUNDING_ANTHROPIC_API_KEY,
70
+ modelFamily: 'THERE_IS_NO_MODEL_FAMILY_FOR_INSIGHT',
180
71
  /**
181
- * Extra
72
+ * Timeout
182
73
  */
183
- vlMode: MIDSCENE_GROUNDING_VL_MODE,
74
+ timeout: MIDSCENE_INSIGHT_MODEL_TIMEOUT,
184
75
  } as const;
185
76
 
186
77
  export const PLANNING_MODEL_CONFIG_KEYS: IModelConfigKeys = {
@@ -188,34 +79,22 @@ export const PLANNING_MODEL_CONFIG_KEYS: IModelConfigKeys = {
188
79
  /**
189
80
  * proxy
190
81
  */
191
- socksProxy: MIDSCENE_PLANNING_OPENAI_SOCKS_PROXY,
192
- httpProxy: MIDSCENE_PLANNING_OPENAI_HTTP_PROXY,
82
+ socksProxy: MIDSCENE_PLANNING_MODEL_SOCKS_PROXY,
83
+ httpProxy: MIDSCENE_PLANNING_MODEL_HTTP_PROXY,
193
84
  /**
194
85
  * OpenAI
195
86
  */
196
- openaiBaseURL: MIDSCENE_PLANNING_OPENAI_BASE_URL,
197
- openaiApiKey: MIDSCENE_PLANNING_OPENAI_API_KEY,
198
- openaiExtraConfig: MIDSCENE_PLANNING_OPENAI_INIT_CONFIG_JSON,
87
+ openaiBaseURL: MIDSCENE_PLANNING_MODEL_BASE_URL,
88
+ openaiApiKey: MIDSCENE_PLANNING_MODEL_API_KEY,
89
+ openaiExtraConfig: MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON,
199
90
  /**
200
- * Azure
201
- */
202
- openaiUseAzureDeprecated: MIDSCENE_PLANNING_OPENAI_USE_AZURE,
203
- useAzureOpenai: MIDSCENE_PLANNING_USE_AZURE_OPENAI,
204
- azureOpenaiScope: MIDSCENE_PLANNING_AZURE_OPENAI_SCOPE,
205
- azureOpenaiKey: MIDSCENE_PLANNING_AZURE_OPENAI_KEY,
206
- azureOpenaiEndpoint: MIDSCENE_PLANNING_AZURE_OPENAI_ENDPOINT,
207
- azureOpenaiApiVersion: MIDSCENE_PLANNING_AZURE_OPENAI_API_VERSION,
208
- azureOpenaiDeployment: MIDSCENE_PLANNING_AZURE_OPENAI_DEPLOYMENT,
209
- azureExtraConfig: MIDSCENE_PLANNING_AZURE_OPENAI_INIT_CONFIG_JSON,
210
- /**
211
- * Anthropic
91
+ * Extra
212
92
  */
213
- useAnthropicSdk: MIDSCENE_PLANNING_USE_ANTHROPIC_SDK,
214
- anthropicApiKey: MIDSCENE_PLANNING_ANTHROPIC_API_KEY,
93
+ modelFamily: 'THERE_IS_NO_MODEL_FAMILY_FOR_PLANNING',
215
94
  /**
216
- * Extra
95
+ * Timeout
217
96
  */
218
- vlMode: MIDSCENE_PLANNING_VL_MODE,
97
+ timeout: MIDSCENE_PLANNING_MODEL_TIMEOUT,
219
98
  } as const;
220
99
 
221
100
  // modelConfig return default
@@ -224,68 +103,44 @@ export const DEFAULT_MODEL_CONFIG_KEYS: IModelConfigKeys = {
224
103
  /**
225
104
  * proxy
226
105
  */
227
- socksProxy: MIDSCENE_OPENAI_SOCKS_PROXY,
228
- httpProxy: MIDSCENE_OPENAI_HTTP_PROXY,
106
+ socksProxy: MIDSCENE_MODEL_SOCKS_PROXY,
107
+ httpProxy: MIDSCENE_MODEL_HTTP_PROXY,
229
108
  /**
230
109
  * OpenAI
231
110
  */
232
- openaiBaseURL: MIDSCENE_OPENAI_BASE_URL,
233
- openaiApiKey: MIDSCENE_OPENAI_API_KEY,
234
- openaiExtraConfig: MIDSCENE_OPENAI_INIT_CONFIG_JSON,
111
+ openaiBaseURL: MIDSCENE_MODEL_BASE_URL,
112
+ openaiApiKey: MIDSCENE_MODEL_API_KEY,
113
+ openaiExtraConfig: MIDSCENE_MODEL_INIT_CONFIG_JSON,
235
114
  /**
236
- * Azure
237
- */
238
- openaiUseAzureDeprecated: MIDSCENE_OPENAI_USE_AZURE,
239
- useAzureOpenai: MIDSCENE_USE_AZURE_OPENAI,
240
- azureOpenaiScope: MIDSCENE_AZURE_OPENAI_SCOPE,
241
- azureOpenaiKey: MIDSCENE_AZURE_OPENAI_KEY,
242
- azureOpenaiEndpoint: MIDSCENE_AZURE_OPENAI_ENDPOINT,
243
- azureOpenaiApiVersion: MIDSCENE_AZURE_OPENAI_API_VERSION,
244
- azureOpenaiDeployment: MIDSCENE_AZURE_OPENAI_DEPLOYMENT,
245
- azureExtraConfig: MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON,
246
- /**
247
- * Anthropic
115
+ * Extra
248
116
  */
249
- useAnthropicSdk: MIDSCENE_USE_ANTHROPIC_SDK,
250
- anthropicApiKey: MIDSCENE_ANTHROPIC_API_KEY,
117
+ modelFamily: MIDSCENE_MODEL_FAMILY,
251
118
  /**
252
- * Extra
119
+ * Timeout
253
120
  */
254
- vlMode: MIDSCENE_VL_MODE,
121
+ timeout: MIDSCENE_MODEL_TIMEOUT,
255
122
  } as const;
256
123
 
257
124
  // read from process.env
258
125
  export const DEFAULT_MODEL_CONFIG_KEYS_LEGACY: IModelConfigKeys = {
259
126
  modelName: MIDSCENE_MODEL_NAME,
260
127
  /**
261
- * proxy
128
+ * proxy - Uses legacy MIDSCENE_OPENAI_* variables for backward compatibility
262
129
  */
263
130
  socksProxy: MIDSCENE_OPENAI_SOCKS_PROXY,
264
131
  httpProxy: MIDSCENE_OPENAI_HTTP_PROXY,
265
132
  /**
266
- * OpenAI
133
+ * Model API - Uses legacy OPENAI_* variables for backward compatibility
267
134
  */
268
135
  openaiBaseURL: OPENAI_BASE_URL,
269
136
  openaiApiKey: OPENAI_API_KEY,
270
137
  openaiExtraConfig: MIDSCENE_OPENAI_INIT_CONFIG_JSON,
271
138
  /**
272
- * Azure
273
- */
274
- openaiUseAzureDeprecated: OPENAI_USE_AZURE,
275
- useAzureOpenai: MIDSCENE_USE_AZURE_OPENAI,
276
- azureOpenaiScope: MIDSCENE_AZURE_OPENAI_SCOPE,
277
- azureOpenaiKey: AZURE_OPENAI_KEY,
278
- azureOpenaiEndpoint: AZURE_OPENAI_ENDPOINT,
279
- azureOpenaiApiVersion: AZURE_OPENAI_API_VERSION,
280
- azureOpenaiDeployment: AZURE_OPENAI_DEPLOYMENT,
281
- azureExtraConfig: MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON,
282
- /**
283
- * Anthropic
139
+ * Extra
284
140
  */
285
- useAnthropicSdk: MIDSCENE_USE_ANTHROPIC_SDK,
286
- anthropicApiKey: ANTHROPIC_API_KEY,
141
+ modelFamily: 'DEFAULT_MODEL_CONFIG_KEYS has no modelFamily key',
287
142
  /**
288
- * Extra
143
+ * Timeout - use the new key for legacy mode too
289
144
  */
290
- vlMode: 'DEFAULT_MODEL_CONFIG_KEYS has no vlMode key',
145
+ timeout: MIDSCENE_MODEL_TIMEOUT,
291
146
  } as const;