@midscene/shared 1.0.4 → 1.0.5-beta-20251230124359.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/es/constants/example-code.mjs +19 -10
- package/dist/es/mcp/base-server.mjs +30 -2
- package/dist/es/mcp/base-tools.mjs +6 -0
- package/dist/es/mcp/index.mjs +1 -0
- package/dist/es/mcp/launcher-helper.mjs +52 -0
- package/dist/lib/constants/example-code.js +19 -10
- package/dist/lib/mcp/base-server.js +30 -2
- package/dist/lib/mcp/base-tools.js +6 -0
- package/dist/lib/mcp/index.js +7 -0
- package/dist/lib/mcp/launcher-helper.js +86 -0
- package/dist/types/constants/example-code.d.ts +2 -2
- package/dist/types/mcp/base-server.d.ts +20 -4
- package/dist/types/mcp/base-tools.d.ts +8 -0
- package/dist/types/mcp/index.d.ts +1 -0
- package/dist/types/mcp/launcher-helper.d.ts +94 -0
- package/dist/types/mcp/types.d.ts +4 -0
- package/package.json +2 -2
- package/src/constants/example-code.ts +19 -10
- package/src/mcp/base-server.ts +65 -5
- package/src/mcp/base-tools.ts +14 -0
- package/src/mcp/index.ts +1 -0
- package/src/mcp/launcher-helper.ts +200 -0
- package/src/mcp/types.ts +5 -0
|
@@ -5,15 +5,22 @@ const PLAYWRIGHT_EXAMPLE_CODE = `
|
|
|
5
5
|
IMPORTANT: Follow these exact type signatures for AI functions:
|
|
6
6
|
|
|
7
7
|
// Type signatures for AI functions:
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
8
|
+
aiAct(prompt: string, options?: { cacheable?: boolean }): Promise<void>
|
|
9
|
+
aiInput(text: string, locate: string, options?: { deepThink?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
|
|
10
|
+
aiTap(locate: string, options?: { deepThink?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
|
|
11
|
+
aiHover(locate: string, options?: { deepThink?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
|
|
12
|
+
aiDoubleClick(locate: string, options?: { deepThink?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
|
|
13
|
+
aiKeyboardPress(key: string, locate?: string, options?: { deepThink?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
|
|
14
|
+
aiScroll(locate: string | undefined, options: {
|
|
15
|
+
direction?: 'up' | 'down' | 'left' | 'right',
|
|
16
|
+
scrollType?: 'singleAction' | 'scrollToBottom' | 'scrollToTop' | 'scrollToRight' | 'scrollToLeft',
|
|
17
|
+
distance?: number | null,
|
|
18
|
+
deepThink?: boolean,
|
|
19
|
+
xpath?: string,
|
|
20
|
+
cacheable?: boolean
|
|
15
21
|
}): Promise<void>
|
|
16
|
-
aiAssert(assertion: string): Promise<void>
|
|
22
|
+
aiAssert(assertion: string, options?: { errorMessage?: string }): Promise<void>
|
|
23
|
+
aiWaitFor(prompt: string, options?: { timeout?: number }): Promise<void>
|
|
17
24
|
aiQuery<T>(queryObject: Record<string, string>): Promise<T> // Extracts data from page based on descriptions
|
|
18
25
|
|
|
19
26
|
// examples:
|
|
@@ -35,12 +42,14 @@ test.beforeEach(async ({ page }) => {
|
|
|
35
42
|
});
|
|
36
43
|
|
|
37
44
|
test('ai shop', async ({
|
|
45
|
+
aiAct,
|
|
38
46
|
aiInput,
|
|
39
47
|
aiAssert,
|
|
40
48
|
aiQuery,
|
|
41
49
|
aiKeyboardPress,
|
|
42
50
|
aiHover,
|
|
43
51
|
aiTap,
|
|
52
|
+
aiWaitFor,
|
|
44
53
|
agentForPage,
|
|
45
54
|
page,
|
|
46
55
|
}) => {
|
|
@@ -86,7 +95,7 @@ tasks:
|
|
|
86
95
|
locate: 'input field description'
|
|
87
96
|
- aiScroll:
|
|
88
97
|
direction: down/up
|
|
89
|
-
scrollType:
|
|
98
|
+
scrollType: scrollToBottom/scrollToTop/singleAction
|
|
90
99
|
- aiAssert: "expected state"
|
|
91
100
|
- sleep: milliseconds
|
|
92
101
|
|
|
@@ -159,7 +168,7 @@ tasks:
|
|
|
159
168
|
# Scroll globally or on an element described by a prompt.
|
|
160
169
|
- aiScroll:
|
|
161
170
|
direction: 'up' # or 'down' | 'left' | 'right'
|
|
162
|
-
scrollType: '
|
|
171
|
+
scrollType: 'singleAction' # or 'scrollToTop' | 'scrollToBottom' | 'scrollToLeft' | 'scrollToRight'
|
|
163
172
|
distance: <number> # Optional, the scroll distance in pixels.
|
|
164
173
|
locate: <prompt> # Optional, the element to scroll on.
|
|
165
174
|
deepThink: <boolean> # Optional, whether to use deepThink to precisely locate the element. Defaults to False.
|
|
@@ -41,7 +41,7 @@ const MAX_SESSIONS = 100;
|
|
|
41
41
|
class BaseMCPServer {
|
|
42
42
|
async initializeToolsManager() {
|
|
43
43
|
setIsMcp(true);
|
|
44
|
-
this.toolsManager = this.createToolsManager();
|
|
44
|
+
this.toolsManager = this.providedToolsManager || this.createToolsManager();
|
|
45
45
|
try {
|
|
46
46
|
await this.toolsManager.initTools();
|
|
47
47
|
} catch (error) {
|
|
@@ -91,6 +91,11 @@ class BaseMCPServer {
|
|
|
91
91
|
};
|
|
92
92
|
process.once('SIGINT', cleanup);
|
|
93
93
|
process.once('SIGTERM', cleanup);
|
|
94
|
+
return {
|
|
95
|
+
close: async ()=>{
|
|
96
|
+
this.performCleanup();
|
|
97
|
+
}
|
|
98
|
+
};
|
|
94
99
|
}
|
|
95
100
|
async launchHttp(options) {
|
|
96
101
|
if (!Number.isInteger(options.port) || options.port < 1 || options.port > 65535) throw new Error(`Invalid port number: ${options.port}. Port must be between 1 and 65535.`);
|
|
@@ -151,6 +156,27 @@ class BaseMCPServer {
|
|
|
151
156
|
});
|
|
152
157
|
const cleanupInterval = this.startSessionCleanup(sessions);
|
|
153
158
|
this.setupHttpShutdownHandlers(server, sessions, cleanupInterval);
|
|
159
|
+
return {
|
|
160
|
+
port: options.port,
|
|
161
|
+
host,
|
|
162
|
+
close: async ()=>{
|
|
163
|
+
clearInterval(cleanupInterval);
|
|
164
|
+
for (const session of sessions.values())try {
|
|
165
|
+
await session.transport.close();
|
|
166
|
+
} catch (error) {
|
|
167
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
168
|
+
console.error(`Failed to close session ${session.transport.sessionId}: ${message}`);
|
|
169
|
+
}
|
|
170
|
+
sessions.clear();
|
|
171
|
+
return new Promise((resolve)=>{
|
|
172
|
+
server.close((err)=>{
|
|
173
|
+
if (err) console.error('Error closing HTTP server:', err);
|
|
174
|
+
this.performCleanup();
|
|
175
|
+
resolve();
|
|
176
|
+
});
|
|
177
|
+
});
|
|
178
|
+
}
|
|
179
|
+
};
|
|
154
180
|
}
|
|
155
181
|
async createHttpSession(sessions) {
|
|
156
182
|
const transport = new StreamableHTTPServerTransport({
|
|
@@ -235,16 +261,18 @@ class BaseMCPServer {
|
|
|
235
261
|
getToolsManager() {
|
|
236
262
|
return this.toolsManager;
|
|
237
263
|
}
|
|
238
|
-
constructor(config){
|
|
264
|
+
constructor(config, toolsManager){
|
|
239
265
|
_define_property(this, "mcpServer", void 0);
|
|
240
266
|
_define_property(this, "toolsManager", void 0);
|
|
241
267
|
_define_property(this, "config", void 0);
|
|
268
|
+
_define_property(this, "providedToolsManager", void 0);
|
|
242
269
|
this.config = config;
|
|
243
270
|
this.mcpServer = new McpServer({
|
|
244
271
|
name: config.name,
|
|
245
272
|
version: config.version,
|
|
246
273
|
description: config.description
|
|
247
274
|
});
|
|
275
|
+
this.providedToolsManager = toolsManager;
|
|
248
276
|
}
|
|
249
277
|
}
|
|
250
278
|
export { BaseMCPServer, CLI_ARGS_CONFIG, launchMCPServer };
|
|
@@ -65,6 +65,12 @@ class BaseMidsceneTools {
|
|
|
65
65
|
async closeBrowser() {
|
|
66
66
|
await this.agent?.destroy?.();
|
|
67
67
|
}
|
|
68
|
+
getToolDefinitions() {
|
|
69
|
+
return this.toolDefinitions;
|
|
70
|
+
}
|
|
71
|
+
setAgent(agent) {
|
|
72
|
+
this.agent = agent;
|
|
73
|
+
}
|
|
68
74
|
buildScreenshotContent(screenshot) {
|
|
69
75
|
const { mimeType, body } = parseBase64(screenshot);
|
|
70
76
|
return [
|
package/dist/es/mcp/index.mjs
CHANGED
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
function createMCPServerLauncher(config) {
|
|
2
|
+
const { agent, platformName, ToolsManagerClass, MCPServerClass } = config;
|
|
3
|
+
function validateAgent() {
|
|
4
|
+
const device = agent.interface;
|
|
5
|
+
if (!device) throw new Error(`Agent must have an 'interface' property that references the underlying device.
|
|
6
|
+
Please ensure your agent instance is properly initialized with a device interface.
|
|
7
|
+
Expected: agent.interface to be defined, but got: ${typeof device}
|
|
8
|
+
Solution: Check that your agent constructor properly sets the interface property.`);
|
|
9
|
+
}
|
|
10
|
+
function createToolsManager() {
|
|
11
|
+
const toolsManager = new ToolsManagerClass();
|
|
12
|
+
toolsManager.agent = agent;
|
|
13
|
+
return toolsManager;
|
|
14
|
+
}
|
|
15
|
+
function logStartupInfo(mode, additionalInfo) {
|
|
16
|
+
const device = agent.interface;
|
|
17
|
+
console.log(`Starting Midscene ${platformName} MCP Server (${mode})...`);
|
|
18
|
+
console.log(`Agent: ${agent.constructor.name}`);
|
|
19
|
+
console.log(`Device: ${device.constructor.name}`);
|
|
20
|
+
if (additionalInfo?.port !== void 0) console.log(`Port: ${additionalInfo.port}`);
|
|
21
|
+
if (additionalInfo?.host) console.log(`Host: ${additionalInfo.host}`);
|
|
22
|
+
}
|
|
23
|
+
return {
|
|
24
|
+
async launch (options = {}) {
|
|
25
|
+
const { verbose = true } = options;
|
|
26
|
+
validateAgent();
|
|
27
|
+
if (verbose) logStartupInfo('stdio');
|
|
28
|
+
const toolsManager = createToolsManager();
|
|
29
|
+
const server = new MCPServerClass(toolsManager);
|
|
30
|
+
const result = await server.launch();
|
|
31
|
+
if (verbose) console.log(`${platformName} MCP Server started (stdio mode)`);
|
|
32
|
+
return result;
|
|
33
|
+
},
|
|
34
|
+
async launchHttp (options) {
|
|
35
|
+
const { port, host = 'localhost', verbose = true } = options;
|
|
36
|
+
validateAgent();
|
|
37
|
+
if (verbose) logStartupInfo('HTTP', {
|
|
38
|
+
port,
|
|
39
|
+
host
|
|
40
|
+
});
|
|
41
|
+
const toolsManager = createToolsManager();
|
|
42
|
+
const server = new MCPServerClass(toolsManager);
|
|
43
|
+
const result = await server.launchHttp({
|
|
44
|
+
port,
|
|
45
|
+
host
|
|
46
|
+
});
|
|
47
|
+
if (verbose) console.log(`${platformName} MCP Server started on http://${result.host}:${result.port}/mcp`);
|
|
48
|
+
return result;
|
|
49
|
+
}
|
|
50
|
+
};
|
|
51
|
+
}
|
|
52
|
+
export { createMCPServerLauncher };
|
|
@@ -34,15 +34,22 @@ const PLAYWRIGHT_EXAMPLE_CODE = `
|
|
|
34
34
|
IMPORTANT: Follow these exact type signatures for AI functions:
|
|
35
35
|
|
|
36
36
|
// Type signatures for AI functions:
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
37
|
+
aiAct(prompt: string, options?: { cacheable?: boolean }): Promise<void>
|
|
38
|
+
aiInput(text: string, locate: string, options?: { deepThink?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
|
|
39
|
+
aiTap(locate: string, options?: { deepThink?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
|
|
40
|
+
aiHover(locate: string, options?: { deepThink?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
|
|
41
|
+
aiDoubleClick(locate: string, options?: { deepThink?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
|
|
42
|
+
aiKeyboardPress(key: string, locate?: string, options?: { deepThink?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
|
|
43
|
+
aiScroll(locate: string | undefined, options: {
|
|
44
|
+
direction?: 'up' | 'down' | 'left' | 'right',
|
|
45
|
+
scrollType?: 'singleAction' | 'scrollToBottom' | 'scrollToTop' | 'scrollToRight' | 'scrollToLeft',
|
|
46
|
+
distance?: number | null,
|
|
47
|
+
deepThink?: boolean,
|
|
48
|
+
xpath?: string,
|
|
49
|
+
cacheable?: boolean
|
|
44
50
|
}): Promise<void>
|
|
45
|
-
aiAssert(assertion: string): Promise<void>
|
|
51
|
+
aiAssert(assertion: string, options?: { errorMessage?: string }): Promise<void>
|
|
52
|
+
aiWaitFor(prompt: string, options?: { timeout?: number }): Promise<void>
|
|
46
53
|
aiQuery<T>(queryObject: Record<string, string>): Promise<T> // Extracts data from page based on descriptions
|
|
47
54
|
|
|
48
55
|
// examples:
|
|
@@ -64,12 +71,14 @@ test.beforeEach(async ({ page }) => {
|
|
|
64
71
|
});
|
|
65
72
|
|
|
66
73
|
test('ai shop', async ({
|
|
74
|
+
aiAct,
|
|
67
75
|
aiInput,
|
|
68
76
|
aiAssert,
|
|
69
77
|
aiQuery,
|
|
70
78
|
aiKeyboardPress,
|
|
71
79
|
aiHover,
|
|
72
80
|
aiTap,
|
|
81
|
+
aiWaitFor,
|
|
73
82
|
agentForPage,
|
|
74
83
|
page,
|
|
75
84
|
}) => {
|
|
@@ -115,7 +124,7 @@ tasks:
|
|
|
115
124
|
locate: 'input field description'
|
|
116
125
|
- aiScroll:
|
|
117
126
|
direction: down/up
|
|
118
|
-
scrollType:
|
|
127
|
+
scrollType: scrollToBottom/scrollToTop/singleAction
|
|
119
128
|
- aiAssert: "expected state"
|
|
120
129
|
- sleep: milliseconds
|
|
121
130
|
|
|
@@ -188,7 +197,7 @@ tasks:
|
|
|
188
197
|
# Scroll globally or on an element described by a prompt.
|
|
189
198
|
- aiScroll:
|
|
190
199
|
direction: 'up' # or 'down' | 'left' | 'right'
|
|
191
|
-
scrollType: '
|
|
200
|
+
scrollType: 'singleAction' # or 'scrollToTop' | 'scrollToBottom' | 'scrollToLeft' | 'scrollToRight'
|
|
192
201
|
distance: <number> # Optional, the scroll distance in pixels.
|
|
193
202
|
locate: <prompt> # Optional, the element to scroll on.
|
|
194
203
|
deepThink: <boolean> # Optional, whether to use deepThink to precisely locate the element. Defaults to False.
|
|
@@ -81,7 +81,7 @@ const MAX_SESSIONS = 100;
|
|
|
81
81
|
class BaseMCPServer {
|
|
82
82
|
async initializeToolsManager() {
|
|
83
83
|
(0, utils_namespaceObject.setIsMcp)(true);
|
|
84
|
-
this.toolsManager = this.createToolsManager();
|
|
84
|
+
this.toolsManager = this.providedToolsManager || this.createToolsManager();
|
|
85
85
|
try {
|
|
86
86
|
await this.toolsManager.initTools();
|
|
87
87
|
} catch (error) {
|
|
@@ -131,6 +131,11 @@ class BaseMCPServer {
|
|
|
131
131
|
};
|
|
132
132
|
process.once('SIGINT', cleanup);
|
|
133
133
|
process.once('SIGTERM', cleanup);
|
|
134
|
+
return {
|
|
135
|
+
close: async ()=>{
|
|
136
|
+
this.performCleanup();
|
|
137
|
+
}
|
|
138
|
+
};
|
|
134
139
|
}
|
|
135
140
|
async launchHttp(options) {
|
|
136
141
|
if (!Number.isInteger(options.port) || options.port < 1 || options.port > 65535) throw new Error(`Invalid port number: ${options.port}. Port must be between 1 and 65535.`);
|
|
@@ -191,6 +196,27 @@ class BaseMCPServer {
|
|
|
191
196
|
});
|
|
192
197
|
const cleanupInterval = this.startSessionCleanup(sessions);
|
|
193
198
|
this.setupHttpShutdownHandlers(server, sessions, cleanupInterval);
|
|
199
|
+
return {
|
|
200
|
+
port: options.port,
|
|
201
|
+
host,
|
|
202
|
+
close: async ()=>{
|
|
203
|
+
clearInterval(cleanupInterval);
|
|
204
|
+
for (const session of sessions.values())try {
|
|
205
|
+
await session.transport.close();
|
|
206
|
+
} catch (error) {
|
|
207
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
208
|
+
console.error(`Failed to close session ${session.transport.sessionId}: ${message}`);
|
|
209
|
+
}
|
|
210
|
+
sessions.clear();
|
|
211
|
+
return new Promise((resolve)=>{
|
|
212
|
+
server.close((err)=>{
|
|
213
|
+
if (err) console.error('Error closing HTTP server:', err);
|
|
214
|
+
this.performCleanup();
|
|
215
|
+
resolve();
|
|
216
|
+
});
|
|
217
|
+
});
|
|
218
|
+
}
|
|
219
|
+
};
|
|
194
220
|
}
|
|
195
221
|
async createHttpSession(sessions) {
|
|
196
222
|
const transport = new streamableHttp_js_namespaceObject.StreamableHTTPServerTransport({
|
|
@@ -275,16 +301,18 @@ class BaseMCPServer {
|
|
|
275
301
|
getToolsManager() {
|
|
276
302
|
return this.toolsManager;
|
|
277
303
|
}
|
|
278
|
-
constructor(config){
|
|
304
|
+
constructor(config, toolsManager){
|
|
279
305
|
_define_property(this, "mcpServer", void 0);
|
|
280
306
|
_define_property(this, "toolsManager", void 0);
|
|
281
307
|
_define_property(this, "config", void 0);
|
|
308
|
+
_define_property(this, "providedToolsManager", void 0);
|
|
282
309
|
this.config = config;
|
|
283
310
|
this.mcpServer = new mcp_js_namespaceObject.McpServer({
|
|
284
311
|
name: config.name,
|
|
285
312
|
version: config.version,
|
|
286
313
|
description: config.description
|
|
287
314
|
});
|
|
315
|
+
this.providedToolsManager = toolsManager;
|
|
288
316
|
}
|
|
289
317
|
}
|
|
290
318
|
exports.BaseMCPServer = __webpack_exports__.BaseMCPServer;
|
|
@@ -93,6 +93,12 @@ class BaseMidsceneTools {
|
|
|
93
93
|
async closeBrowser() {
|
|
94
94
|
await this.agent?.destroy?.();
|
|
95
95
|
}
|
|
96
|
+
getToolDefinitions() {
|
|
97
|
+
return this.toolDefinitions;
|
|
98
|
+
}
|
|
99
|
+
setAgent(agent) {
|
|
100
|
+
this.agent = agent;
|
|
101
|
+
}
|
|
96
102
|
buildScreenshotContent(screenshot) {
|
|
97
103
|
const { mimeType, body } = (0, img_namespaceObject.parseBase64)(screenshot);
|
|
98
104
|
return [
|
package/dist/lib/mcp/index.js
CHANGED
|
@@ -9,6 +9,9 @@ var __webpack_modules__ = {
|
|
|
9
9
|
"./inject-report-html-plugin" (module) {
|
|
10
10
|
module.exports = require("./inject-report-html-plugin.js");
|
|
11
11
|
},
|
|
12
|
+
"./launcher-helper" (module) {
|
|
13
|
+
module.exports = require("./launcher-helper.js");
|
|
14
|
+
},
|
|
12
15
|
"./tool-generator" (module) {
|
|
13
16
|
module.exports = require("./tool-generator.js");
|
|
14
17
|
},
|
|
@@ -79,6 +82,10 @@ var __webpack_exports__ = {};
|
|
|
79
82
|
var __rspack_reexport = {};
|
|
80
83
|
for(const __rspack_import_key in _inject_report_html_plugin__rspack_import_4)if ("default" !== __rspack_import_key) __rspack_reexport[__rspack_import_key] = ()=>_inject_report_html_plugin__rspack_import_4[__rspack_import_key];
|
|
81
84
|
__webpack_require__.d(__webpack_exports__, __rspack_reexport);
|
|
85
|
+
var _launcher_helper__rspack_import_5 = __webpack_require__("./launcher-helper");
|
|
86
|
+
var __rspack_reexport = {};
|
|
87
|
+
for(const __rspack_import_key in _launcher_helper__rspack_import_5)if ("default" !== __rspack_import_key) __rspack_reexport[__rspack_import_key] = ()=>_launcher_helper__rspack_import_5[__rspack_import_key];
|
|
88
|
+
__webpack_require__.d(__webpack_exports__, __rspack_reexport);
|
|
82
89
|
})();
|
|
83
90
|
for(var __rspack_i in __webpack_exports__)exports[__rspack_i] = __webpack_exports__[__rspack_i];
|
|
84
91
|
Object.defineProperty(exports, '__esModule', {
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __webpack_require__ = {};
|
|
3
|
+
(()=>{
|
|
4
|
+
__webpack_require__.d = (exports1, definition)=>{
|
|
5
|
+
for(var key in definition)if (__webpack_require__.o(definition, key) && !__webpack_require__.o(exports1, key)) Object.defineProperty(exports1, key, {
|
|
6
|
+
enumerable: true,
|
|
7
|
+
get: definition[key]
|
|
8
|
+
});
|
|
9
|
+
};
|
|
10
|
+
})();
|
|
11
|
+
(()=>{
|
|
12
|
+
__webpack_require__.o = (obj, prop)=>Object.prototype.hasOwnProperty.call(obj, prop);
|
|
13
|
+
})();
|
|
14
|
+
(()=>{
|
|
15
|
+
__webpack_require__.r = (exports1)=>{
|
|
16
|
+
if ('undefined' != typeof Symbol && Symbol.toStringTag) Object.defineProperty(exports1, Symbol.toStringTag, {
|
|
17
|
+
value: 'Module'
|
|
18
|
+
});
|
|
19
|
+
Object.defineProperty(exports1, '__esModule', {
|
|
20
|
+
value: true
|
|
21
|
+
});
|
|
22
|
+
};
|
|
23
|
+
})();
|
|
24
|
+
var __webpack_exports__ = {};
|
|
25
|
+
__webpack_require__.r(__webpack_exports__);
|
|
26
|
+
__webpack_require__.d(__webpack_exports__, {
|
|
27
|
+
createMCPServerLauncher: ()=>createMCPServerLauncher
|
|
28
|
+
});
|
|
29
|
+
function createMCPServerLauncher(config) {
|
|
30
|
+
const { agent, platformName, ToolsManagerClass, MCPServerClass } = config;
|
|
31
|
+
function validateAgent() {
|
|
32
|
+
const device = agent.interface;
|
|
33
|
+
if (!device) throw new Error(`Agent must have an 'interface' property that references the underlying device.
|
|
34
|
+
Please ensure your agent instance is properly initialized with a device interface.
|
|
35
|
+
Expected: agent.interface to be defined, but got: ${typeof device}
|
|
36
|
+
Solution: Check that your agent constructor properly sets the interface property.`);
|
|
37
|
+
}
|
|
38
|
+
function createToolsManager() {
|
|
39
|
+
const toolsManager = new ToolsManagerClass();
|
|
40
|
+
toolsManager.agent = agent;
|
|
41
|
+
return toolsManager;
|
|
42
|
+
}
|
|
43
|
+
function logStartupInfo(mode, additionalInfo) {
|
|
44
|
+
const device = agent.interface;
|
|
45
|
+
console.log(`Starting Midscene ${platformName} MCP Server (${mode})...`);
|
|
46
|
+
console.log(`Agent: ${agent.constructor.name}`);
|
|
47
|
+
console.log(`Device: ${device.constructor.name}`);
|
|
48
|
+
if (additionalInfo?.port !== void 0) console.log(`Port: ${additionalInfo.port}`);
|
|
49
|
+
if (additionalInfo?.host) console.log(`Host: ${additionalInfo.host}`);
|
|
50
|
+
}
|
|
51
|
+
return {
|
|
52
|
+
async launch (options = {}) {
|
|
53
|
+
const { verbose = true } = options;
|
|
54
|
+
validateAgent();
|
|
55
|
+
if (verbose) logStartupInfo('stdio');
|
|
56
|
+
const toolsManager = createToolsManager();
|
|
57
|
+
const server = new MCPServerClass(toolsManager);
|
|
58
|
+
const result = await server.launch();
|
|
59
|
+
if (verbose) console.log(`${platformName} MCP Server started (stdio mode)`);
|
|
60
|
+
return result;
|
|
61
|
+
},
|
|
62
|
+
async launchHttp (options) {
|
|
63
|
+
const { port, host = 'localhost', verbose = true } = options;
|
|
64
|
+
validateAgent();
|
|
65
|
+
if (verbose) logStartupInfo('HTTP', {
|
|
66
|
+
port,
|
|
67
|
+
host
|
|
68
|
+
});
|
|
69
|
+
const toolsManager = createToolsManager();
|
|
70
|
+
const server = new MCPServerClass(toolsManager);
|
|
71
|
+
const result = await server.launchHttp({
|
|
72
|
+
port,
|
|
73
|
+
host
|
|
74
|
+
});
|
|
75
|
+
if (verbose) console.log(`${platformName} MCP Server started on http://${result.host}:${result.port}/mcp`);
|
|
76
|
+
return result;
|
|
77
|
+
}
|
|
78
|
+
};
|
|
79
|
+
}
|
|
80
|
+
exports.createMCPServerLauncher = __webpack_exports__.createMCPServerLauncher;
|
|
81
|
+
for(var __rspack_i in __webpack_exports__)if (-1 === [
|
|
82
|
+
"createMCPServerLauncher"
|
|
83
|
+
].indexOf(__rspack_i)) exports[__rspack_i] = __webpack_exports__[__rspack_i];
|
|
84
|
+
Object.defineProperty(exports, '__esModule', {
|
|
85
|
+
value: true
|
|
86
|
+
});
|
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
export declare const PLAYWRIGHT_EXAMPLE_CODE = "\n// Reference the following code to generate Midscene test cases\n// The following is test code for Midscene AI, for reference\n// The following is Playwright syntax, you can use Playwright to assist in test generation\nIMPORTANT: Follow these exact type signatures for AI functions:\n\n// Type signatures for AI functions:\naiInput(
|
|
2
|
-
export declare const YAML_EXAMPLE_CODE = "\n1. Format:\n\nweb:\n url: \"starting_url\"\n viewportWidth: 1280\n viewportHeight: 960\n\ntasks:\n - name: \"descriptive task name\"\n flow:\n - aiTap: \"element description\"\n - aiInput: 'text value'\n locate: 'input field description'\n - aiScroll:\n direction: down/up\n scrollType:
|
|
1
|
+
export declare const PLAYWRIGHT_EXAMPLE_CODE = "\n// Reference the following code to generate Midscene test cases\n// The following is test code for Midscene AI, for reference\n// The following is Playwright syntax, you can use Playwright to assist in test generation\nIMPORTANT: Follow these exact type signatures for AI functions:\n\n// Type signatures for AI functions:\naiAct(prompt: string, options?: { cacheable?: boolean }): Promise<void>\naiInput(text: string, locate: string, options?: { deepThink?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>\naiTap(locate: string, options?: { deepThink?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>\naiHover(locate: string, options?: { deepThink?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>\naiDoubleClick(locate: string, options?: { deepThink?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>\naiKeyboardPress(key: string, locate?: string, options?: { deepThink?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>\naiScroll(locate: string | undefined, options: {\n direction?: 'up' | 'down' | 'left' | 'right',\n scrollType?: 'singleAction' | 'scrollToBottom' | 'scrollToTop' | 'scrollToRight' | 'scrollToLeft',\n distance?: number | null,\n deepThink?: boolean,\n xpath?: string,\n cacheable?: boolean\n}): Promise<void>\naiAssert(assertion: string, options?: { errorMessage?: string }): Promise<void>\naiWaitFor(prompt: string, options?: { timeout?: number }): Promise<void>\naiQuery<T>(queryObject: Record<string, string>): Promise<T> // Extracts data from page based on descriptions\n\n// examples:\n// Reference the following code to generate Midscene test cases\n// The following is test code for Midscene AI, for reference\n// The following is Playwright syntax, you can use Playwright to assist in test generation\nimport { test as base } from '@playwright/test';\nimport type { PlayWrightAiFixtureType } from '@midscene/web/playwright';\nimport { PlaywrightAiFixture } from '@midscene/web/playwright';\n\nconst test = base.extend<PlayWrightAiFixtureType>(PlaywrightAiFixture({\n waitForNetworkIdleTimeout: 2000, // optional, the timeout for waiting for network idle between each action, default is 2000ms\n}));\n\n\ntest.beforeEach(async ({ page }) => {\n await page.goto('https://www.xxx.com/');\n await page.setViewportSize({ width: 1920, height: 1080 });\n});\n\ntest('ai shop', async ({\n aiAct,\n aiInput,\n aiAssert,\n aiQuery,\n aiKeyboardPress,\n aiHover,\n aiTap,\n aiWaitFor,\n agentForPage,\n page,\n}) => {\n // login\n await aiAssert('The page shows the login interface');\n await aiInput('user_name', 'in user name input');\n await aiInput('password', 'in password input');\n await aiKeyboardPress('Enter', 'Login Button');\n\n // check the login success\n await aiWaitFor('The page shows that the loading is complete');\n await aiAssert('The current page shows the product detail page');\n\n // check the product info\n const dataA = await aiQuery({\n userInfo: 'User information in the format {name: string}',\n theFirstProductInfo: 'The first product info in the format {name: string, price: number}',\n });\n expect(dataA.theFirstProductInfo.name).toBe('xxx');\n expect(dataA.theFirstProductInfo.price).toBe(100);\n\n\n // add to cart\n await aiTap('click add to cart button');\n \n await aiTap('click right top cart icon');\n await aiAssert('The cart icon shows the number 1');\n});\n";
|
|
2
|
+
export declare const YAML_EXAMPLE_CODE = "\n1. Format:\n\nweb:\n url: \"starting_url\"\n viewportWidth: 1280\n viewportHeight: 960\n\ntasks:\n - name: \"descriptive task name\"\n flow:\n - aiTap: \"element description\"\n - aiInput: 'text value'\n locate: 'input field description'\n - aiScroll:\n direction: down/up\n scrollType: scrollToBottom/scrollToTop/singleAction\n - aiAssert: \"expected state\"\n - sleep: milliseconds\n\n2. Action Types:\n- aiTap: for clicks (natural language targeting)\n- aiInput: for text input with 'locate' field\n- aiScroll: with direction and scrollType\n- aiAssert: for validations\n- sleep: for delays (milliseconds)\n\n3. Best Practices:\n- Group related actions into logical tasks\n- Use natural language descriptions\n- Add deepThink: true for complex interactions\n- Keep task names concise but descriptive\n\n\n\nYAML type\ntasks:\n - name: <name>\n continueOnError: <boolean> # Optional, whether to continue to the next task on error, defaults to false.\n flow:\n # Auto Planning (.ai)\n # ----------------\n\n # Perform an interaction. `ai` is a shorthand for `aiAct`.\n - ai: <prompt>\n cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.\n\n # This usage is the same as `ai`.\n - aiAct: <prompt>\n cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.\n\n # Instant Action (.aiTap, .aiDoubleClick, .aiHover, .aiInput, .aiKeyboardPress, .aiScroll)\n # ----------------\n\n # Tap an element described by a prompt.\n - aiTap: <prompt>\n deepThink: <boolean> # Optional, whether to use deepThink to precisely locate the element. Defaults to False.\n xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.\n cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.\n\n # Double click an element described by a prompt.\n - aiDoubleClick: <prompt>\n deepThink: <boolean> # Optional, whether to use deepThink to precisely locate the element. Defaults to False.\n xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.\n cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.\n\n # Hover over an element described by a prompt.\n - aiHover: <prompt>\n deepThink: <boolean> # Optional, whether to use deepThink to precisely locate the element. Defaults to False.\n xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.\n cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.\n\n # Input text into an element described by a prompt.\n - aiInput: <final text content of the input>\n locate: <prompt>\n deepThink: <boolean> # Optional, whether to use deepThink to precisely locate the element. Defaults to False.\n xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.\n cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.\n\n # Press a key (e.g., Enter, Tab, Escape) on an element described by a prompt.\n - aiKeyboardPress: <key>\n locate: <prompt>\n deepThink: <boolean> # Optional, whether to use deepThink to precisely locate the element. Defaults to False.\n xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.\n cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.\n\n # Scroll globally or on an element described by a prompt.\n - aiScroll:\n direction: 'up' # or 'down' | 'left' | 'right'\n scrollType: 'singleAction' # or 'scrollToTop' | 'scrollToBottom' | 'scrollToLeft' | 'scrollToRight'\n distance: <number> # Optional, the scroll distance in pixels.\n locate: <prompt> # Optional, the element to scroll on.\n deepThink: <boolean> # Optional, whether to use deepThink to precisely locate the element. Defaults to False.\n xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.\n cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.\n\n # Record the current screenshot with a description in the report file.\n - recordToReport: <title> # Optional, the title of the screenshot. If not provided, the title will be 'untitled'.\n content: <content> # Optional, the description of the screenshot.\n\n # Data Extraction\n # ----------------\n\n # Perform a query that returns a JSON object.\n - aiQuery: <prompt> # Remember to describe the format of the result in the prompt.\n name: <name> # The key for the query result in the JSON output.\n\n # More APIs\n # ----------------\n\n # Wait for a condition to be met, with a timeout (in ms, optional, defaults to 30000).\n - aiWaitFor: <prompt>\n timeout: <ms>\n\n # Perform an assertion.\n - aiAssert: <prompt>\n errorMessage: <error-message> # Optional, the error message to print if the assertion fails.\n\n # Wait for a specified amount of time.\n - sleep: <ms>\n\n # Execute a piece of JavaScript code in the web page context.\n - javascript: <javascript>\n name: <name> # Optional, assign a name to the return value, which will be used as a key in the JSON output.\n\n - name: <name>\n flow:\n # ...\n";
|
|
@@ -10,6 +10,20 @@ export interface HttpLaunchOptions {
|
|
|
10
10
|
port: number;
|
|
11
11
|
host?: string;
|
|
12
12
|
}
|
|
13
|
+
export interface LaunchMCPServerResult {
|
|
14
|
+
/**
|
|
15
|
+
* The MCP server port (for HTTP mode)
|
|
16
|
+
*/
|
|
17
|
+
port?: number;
|
|
18
|
+
/**
|
|
19
|
+
* The server host (for HTTP mode)
|
|
20
|
+
*/
|
|
21
|
+
host?: string;
|
|
22
|
+
/**
|
|
23
|
+
* Function to gracefully shutdown the MCP server
|
|
24
|
+
*/
|
|
25
|
+
close: () => Promise<void>;
|
|
26
|
+
}
|
|
13
27
|
/**
|
|
14
28
|
* CLI argument configuration for MCP servers
|
|
15
29
|
*/
|
|
@@ -23,7 +37,7 @@ export interface CLIArgs {
|
|
|
23
37
|
* Launch an MCP server based on CLI arguments
|
|
24
38
|
* Shared helper to reduce duplication across platform CLI entry points
|
|
25
39
|
*/
|
|
26
|
-
export declare function launchMCPServer(server: BaseMCPServer, args: CLIArgs): Promise<
|
|
40
|
+
export declare function launchMCPServer(server: BaseMCPServer, args: CLIArgs): Promise<LaunchMCPServerResult>;
|
|
27
41
|
/**
|
|
28
42
|
* Base MCP Server class with programmatic launch() API
|
|
29
43
|
* Each platform extends this to provide their own tools manager
|
|
@@ -32,9 +46,11 @@ export declare abstract class BaseMCPServer {
|
|
|
32
46
|
protected mcpServer: McpServer;
|
|
33
47
|
protected toolsManager?: IMidsceneTools;
|
|
34
48
|
protected config: BaseMCPServerConfig;
|
|
35
|
-
|
|
49
|
+
protected providedToolsManager?: IMidsceneTools;
|
|
50
|
+
constructor(config: BaseMCPServerConfig, toolsManager?: IMidsceneTools);
|
|
36
51
|
/**
|
|
37
52
|
* Platform-specific: create tools manager instance
|
|
53
|
+
* This is only called if no tools manager was provided in constructor
|
|
38
54
|
*/
|
|
39
55
|
protected abstract createToolsManager(): IMidsceneTools;
|
|
40
56
|
/**
|
|
@@ -48,12 +64,12 @@ export declare abstract class BaseMCPServer {
|
|
|
48
64
|
/**
|
|
49
65
|
* Initialize and launch the MCP server with stdio transport
|
|
50
66
|
*/
|
|
51
|
-
launch(): Promise<
|
|
67
|
+
launch(): Promise<LaunchMCPServerResult>;
|
|
52
68
|
/**
|
|
53
69
|
* Launch MCP server with HTTP transport
|
|
54
70
|
* Supports stateful sessions for web applications and service integration
|
|
55
71
|
*/
|
|
56
|
-
launchHttp(options: HttpLaunchOptions): Promise<
|
|
72
|
+
launchHttp(options: HttpLaunchOptions): Promise<LaunchMCPServerResult>;
|
|
57
73
|
/**
|
|
58
74
|
* Create a new HTTP session with transport
|
|
59
75
|
*/
|
|
@@ -44,6 +44,14 @@ export declare abstract class BaseMidsceneTools<TAgent extends BaseAgent = BaseA
|
|
|
44
44
|
* Cleanup method - destroy agent and release resources
|
|
45
45
|
*/
|
|
46
46
|
closeBrowser(): Promise<void>;
|
|
47
|
+
/**
|
|
48
|
+
* Get tool definitions
|
|
49
|
+
*/
|
|
50
|
+
getToolDefinitions(): ToolDefinition[];
|
|
51
|
+
/**
|
|
52
|
+
* Set agent for the tools manager
|
|
53
|
+
*/
|
|
54
|
+
setAgent(agent: TAgent): void;
|
|
47
55
|
/**
|
|
48
56
|
* Helper: Convert base64 screenshot to image content array
|
|
49
57
|
*/
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
import type { BaseMCPServer } from './base-server';
|
|
2
|
+
import type { HttpLaunchOptions, LaunchMCPServerResult } from './base-server';
|
|
3
|
+
import type { IMidsceneTools } from './types';
|
|
4
|
+
export interface LaunchMCPServerOptions extends HttpLaunchOptions {
|
|
5
|
+
/**
|
|
6
|
+
* Whether to show server logs
|
|
7
|
+
* @default true
|
|
8
|
+
*/
|
|
9
|
+
verbose?: boolean;
|
|
10
|
+
}
|
|
11
|
+
/**
|
|
12
|
+
* Generic agent type (avoid importing from @midscene/core to prevent circular deps)
|
|
13
|
+
*/
|
|
14
|
+
export interface GenericAgent<TDevice = any> {
|
|
15
|
+
interface: TDevice;
|
|
16
|
+
constructor: {
|
|
17
|
+
name: string;
|
|
18
|
+
};
|
|
19
|
+
}
|
|
20
|
+
/**
|
|
21
|
+
* Additional information for logging server startup
|
|
22
|
+
*/
|
|
23
|
+
export interface StartupInfo {
|
|
24
|
+
port?: number;
|
|
25
|
+
host?: string;
|
|
26
|
+
}
|
|
27
|
+
export interface MCPServerLauncherConfig<AgentType extends GenericAgent = GenericAgent, ToolsManagerType extends IMidsceneTools = IMidsceneTools> {
|
|
28
|
+
agent: AgentType;
|
|
29
|
+
platformName: string;
|
|
30
|
+
ToolsManagerClass: new (...args: any[]) => ToolsManagerType;
|
|
31
|
+
MCPServerClass: new (toolsManager?: ToolsManagerType) => BaseMCPServer;
|
|
32
|
+
}
|
|
33
|
+
/**
|
|
34
|
+
* Create a generic MCP server launcher for a given agent, tools manager, and MCP server.
|
|
35
|
+
*
|
|
36
|
+
* This helper centralizes the common wiring logic used by platform-specific launchers:
|
|
37
|
+
* it constructs a tools manager, attaches the provided `agent` to it, then instantiates
|
|
38
|
+
* the `MCPServerClass` and exposes convenience methods to start the server over stdio
|
|
39
|
+
* (`launch`) or HTTP (`launchHttp`).
|
|
40
|
+
*
|
|
41
|
+
* Use this helper when adding a new platform-specific launcher or when you want to
|
|
42
|
+
* avoid duplicating boilerplate code for starting an MCP server. Typically, callers
|
|
43
|
+
* provide:
|
|
44
|
+
* - an `agent` instance that contains the underlying device on its `interface` property
|
|
45
|
+
* - a `ToolsManagerClass` that knows how to expose tools for that agent
|
|
46
|
+
* - an `MCPServerClass` that implements the MCP protocol and supports `launch` and
|
|
47
|
+
* `launchHttp` methods.
|
|
48
|
+
*
|
|
49
|
+
* The returned object has two methods:
|
|
50
|
+
* - `launch(options?)` to start the server using stdio transport
|
|
51
|
+
* - `launchHttp(options)` to start the server using HTTP transport
|
|
52
|
+
* Both methods accept a `verbose` flag to control console logging.
|
|
53
|
+
*
|
|
54
|
+
* @param config Configuration describing the agent, platform name (for logging),
|
|
55
|
+
* tools manager implementation, and MCP server implementation.
|
|
56
|
+
*
|
|
57
|
+
* @returns An object with `launch` and `launchHttp` methods to start the MCP server.
|
|
58
|
+
*
|
|
59
|
+
* @example
|
|
60
|
+
* ```typescript
|
|
61
|
+
* import { createMCPServerLauncher } from '@midscene/shared/mcp';
|
|
62
|
+
* import { Agent } from '@midscene/core/agent';
|
|
63
|
+
* import { WebMidsceneTools } from './web-tools';
|
|
64
|
+
* import { WebMCPServer } from './server';
|
|
65
|
+
*
|
|
66
|
+
* const agent = new Agent();
|
|
67
|
+
* const launcher = createMCPServerLauncher({
|
|
68
|
+
* agent,
|
|
69
|
+
* platformName: 'Web',
|
|
70
|
+
* ToolsManagerClass: WebMidsceneTools,
|
|
71
|
+
* MCPServerClass: WebMCPServer,
|
|
72
|
+
* });
|
|
73
|
+
*
|
|
74
|
+
* // Start with stdio
|
|
75
|
+
* await launcher.launch({ verbose: true });
|
|
76
|
+
*
|
|
77
|
+
* // Or start with HTTP
|
|
78
|
+
* await launcher.launchHttp({ port: 3000, host: 'localhost' });
|
|
79
|
+
* ```
|
|
80
|
+
*
|
|
81
|
+
* @internal
|
|
82
|
+
*/
|
|
83
|
+
export declare function createMCPServerLauncher<AgentType extends GenericAgent, ToolsManagerType extends IMidsceneTools>(config: MCPServerLauncherConfig<AgentType, ToolsManagerType>): {
|
|
84
|
+
/**
|
|
85
|
+
* Launch the MCP server with stdio transport
|
|
86
|
+
*/
|
|
87
|
+
launch(options?: {
|
|
88
|
+
verbose?: boolean;
|
|
89
|
+
}): Promise<LaunchMCPServerResult>;
|
|
90
|
+
/**
|
|
91
|
+
* Launch the MCP server with HTTP transport
|
|
92
|
+
*/
|
|
93
|
+
launchHttp(options: LaunchMCPServerOptions): Promise<LaunchMCPServerResult>;
|
|
94
|
+
};
|
|
@@ -59,6 +59,10 @@ export interface ToolDefinition<T = Record<string, unknown>> {
|
|
|
59
59
|
handler: ToolHandler<T>;
|
|
60
60
|
autoDestroy?: boolean;
|
|
61
61
|
}
|
|
62
|
+
/**
|
|
63
|
+
* Tool type for mcpKitForAgent return value
|
|
64
|
+
*/
|
|
65
|
+
export type Tool = ToolDefinition;
|
|
62
66
|
/**
|
|
63
67
|
* Action space item definition
|
|
64
68
|
* Note: Intentionally no index signature to maintain compatibility with DeviceAction
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@midscene/shared",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.5-beta-20251230124359.0",
|
|
4
4
|
"repository": "https://github.com/web-infra-dev/midscene",
|
|
5
5
|
"homepage": "https://midscenejs.com/",
|
|
6
6
|
"types": "./dist/types/index.d.ts",
|
|
@@ -79,6 +79,7 @@
|
|
|
79
79
|
"README.md"
|
|
80
80
|
],
|
|
81
81
|
"dependencies": {
|
|
82
|
+
"@modelcontextprotocol/sdk": "1.10.2",
|
|
82
83
|
"@silvia-odwyer/photon": "0.3.3",
|
|
83
84
|
"@silvia-odwyer/photon-node": "0.3.3",
|
|
84
85
|
"debug": "4.4.0",
|
|
@@ -90,7 +91,6 @@
|
|
|
90
91
|
},
|
|
91
92
|
"devDependencies": {
|
|
92
93
|
"@rslib/core": "^0.18.3",
|
|
93
|
-
"@modelcontextprotocol/sdk": "1.10.2",
|
|
94
94
|
"@types/debug": "4.1.12",
|
|
95
95
|
"@types/express": "^4.17.21",
|
|
96
96
|
"@types/node": "^18.0.0",
|
|
@@ -5,15 +5,22 @@ export const PLAYWRIGHT_EXAMPLE_CODE = `
|
|
|
5
5
|
IMPORTANT: Follow these exact type signatures for AI functions:
|
|
6
6
|
|
|
7
7
|
// Type signatures for AI functions:
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
8
|
+
aiAct(prompt: string, options?: { cacheable?: boolean }): Promise<void>
|
|
9
|
+
aiInput(text: string, locate: string, options?: { deepThink?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
|
|
10
|
+
aiTap(locate: string, options?: { deepThink?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
|
|
11
|
+
aiHover(locate: string, options?: { deepThink?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
|
|
12
|
+
aiDoubleClick(locate: string, options?: { deepThink?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
|
|
13
|
+
aiKeyboardPress(key: string, locate?: string, options?: { deepThink?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
|
|
14
|
+
aiScroll(locate: string | undefined, options: {
|
|
15
|
+
direction?: 'up' | 'down' | 'left' | 'right',
|
|
16
|
+
scrollType?: 'singleAction' | 'scrollToBottom' | 'scrollToTop' | 'scrollToRight' | 'scrollToLeft',
|
|
17
|
+
distance?: number | null,
|
|
18
|
+
deepThink?: boolean,
|
|
19
|
+
xpath?: string,
|
|
20
|
+
cacheable?: boolean
|
|
15
21
|
}): Promise<void>
|
|
16
|
-
aiAssert(assertion: string): Promise<void>
|
|
22
|
+
aiAssert(assertion: string, options?: { errorMessage?: string }): Promise<void>
|
|
23
|
+
aiWaitFor(prompt: string, options?: { timeout?: number }): Promise<void>
|
|
17
24
|
aiQuery<T>(queryObject: Record<string, string>): Promise<T> // Extracts data from page based on descriptions
|
|
18
25
|
|
|
19
26
|
// examples:
|
|
@@ -35,12 +42,14 @@ test.beforeEach(async ({ page }) => {
|
|
|
35
42
|
});
|
|
36
43
|
|
|
37
44
|
test('ai shop', async ({
|
|
45
|
+
aiAct,
|
|
38
46
|
aiInput,
|
|
39
47
|
aiAssert,
|
|
40
48
|
aiQuery,
|
|
41
49
|
aiKeyboardPress,
|
|
42
50
|
aiHover,
|
|
43
51
|
aiTap,
|
|
52
|
+
aiWaitFor,
|
|
44
53
|
agentForPage,
|
|
45
54
|
page,
|
|
46
55
|
}) => {
|
|
@@ -87,7 +96,7 @@ tasks:
|
|
|
87
96
|
locate: 'input field description'
|
|
88
97
|
- aiScroll:
|
|
89
98
|
direction: down/up
|
|
90
|
-
scrollType:
|
|
99
|
+
scrollType: scrollToBottom/scrollToTop/singleAction
|
|
91
100
|
- aiAssert: "expected state"
|
|
92
101
|
- sleep: milliseconds
|
|
93
102
|
|
|
@@ -160,7 +169,7 @@ tasks:
|
|
|
160
169
|
# Scroll globally or on an element described by a prompt.
|
|
161
170
|
- aiScroll:
|
|
162
171
|
direction: 'up' # or 'down' | 'left' | 'right'
|
|
163
|
-
scrollType: '
|
|
172
|
+
scrollType: 'singleAction' # or 'scrollToTop' | 'scrollToBottom' | 'scrollToLeft' | 'scrollToRight'
|
|
164
173
|
distance: <number> # Optional, the scroll distance in pixels.
|
|
165
174
|
locate: <prompt> # Optional, the element to scroll on.
|
|
166
175
|
deepThink: <boolean> # Optional, whether to use deepThink to precisely locate the element. Defaults to False.
|
package/src/mcp/base-server.ts
CHANGED
|
@@ -22,6 +22,23 @@ export interface HttpLaunchOptions {
|
|
|
22
22
|
host?: string;
|
|
23
23
|
}
|
|
24
24
|
|
|
25
|
+
export interface LaunchMCPServerResult {
|
|
26
|
+
/**
|
|
27
|
+
* The MCP server port (for HTTP mode)
|
|
28
|
+
*/
|
|
29
|
+
port?: number;
|
|
30
|
+
|
|
31
|
+
/**
|
|
32
|
+
* The server host (for HTTP mode)
|
|
33
|
+
*/
|
|
34
|
+
host?: string;
|
|
35
|
+
|
|
36
|
+
/**
|
|
37
|
+
* Function to gracefully shutdown the MCP server
|
|
38
|
+
*/
|
|
39
|
+
close: () => Promise<void>;
|
|
40
|
+
}
|
|
41
|
+
|
|
25
42
|
interface SessionData {
|
|
26
43
|
transport: StreamableHTTPServerTransport;
|
|
27
44
|
createdAt: Date;
|
|
@@ -50,7 +67,7 @@ export interface CLIArgs {
|
|
|
50
67
|
export function launchMCPServer(
|
|
51
68
|
server: BaseMCPServer,
|
|
52
69
|
args: CLIArgs,
|
|
53
|
-
): Promise<
|
|
70
|
+
): Promise<LaunchMCPServerResult> {
|
|
54
71
|
if (args.mode === 'http') {
|
|
55
72
|
return server.launchHttp({
|
|
56
73
|
port: Number.parseInt(args.port || '3000', 10),
|
|
@@ -72,18 +89,21 @@ export abstract class BaseMCPServer {
|
|
|
72
89
|
protected mcpServer: McpServer;
|
|
73
90
|
protected toolsManager?: IMidsceneTools;
|
|
74
91
|
protected config: BaseMCPServerConfig;
|
|
92
|
+
protected providedToolsManager?: IMidsceneTools;
|
|
75
93
|
|
|
76
|
-
constructor(config: BaseMCPServerConfig) {
|
|
94
|
+
constructor(config: BaseMCPServerConfig, toolsManager?: IMidsceneTools) {
|
|
77
95
|
this.config = config;
|
|
78
96
|
this.mcpServer = new McpServer({
|
|
79
97
|
name: config.name,
|
|
80
98
|
version: config.version,
|
|
81
99
|
description: config.description,
|
|
82
100
|
});
|
|
101
|
+
this.providedToolsManager = toolsManager;
|
|
83
102
|
}
|
|
84
103
|
|
|
85
104
|
/**
|
|
86
105
|
* Platform-specific: create tools manager instance
|
|
106
|
+
* This is only called if no tools manager was provided in constructor
|
|
87
107
|
*/
|
|
88
108
|
protected abstract createToolsManager(): IMidsceneTools;
|
|
89
109
|
|
|
@@ -92,7 +112,9 @@ export abstract class BaseMCPServer {
|
|
|
92
112
|
*/
|
|
93
113
|
private async initializeToolsManager(): Promise<void> {
|
|
94
114
|
setIsMcp(true);
|
|
95
|
-
|
|
115
|
+
|
|
116
|
+
// Use provided tools manager if available, otherwise create new one
|
|
117
|
+
this.toolsManager = this.providedToolsManager || this.createToolsManager();
|
|
96
118
|
|
|
97
119
|
try {
|
|
98
120
|
await this.toolsManager.initTools();
|
|
@@ -117,7 +139,7 @@ export abstract class BaseMCPServer {
|
|
|
117
139
|
/**
|
|
118
140
|
* Initialize and launch the MCP server with stdio transport
|
|
119
141
|
*/
|
|
120
|
-
public async launch(): Promise<
|
|
142
|
+
public async launch(): Promise<LaunchMCPServerResult> {
|
|
121
143
|
// Hijack stdout-based console methods to stderr for stdio mode
|
|
122
144
|
// This prevents them from breaking MCP JSON-RPC protocol on stdout
|
|
123
145
|
// Note: console.warn and console.error already output to stderr
|
|
@@ -170,13 +192,21 @@ export abstract class BaseMCPServer {
|
|
|
170
192
|
|
|
171
193
|
process.once('SIGINT', cleanup);
|
|
172
194
|
process.once('SIGTERM', cleanup);
|
|
195
|
+
|
|
196
|
+
return {
|
|
197
|
+
close: async () => {
|
|
198
|
+
this.performCleanup();
|
|
199
|
+
},
|
|
200
|
+
};
|
|
173
201
|
}
|
|
174
202
|
|
|
175
203
|
/**
|
|
176
204
|
* Launch MCP server with HTTP transport
|
|
177
205
|
* Supports stateful sessions for web applications and service integration
|
|
178
206
|
*/
|
|
179
|
-
public async launchHttp(
|
|
207
|
+
public async launchHttp(
|
|
208
|
+
options: HttpLaunchOptions,
|
|
209
|
+
): Promise<LaunchMCPServerResult> {
|
|
180
210
|
// Validate port number
|
|
181
211
|
if (
|
|
182
212
|
!Number.isInteger(options.port) ||
|
|
@@ -286,6 +316,36 @@ export abstract class BaseMCPServer {
|
|
|
286
316
|
|
|
287
317
|
const cleanupInterval = this.startSessionCleanup(sessions);
|
|
288
318
|
this.setupHttpShutdownHandlers(server, sessions, cleanupInterval);
|
|
319
|
+
|
|
320
|
+
return {
|
|
321
|
+
port: options.port,
|
|
322
|
+
host,
|
|
323
|
+
close: async () => {
|
|
324
|
+
clearInterval(cleanupInterval);
|
|
325
|
+
for (const session of sessions.values()) {
|
|
326
|
+
try {
|
|
327
|
+
await session.transport.close();
|
|
328
|
+
} catch (error: unknown) {
|
|
329
|
+
const message =
|
|
330
|
+
error instanceof Error ? error.message : String(error);
|
|
331
|
+
console.error(
|
|
332
|
+
`Failed to close session ${session.transport.sessionId}: ${message}`,
|
|
333
|
+
);
|
|
334
|
+
}
|
|
335
|
+
}
|
|
336
|
+
sessions.clear();
|
|
337
|
+
|
|
338
|
+
return new Promise<void>((resolve) => {
|
|
339
|
+
server.close((err) => {
|
|
340
|
+
if (err) {
|
|
341
|
+
console.error('Error closing HTTP server:', err);
|
|
342
|
+
}
|
|
343
|
+
this.performCleanup();
|
|
344
|
+
resolve();
|
|
345
|
+
});
|
|
346
|
+
});
|
|
347
|
+
},
|
|
348
|
+
};
|
|
289
349
|
}
|
|
290
350
|
|
|
291
351
|
/**
|
package/src/mcp/base-tools.ts
CHANGED
|
@@ -180,6 +180,20 @@ export abstract class BaseMidsceneTools<TAgent extends BaseAgent = BaseAgent>
|
|
|
180
180
|
await this.agent?.destroy?.();
|
|
181
181
|
}
|
|
182
182
|
|
|
183
|
+
/**
|
|
184
|
+
* Get tool definitions
|
|
185
|
+
*/
|
|
186
|
+
public getToolDefinitions(): ToolDefinition[] {
|
|
187
|
+
return this.toolDefinitions;
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
/**
|
|
191
|
+
* Set agent for the tools manager
|
|
192
|
+
*/
|
|
193
|
+
public setAgent(agent: TAgent): void {
|
|
194
|
+
this.agent = agent;
|
|
195
|
+
}
|
|
196
|
+
|
|
183
197
|
/**
|
|
184
198
|
* Helper: Convert base64 screenshot to image content array
|
|
185
199
|
*/
|
package/src/mcp/index.ts
CHANGED
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
import type { BaseMCPServer } from './base-server';
|
|
2
|
+
import type { HttpLaunchOptions, LaunchMCPServerResult } from './base-server';
|
|
3
|
+
import type { IMidsceneTools } from './types';
|
|
4
|
+
|
|
5
|
+
export interface LaunchMCPServerOptions extends HttpLaunchOptions {
|
|
6
|
+
/**
|
|
7
|
+
* Whether to show server logs
|
|
8
|
+
* @default true
|
|
9
|
+
*/
|
|
10
|
+
verbose?: boolean;
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Generic agent type (avoid importing from @midscene/core to prevent circular deps)
|
|
15
|
+
*/
|
|
16
|
+
export interface GenericAgent<TDevice = any> {
|
|
17
|
+
interface: TDevice;
|
|
18
|
+
constructor: { name: string };
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* Additional information for logging server startup
|
|
23
|
+
*/
|
|
24
|
+
export interface StartupInfo {
|
|
25
|
+
port?: number;
|
|
26
|
+
host?: string;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
export interface MCPServerLauncherConfig<
|
|
30
|
+
AgentType extends GenericAgent = GenericAgent,
|
|
31
|
+
ToolsManagerType extends IMidsceneTools = IMidsceneTools,
|
|
32
|
+
> {
|
|
33
|
+
agent: AgentType;
|
|
34
|
+
platformName: string;
|
|
35
|
+
ToolsManagerClass: new (...args: any[]) => ToolsManagerType;
|
|
36
|
+
MCPServerClass: new (toolsManager?: ToolsManagerType) => BaseMCPServer;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
/**
|
|
40
|
+
* Create a generic MCP server launcher for a given agent, tools manager, and MCP server.
|
|
41
|
+
*
|
|
42
|
+
* This helper centralizes the common wiring logic used by platform-specific launchers:
|
|
43
|
+
* it constructs a tools manager, attaches the provided `agent` to it, then instantiates
|
|
44
|
+
* the `MCPServerClass` and exposes convenience methods to start the server over stdio
|
|
45
|
+
* (`launch`) or HTTP (`launchHttp`).
|
|
46
|
+
*
|
|
47
|
+
* Use this helper when adding a new platform-specific launcher or when you want to
|
|
48
|
+
* avoid duplicating boilerplate code for starting an MCP server. Typically, callers
|
|
49
|
+
* provide:
|
|
50
|
+
* - an `agent` instance that contains the underlying device on its `interface` property
|
|
51
|
+
* - a `ToolsManagerClass` that knows how to expose tools for that agent
|
|
52
|
+
* - an `MCPServerClass` that implements the MCP protocol and supports `launch` and
|
|
53
|
+
* `launchHttp` methods.
|
|
54
|
+
*
|
|
55
|
+
* The returned object has two methods:
|
|
56
|
+
* - `launch(options?)` to start the server using stdio transport
|
|
57
|
+
* - `launchHttp(options)` to start the server using HTTP transport
|
|
58
|
+
* Both methods accept a `verbose` flag to control console logging.
|
|
59
|
+
*
|
|
60
|
+
* @param config Configuration describing the agent, platform name (for logging),
|
|
61
|
+
* tools manager implementation, and MCP server implementation.
|
|
62
|
+
*
|
|
63
|
+
* @returns An object with `launch` and `launchHttp` methods to start the MCP server.
|
|
64
|
+
*
|
|
65
|
+
* @example
|
|
66
|
+
* ```typescript
|
|
67
|
+
* import { createMCPServerLauncher } from '@midscene/shared/mcp';
|
|
68
|
+
* import { Agent } from '@midscene/core/agent';
|
|
69
|
+
* import { WebMidsceneTools } from './web-tools';
|
|
70
|
+
* import { WebMCPServer } from './server';
|
|
71
|
+
*
|
|
72
|
+
* const agent = new Agent();
|
|
73
|
+
* const launcher = createMCPServerLauncher({
|
|
74
|
+
* agent,
|
|
75
|
+
* platformName: 'Web',
|
|
76
|
+
* ToolsManagerClass: WebMidsceneTools,
|
|
77
|
+
* MCPServerClass: WebMCPServer,
|
|
78
|
+
* });
|
|
79
|
+
*
|
|
80
|
+
* // Start with stdio
|
|
81
|
+
* await launcher.launch({ verbose: true });
|
|
82
|
+
*
|
|
83
|
+
* // Or start with HTTP
|
|
84
|
+
* await launcher.launchHttp({ port: 3000, host: 'localhost' });
|
|
85
|
+
* ```
|
|
86
|
+
*
|
|
87
|
+
* @internal
|
|
88
|
+
*/
|
|
89
|
+
export function createMCPServerLauncher<
|
|
90
|
+
AgentType extends GenericAgent,
|
|
91
|
+
ToolsManagerType extends IMidsceneTools,
|
|
92
|
+
>(config: MCPServerLauncherConfig<AgentType, ToolsManagerType>) {
|
|
93
|
+
const { agent, platformName, ToolsManagerClass, MCPServerClass } = config;
|
|
94
|
+
|
|
95
|
+
/**
|
|
96
|
+
* Validate that the agent has the required interface property
|
|
97
|
+
* @throws {Error} If agent.interface is missing
|
|
98
|
+
*/
|
|
99
|
+
function validateAgent(): void {
|
|
100
|
+
const device = agent.interface;
|
|
101
|
+
if (!device) {
|
|
102
|
+
throw new Error(
|
|
103
|
+
`Agent must have an 'interface' property that references the underlying device.
|
|
104
|
+
Please ensure your agent instance is properly initialized with a device interface.
|
|
105
|
+
Expected: agent.interface to be defined, but got: ${typeof device}
|
|
106
|
+
Solution: Check that your agent constructor properly sets the interface property.`,
|
|
107
|
+
);
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
/**
|
|
112
|
+
* Create and configure a tools manager with the agent
|
|
113
|
+
* @returns Configured tools manager instance
|
|
114
|
+
*/
|
|
115
|
+
function createToolsManager(): ToolsManagerType {
|
|
116
|
+
const toolsManager = new ToolsManagerClass();
|
|
117
|
+
// Type-safe agent injection: define explicit interface for tools manager with agent
|
|
118
|
+
interface ToolsManagerWithAgent extends IMidsceneTools {
|
|
119
|
+
agent: AgentType;
|
|
120
|
+
}
|
|
121
|
+
(toolsManager as unknown as ToolsManagerWithAgent).agent = agent;
|
|
122
|
+
return toolsManager;
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
/**
|
|
126
|
+
* Log server startup information
|
|
127
|
+
* @param mode - Transport mode ('stdio' or 'HTTP')
|
|
128
|
+
* @param additionalInfo - Additional info to log (e.g., port, host)
|
|
129
|
+
*/
|
|
130
|
+
function logStartupInfo(
|
|
131
|
+
mode: 'stdio' | 'HTTP',
|
|
132
|
+
additionalInfo?: StartupInfo,
|
|
133
|
+
): void {
|
|
134
|
+
const device = agent.interface;
|
|
135
|
+
console.log(`Starting Midscene ${platformName} MCP Server (${mode})...`);
|
|
136
|
+
console.log(`Agent: ${agent.constructor.name}`);
|
|
137
|
+
console.log(`Device: ${device.constructor.name}`);
|
|
138
|
+
|
|
139
|
+
if (additionalInfo?.port !== undefined) {
|
|
140
|
+
console.log(`Port: ${additionalInfo.port}`);
|
|
141
|
+
}
|
|
142
|
+
if (additionalInfo?.host) {
|
|
143
|
+
console.log(`Host: ${additionalInfo.host}`);
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
return {
|
|
148
|
+
/**
|
|
149
|
+
* Launch the MCP server with stdio transport
|
|
150
|
+
*/
|
|
151
|
+
async launch(
|
|
152
|
+
options: { verbose?: boolean } = {},
|
|
153
|
+
): Promise<LaunchMCPServerResult> {
|
|
154
|
+
const { verbose = true } = options;
|
|
155
|
+
|
|
156
|
+
validateAgent();
|
|
157
|
+
|
|
158
|
+
if (verbose) {
|
|
159
|
+
logStartupInfo('stdio');
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
const toolsManager = createToolsManager();
|
|
163
|
+
const server = new MCPServerClass(toolsManager);
|
|
164
|
+
const result = await server.launch();
|
|
165
|
+
|
|
166
|
+
if (verbose) {
|
|
167
|
+
console.log(`${platformName} MCP Server started (stdio mode)`);
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
return result;
|
|
171
|
+
},
|
|
172
|
+
|
|
173
|
+
/**
|
|
174
|
+
* Launch the MCP server with HTTP transport
|
|
175
|
+
*/
|
|
176
|
+
async launchHttp(
|
|
177
|
+
options: LaunchMCPServerOptions,
|
|
178
|
+
): Promise<LaunchMCPServerResult> {
|
|
179
|
+
const { port, host = 'localhost', verbose = true } = options;
|
|
180
|
+
|
|
181
|
+
validateAgent();
|
|
182
|
+
|
|
183
|
+
if (verbose) {
|
|
184
|
+
logStartupInfo('HTTP', { port, host });
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
const toolsManager = createToolsManager();
|
|
188
|
+
const server = new MCPServerClass(toolsManager);
|
|
189
|
+
const result = await server.launchHttp({ port, host });
|
|
190
|
+
|
|
191
|
+
if (verbose) {
|
|
192
|
+
console.log(
|
|
193
|
+
`${platformName} MCP Server started on http://${result.host}:${result.port}/mcp`,
|
|
194
|
+
);
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
return result;
|
|
198
|
+
},
|
|
199
|
+
};
|
|
200
|
+
}
|
package/src/mcp/types.ts
CHANGED
|
@@ -58,6 +58,11 @@ export interface ToolDefinition<T = Record<string, unknown>> {
|
|
|
58
58
|
autoDestroy?: boolean;
|
|
59
59
|
}
|
|
60
60
|
|
|
61
|
+
/**
|
|
62
|
+
* Tool type for mcpKitForAgent return value
|
|
63
|
+
*/
|
|
64
|
+
export type Tool = ToolDefinition;
|
|
65
|
+
|
|
61
66
|
/**
|
|
62
67
|
* Action space item definition
|
|
63
68
|
* Note: Intentionally no index signature to maintain compatibility with DeviceAction
|