midscene 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
midscene/__init__.py ADDED
@@ -0,0 +1,73 @@
1
+ """
2
+ midscene
3
+ ========
4
+
5
+ 将 [Midscene.js](https://github.com/web-infra-dev/midscene) AI 驱动的 UI 自动化
6
+ 能力桥接到 Python 测试框架。单包内按模块划分:
7
+
8
+ - Android 自动化::class:`MidsceneAgent`(``agent_android``)
9
+ - 网页自动化::class:`MidsceneWebAgent` + 驱动(``agent_web`` / ``drivers``)
10
+ - 共享底层:配置、异常、Node 运行时桥接、RPC 服务管理、:class:`BaseAgent`
11
+
12
+ 快速开始(Android)
13
+ -------------------
14
+ from midscene import MidsceneAgent
15
+
16
+ agent = MidsceneAgent("emulator-5554")
17
+ agent.ai_action("点击登录按钮")
18
+ agent.ai_assert("已进入用户首页")
19
+ agent.destroy()
20
+
21
+ 快速开始(Web)
22
+ ---------------
23
+ from midscene import MidsceneWebAgent
24
+
25
+ agent = MidsceneWebAgent("https://example.com")
26
+ agent.ai_action("点击更多信息链接")
27
+ agent.ai_assert("页面已跳转")
28
+ agent.destroy()
29
+ """
30
+
31
+ from __future__ import annotations
32
+
33
+ from .agent_android import ANDROID_SERVICE_SPEC, MidsceneAgent
34
+ from .agent_web import WEB_SERVICE_SPEC, MidsceneWebAgent
35
+ from .base_agent import BaseAgent
36
+ from .config import MidsceneConfig
37
+ from .drivers import BridgeDriver, PlaywrightDriver, PuppeteerDriver, WebDriver
38
+ from .exceptions import (
39
+ MidsceneConfigError,
40
+ MidsceneError,
41
+ MidsceneNodeServiceError,
42
+ MidsceneRPCError,
43
+ MidsceneSetupError,
44
+ )
45
+ from .node_service import NodeServiceManager
46
+ from .runtime import ServiceSpec
47
+
48
+ __all__ = [
49
+ "ANDROID_SERVICE_SPEC",
50
+ "WEB_SERVICE_SPEC",
51
+ "BaseAgent",
52
+ "BridgeDriver",
53
+ "MidsceneAgent",
54
+ "MidsceneConfig",
55
+ "MidsceneConfigError",
56
+ "MidsceneError",
57
+ "MidsceneNodeServiceError",
58
+ "MidsceneRPCError",
59
+ "MidsceneSetupError",
60
+ "MidsceneWebAgent",
61
+ "NodeServiceManager",
62
+ "PlaywrightDriver",
63
+ "PuppeteerDriver",
64
+ "ServiceSpec",
65
+ "WebDriver",
66
+ ]
67
+
68
+ try:
69
+ from importlib.metadata import version
70
+
71
+ __version__ = version("midscene")
72
+ except Exception:
73
+ __version__ = "dev"
@@ -0,0 +1,13 @@
1
+ {
2
+ "name": "midscene-android-service",
3
+ "version": "0.0.1",
4
+ "description": "Local JSON-RPC bridge service for midscene-android Python package",
5
+ "main": "service.js",
6
+ "private": true,
7
+ "engines": {
8
+ "node": ">=18.0.0"
9
+ },
10
+ "dependencies": {
11
+ "@midscene/android": "1.8.7"
12
+ }
13
+ }
@@ -0,0 +1,470 @@
1
+ /**
2
+ * midscene-android Node.js RPC Service
3
+ *
4
+ * 作为 Python 侧 MidsceneAgent 的后端,通过 JSON-RPC 2.0 over HTTP 通信。
5
+ * 由 Python 的 NodeServiceManager 启动,进程级单例,支持多 session 并发。
6
+ **/
7
+
8
+ 'use strict';
9
+
10
+ const {execFile} = require('child_process');
11
+ const http = require('http');
12
+ const fs = require('fs');
13
+ const path = require('path');
14
+ const {
15
+ AndroidAgent,
16
+ AndroidDevice,
17
+ getConnectedDevices
18
+ } = require('@midscene/android');
19
+
20
+ const sessions = new Map();
21
+ let sessionCounter = 0;
22
+
23
+ let SERVICE_VERSION = 'unknown';
24
+ try {
25
+ SERVICE_VERSION = require('./package.json').version || 'unknown';
26
+ } catch (_) {
27
+ // package.json 缺失时回退到 'unknown'
28
+ }
29
+
30
+ // ====================== Logging ======================
31
+ const LOG_FILE = path.join(process.cwd(), "midscene_service.log");
32
+ const LOG_MAX_BYTES = 5 * 1024 * 1024; // 单个日志文件上限 5MB,超过则轮转一份
33
+
34
+ function rotateLogIfNeeded() {
35
+ try {
36
+ if (fs.statSync(LOG_FILE).size > LOG_MAX_BYTES) {
37
+ fs.renameSync(LOG_FILE, `${LOG_FILE}.1`);
38
+ }
39
+ } catch (_) {
40
+ // 文件不存在或 stat 失败,忽略
41
+ }
42
+ }
43
+
44
+ function log(message) {
45
+ const timestamp = new Date().toLocaleTimeString("en-US", {hour12: false});
46
+ const line = `[${timestamp}] ${message}\n`;
47
+ rotateLogIfNeeded();
48
+ fs.appendFileSync(LOG_FILE, line, "utf-8");
49
+ console.log(line.trim());
50
+ }
51
+
52
+ function nextSessionId() {
53
+ sessionCounter += 1;
54
+ return `session_${Date.now()}_${sessionCounter}`;
55
+ }
56
+
57
+ function runCommand(command, args, options = {}) {
58
+ return new Promise((resolve, reject) => {
59
+ execFile(command, args, {...options, windowsHide: true}, (error, stdout, stderr) => {
60
+ if (error) {
61
+ error.stdout = stdout;
62
+ error.stderr = stderr;
63
+ reject(error);
64
+ return;
65
+ }
66
+ resolve(stdout);
67
+ });
68
+ });
69
+ }
70
+
71
+ function getSession(sessionId) {
72
+ const session = sessions.get(sessionId);
73
+ if (!session) {
74
+ throw new Error(`Session not found: ${sessionId}`);
75
+ }
76
+ return session;
77
+ }
78
+
79
+ /**
80
+ * 过滤对象中的 null 和 undefined 值,防止传给 Agent 时触发参数校验错误
81
+ */
82
+ function cleanOptions(options) {
83
+ const cleaned = {};
84
+ for (const key in options) {
85
+ if (options[key] !== null && options[key] !== undefined) {
86
+ cleaned[key] = options[key];
87
+ }
88
+ }
89
+ return cleaned;
90
+ }
91
+
92
+ function parseAdbDevices(stdout) {
93
+ return stdout
94
+ .split(/\r?\n/)
95
+ .map((line) => line.trim())
96
+ .filter((line) => line && !line.startsWith('List of devices') && !line.startsWith('*'))
97
+ .map((line) => {
98
+ const [udid, state] = line.split(/\s+/);
99
+ return {udid, state};
100
+ })
101
+ .filter((device) => device.udid && device.state);
102
+ }
103
+
104
+ async function listConnectedDevices() {
105
+ try {
106
+ // Method 1: Try manual adb call (most compatible with PATH-only setups)
107
+ const stdout = await runCommand('adb', ['devices'], {timeout: 10000});
108
+ const devices = parseAdbDevices(stdout);
109
+ if (devices.length > 0) {
110
+ return devices;
111
+ }
112
+ } catch (e) {
113
+ log(`Manual adb devices failed: ${e.message}`);
114
+ }
115
+
116
+ try {
117
+ // Method 2: Fallback to @midscene/android's internal discovery
118
+ const devices = await getConnectedDevices();
119
+ return devices.map(udid => ({udid, state: 'device'}));
120
+ } catch (error) {
121
+ log(`Internal getConnectedDevices failed: ${error.message}`);
122
+ throw new Error(`Unable to list Android devices. Please ensure 'adb' is in your PATH or ANDROID_HOME is set. Error: ${error.message}`);
123
+ }
124
+ }
125
+
126
+ // ─── RPC Handlers ────────────────────────────────────────────────────────────
127
+
128
+ const handlers = {
129
+ /**
130
+ * 创建设备会话,对应 JS 侧 new AndroidDevice() + new AndroidAgent()
131
+ * params: { deviceId, aiActionContext }
132
+ */
133
+ async createSession({deviceId, aiActionContext}) {
134
+ let targetDeviceId = deviceId;
135
+ if (!targetDeviceId) {
136
+ const devices = await listConnectedDevices();
137
+ if (devices.length === 0) {
138
+ throw new Error("No connected Android devices found via ADB");
139
+ }
140
+ targetDeviceId = devices[0].udid;
141
+ log(`Using targetDeviceId: ${targetDeviceId}`);
142
+ log(`Using devices: ${JSON.stringify(devices)}`);
143
+ }
144
+
145
+ log(`Creating session for device: ${targetDeviceId}`);
146
+ const device = new AndroidDevice(targetDeviceId);
147
+ await device.connect();
148
+
149
+ const agentOptions = aiActionContext ? {aiActionContext} : {};
150
+ const agent = new AndroidAgent(device, agentOptions);
151
+
152
+ const sessionId = nextSessionId();
153
+ sessions.set(sessionId, {device, agent, deviceId: targetDeviceId});
154
+
155
+ log(`Session created: ${sessionId} (device: ${targetDeviceId})`);
156
+ return {sessionId, deviceId: targetDeviceId};
157
+ },
158
+
159
+ /**
160
+ * 销毁会话,释放设备连接
161
+ */
162
+ async destroySession({sessionId}) {
163
+ const session = sessions.get(sessionId);
164
+ if (!session) {
165
+ return {ok: true};
166
+ }
167
+ log(`Destroying session: ${sessionId}`);
168
+ try {
169
+ await session.agent.destroy?.();
170
+ } catch (e) {
171
+ log(`Error destroying agent: ${e.message}`);
172
+ }
173
+ try {
174
+ await session.device.disconnect?.();
175
+ } catch (e) {
176
+ log(`Error disconnecting device: ${e.message}`);
177
+ }
178
+ sessions.delete(sessionId);
179
+ return {ok: true};
180
+ },
181
+
182
+ // ── Auto Planning ───────────────────────────────────────────────────────────
183
+
184
+ /**
185
+ * agent.aiAct() - AI 自动规划并执行
186
+ * params: { sessionId, prompt }
187
+ */
188
+ async aiAct({sessionId, prompt}) {
189
+ await getSession(sessionId).agent.aiAct(prompt);
190
+ return {ok: true};
191
+ },
192
+
193
+ // ── Instant Actions ─────────────────────────────────────────────────────────
194
+
195
+ /**
196
+ * agent.aiTap() - 点击
197
+ * params: { sessionId, locate }
198
+ */
199
+ async aiTap({sessionId, locate}) {
200
+ await getSession(sessionId).agent.aiTap(locate);
201
+ return {ok: true};
202
+ },
203
+
204
+ /**
205
+ * agent.aiInput() - 输入文本
206
+ * params: { sessionId, locate, value }
207
+ */
208
+ async aiInput({sessionId, locate, value}) {
209
+ await getSession(sessionId).agent.aiInput(locate, {value});
210
+ return {ok: true};
211
+ },
212
+
213
+ /**
214
+ * agent.aiClearInput() - 清空输入框
215
+ * params: { sessionId, locate }
216
+ */
217
+ async aiClearInput({sessionId, locate}) {
218
+ await getSession(sessionId).agent.aiClearInput(locate);
219
+ return {ok: true};
220
+ },
221
+
222
+ /**
223
+ * agent.aiScroll() - 滚动
224
+ * * params: { sessionId, locate, direction, scrollType?, distance? }
225
+ */
226
+ async aiScroll({sessionId, locate, direction, scrollType, distance}) {
227
+ const options = cleanOptions({scrollType, distance});
228
+ if (direction) {
229
+ options.direction = direction;
230
+ }
231
+ await getSession(sessionId).agent.aiScroll(locate, options);
232
+ return {ok: true};
233
+ },
234
+
235
+ async aiPinch({sessionId, locate, ...rest}) {
236
+ const options = cleanOptions(rest);
237
+ await getSession(sessionId).agent.aiPinch(locate, options);
238
+ return {ok: true};
239
+ },
240
+
241
+ async aiLongPress({sessionId, locate, duration}) {
242
+ const options = duration === undefined ? undefined : {duration};
243
+ await getSession(sessionId).agent.aiLongPress(locate, options);
244
+ return {ok: true};
245
+ },
246
+
247
+ async aiDoubleClick({sessionId, locate}) {
248
+ await getSession(sessionId).agent.aiDoubleClick(locate);
249
+ return {ok: true};
250
+ },
251
+
252
+ async aiKeyboardPress({sessionId, locate, keyName}) {
253
+ if (locate === undefined || locate === null) {
254
+ await getSession(sessionId).agent.aiKeyboardPress(keyName);
255
+ } else {
256
+ await getSession(sessionId).agent.aiKeyboardPress(locate, {keyName});
257
+ }
258
+ return {ok: true};
259
+ },
260
+
261
+ async aiAsk({sessionId, prompt}) {
262
+ const data = await getSession(sessionId).agent.aiAsk(prompt);
263
+ return {data};
264
+ },
265
+
266
+ /**
267
+ * agent.aiQuery() - 结构化数据提取
268
+ * params: { sessionId, schema }
269
+ * schema: Midscene query schema 字符串,如 '{title: string, price: number}[]'
270
+ */
271
+ async aiQuery({sessionId, dataDemand}) {
272
+ const data = await getSession(sessionId).agent.aiQuery(dataDemand);
273
+ return {data};
274
+ },
275
+
276
+ async aiBoolean({sessionId, prompt}) {
277
+ const data = await getSession(sessionId).agent.aiBoolean(prompt);
278
+ return {data};
279
+ },
280
+
281
+ async aiNumber({sessionId, prompt}) {
282
+ const data = await getSession(sessionId).agent.aiNumber(prompt);
283
+ return {data};
284
+ },
285
+
286
+ async aiString({sessionId, prompt}) {
287
+ const data = await getSession(sessionId).agent.aiString(prompt);
288
+ return {data};
289
+ },
290
+
291
+ async aiLocate({sessionId, locate}) {
292
+ const data = await getSession(sessionId).agent.aiLocate(locate);
293
+ return {data};
294
+ },
295
+
296
+ async aiAssert({sessionId, assertion}) {
297
+ try {
298
+ await getSession(sessionId).agent.aiAssert(assertion);
299
+ return {pass: true, reason: null};
300
+ } catch (error) {
301
+ return {pass: false, reason: error.message};
302
+ }
303
+ },
304
+
305
+ async aiWaitFor({sessionId, assertion, timeoutMs}) {
306
+ await getSession(sessionId).agent.aiWaitFor(assertion, {timeoutMs});
307
+ return {ok: true};
308
+ },
309
+
310
+ // ── Device & System Actions ──────────────────────────────────────────────────
311
+
312
+ async back({sessionId}) {
313
+ await getSession(sessionId).device.back();
314
+ return {ok: true};
315
+ },
316
+
317
+ async home({sessionId}) {
318
+ await getSession(sessionId).device.home();
319
+ return {ok: true};
320
+ },
321
+
322
+ async recentApps({sessionId}) {
323
+ await getSession(sessionId).device.recentApps();
324
+ return {ok: true};
325
+ },
326
+
327
+ async launchApp({sessionId, packageName}) {
328
+ await getSession(sessionId).device.launch(packageName);
329
+ return {ok: true};
330
+ },
331
+
332
+ async terminateApp({sessionId, packageName}) {
333
+ await getSession(sessionId).device.terminate(packageName);
334
+ return {ok: true};
335
+ },
336
+
337
+ async getScreenshot({sessionId}) {
338
+ const base64 = await getSession(sessionId).device.screenshotBase64();
339
+ return {screenshot: base64};
340
+ },
341
+
342
+ // ── Advanced Automation ──────────────────────────────────────────────────────
343
+
344
+ async setAIActContext({sessionId, aiActionContext}) {
345
+ getSession(sessionId).agent.setAIActContext(aiActionContext);
346
+ return {ok: true};
347
+ },
348
+
349
+ async runYaml({sessionId, yamlContent}) {
350
+ const result = await getSession(sessionId).agent.runYaml(yamlContent);
351
+ return {result};
352
+ },
353
+
354
+ async getReportFile({sessionId}) {
355
+ // midscene-android agent usually has a report file path if it's generated
356
+ const reportPath = getSession(sessionId).agent.reportFile;
357
+ return {reportPath: reportPath || null};
358
+ },
359
+
360
+ async getStatus({sessionId}) {
361
+ const session = getSession(sessionId);
362
+ return {
363
+ status: "connected",
364
+ deviceId: session.deviceId,
365
+ sessionId: sessionId
366
+ };
367
+ },
368
+
369
+ async runAdbShell({sessionId, command, timeoutMs}) {
370
+ const output = await getSession(sessionId).agent.runAdbShell(
371
+ command,
372
+ timeoutMs === undefined ? undefined : {timeout: timeoutMs},
373
+ );
374
+ return {output};
375
+ },
376
+
377
+ async getConnectedDevices() {
378
+ return {devices: await listConnectedDevices()};
379
+ },
380
+
381
+ ping() {
382
+ return {
383
+ pong: true,
384
+ pid: process.pid,
385
+ version: SERVICE_VERSION,
386
+ activeSessions: sessions.size
387
+ };
388
+ },
389
+ };
390
+
391
+ const server = http.createServer(async (req, res) => {
392
+ if (req.method !== 'POST' || req.url !== '/rpc') {
393
+ res.writeHead(404, {'Content-Type': 'application/json'});
394
+ res.end(JSON.stringify({error: 'Not found'}));
395
+ return;
396
+ }
397
+
398
+ let body = '';
399
+ try {
400
+ body = await new Promise((resolve, reject) => {
401
+ let chunks = '';
402
+ req.on('data', (chunk) => {
403
+ chunks += chunk;
404
+ });
405
+ req.on('end', () => resolve(chunks));
406
+ req.on('error', reject);
407
+ });
408
+ } catch (_) {
409
+ res.writeHead(400, {'Content-Type': 'application/json'});
410
+ res.end(JSON.stringify({error: 'Failed to read request body'}));
411
+ return;
412
+ }
413
+
414
+ let rpcRequest;
415
+ try {
416
+ rpcRequest = JSON.parse(body);
417
+ } catch (_) {
418
+ res.writeHead(400, {'Content-Type': 'application/json'});
419
+ res.end(JSON.stringify({error: 'Invalid JSON'}));
420
+ return;
421
+ }
422
+
423
+ const {jsonrpc, id, method, params = {}} = rpcRequest;
424
+ const handler = handlers[method];
425
+
426
+ let rpcResponse;
427
+ if (!handler) {
428
+ rpcResponse = {
429
+ jsonrpc,
430
+ id,
431
+ error: {code: -32601, message: `Method not found: ${method}`},
432
+ };
433
+ } else {
434
+ try {
435
+ rpcResponse = {jsonrpc, id, result: await handler(params)};
436
+ } catch (error) {
437
+ rpcResponse = {
438
+ jsonrpc,
439
+ id,
440
+ error: {code: -1, message: error.message, stack: error.stack},
441
+ };
442
+ }
443
+ }
444
+
445
+ res.writeHead(200, {'Content-Type': 'application/json'});
446
+ res.end(JSON.stringify(rpcResponse));
447
+ });
448
+
449
+ const PORT = parseInt(process.env.PORT || '0', 10);
450
+
451
+ server.listen(PORT, '127.0.0.1', () => {
452
+ const addr = server.address();
453
+ const message = `MIDSCENE_SERVICE_READY:${addr.port}`;
454
+ log(`Service started on port ${addr.port} (PID: ${process.pid})`);
455
+ log(`Environment: PATH=${process.env.PATH}`);
456
+ log(`Environment: ANDROID_HOME=${process.env.ANDROID_HOME}`);
457
+ process.stdout.write(`${message}\n`);
458
+ });
459
+
460
+ async function gracefulShutdown() {
461
+ const tasks = Array.from(sessions.keys()).map((sessionId) =>
462
+ handlers.destroySession({sessionId}).catch(() => {
463
+ }),
464
+ );
465
+ await Promise.allSettled(tasks);
466
+ server.close(() => process.exit(0));
467
+ }
468
+
469
+ process.on('SIGTERM', gracefulShutdown);
470
+ process.on('SIGINT', gracefulShutdown);
@@ -0,0 +1,14 @@
1
+ {
2
+ "name": "midscene-web-service",
3
+ "version": "0.1.0",
4
+ "description": "Local JSON-RPC bridge service for midscene-web Python package",
5
+ "main": "service.js",
6
+ "private": true,
7
+ "engines": {
8
+ "node": ">=18.0.0"
9
+ },
10
+ "dependencies": {
11
+ "@midscene/web": "1.8.7",
12
+ "puppeteer": "23.11.1"
13
+ }
14
+ }