@midscene/playground 1.8.0 → 1.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,4 @@
1
1
  import { existsSync, readFileSync, writeFileSync } from "node:fs";
2
- import node_http from "node:http";
3
2
  import { dirname, join, resolve as external_node_path_resolve } from "node:path";
4
3
  import { fileURLToPath } from "node:url";
5
4
  import { ReportActionDump, runConnectivityTest } from "@midscene/core";
@@ -11,7 +10,8 @@ import { getDebug } from "@midscene/shared/logger";
11
10
  import { uuid as utils_uuid } from "@midscene/shared/utils";
12
11
  import express from "express";
13
12
  import { executeAction, formatErrorMessage } from "./common.mjs";
14
- import { createInterfaceMjpegHub, writeMjpegFrame } from "./mjpeg-hub.mjs";
13
+ import { MjpegStreamHandler } from "./mjpeg-stream-handler.mjs";
14
+ import { PointerInputError, dispatchPointer } from "./pointer-dispatch.mjs";
15
15
  import { buildRuntimeInfo } from "./runtime-metadata.mjs";
16
16
  import "dotenv/config";
17
17
  function _define_property(obj, key, value) {
@@ -81,6 +81,19 @@ function locateFromPoint(x, y, fieldX, fieldY, description) {
81
81
  Math.round(requireNumber(y, fieldY))
82
82
  ], description);
83
83
  }
84
+ const POINTER_INTERACT_ACTIONS = new Set([
85
+ 'Tap',
86
+ 'DoubleClick',
87
+ 'LongPress',
88
+ 'Swipe',
89
+ 'DragAndDrop',
90
+ 'KeyboardPress',
91
+ 'Input',
92
+ 'Pinch'
93
+ ]);
94
+ function isPointerInteractActionType(actionType) {
95
+ return POINTER_INTERACT_ACTIONS.has(actionType);
96
+ }
84
97
  const buildLocateActionParams = (body, actionType)=>{
85
98
  const params = {
86
99
  locate: locateFromPoint(body.x, body.y, 'x', 'y', `manual ${actionType}`)
@@ -171,6 +184,10 @@ function isRecoverablePageSessionError(error) {
171
184
  return RECOVERABLE_PAGE_SESSION_ERROR_PATTERN.test(message);
172
185
  }
173
186
  class PlaygroundServer {
187
+ setActiveAgent(agent) {
188
+ this._activeConnection.agent = agent;
189
+ this._mjpegHandler.reset();
190
+ }
174
191
  get agent() {
175
192
  return this._activeConnection.agent;
176
193
  }
@@ -223,6 +240,7 @@ class PlaygroundServer {
223
240
  executionHooks: this._baseExecutionHooks,
224
241
  sidecars: this._baseSidecars
225
242
  };
243
+ this._mjpegHandler.reset();
226
244
  this.syncRuntimeState();
227
245
  }
228
246
  setPreparedPlatform(prepared) {
@@ -318,7 +336,7 @@ class PlaygroundServer {
318
336
  } catch (error) {
319
337
  console.warn('Failed to destroy old agent:', error);
320
338
  } finally{
321
- this._activeConnection.agent = null;
339
+ this.setActiveAgent(null);
322
340
  this._configDirty = false;
323
341
  }
324
342
  }
@@ -357,6 +375,7 @@ class PlaygroundServer {
357
375
  executionHooks: session.executionHooks || this._baseExecutionHooks,
358
376
  sidecars: sessionSidecars
359
377
  };
378
+ this._mjpegHandler.reset();
360
379
  this.sessionSetupState = 'ready';
361
380
  this.sessionSetupBlockingReason = void 0;
362
381
  this.syncRuntimeState();
@@ -414,7 +433,7 @@ class PlaygroundServer {
414
433
  console.log('Recreating agent to cancel current task...');
415
434
  await this.destroyCurrentAgent();
416
435
  if (this._activeConnection.agentFactory) try {
417
- this._activeConnection.agent = await this._activeConnection.agentFactory();
436
+ this.setActiveAgent(await this._activeConnection.agentFactory());
418
437
  this._agentReady = true;
419
438
  console.log('Agent recreated successfully');
420
439
  } catch (error) {
@@ -431,7 +450,7 @@ class PlaygroundServer {
431
450
  if (!this._activeConnection.agentFactory || !isRecoverablePageSessionError(error)) return null;
432
451
  debugMjpeg(`Recovering active agent after ${reason}:`, error);
433
452
  try {
434
- this._interfaceMjpegHub.stopProducer();
453
+ this._mjpegHandler.reset();
435
454
  await this.recreateAgent();
436
455
  return this._activeConnection.agent;
437
456
  } catch (recreateError) {
@@ -505,7 +524,7 @@ class PlaygroundServer {
505
524
  await this.destroyCurrentSession();
506
525
  const created = await this.sessionManager.createSession(req.body || {});
507
526
  await this.applyCreatedSession(created);
508
- if (!this._activeConnection.agent && this._activeConnection.agentFactory) this._activeConnection.agent = await this._activeConnection.agentFactory();
527
+ if (!this._activeConnection.agent && this._activeConnection.agentFactory) this.setActiveAgent(await this._activeConnection.agentFactory());
509
528
  if (this._configDirty && this._activeConnection.agentFactory) {
510
529
  this._configDirty = false;
511
530
  await this.recreateAgent();
@@ -637,7 +656,7 @@ class PlaygroundServer {
637
656
  console.log('AI config changed, recreating agent...');
638
657
  try {
639
658
  await this.destroyCurrentAgent();
640
- this._activeConnection.agent = await this._activeConnection.agentFactory();
659
+ this.setActiveAgent(await this._activeConnection.agentFactory());
641
660
  agent = this.getActiveAgentOrThrow();
642
661
  this._agentReady = true;
643
662
  console.log('Agent recreated with new config');
@@ -807,19 +826,7 @@ class PlaygroundServer {
807
826
  if (!agent) return res.status(409).json({
808
827
  error: 'No active session'
809
828
  });
810
- const nativeUrl = agent.interface?.mjpegStreamUrl;
811
- const recentlyFailed = false === this._nativeMjpegAvailable && null !== this._nativeMjpegFailedAt && Date.now() - this._nativeMjpegFailedAt < PlaygroundServer.MJPEG_NEGATIVE_CACHE_MS;
812
- if (nativeUrl && !recentlyFailed) {
813
- const proxyOk = await this.probeAndProxyNativeMjpeg(nativeUrl, req, res);
814
- if (proxyOk) return;
815
- }
816
- const interfaceStreamStarted = await this._interfaceMjpegHub.streamRequest(req, res, agent.interface, async (startupError)=>(await this.recoverActiveAgentAfterPreviewError(startupError, 'interface MJPEG startup'))?.interface ?? null);
817
- if (interfaceStreamStarted) return;
818
- const fallbackAgent = this._activeConnection.agent;
819
- if ('function' != typeof fallbackAgent?.interface?.screenshotBase64) return res.status(500).json({
820
- error: 'Screenshot method not available on current interface'
821
- });
822
- await this.startPollingMjpegStream(req, res);
829
+ await this._mjpegHandler.serve(req, res);
823
830
  });
824
831
  this._app.get('/interface-info', async (_req, res)=>{
825
832
  try {
@@ -878,26 +885,26 @@ class PlaygroundServer {
878
885
  if ('string' != typeof actionType || !actionType) return res.status(400).json({
879
886
  error: 'actionType is required'
880
887
  });
881
- if (!this.findInteractAction(agent, actionType) && !this.canRunBrowserChromeInteractAction(agent, actionType)) return res.status(404).json({
882
- error: `Action "${actionType}" is not available on the current device`
883
- });
884
- let params;
885
888
  try {
886
- params = buildInteractParams(actionType, req.body ?? {});
887
- } catch (error) {
888
- if (error instanceof InteractParamsValidationError) return res.status(400).json({
889
- error: error.message
890
- });
891
- const errorMessage = error instanceof Error ? error.message : 'Unknown error';
892
- console.error(`Failed to build interact params for "${actionType}": ${errorMessage}`);
893
- return res.status(500).json({
894
- error: errorMessage
889
+ const inputPrimitives = agent.interface.inputPrimitives;
890
+ if (inputPrimitives) {
891
+ await dispatchPointer(inputPrimitives, req.body ?? {}, ()=>agent.interface.size());
892
+ res.json({});
893
+ return;
894
+ }
895
+ if (!this.findInteractAction(agent, actionType) && !this.canRunBrowserChromeInteractAction(agent, actionType)) return res.status(404).json({
896
+ error: isPointerInteractActionType(actionType) ? 'Manual control is not supported on this device' : `Action "${actionType}" is not available on the current device`
895
897
  });
896
- }
897
- try {
898
+ const params = buildInteractParams(actionType, req.body ?? {});
898
899
  await this.runInteractAction(agent, actionType, params);
899
900
  res.json({});
900
901
  } catch (error) {
902
+ if (error instanceof PointerInputError) return res.status(error.statusCode).json({
903
+ error: error.message
904
+ });
905
+ if (error instanceof InteractParamsValidationError) return res.status(400).json({
906
+ error: error.message
907
+ });
901
908
  const recoveredAgent = await this.recoverActiveAgentAfterPreviewError(error, `manual interact action "${actionType}"`);
902
909
  if (recoveredAgent) return res.status(409).json({
903
910
  error: 'The page session was closed and has been recreated. Please retry the action.'
@@ -979,123 +986,6 @@ class PlaygroundServer {
979
986
  }
980
987
  });
981
988
  }
982
- probeAndProxyNativeMjpeg(nativeUrl, req, res) {
983
- return new Promise((resolve)=>{
984
- console.log(`MJPEG: trying native stream from ${nativeUrl}`);
985
- const proxyReq = node_http.get(nativeUrl, (proxyRes)=>{
986
- const statusCode = proxyRes.statusCode ?? 0;
987
- if (statusCode >= 400) {
988
- this._nativeMjpegAvailable = false;
989
- this._nativeMjpegFailedAt = Date.now();
990
- proxyRes.resume();
991
- debugMjpeg(`native stream returned HTTP ${statusCode}, using polling mode`);
992
- resolve(false);
993
- return;
994
- }
995
- this._nativeMjpegAvailable = true;
996
- this._nativeMjpegFailedAt = null;
997
- console.log('MJPEG: streaming via native WDA MJPEG server');
998
- const contentType = proxyRes.headers['content-type'];
999
- if (contentType) res.setHeader('Content-Type', contentType);
1000
- res.setHeader('Cache-Control', 'no-cache, no-store, must-revalidate');
1001
- res.setHeader('Connection', 'keep-alive');
1002
- proxyRes.pipe(res);
1003
- req.on('close', ()=>proxyReq.destroy());
1004
- resolve(true);
1005
- });
1006
- proxyReq.on('error', (err)=>{
1007
- this._nativeMjpegAvailable = false;
1008
- this._nativeMjpegFailedAt = Date.now();
1009
- debugMjpeg(`MJPEG: native stream unavailable (${err.message}), using polling mode`);
1010
- resolve(false);
1011
- });
1012
- });
1013
- }
1014
- probeNativeMjpegLiveness(nativeUrl) {
1015
- return new Promise((resolve)=>{
1016
- const probe = node_http.get(nativeUrl, (probeRes)=>{
1017
- const statusCode = probeRes.statusCode ?? 0;
1018
- const reachable = statusCode >= 200 && statusCode < 400;
1019
- probeRes.destroy();
1020
- resolve(reachable);
1021
- });
1022
- probe.setTimeout(1000, ()=>{
1023
- probe.destroy();
1024
- resolve(false);
1025
- });
1026
- probe.on('error', ()=>resolve(false));
1027
- });
1028
- }
1029
- async startPollingMjpegStream(req, res) {
1030
- const defaultMjpegFps = 10;
1031
- const maxMjpegFps = 30;
1032
- const maxErrorBackoffMs = 3000;
1033
- const errorLogThreshold = 3;
1034
- const nativeProbeIntervalMs = 3000;
1035
- const parsedFps = Number(req.query.fps);
1036
- const fps = Math.min(Math.max(Number.isNaN(parsedFps) ? defaultMjpegFps : parsedFps, 1), maxMjpegFps);
1037
- const interval = Math.round(1000 / fps);
1038
- const boundary = 'mjpeg-boundary';
1039
- console.log(`MJPEG: streaming via polling mode (${fps}fps)`);
1040
- res.setHeader('Content-Type', `multipart/x-mixed-replace; boundary=${boundary}`);
1041
- res.setHeader('Cache-Control', 'no-cache, no-store, must-revalidate');
1042
- res.setHeader('Connection', 'keep-alive');
1043
- let stopped = false;
1044
- let consecutiveErrors = 0;
1045
- const nativeUrl = this._activeConnection.agent?.interface?.mjpegStreamUrl;
1046
- let probeTimer;
1047
- if (nativeUrl) probeTimer = setInterval(async ()=>{
1048
- if (stopped) return;
1049
- const reachable = await this.probeNativeMjpegLiveness(nativeUrl);
1050
- if (reachable && !stopped) {
1051
- console.log('MJPEG: native stream came online, ending polling so client reconnects');
1052
- this._nativeMjpegAvailable = true;
1053
- this._nativeMjpegFailedAt = null;
1054
- stopped = true;
1055
- try {
1056
- res.destroy();
1057
- } catch {}
1058
- }
1059
- }, nativeProbeIntervalMs);
1060
- req.on('close', ()=>{
1061
- stopped = true;
1062
- if (probeTimer) clearInterval(probeTimer);
1063
- });
1064
- while(!stopped){
1065
- if (!this._agentReady) {
1066
- await new Promise((r)=>setTimeout(r, 200));
1067
- continue;
1068
- }
1069
- const frameStart = Date.now();
1070
- try {
1071
- const agent = this.getActiveAgentOrThrow();
1072
- const base64 = await agent.interface.screenshotBase64();
1073
- if (stopped) break;
1074
- consecutiveErrors = 0;
1075
- writeMjpegFrame(res, boundary, {
1076
- data: base64,
1077
- contentType: 'image/jpeg'
1078
- });
1079
- } catch (err) {
1080
- if (stopped) break;
1081
- const recoveredAgent = await this.recoverActiveAgentAfterPreviewError(err, 'polling MJPEG frame capture');
1082
- if (recoveredAgent) {
1083
- consecutiveErrors = 0;
1084
- continue;
1085
- }
1086
- consecutiveErrors++;
1087
- if (consecutiveErrors <= errorLogThreshold) console.error('MJPEG frame error:', err);
1088
- else if (consecutiveErrors === errorLogThreshold + 1) console.error('MJPEG: suppressing further errors, retrying silently...');
1089
- const backoff = Math.min(1000 * consecutiveErrors, maxErrorBackoffMs);
1090
- await new Promise((r)=>setTimeout(r, backoff));
1091
- continue;
1092
- }
1093
- const elapsed = Date.now() - frameStart;
1094
- const remaining = interval - elapsed;
1095
- if (remaining > 0) await new Promise((r)=>setTimeout(r, remaining));
1096
- }
1097
- if (probeTimer) clearInterval(probeTimer);
1098
- }
1099
989
  setupStaticRoutes() {
1100
990
  this._app.get('/', (_req, res)=>{
1101
991
  this.serveHtmlWithPorts(res);
@@ -1129,7 +1019,7 @@ class PlaygroundServer {
1129
1019
  async launch(port) {
1130
1020
  if (this._activeConnection.agentFactory && !this.sessionManager) {
1131
1021
  console.log('Initializing agent from factory function...');
1132
- this._activeConnection.agent = await this._activeConnection.agentFactory();
1022
+ this.setActiveAgent(await this._activeConnection.agentFactory());
1133
1023
  this._activeConnection.session = {
1134
1024
  connected: true,
1135
1025
  metadata: {}
@@ -1151,6 +1041,7 @@ class PlaygroundServer {
1151
1041
  await this.destroyCurrentSession().catch((error)=>{
1152
1042
  console.warn('Failed to destroy current session during shutdown:', error);
1153
1043
  });
1044
+ this._mjpegHandler.shutdown();
1154
1045
  return new Promise((resolve, reject)=>{
1155
1046
  if (this.server) {
1156
1047
  this.taskExecutionDumps = {};
@@ -1174,12 +1065,13 @@ class PlaygroundServer {
1174
1065
  _define_property(this, "id", void 0);
1175
1066
  _define_property(this, "scrcpyPort", void 0);
1176
1067
  _define_property(this, "_initialized", false);
1177
- _define_property(this, "_nativeMjpegAvailable", null);
1178
- _define_property(this, "_nativeMjpegFailedAt", null);
1179
- _define_property(this, "_interfaceMjpegHub", createInterfaceMjpegHub({
1180
- initialFrameTimeoutMs: PlaygroundServer.INTERFACE_MJPEG_INITIAL_FRAME_TIMEOUT_MS,
1181
- idleStopMs: PlaygroundServer.INTERFACE_MJPEG_IDLE_STOP_MS,
1182
- debug: debugMjpeg
1068
+ _define_property(this, "_mjpegHandler", new MjpegStreamHandler({
1069
+ getNativeUrl: ()=>this._activeConnection.agent?.interface?.mjpegStreamUrl,
1070
+ getActiveInterface: ()=>this._activeConnection.agent?.interface ?? null,
1071
+ takeScreenshot: ()=>this.getActiveAgentOrThrow().interface.screenshotBase64(),
1072
+ canTakeScreenshot: ()=>'function' == typeof this._activeConnection.agent?.interface?.screenshotBase64,
1073
+ isAgentReady: ()=>this._agentReady,
1074
+ recoverFromPreviewError: async (error, reason)=>(await this.recoverActiveAgentAfterPreviewError(error, reason))?.interface ?? null
1183
1075
  }));
1184
1076
  _define_property(this, "sessionManager", void 0);
1185
1077
  _define_property(this, "sessionSetupState", 'ready');
@@ -1206,12 +1098,9 @@ class PlaygroundServer {
1206
1098
  this.taskExecutionDumps = {};
1207
1099
  this.id = id || utils_uuid();
1208
1100
  if ('function' == typeof agent) this._activeConnection.agentFactory = agent;
1209
- else this._activeConnection.agent = agent || null;
1101
+ else this.setActiveAgent(agent || null);
1210
1102
  }
1211
1103
  }
1212
- _define_property(PlaygroundServer, "MJPEG_NEGATIVE_CACHE_MS", 10000);
1213
- _define_property(PlaygroundServer, "INTERFACE_MJPEG_INITIAL_FRAME_TIMEOUT_MS", 1500);
1214
- _define_property(PlaygroundServer, "INTERFACE_MJPEG_IDLE_STOP_MS", 2000);
1215
1104
  const server = PlaygroundServer;
1216
1105
  export { InteractParamsValidationError, PlaygroundServer, buildInteractParams, createManualExecutorContext, server as default, serializeZodField };
1217
1106