@dyyz1993/agent-browser 0.9.2 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. package/dist/__tests__/utils/parseCli.d.ts +1 -0
  2. package/dist/__tests__/utils/parseCli.d.ts.map +1 -1
  3. package/dist/__tests__/utils/parseCli.js +18 -10
  4. package/dist/__tests__/utils/parseCli.js.map +1 -1
  5. package/dist/actions.d.ts.map +1 -1
  6. package/dist/actions.js +63 -3
  7. package/dist/actions.js.map +1 -1
  8. package/dist/browser.d.ts +46 -2
  9. package/dist/browser.d.ts.map +1 -1
  10. package/dist/browser.js +343 -13
  11. package/dist/browser.js.map +1 -1
  12. package/dist/cli/commands.d.ts.map +1 -1
  13. package/dist/cli/commands.js +8 -3
  14. package/dist/cli/commands.js.map +1 -1
  15. package/dist/cli/connection.d.ts.map +1 -1
  16. package/dist/cli/connection.js +39 -1
  17. package/dist/cli/connection.js.map +1 -1
  18. package/dist/cli/help.d.ts.map +1 -1
  19. package/dist/cli/help.js +27 -20
  20. package/dist/cli/help.js.map +1 -1
  21. package/dist/cli/output.d.ts.map +1 -1
  22. package/dist/cli/output.js +5 -0
  23. package/dist/cli/output.js.map +1 -1
  24. package/dist/cli.js +20 -0
  25. package/dist/cli.js.map +1 -1
  26. package/dist/daemon.d.ts.map +1 -1
  27. package/dist/daemon.js +147 -1
  28. package/dist/daemon.js.map +1 -1
  29. package/dist/message-bridge.d.ts.map +1 -1
  30. package/dist/message-bridge.js +22 -4
  31. package/dist/message-bridge.js.map +1 -1
  32. package/dist/openapi.d.ts +22 -0
  33. package/dist/openapi.d.ts.map +1 -0
  34. package/dist/openapi.js +382 -0
  35. package/dist/openapi.js.map +1 -0
  36. package/dist/protocol.d.ts.map +1 -1
  37. package/dist/protocol.js +18 -0
  38. package/dist/protocol.js.map +1 -1
  39. package/dist/recorder/inject.js +61 -134
  40. package/dist/stream-server-standalone.d.ts +10 -0
  41. package/dist/stream-server-standalone.d.ts.map +1 -1
  42. package/dist/stream-server-standalone.js +594 -74
  43. package/dist/stream-server-standalone.js.map +1 -1
  44. package/dist/stream-server.d.ts +67 -2
  45. package/dist/stream-server.d.ts.map +1 -1
  46. package/dist/stream-server.js +371 -51
  47. package/dist/stream-server.js.map +1 -1
  48. package/dist/swagger-ui.d.ts +6 -0
  49. package/dist/swagger-ui.d.ts.map +1 -0
  50. package/dist/swagger-ui.js +51 -0
  51. package/dist/swagger-ui.js.map +1 -0
  52. package/dist/test-live.d.ts +2 -0
  53. package/dist/test-live.d.ts.map +1 -0
  54. package/dist/test-live.js +333 -0
  55. package/dist/test-live.js.map +1 -0
  56. package/dist/types.d.ts +7 -1
  57. package/dist/types.d.ts.map +1 -1
  58. package/dist/types.js.map +1 -1
  59. package/dist/viewer-html.d.ts.map +1 -1
  60. package/dist/viewer-html.js +270 -58
  61. package/dist/viewer-html.js.map +1 -1
  62. package/dist/viewer-script.d.ts +20 -2
  63. package/dist/viewer-script.d.ts.map +1 -1
  64. package/dist/viewer-script.js +858 -102
  65. package/dist/viewer-script.js.map +1 -1
  66. package/package.json +1 -1
  67. package/scripts/postinstall.js +6 -32
  68. package/scripts/test-cli-help.sh +51 -0
  69. package/scripts/verify-form.sh +67 -0
  70. package/scripts/verify-login.sh +65 -0
  71. package/scripts/verify-recording.sh +80 -0
  72. package/scripts/verify-upload.sh +41 -0
  73. package/skills/agent-browser/SKILL.md +88 -1
  74. package/skills/agent-browser/references/commands.md +3 -0
  75. package/skills/agent-browser/references/network-monitoring.md +232 -0
  76. package/skills/agent-browser/references/profiling.md +120 -0
  77. package/skills/agent-browser/references/recorder.md +319 -0
  78. package/skills/agent-browser/templates/network-intercept-crawl.sh +255 -0
  79. package/dist/__tests__/test-iframe.d.ts +0 -2
  80. package/dist/__tests__/test-iframe.d.ts.map +0 -1
  81. package/dist/__tests__/test-iframe.js +0 -52
  82. package/dist/__tests__/test-iframe.js.map +0 -1
  83. package/dist/cli-new.d.ts +0 -3
  84. package/dist/cli-new.d.ts.map +0 -1
  85. package/dist/cli-new.js +0 -308
  86. package/dist/cli-new.js.map +0 -1
  87. package/dist/cli-old.d.ts +0 -3
  88. package/dist/cli-old.d.ts.map +0 -1
  89. package/dist/cli-old.js +0 -1101
  90. package/dist/cli-old.js.map +0 -1
  91. package/dist/recorder/binding.d.ts +0 -24
  92. package/dist/recorder/binding.d.ts.map +0 -1
  93. package/dist/recorder/binding.js +0 -215
  94. package/dist/recorder/binding.js.map +0 -1
  95. package/dist/recorder/index.d.ts +0 -4
  96. package/dist/recorder/index.d.ts.map +0 -1
  97. package/dist/recorder/index.js +0 -4
  98. package/dist/recorder/index.js.map +0 -1
  99. package/dist/recorder/recorder.d.ts +0 -19
  100. package/dist/recorder/recorder.d.ts.map +0 -1
  101. package/dist/recorder/recorder.js +0 -101
  102. package/dist/recorder/recorder.js.map +0 -1
  103. package/dist/recorder/store.d.ts +0 -22
  104. package/dist/recorder/store.d.ts.map +0 -1
  105. package/dist/recorder/store.js +0 -150
  106. package/dist/recorder/store.js.map +0 -1
  107. package/dist/recorder/types.d.ts +0 -73
  108. package/dist/recorder/types.d.ts.map +0 -1
  109. package/dist/recorder/types.js +0 -5
  110. package/dist/recorder/types.js.map +0 -1
@@ -1,10 +1,18 @@
1
1
  import * as net from 'net';
2
2
  import * as fs from 'fs';
3
+ const LOG_FILE = '/tmp/standalone-diag.log';
4
+ function logDiag(msg) {
5
+ fs.appendFileSync(LOG_FILE, new Date().toISOString().substring(11, 23) + ' ' + msg + '\n');
6
+ }
3
7
  import * as path from 'path';
4
8
  import * as http from 'http';
5
9
  import { WebSocketServer, WebSocket } from 'ws';
10
+ import sharp from 'sharp';
6
11
  import { getViewerHtml } from './viewer-html.js';
12
+ import { isAllowedOrigin } from './stream-server.js';
7
13
  import { getSocketDir } from './daemon.js';
14
+ import { openApiSpec } from './openapi.js';
15
+ import { getSwaggerUiHtml } from './swagger-ui.js';
8
16
  const DEFAULT_STREAM_PORT = parseInt(process.env.AGENT_BROWSER_STREAM_PORT || '5005', 10);
9
17
  const STREAM_SERVER_PID_FILE = 'stream-server.pid';
10
18
  const STREAM_SERVER_IPC_FILE = 'stream-server.ipc';
@@ -19,9 +27,12 @@ class StreamServerStandalone {
19
27
  sessions = new Map();
20
28
  clients = new Map();
21
29
  daemonSockets = new Map();
30
+ outboundSockets = new Map();
22
31
  frameBuffers = new Map();
23
32
  instanceIdToSession = new Map();
24
33
  latestFrames = new Map();
34
+ clientStates = new Map();
35
+ browser;
25
36
  constructor(port = DEFAULT_STREAM_PORT) {
26
37
  this.port = port;
27
38
  }
@@ -86,7 +97,7 @@ class StreamServerStandalone {
86
97
  res.end(JSON.stringify({
87
98
  status: 'ok',
88
99
  sessions: Array.from(this.sessions.keys()),
89
- clients: this.getTotalClientCount()
100
+ clients: this.getTotalClientCount(),
90
101
  }));
91
102
  return;
92
103
  }
@@ -100,34 +111,101 @@ class StreamServerStandalone {
100
111
  res.end(JSON.stringify({ sessions: Array.from(this.sessions.keys()) }));
101
112
  return;
102
113
  }
114
+ // HTTP API: Execute command
115
+ if (req.url === '/api/command' && req.method === 'POST') {
116
+ let body = '';
117
+ req.on('data', (chunk) => (body += chunk));
118
+ req.on('end', async () => {
119
+ try {
120
+ const response = await this.sendCommandToDaemon(body);
121
+ res.writeHead(200, { 'Content-Type': 'application/json' });
122
+ res.end(response);
123
+ }
124
+ catch (err) {
125
+ const error = err instanceof Error ? err.message : String(err);
126
+ res.writeHead(500, { 'Content-Type': 'application/json' });
127
+ res.end(JSON.stringify({ id: 'unknown', success: false, error }));
128
+ }
129
+ });
130
+ return;
131
+ }
132
+ // HTTP API: OpenAPI specification
133
+ if (req.url === '/api/openapi.json' && req.method === 'GET') {
134
+ res.writeHead(200, { 'Content-Type': 'application/json' });
135
+ res.end(JSON.stringify(openApiSpec));
136
+ return;
137
+ }
138
+ // HTTP API: Swagger UI
139
+ if (req.url === '/api/docs' && req.method === 'GET') {
140
+ res.setHeader('Content-Type', 'text/html; charset=utf-8');
141
+ res.end(getSwaggerUiHtml());
142
+ return;
143
+ }
144
+ // HTTP API: Help - list available commands
145
+ if (req.url === '/api/help' && req.method === 'GET') {
146
+ res.writeHead(200, { 'Content-Type': 'application/json' });
147
+ res.end(JSON.stringify({
148
+ title: 'agent-browser HTTP API',
149
+ version: '0.10.0',
150
+ endpoints: {
151
+ 'POST /api/command': {
152
+ description: 'Execute a browser command',
153
+ example: { id: '1', action: 'navigate', url: 'https://example.com' },
154
+ },
155
+ 'GET /api/help': { description: 'Show this help message' },
156
+ 'GET /api/openapi.json': { description: 'OpenAPI 3.0 specification' },
157
+ 'GET /api/docs': { description: 'Swagger UI documentation' },
158
+ 'GET /health': { description: 'Health check' },
159
+ 'GET /sessions': { description: 'List active sessions' },
160
+ },
161
+ availableActions: [
162
+ { action: 'launch', description: 'Launch browser', required: [] },
163
+ { action: 'navigate', description: 'Navigate to URL', required: ['url'] },
164
+ { action: 'click', description: 'Click element', required: ['selector'] },
165
+ { action: 'fill', description: 'Fill form field', required: ['selector', 'value'] },
166
+ { action: 'type', description: 'Type text', required: ['selector', 'text'] },
167
+ { action: 'snapshot', description: 'Get page snapshot', required: [] },
168
+ { action: 'screenshot', description: 'Take screenshot', required: [] },
169
+ { action: 'evaluate', description: 'Execute JavaScript', required: ['script'] },
170
+ { action: 'wait', description: 'Wait for element/condition', required: [] },
171
+ { action: 'scroll', description: 'Scroll page', required: [] },
172
+ { action: 'hover', description: 'Hover element', required: ['selector'] },
173
+ { action: 'press', description: 'Press key', required: ['key'] },
174
+ {
175
+ action: 'select',
176
+ description: 'Select dropdown option',
177
+ required: ['selector', 'values'],
178
+ },
179
+ { action: 'back', description: 'Go back', required: [] },
180
+ { action: 'forward', description: 'Go forward', required: [] },
181
+ { action: 'reload', description: 'Reload page', required: [] },
182
+ { action: 'close', description: 'Close browser', required: [] },
183
+ { action: 'url', description: 'Get current URL', required: [] },
184
+ { action: 'title', description: 'Get page title', required: [] },
185
+ { action: 'cookies_get', description: 'Get cookies', required: [] },
186
+ { action: 'cookies_set', description: 'Set cookie', required: ['cookies'] },
187
+ { action: 'state_save', description: 'Save browser state', required: ['path'] },
188
+ { action: 'state_load', description: 'Load browser state', required: ['path'] },
189
+ ],
190
+ docs: 'See http://localhost:5005/api/docs for interactive documentation',
191
+ }));
192
+ return;
193
+ }
103
194
  res.writeHead(404, { 'Content-Type': 'application/json' });
104
195
  res.end(JSON.stringify({ error: 'Not found' }));
105
196
  });
106
197
  this.wss = new WebSocketServer({
107
198
  server: this.httpServer,
108
199
  verifyClient: (info) => {
109
- const origin = info.origin;
110
- if (!origin)
111
- return true;
112
- if (origin.startsWith('file://'))
113
- return true;
114
- try {
115
- const url = new URL(origin);
116
- const host = url.hostname;
117
- if (host === 'localhost' || host === '127.0.0.1' || host === '::1' || host === '[::1]') {
118
- return true;
119
- }
120
- }
121
- catch { }
122
- return false;
123
- }
200
+ return isAllowedOrigin(info.origin);
201
+ },
124
202
  });
125
203
  this.wss.on('connection', (ws, req) => {
126
204
  this.handleWebSocketConnection(ws, req);
127
205
  });
128
206
  this.wss.on('error', reject);
129
- this.httpServer.listen(this.port, '127.0.0.1', () => {
130
- console.log(`[StreamServer] Server listening on port ${this.port}`);
207
+ this.httpServer.listen(this.port, '0.0.0.0', () => {
208
+ console.log(`[StreamServer] Server listening on port ${this.port} (HTTP API enabled)`);
131
209
  resolve();
132
210
  });
133
211
  this.httpServer.on('error', reject);
@@ -137,72 +215,118 @@ class StreamServerStandalone {
137
215
  const url = new URL(req.url || '/', `http://localhost:${this.port}`);
138
216
  const sessionParam = url.searchParams.get('session') || 'default';
139
217
  const instanceIdParam = url.searchParams.get('instanceId');
140
- // 优先使用 instanceId 查找 session
218
+ const rawSelector = url.searchParams.get('selector');
219
+ const clientState = {};
220
+ if (rawSelector) {
221
+ clientState.selector = decodeURIComponent(rawSelector);
222
+ }
141
223
  let session;
224
+ let connected = false;
142
225
  if (instanceIdParam) {
143
226
  const foundSession = this.instanceIdToSession.get(instanceIdParam);
144
227
  if (foundSession) {
145
228
  session = foundSession;
229
+ connected = true;
146
230
  }
147
231
  else {
148
- // instanceId 不存在,返回错误
149
- console.log(`[StreamServer] Invalid instanceId: ${instanceIdParam}`);
150
- ws.send(JSON.stringify({ type: 'status', connected: false, error: 'Invalid instanceId' }));
151
- ws.close();
152
- return;
232
+ session = sessionParam;
153
233
  }
154
234
  }
155
235
  else {
156
236
  session = sessionParam;
157
237
  }
158
- console.log(`[StreamServer] WebSocket client connected for session: ${session}`);
159
238
  if (!this.clients.has(session)) {
160
239
  this.clients.set(session, new Set());
161
240
  }
162
241
  const wasEmpty = this.clients.get(session).size === 0;
163
242
  this.clients.get(session).add(ws);
164
- this.sendStatus(ws, session);
165
- // 如果有最新帧,立即发送给新客户端
243
+ this.clientStates.set(ws, clientState);
244
+ if (clientState.selector) {
245
+ this.requestElementBox(session, clientState.selector);
246
+ clientState.elementCheckTimer = setInterval(() => {
247
+ if (!clientState.selector)
248
+ return;
249
+ this.requestElementBox(session, clientState.selector);
250
+ }, 2500);
251
+ }
252
+ this.sendStatus(ws, session, clientState);
166
253
  const latestFrame = this.latestFrames.get(session);
167
254
  if (latestFrame) {
168
- ws.send(latestFrame.header);
169
- ws.send(latestFrame.data);
255
+ this.sendCroppedFrame(ws, latestFrame, clientState);
170
256
  }
171
- // 如果这是该 session 的第一个客户端,通知 daemon 启动 screencast
257
+ logDiag('[WSCONN] viewer session=' +
258
+ session +
259
+ ' instanceId=' +
260
+ instanceIdParam +
261
+ ' sessions.has=' +
262
+ this.sessions.has(session) +
263
+ ' daemonSockets.has=' +
264
+ this.daemonSockets.has(session) +
265
+ ' clients before=' +
266
+ wasEmpty);
172
267
  if (wasEmpty && this.daemonSockets.has(session)) {
173
- this.daemonSockets.get(session)?.write(JSON.stringify({ type: 'client_connected', session }) + '\n');
268
+ this.daemonSockets
269
+ .get(session)
270
+ ?.write(JSON.stringify({ type: 'client_connected', session }) + '\n');
174
271
  }
175
- if (this.sessions.has(session) && !this.daemonSockets.has(session)) {
272
+ if (this.sessions.has(session)) {
176
273
  this.connectToDaemon(session);
177
274
  }
178
275
  ws.on('message', (data) => {
179
276
  try {
180
277
  const message = JSON.parse(data.toString());
181
- this.handleClientMessage(session, message);
278
+ if (message.type === 'status') {
279
+ this.sendStatus(ws, session, clientState);
280
+ if (clientState.selector) {
281
+ this.requestElementBox(session, clientState.selector);
282
+ }
283
+ }
284
+ else {
285
+ this.handleClientMessage(session, message);
286
+ }
182
287
  }
183
288
  catch (error) {
184
289
  console.error('[StreamServer] Failed to parse client message:', error);
185
290
  }
186
291
  });
187
292
  ws.on('close', () => {
188
- console.log(`[StreamServer] WebSocket client disconnected for session: ${session}`);
293
+ if (clientState.elementCheckTimer) {
294
+ clearInterval(clientState.elementCheckTimer);
295
+ clientState.elementCheckTimer = undefined;
296
+ }
189
297
  this.clients.get(session)?.delete(ws);
298
+ this.clientStates.delete(ws);
190
299
  if (this.clients.get(session)?.size === 0) {
191
300
  this.clients.delete(session);
192
- // 如果该 session 没有客户端了,通知 daemon 停止 screencast
193
301
  if (this.daemonSockets.has(session)) {
194
- this.daemonSockets.get(session)?.write(JSON.stringify({ type: 'client_disconnected', session }) + '\n');
302
+ this.daemonSockets
303
+ .get(session)
304
+ ?.write(JSON.stringify({ type: 'client_disconnected', session }) + '\n');
195
305
  }
196
306
  }
197
307
  });
198
308
  ws.on('error', (error) => {
199
- console.error(`[StreamServer] WebSocket error for session ${session}:`, error);
309
+ if (clientState.elementCheckTimer) {
310
+ clearInterval(clientState.elementCheckTimer);
311
+ clientState.elementCheckTimer = undefined;
312
+ }
200
313
  this.clients.get(session)?.delete(ws);
314
+ this.clientStates.delete(ws);
201
315
  });
202
316
  }
203
317
  handleClientMessage(session, message) {
318
+ const msgType = message.type;
319
+ if (msgType === 'input_fill') {
320
+ logDiag('[CM] input_fill SESSION=' +
321
+ session +
322
+ ' socket_exists=' +
323
+ !!this.daemonSockets.get(session) +
324
+ ' text=' +
325
+ (message.text || ''));
326
+ }
204
327
  const daemonSocket = this.daemonSockets.get(session);
205
328
  if (!daemonSocket) {
329
+ logDiag('[CM] NO DAEMON SOCKET for session=' + session);
206
330
  return;
207
331
  }
208
332
  const forwardableTypes = [
@@ -213,17 +337,109 @@ class StreamServerStandalone {
213
337
  'user_activity',
214
338
  'keyboard_down',
215
339
  'keyboard_up',
216
- 'keyboard_insert_text'
340
+ 'keyboard_insert_text',
341
+ 'input_focused',
342
+ 'input_value',
343
+ 'input_blur',
344
+ 'input_fill',
345
+ 'input_blur_element',
217
346
  ];
218
347
  if (forwardableTypes.includes(message.type)) {
219
348
  try {
220
349
  daemonSocket.write(JSON.stringify(message) + '\n');
350
+ if (msgType === 'input_fill') {
351
+ logDiag('[CM] input_fill WRITTEN TO SOCKET');
352
+ }
221
353
  }
222
354
  catch (error) {
223
355
  console.error(`[StreamServer] Failed to send message to daemon for session ${session}:`, error);
224
356
  }
225
357
  }
226
358
  }
359
+ requestElementBox(session, selector) {
360
+ if (!this.daemonSockets.has(session)) {
361
+ console.log(`[StreamServer] requestElementBox: no daemon socket for session ${session}`);
362
+ return;
363
+ }
364
+ const daemonSocket = this.daemonSockets.get(session);
365
+ if (daemonSocket) {
366
+ console.log(`[StreamServer] requestElementBox: session=${session} selector=${selector}`);
367
+ daemonSocket.write(JSON.stringify({
368
+ type: 'request_element_box',
369
+ session,
370
+ selector,
371
+ }) + '\n');
372
+ }
373
+ }
374
+ async sendCroppedFrame(ws, frame, clientState) {
375
+ if (clientState.selector && clientState.elementBox && ws.readyState === WebSocket.OPEN) {
376
+ try {
377
+ const box = clientState.elementBox;
378
+ const header = JSON.parse(frame.header);
379
+ const meta = header.metadata;
380
+ let left = Math.round(box.x);
381
+ let top = Math.round(box.y);
382
+ let w = Math.round(box.width);
383
+ let h = Math.round(box.height);
384
+ if (meta?.deviceWidth && meta?.deviceHeight) {
385
+ const imgInfo = await sharp(frame.data).metadata();
386
+ const actualW = imgInfo.width || meta.deviceWidth;
387
+ const actualH = imgInfo.height || meta.deviceHeight;
388
+ const scaleX = actualW / meta.deviceWidth;
389
+ const scaleY = actualH / meta.deviceHeight;
390
+ if (scaleX !== 1 || scaleY !== 1) {
391
+ left = Math.round(box.x * scaleX);
392
+ top = Math.round(box.y * scaleY);
393
+ w = Math.round(box.width * scaleX);
394
+ h = Math.round(box.height * scaleY);
395
+ }
396
+ left = Math.max(0, Math.min(left, actualW - 1));
397
+ top = Math.max(0, Math.min(top, actualH - 1));
398
+ w = Math.min(w, actualW - left);
399
+ h = Math.min(h, actualH - top);
400
+ }
401
+ if (w <= 0 || h <= 0) {
402
+ ws.send(frame.header);
403
+ ws.send(frame.data);
404
+ return;
405
+ }
406
+ const buf = await sharp(frame.data)
407
+ .extract({ left, top, width: w, height: h })
408
+ .resize(box.width, box.height)
409
+ .jpeg({ quality: 80 })
410
+ .toBuffer();
411
+ if (ws.readyState !== WebSocket.OPEN)
412
+ return;
413
+ const croppedHeader = {
414
+ ...header,
415
+ metadata: {
416
+ ...header.metadata,
417
+ deviceWidth: box.width,
418
+ deviceHeight: box.height,
419
+ element: {
420
+ selector: clientState.selector,
421
+ x: box.x,
422
+ y: box.y,
423
+ width: box.width,
424
+ height: box.height,
425
+ },
426
+ },
427
+ };
428
+ ws.send(JSON.stringify(croppedHeader));
429
+ ws.send(buf);
430
+ }
431
+ catch {
432
+ if (ws.readyState !== WebSocket.OPEN)
433
+ return;
434
+ ws.send(frame.header);
435
+ ws.send(frame.data);
436
+ }
437
+ }
438
+ else {
439
+ ws.send(frame.header);
440
+ ws.send(frame.data);
441
+ }
442
+ }
227
443
  async startIpcServer() {
228
444
  return new Promise((resolve, reject) => {
229
445
  const ipcPath = this.getIpcPath();
@@ -270,22 +486,21 @@ class StreamServerStandalone {
270
486
  if (s === socket) {
271
487
  console.log(`[StreamServer] Daemon disconnected for session: ${session}`);
272
488
  this.daemonSockets.delete(session);
273
- this.sessions.delete(session);
274
489
  this.broadcastStatus(session, false);
275
490
  break;
276
491
  }
277
492
  }
278
493
  });
279
494
  }
280
- handleIpcMessage(socket, message) {
495
+ async handleIpcMessage(socket, message) {
281
496
  switch (message.type) {
282
497
  case 'register':
283
498
  if (message.session && message.socketPath && message.instanceId) {
284
- console.log(`[StreamServer] Session registered: ${message.session}, instanceId: ${message.instanceId}`);
499
+ logDiag('[REGISTER] session=' + message.session + ' instanceId=' + message.instanceId);
285
500
  this.sessions.set(message.session, {
286
501
  socketPath: message.socketPath,
287
502
  lastSeen: Date.now(),
288
- instanceId: message.instanceId
503
+ instanceId: message.instanceId,
289
504
  });
290
505
  this.instanceIdToSession.set(message.instanceId, message.session);
291
506
  this.daemonSockets.set(message.session, socket);
@@ -316,78 +531,320 @@ class StreamServerStandalone {
316
531
  case 'frame':
317
532
  if (message.session) {
318
533
  this.sessions.get(message.session).lastSeen = Date.now();
319
- this.broadcastFrame(message);
534
+ await this.broadcastFrame(message);
535
+ }
536
+ break;
537
+ case 'selector_element':
538
+ if (message.session && message.selector) {
539
+ console.log(`[StreamServer] Received selector_element: session=${message.session} selector=${message.selector} box=${message.elementBox ? JSON.stringify(message.elementBox) : 'null'}`);
540
+ const clients = this.clients.get(message.session);
541
+ if (clients) {
542
+ for (const client of clients) {
543
+ const state = this.clientStates.get(client);
544
+ if (state?.selector === message.selector) {
545
+ if (message.elementBox) {
546
+ state.elementBox = message.elementBox;
547
+ state.degraded = false;
548
+ const latestFrameForElem = this.latestFrames.get(message.session);
549
+ if (latestFrameForElem) {
550
+ this.sendCroppedFrame(client, latestFrameForElem, state);
551
+ }
552
+ }
553
+ else if (!state.degraded) {
554
+ state.elementBox = undefined;
555
+ state.degraded = true;
556
+ }
557
+ this.sendStatus(client, message.session, state);
558
+ }
559
+ else {
560
+ console.log(`[StreamServer] selector_element mismatch: state.selector="${state?.selector}" vs message.selector="${message.selector}"`);
561
+ }
562
+ }
563
+ }
564
+ }
565
+ break;
566
+ case 'input_focused':
567
+ case 'input_value':
568
+ case 'input_blur':
569
+ logDiag('[IPC] ' + String(message.type) + ' clients=' + this.clients.size);
570
+ for (const [, clients] of this.clients) {
571
+ for (const client of clients) {
572
+ if (client.readyState === WebSocket.OPEN) {
573
+ try {
574
+ client.send(JSON.stringify(message));
575
+ }
576
+ catch (_) { }
577
+ }
578
+ }
320
579
  }
321
580
  break;
322
581
  }
323
582
  }
324
583
  connectToDaemon(session) {
584
+ if (this.outboundSockets.has(session))
585
+ return;
325
586
  const sessionInfo = this.sessions.get(session);
326
587
  if (!sessionInfo)
327
588
  return;
328
589
  const socketPath = sessionInfo.socketPath;
329
- const socket = net.createConnection({ path: socketPath }, () => {
590
+ const socket = net.createConnection({ path: socketPath }, async () => {
330
591
  console.log(`[StreamServer] Connected to daemon for session: ${session}`);
331
- this.daemonSockets.set(session, socket);
592
+ const sessionClients = this.clients.get(session);
593
+ if (sessionClients) {
594
+ for (const client of sessionClients) {
595
+ const state = this.clientStates.get(client);
596
+ if (state?.selector) {
597
+ this.requestElementBox(session, state.selector);
598
+ }
599
+ }
600
+ }
332
601
  });
333
602
  socket.on('error', (error) => {
334
603
  console.error(`[StreamServer] Failed to connect to daemon for session ${session}:`, error);
604
+ this.outboundSockets.delete(session);
335
605
  });
336
606
  socket.on('close', () => {
337
- this.daemonSockets.delete(session);
607
+ logDiag('[CTD] socket close session=' + session);
608
+ this.outboundSockets.delete(session);
609
+ });
610
+ this.outboundSockets.set(session, socket);
611
+ // Send inject_focus_listener command to daemon via this outbound connection
612
+ logDiag('[CTD] sending inject_focus_listener to daemon for session=' + session);
613
+ try {
614
+ socket.write(JSON.stringify({ id: 'inject-fl-' + Date.now(), action: 'inject_focus_listener' }) + '\n');
615
+ }
616
+ catch (e) {
617
+ console.error('[StreamServer] Failed to send inject_focus_listener:', e);
618
+ }
619
+ // Data handler: receive focus events from daemon's injectFocusListener callback
620
+ socket.on('data', (data) => {
621
+ const raw = data.toString();
622
+ logDiag('[CTD DATA] session=' +
623
+ session +
624
+ ' rawLen=' +
625
+ raw.length +
626
+ ' firstLine=' +
627
+ raw.substring(0, 100).replace(/\n/g, '|'));
628
+ const lines = raw.split('\n');
629
+ for (const line of lines) {
630
+ if (!line.trim())
631
+ continue;
632
+ try {
633
+ const msg = JSON.parse(line);
634
+ // Handle inject_focus_listener response — retry on "Browser not launched"
635
+ if (msg.id && String(msg.id).startsWith('inject-fl-')) {
636
+ if (msg.success === false && msg.error && msg.error.includes('Browser not launched')) {
637
+ logDiag('[CTD] inject_focus_listener failed: ' + msg.error + ' — retrying in 2s');
638
+ setTimeout(() => {
639
+ try {
640
+ socket.write(JSON.stringify({
641
+ id: 'inject-fl-retry-' + Date.now(),
642
+ action: 'inject_focus_listener',
643
+ }) + '\n');
644
+ }
645
+ catch (_) { }
646
+ }, 2000);
647
+ }
648
+ continue;
649
+ }
650
+ if (msg.type === 'input_focused' ||
651
+ msg.type === 'input_value' ||
652
+ msg.type === 'input_blur') {
653
+ const clients = this.clients.get(session);
654
+ logDiag('[CTD DATA] broadcasting ' +
655
+ msg.type +
656
+ ' to ' +
657
+ (clients?.size || 0) +
658
+ ' viewer clients');
659
+ if (clients) {
660
+ for (const client of clients) {
661
+ if (client.readyState === WebSocket.OPEN) {
662
+ try {
663
+ client.send(JSON.stringify(msg));
664
+ }
665
+ catch (_) { }
666
+ }
667
+ }
668
+ }
669
+ }
670
+ }
671
+ catch (_) {
672
+ // Ignore parse errors (might be partial data or non-JSON responses)
673
+ }
674
+ }
338
675
  });
339
676
  }
340
- broadcastFrame(message) {
677
+ async broadcastFrame(message) {
341
678
  const session = message.session;
342
679
  const clients = this.clients.get(session);
343
680
  if (!clients || clients.size === 0)
344
681
  return;
345
- const headerMessage = {
346
- type: 'frame',
347
- metadata: message.metadata,
348
- format: message.format,
349
- fps: message.fps,
350
- state: message.state
351
- };
352
- // 保存最新帧
353
- if (message.data) {
354
- this.latestFrames.set(session, {
355
- header: JSON.stringify(headerMessage),
356
- data: Buffer.from(message.data, 'base64')
357
- });
358
- }
682
+ const frameData = message.data ? Buffer.from(message.data, 'base64') : null;
359
683
  for (const client of clients) {
360
- if (client.readyState === WebSocket.OPEN) {
361
- client.send(JSON.stringify(headerMessage));
362
- if (message.data) {
363
- client.send(Buffer.from(message.data, 'base64'));
684
+ if (client.readyState !== WebSocket.OPEN)
685
+ continue;
686
+ const clientState = this.clientStates.get(client);
687
+ let metadata = message.metadata;
688
+ let dataToSend = frameData;
689
+ const hasSelector = !!clientState?.selector;
690
+ const hasBox = !!clientState?.elementBox;
691
+ const hasFrame = !!frameData;
692
+ if (hasSelector && hasBox && hasFrame) {
693
+ try {
694
+ const box = clientState.elementBox;
695
+ const meta = message.metadata;
696
+ let left = Math.round(box.x);
697
+ let top = Math.round(box.y);
698
+ let w = Math.round(box.width);
699
+ let h = Math.round(box.height);
700
+ if (meta?.deviceWidth && meta?.deviceHeight) {
701
+ const imgInfo = await sharp(frameData).metadata();
702
+ const actualW = imgInfo.width || meta.deviceWidth;
703
+ const actualH = imgInfo.height || meta.deviceHeight;
704
+ const scaleX = actualW / meta.deviceWidth;
705
+ const scaleY = actualH / meta.deviceHeight;
706
+ if (scaleX !== 1 || scaleY !== 1) {
707
+ left = Math.round(box.x * scaleX);
708
+ top = Math.round(box.y * scaleY);
709
+ w = Math.round(box.width * scaleX);
710
+ h = Math.round(box.height * scaleY);
711
+ }
712
+ left = Math.max(0, Math.min(left, actualW - 1));
713
+ top = Math.max(0, Math.min(top, actualH - 1));
714
+ w = Math.min(w, actualW - left);
715
+ h = Math.min(h, actualH - top);
716
+ }
717
+ if (w <= 0 || h <= 0) {
718
+ dataToSend = frameData;
719
+ }
720
+ else {
721
+ const cropped = await sharp(frameData)
722
+ .extract({ left, top, width: w, height: h })
723
+ .resize(box.width, box.height)
724
+ .jpeg({ quality: 80 })
725
+ .toBuffer();
726
+ dataToSend = Buffer.from(cropped);
727
+ if (metadata) {
728
+ metadata = {
729
+ ...metadata,
730
+ deviceWidth: box.width,
731
+ deviceHeight: box.height,
732
+ element: {
733
+ selector: clientState.selector,
734
+ x: box.x,
735
+ y: box.y,
736
+ width: box.width,
737
+ height: box.height,
738
+ },
739
+ };
740
+ }
741
+ }
742
+ }
743
+ catch (error) {
744
+ const errMsg = error instanceof Error ? error.message : String(error);
745
+ const box = clientState.elementBox;
746
+ metadata = {
747
+ ...(metadata || {}),
748
+ _cropError: errMsg,
749
+ _cropBox: {
750
+ left: Math.round(box.x),
751
+ top: Math.round(box.y),
752
+ width: Math.round(box.width),
753
+ height: Math.round(box.height),
754
+ },
755
+ _selector: clientState?.selector,
756
+ };
757
+ dataToSend = frameData;
364
758
  }
365
759
  }
760
+ else if (hasSelector) {
761
+ metadata = {
762
+ ...(metadata || {}),
763
+ _skipCrop: `hasBox=${hasBox} hasFrame=${hasFrame}`,
764
+ _selector: clientState?.selector,
765
+ };
766
+ }
767
+ const headerMessage = {
768
+ type: 'frame',
769
+ metadata,
770
+ format: message.format,
771
+ fps: message.fps,
772
+ state: message.state,
773
+ };
774
+ client.send(JSON.stringify(headerMessage));
775
+ if (dataToSend) {
776
+ client.send(dataToSend);
777
+ }
778
+ }
779
+ // 保存最新帧(原始)
780
+ if (frameData) {
781
+ this.latestFrames.set(session, {
782
+ header: JSON.stringify({
783
+ type: 'frame',
784
+ metadata: message.metadata,
785
+ format: message.format,
786
+ fps: message.fps,
787
+ state: message.state,
788
+ }),
789
+ data: frameData,
790
+ });
366
791
  }
367
792
  }
368
793
  broadcastStatus(session, connected) {
369
794
  const clients = this.clients.get(session);
370
795
  if (!clients)
371
796
  return;
372
- const message = {
373
- type: 'status',
374
- connected,
375
- screencasting: connected
376
- };
377
797
  for (const client of clients) {
378
798
  if (client.readyState === WebSocket.OPEN) {
379
- client.send(JSON.stringify(message));
799
+ const state = this.clientStates.get(client);
800
+ const msg = {
801
+ type: 'status',
802
+ connected,
803
+ screencasting: connected,
804
+ session,
805
+ version: '0.10.0',
806
+ };
807
+ if (state?.selector && state.elementBox) {
808
+ msg.element = {
809
+ selector: state.selector,
810
+ x: state.elementBox.x,
811
+ y: state.elementBox.y,
812
+ width: state.elementBox.width,
813
+ height: state.elementBox.height,
814
+ };
815
+ msg.viewportWidth = state.elementBox.width;
816
+ msg.viewportHeight = state.elementBox.height;
817
+ }
818
+ if (state?.degraded) {
819
+ msg.degraded = true;
820
+ }
821
+ client.send(JSON.stringify(msg));
380
822
  }
381
823
  }
382
824
  }
383
- sendStatus(ws, session) {
825
+ sendStatus(ws, session, clientState) {
384
826
  const connected = this.sessions.has(session);
385
827
  const message = {
386
828
  type: 'status',
387
829
  connected,
388
830
  screencasting: connected,
389
- session
831
+ session,
832
+ version: '0.10.0',
390
833
  };
834
+ if (clientState?.selector && clientState?.elementBox) {
835
+ message.element = {
836
+ selector: clientState.selector,
837
+ x: clientState.elementBox.x,
838
+ y: clientState.elementBox.y,
839
+ width: clientState.elementBox.width,
840
+ height: clientState.elementBox.height,
841
+ };
842
+ message.viewportWidth = clientState.elementBox.width;
843
+ message.viewportHeight = clientState.elementBox.height;
844
+ }
845
+ if (clientState?.degraded) {
846
+ message.degraded = true;
847
+ }
391
848
  if (ws.readyState === WebSocket.OPEN) {
392
849
  ws.send(JSON.stringify(message));
393
850
  }
@@ -399,6 +856,68 @@ class StreamServerStandalone {
399
856
  }
400
857
  return total;
401
858
  }
859
+ /**
860
+ * Send a command to the daemon via Unix socket and return the response
861
+ */
862
+ async sendCommandToDaemon(commandJson) {
863
+ return new Promise((resolve, reject) => {
864
+ // Get the daemon socket path from the first available session
865
+ // or use the default socket path
866
+ let socketPath;
867
+ // Try to find an active session's socket path
868
+ for (const [session, info] of this.sessions) {
869
+ socketPath = info.socketPath;
870
+ break;
871
+ }
872
+ if (!socketPath) {
873
+ // Fallback to default socket path
874
+ socketPath = path.join(getSocketDir(), 'default.sock');
875
+ }
876
+ const socket = net.createConnection({ path: socketPath }, () => {
877
+ socket.write(commandJson + '\n');
878
+ });
879
+ let response = '';
880
+ let resolved = false;
881
+ const timeout = setTimeout(() => {
882
+ if (!resolved) {
883
+ resolved = true;
884
+ socket.destroy();
885
+ reject(new Error('Command timeout'));
886
+ }
887
+ }, 30000); // 30 second timeout
888
+ socket.on('data', (data) => {
889
+ response += data.toString();
890
+ // Check if we have a complete JSON response
891
+ try {
892
+ JSON.parse(response);
893
+ // If we can parse it, we have the complete response
894
+ if (!resolved) {
895
+ resolved = true;
896
+ clearTimeout(timeout);
897
+ resolve(response);
898
+ socket.end();
899
+ }
900
+ }
901
+ catch {
902
+ // Not complete yet, keep reading
903
+ }
904
+ });
905
+ socket.on('end', () => {
906
+ if (!resolved) {
907
+ resolved = true;
908
+ clearTimeout(timeout);
909
+ resolve(response);
910
+ }
911
+ });
912
+ socket.on('error', (err) => {
913
+ if (!resolved) {
914
+ resolved = true;
915
+ clearTimeout(timeout);
916
+ reject(err);
917
+ }
918
+ });
919
+ });
920
+ }
402
921
  getPidFile() {
403
922
  return path.join(getSocketDir(), STREAM_SERVER_PID_FILE);
404
923
  }
@@ -484,7 +1003,8 @@ export function getStreamServerIpcPath() {
484
1003
  return path.join(getSocketDir(), STREAM_SERVER_IPC_FILE);
485
1004
  }
486
1005
  export { StreamServerStandalone };
487
- if (process.argv[1]?.endsWith('stream-server-standalone.js') || process.env.AGENT_BROWSER_STREAM_SERVER === '1') {
1006
+ if (process.argv[1]?.endsWith('stream-server-standalone.js') ||
1007
+ process.env.AGENT_BROWSER_STREAM_SERVER === '1') {
488
1008
  const server = new StreamServerStandalone();
489
1009
  server.start().catch((err) => {
490
1010
  console.error('[StreamServer] Failed to start:', err);