neoagent 2.4.0 → 2.4.1-beta.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/LICENSE +619 -21
  2. package/README.md +1 -1
  3. package/extensions/chrome-browser/background.mjs +19 -7
  4. package/extensions/chrome-browser/icons/icon128.png +0 -0
  5. package/extensions/chrome-browser/icons/icon16.png +0 -0
  6. package/extensions/chrome-browser/icons/icon48.png +0 -0
  7. package/extensions/chrome-browser/icons/logo.svg +12 -0
  8. package/extensions/chrome-browser/manifest.json +13 -2
  9. package/extensions/chrome-browser/popup.css +5 -0
  10. package/extensions/chrome-browser/popup.html +7 -5
  11. package/extensions/chrome-browser/popup.js +16 -7
  12. package/flutter_app/lib/features/onboarding/onboarding_companion_step.dart +721 -0
  13. package/flutter_app/lib/features/onboarding/onboarding_shell.dart +6 -0
  14. package/flutter_app/lib/features/onboarding/onboarding_welcome_step.dart +1 -1
  15. package/flutter_app/lib/main.dart +1 -0
  16. package/flutter_app/lib/main_controller.dart +156 -3
  17. package/flutter_app/lib/main_devices.dart +485 -119
  18. package/flutter_app/lib/main_settings.dart +289 -30
  19. package/flutter_app/lib/src/backend_client.dart +89 -0
  20. package/flutter_app/lib/src/desktop_companion_actions.dart +153 -3
  21. package/flutter_app/lib/src/desktop_companion_io.dart +145 -4
  22. package/flutter_app/lib/src/desktop_native_bridge.dart +13 -0
  23. package/flutter_app/lib/src/stream_renderer.dart +286 -0
  24. package/flutter_app/macos/Runner/AppDelegate.swift +56 -1
  25. package/package.json +2 -2
  26. package/server/guest_agent.js +19 -1
  27. package/server/http/routes.js +191 -0
  28. package/server/http/socket.js +1 -1
  29. package/server/index.js +4 -1
  30. package/server/public/.last_build_id +1 -1
  31. package/server/public/assets/fonts/MaterialIcons-Regular.otf +0 -0
  32. package/server/public/flutter_bootstrap.js +1 -1
  33. package/server/public/main.dart.js +75438 -74005
  34. package/server/routes/browser.js +14 -0
  35. package/server/routes/browser_extension.js +21 -4
  36. package/server/routes/desktop.js +10 -0
  37. package/server/routes/settings.js +4 -0
  38. package/server/routes/stream.js +187 -0
  39. package/server/services/ai/tools.js +40 -29
  40. package/server/services/android/controller.js +41 -2
  41. package/server/services/browser/controller.js +34 -0
  42. package/server/services/browser/extension/manifest.js +33 -0
  43. package/server/services/browser/extension/provider.js +12 -6
  44. package/server/services/browser/extension/registry.js +188 -18
  45. package/server/services/desktop/gateway.js +28 -3
  46. package/server/services/desktop/protocol.js +34 -0
  47. package/server/services/desktop/provider.js +25 -0
  48. package/server/services/desktop/registry.js +92 -10
  49. package/server/services/manager.js +19 -2
  50. package/server/services/runtime/backends/local-vm.js +6 -0
  51. package/server/services/runtime/docker-vm-manager.js +26 -3
  52. package/server/services/runtime/manager.js +36 -5
  53. package/server/services/runtime/settings.js +17 -0
  54. package/server/services/streaming/android-stream.js +298 -0
  55. package/server/services/streaming/browser-stream.js +87 -0
  56. package/server/services/streaming/stream-hub.js +231 -0
  57. package/server/services/websocket.js +73 -0
@@ -1,3 +1,4 @@
1
+ import 'dart:async';
1
2
  import 'dart:convert';
2
3
  import 'dart:io';
3
4
 
@@ -8,6 +9,20 @@ import 'package:package_info_plus/package_info_plus.dart';
8
9
  import 'desktop_native_bridge.dart';
9
10
  import 'desktop_screen_capture.dart';
10
11
 
12
+ // ─── Isolate helpers for JPEG compression ────────────────────────────────────
13
+ // `compressToJpeg` offloads the CPU-intensive pure-Dart PNG→JPEG conversion
14
+ // to a background isolate via `compute()` so the main isolate's event loop
15
+ // stays free to process incoming WebSocket commands (click, drag, etc.)
16
+ // immediately, rather than queuing behind a 300–600 ms compression job.
17
+
18
+ typedef _JpegArgs = ({Uint8List bytes, int quality});
19
+
20
+ Uint8List _compressJpegInIsolate(_JpegArgs args) {
21
+ final decoded = img.decodeImage(args.bytes);
22
+ if (decoded == null) return args.bytes;
23
+ return Uint8List.fromList(img.encodeJpg(decoded, quality: args.quality));
24
+ }
25
+
11
26
  class DesktopCompanionSnapshot {
12
27
  const DesktopCompanionSnapshot({
13
28
  required this.screenshotBase64,
@@ -85,9 +100,15 @@ class DesktopCompanionActions {
85
100
  if (bytes is! Uint8List || bytes.isEmpty) {
86
101
  return null;
87
102
  }
88
- final decoded = img.decodeImage(bytes);
89
- final width = (frame['width'] as num?)?.round() ?? decoded?.width ?? 0;
90
- final height = (frame['height'] as num?)?.round() ?? decoded?.height ?? 0;
103
+ // Prefer dimensions reported by the native bridge; only fall back to a
104
+ // pure-Dart image decode (which is slow) when the bridge omits them.
105
+ final nativeWidth = (frame['width'] as num?)?.round();
106
+ final nativeHeight = (frame['height'] as num?)?.round();
107
+ final decoded = (nativeWidth == null || nativeHeight == null)
108
+ ? img.decodeImage(bytes)
109
+ : null;
110
+ final width = nativeWidth ?? decoded?.width ?? 0;
111
+ final height = nativeHeight ?? decoded?.height ?? 0;
91
112
  final displays = _normalizeDisplays(
92
113
  frame['displays'],
93
114
  fallbackDisplayId:
@@ -176,6 +197,21 @@ class DesktopCompanionActions {
176
197
  };
177
198
  }
178
199
 
200
+ Future<Uint8List> compressToJpeg(
201
+ DesktopCompanionSnapshot snapshot,
202
+ int quality,
203
+ ) async {
204
+ final raw = _decodeScreenshotBytes(snapshot.screenshotBase64);
205
+ // Already JPEG — return immediately without any heavy work on this isolate.
206
+ if (_looksLikeJpeg(raw)) return raw;
207
+ // Run the pure-Dart PNG decode + JPEG encode in a background isolate so the
208
+ // main isolate's event loop stays responsive for incoming commands.
209
+ return compute(
210
+ _compressJpegInIsolate,
211
+ (bytes: raw, quality: quality.clamp(30, 95)),
212
+ );
213
+ }
214
+
179
215
  Future<Map<String, Object?>> observe({
180
216
  bool includeTree = false,
181
217
  String? activeDisplayId,
@@ -222,6 +258,32 @@ class DesktopCompanionActions {
222
258
  return <String, Object?>{'success': true, 'x': x, 'y': y, 'button': button};
223
259
  }
224
260
 
261
+ Future<Map<String, Object?>> mouseMove({
262
+ required int x,
263
+ required int y,
264
+ String? displayId,
265
+ }) async {
266
+ await _assertInputSupported('mouseMove');
267
+ if (_usesNativeDesktopBridge) {
268
+ await _nativeBridge.mouseMove(
269
+ x: x,
270
+ y: y,
271
+ displayId: displayId,
272
+ );
273
+ } else if (defaultTargetPlatform == TargetPlatform.linux) {
274
+ await _run(
275
+ _ShellCommand('xdotool', <String>[
276
+ 'mousemove',
277
+ '$x',
278
+ '$y',
279
+ ]),
280
+ );
281
+ } else {
282
+ throw Exception('mouseMove is not supported on this platform.');
283
+ }
284
+ return <String, Object?>{'success': true, 'x': x, 'y': y};
285
+ }
286
+
225
287
  Future<Map<String, Object?>> drag({
226
288
  required int x1,
227
289
  required int y1,
@@ -396,6 +458,77 @@ class DesktopCompanionActions {
396
458
  };
397
459
  }
398
460
 
461
+ Future<Map<String, Object?>> executeShellCommand({
462
+ required String command,
463
+ String? cwd,
464
+ int? timeoutMs,
465
+ String? stdinInput,
466
+ }) async {
467
+ final shell = Platform.isWindows ? 'cmd.exe' : (Platform.environment['SHELL'] ?? '/bin/sh');
468
+ final args = Platform.isWindows ? <String>['/c', command] : <String>['-lc', command];
469
+ final workingDir = cwd?.trim().isNotEmpty == true ? cwd : Platform.environment['HOME'];
470
+ final startedAt = DateTime.now();
471
+
472
+ final process = await Process.start(
473
+ shell,
474
+ args,
475
+ workingDirectory: workingDir,
476
+ runInShell: false,
477
+ );
478
+
479
+ if (stdinInput != null && stdinInput.isNotEmpty) {
480
+ process.stdin.write(stdinInput);
481
+ await process.stdin.close();
482
+ } else {
483
+ unawaited(process.stdin.close());
484
+ }
485
+
486
+ const maxChars = 50000;
487
+ final stdoutBuf = StringBuffer();
488
+ final stderrBuf = StringBuffer();
489
+
490
+ final stdoutSub = process.stdout.transform(utf8.decoder).listen((data) {
491
+ stdoutBuf.write(data);
492
+ });
493
+ final stderrSub = process.stderr.transform(utf8.decoder).listen((data) {
494
+ stderrBuf.write(data);
495
+ });
496
+
497
+ final effectiveTimeout = Duration(
498
+ milliseconds: (timeoutMs != null && timeoutMs > 0) ? timeoutMs : 15 * 60 * 1000,
499
+ );
500
+
501
+ bool timedOut = false;
502
+ int? exitCode;
503
+ try {
504
+ exitCode = await process.exitCode.timeout(effectiveTimeout);
505
+ } on TimeoutException {
506
+ timedOut = true;
507
+ process.kill(ProcessSignal.sigterm);
508
+ exitCode = null;
509
+ }
510
+
511
+ await stdoutSub.cancel();
512
+ await stderrSub.cancel();
513
+
514
+ String trimOutput(StringBuffer buf) {
515
+ final s = buf.toString().trim();
516
+ return s.length > maxChars ? '${s.substring(0, maxChars)}\n...[truncated, ${s.length} total chars]' : s;
517
+ }
518
+
519
+ return <String, Object?>{
520
+ 'exitCode': exitCode,
521
+ 'stdout': trimOutput(stdoutBuf),
522
+ 'stderr': trimOutput(stderrBuf),
523
+ 'timedOut': timedOut,
524
+ 'killed': timedOut,
525
+ 'durationMs': DateTime.now().difference(startedAt).inMilliseconds,
526
+ 'command': command,
527
+ 'cwd': workingDir,
528
+ 'backend': 'desktop-companion',
529
+ };
530
+ }
531
+
399
532
  Future<Map<String, Object?>> _capabilities({
400
533
  Map<String, Object?>? platformStatus,
401
534
  }) async {
@@ -516,6 +649,23 @@ class DesktopCompanionActions {
516
649
  }
517
650
  }
518
651
 
652
+ Uint8List _decodeScreenshotBytes(String screenshotBase64) {
653
+ final trimmed = screenshotBase64.trim();
654
+ final commaIndex = trimmed.indexOf(',');
655
+ final encoded = trimmed.startsWith('data:image/') && commaIndex >= 0
656
+ ? trimmed.substring(commaIndex + 1)
657
+ : trimmed;
658
+ return Uint8List.fromList(base64Decode(encoded));
659
+ }
660
+
661
+ bool _looksLikeJpeg(Uint8List bytes) {
662
+ return bytes.length >= 4 &&
663
+ bytes[0] == 0xff &&
664
+ bytes[1] == 0xd8 &&
665
+ bytes[bytes.length - 2] == 0xff &&
666
+ bytes[bytes.length - 1] == 0xd9;
667
+ }
668
+
519
669
  String _normalizeMouseButton(String button) {
520
670
  final value = button.trim().toLowerCase();
521
671
  if (value == 'left' || value == 'right' || value == 'middle') {
@@ -25,6 +25,18 @@ class DesktopCompanionManager extends ChangeNotifier {
25
25
  final DesktopCompanionActions _actions;
26
26
  WebSocket? _socket;
27
27
  Timer? _reconnectTimer;
28
+ Timer? _streamTimer;
29
+ bool _streamCaptureInFlight = false;
30
+ // Set true while a click / drag / scroll / typeText / pressKey command is
31
+ // being executed. _captureAndSendBinaryFrame respects this flag so it does
32
+ // not compete with the input command for the native bridge or the WebSocket
33
+ // send buffer, and a fresh frame is forced immediately after the action.
34
+ bool _inputCommandInFlight = false;
35
+ int _frameSeq = 0;
36
+ int _streamGeneration = 0;
37
+ // Tracks the current stream quality so the forced post-input capture can use
38
+ // the same setting without re-parsing the original startStream payload.
39
+ int _currentStreamQuality = 80;
28
40
 
29
41
  String _backendUrl = '';
30
42
  String _sessionCookie = '';
@@ -52,7 +64,8 @@ class DesktopCompanionManager extends ChangeNotifier {
52
64
 
53
65
  Future<void> bootstrap(SharedPreferences prefs) async {
54
66
  _enabled = prefs.getBool(desktopCompanionEnabledPrefsKey) ?? false;
55
- _paused = prefs.getBool(desktopCompanionPausedPrefsKey) ?? false;
67
+ // Always start unpaused — paused state must not carry over across restarts.
68
+ _paused = false;
56
69
  _label =
57
70
  prefs.getString(desktopCompanionLabelPrefsKey)?.trim() ??
58
71
  _defaultLabel();
@@ -116,7 +129,6 @@ class DesktopCompanionManager extends ChangeNotifier {
116
129
 
117
130
  Future<void> setPaused(bool value, SharedPreferences prefs) async {
118
131
  _paused = value;
119
- await prefs.setBool(desktopCompanionPausedPrefsKey, value);
120
132
  notifyListeners();
121
133
  if (_connected) {
122
134
  await _sendEvent('statusChanged', <String, Object?>{'paused': value});
@@ -126,6 +138,7 @@ class DesktopCompanionManager extends ChangeNotifier {
126
138
  Future<void> disconnect() async {
127
139
  _reconnectTimer?.cancel();
128
140
  _reconnectTimer = null;
141
+ _stopStreaming();
129
142
  _connecting = false;
130
143
  _connected = false;
131
144
  final socket = _socket;
@@ -281,6 +294,19 @@ class DesktopCompanionManager extends ChangeNotifier {
281
294
  }
282
295
  }
283
296
 
297
+ // Commands that interact with the remote machine's input system. While one
298
+ // of these is executing we pause frame captures so the WebSocket send buffer
299
+ // is clear for the result message, and to avoid the native bridge being busy
300
+ // with a screenshot when the click/drag/etc. needs to run.
301
+ static const _inputCommands = <String>{
302
+ 'click',
303
+ 'mouseMove',
304
+ 'drag',
305
+ 'scroll',
306
+ 'typeText',
307
+ 'pressKey',
308
+ };
309
+
284
310
  Future<void> _handleCommand(Map<String, Object?> message) async {
285
311
  final id = message['id']?.toString() ?? '';
286
312
  final command = message['command']?.toString() ?? '';
@@ -289,6 +315,10 @@ class DesktopCompanionManager extends ChangeNotifier {
289
315
  (key, value) => MapEntry(key.toString(), value),
290
316
  )
291
317
  : const <String, Object?>{};
318
+
319
+ final isInput = _inputCommands.contains(command);
320
+ if (isInput) _inputCommandInFlight = true;
321
+
292
322
  try {
293
323
  final response = await _dispatchCommand(command, payload);
294
324
  _socket?.add(
@@ -299,6 +329,18 @@ class DesktopCompanionManager extends ChangeNotifier {
299
329
  'payload': response,
300
330
  }),
301
331
  );
332
+ // Immediately capture a fresh frame after an input action so the user
333
+ // sees the result of their interaction without waiting for the next
334
+ // timer tick.
335
+ if (isInput && _streamTimer != null && _connected) {
336
+ unawaited(
337
+ _captureAndSendBinaryFrame(
338
+ _currentStreamQuality,
339
+ _streamGeneration,
340
+ forced: true,
341
+ ),
342
+ );
343
+ }
302
344
  } catch (error) {
303
345
  _socket?.add(
304
346
  jsonEncode(<String, Object?>{
@@ -308,6 +350,8 @@ class DesktopCompanionManager extends ChangeNotifier {
308
350
  'error': '$error',
309
351
  }),
310
352
  );
353
+ } finally {
354
+ if (isInput) _inputCommandInFlight = false;
311
355
  }
312
356
  }
313
357
 
@@ -327,6 +371,10 @@ class DesktopCompanionManager extends ChangeNotifier {
327
371
  );
328
372
  case 'captureFrame':
329
373
  return _actions.captureFrame(activeDisplayId: _activeDisplayId);
374
+ case 'startStream':
375
+ return _startStreaming(payload);
376
+ case 'stopStream':
377
+ return _stopStreaming();
330
378
  case 'observe':
331
379
  return _actions.observe(
332
380
  includeTree: payload['includeTree'] == true,
@@ -339,6 +387,12 @@ class DesktopCompanionManager extends ChangeNotifier {
339
387
  button: payload['button']?.toString() ?? 'left',
340
388
  displayId: _activeDisplayId,
341
389
  );
390
+ case 'mouseMove':
391
+ return _actions.mouseMove(
392
+ x: (payload['x'] as num?)?.round() ?? 0,
393
+ y: (payload['y'] as num?)?.round() ?? 0,
394
+ displayId: _activeDisplayId,
395
+ );
342
396
  case 'drag':
343
397
  return _actions.drag(
344
398
  x1: (payload['x1'] as num?)?.round() ?? 0,
@@ -387,10 +441,15 @@ class DesktopCompanionManager extends ChangeNotifier {
387
441
  case 'pauseControl':
388
442
  final paused = payload['paused'] != false;
389
443
  _paused = paused;
390
- final prefs = await SharedPreferences.getInstance();
391
- await prefs.setBool(desktopCompanionPausedPrefsKey, paused);
392
444
  notifyListeners();
393
445
  return <String, Object?>{'success': true, 'paused': _paused};
446
+ case 'executeCommand':
447
+ return _actions.executeShellCommand(
448
+ command: payload['command']?.toString() ?? '',
449
+ cwd: payload['cwd']?.toString(),
450
+ timeoutMs: (payload['timeout'] as num?)?.toInt(),
451
+ stdinInput: payload['stdin_input']?.toString(),
452
+ );
394
453
  case 'ping':
395
454
  return <String, Object?>{'pong': true};
396
455
  default:
@@ -399,6 +458,7 @@ class DesktopCompanionManager extends ChangeNotifier {
399
458
  }
400
459
 
401
460
  void _handleSocketClosed() {
461
+ _stopStreaming();
402
462
  _socket = null;
403
463
  _connecting = false;
404
464
  _connected = false;
@@ -410,6 +470,7 @@ class DesktopCompanionManager extends ChangeNotifier {
410
470
  void dispose() {
411
471
  _reconnectTimer?.cancel();
412
472
  _reconnectTimer = null;
473
+ _stopStreaming();
413
474
  _connecting = false;
414
475
  _connected = false;
415
476
  _enabled = false;
@@ -443,6 +504,86 @@ class DesktopCompanionManager extends ChangeNotifier {
443
504
  );
444
505
  }
445
506
 
507
+ Future<Map<String, Object?>> _startStreaming(
508
+ Map<String, Object?> payload,
509
+ ) async {
510
+ _streamTimer?.cancel();
511
+ final generation = ++_streamGeneration;
512
+ final fps = ((payload['fps'] as num?)?.round() ?? 15).clamp(1, 20);
513
+ final quality = ((payload['quality'] as num?)?.round() ?? 80).clamp(30, 95);
514
+ final displayId = payload['displayId']?.toString().trim();
515
+ if (displayId != null && displayId.isNotEmpty) {
516
+ _activeDisplayId = displayId;
517
+ }
518
+ final interval = Duration(milliseconds: max(1, (1000 / fps).floor()));
519
+ _frameSeq = 0;
520
+ _currentStreamQuality = quality;
521
+ _streamTimer = Timer.periodic(interval, (_) {
522
+ unawaited(_captureAndSendBinaryFrame(quality, generation));
523
+ });
524
+ unawaited(_captureAndSendBinaryFrame(quality, generation));
525
+ return <String, Object?>{
526
+ 'success': true,
527
+ 'fps': fps,
528
+ 'quality': quality,
529
+ 'displayId': _activeDisplayId,
530
+ };
531
+ }
532
+
533
+ Map<String, Object?> _stopStreaming() {
534
+ _streamTimer?.cancel();
535
+ _streamTimer = null;
536
+ _streamGeneration++;
537
+ _streamCaptureInFlight = false;
538
+ return <String, Object?>{'success': true};
539
+ }
540
+
541
+ Future<void> _captureAndSendBinaryFrame(
542
+ int quality,
543
+ int generation, {
544
+ bool forced = false,
545
+ }) async {
546
+ final socket = _socket;
547
+ if (socket == null ||
548
+ !_connected ||
549
+ _streamCaptureInFlight ||
550
+ generation != _streamGeneration) {
551
+ return;
552
+ }
553
+ // If an input command is actively running, skip this frame unless we were
554
+ // explicitly forced (i.e. this IS the post-input refresh capture).
555
+ if (!forced && _inputCommandInFlight) return;
556
+ _streamCaptureInFlight = true;
557
+ try {
558
+ final snapshot = await _actions.captureSnapshot(
559
+ activeDisplayId: _activeDisplayId,
560
+ );
561
+ if (snapshot == null) return;
562
+ final jpeg = await _actions.compressToJpeg(snapshot, quality);
563
+ if (jpeg.isEmpty) return;
564
+ if (!_connected || generation != _streamGeneration || _socket != socket) {
565
+ return;
566
+ }
567
+ final frame = Uint8List(10 + jpeg.length);
568
+ final header = ByteData.sublistView(frame, 0, 10);
569
+ header.setUint8(0, 0x01);
570
+ header.setUint32(1, _frameSeq++ & 0xffffffff, Endian.big);
571
+ header.setUint32(
572
+ 5,
573
+ DateTime.now().millisecondsSinceEpoch & 0xffffffff,
574
+ Endian.big,
575
+ );
576
+ header.setUint8(9, 0x01);
577
+ frame.setRange(10, frame.length, jpeg);
578
+ socket.add(frame);
579
+ } catch (error) {
580
+ _errorMessage = 'Desktop stream capture failed: $error';
581
+ notifyListeners();
582
+ } finally {
583
+ _streamCaptureInFlight = false;
584
+ }
585
+ }
586
+
446
587
  Future<void> _openMacPermissionSettings(String key) async {
447
588
  final uri = switch (key) {
448
589
  'screencapture' =>
@@ -44,6 +44,19 @@ class DesktopNativeBridge {
44
44
  });
45
45
  }
46
46
 
47
+ Future<void> mouseMove({
48
+ required int x,
49
+ required int y,
50
+ String? displayId,
51
+ }) {
52
+ return _channel.invokeMethod<void>('mouseMove', <String, Object?>{
53
+ 'x': x,
54
+ 'y': y,
55
+ if (displayId != null && displayId.trim().isNotEmpty)
56
+ 'displayId': displayId.trim(),
57
+ });
58
+ }
59
+
47
60
  Future<void> drag({
48
61
  required int x1,
49
62
  required int y1,