@leg3ndy/otto-bridge 0.4.2 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,6 +4,7 @@ import os from "node:os";
4
4
  import path from "node:path";
5
5
  import process from "node:process";
6
6
  import { JobCancelledError } from "./shared.js";
7
+ import { postDeviceJson, uploadDeviceJobArtifact } from "../http.js";
7
8
  const KNOWN_APPS = [
8
9
  { canonical: "Safari", patterns: [/\bsafari\b/i] },
9
10
  { canonical: "Google Chrome", patterns: [/\bgoogle chrome\b/i, /\bchrome\b/i] },
@@ -87,6 +88,22 @@ function humanizeUrl(url) {
87
88
  return normalized;
88
89
  }
89
90
  }
91
+ function mimeTypeFromPath(filePath) {
92
+ const ext = path.extname(filePath).toLowerCase();
93
+ if (ext === ".png")
94
+ return "image/png";
95
+ if (ext === ".jpg" || ext === ".jpeg")
96
+ return "image/jpeg";
97
+ if (ext === ".webp")
98
+ return "image/webp";
99
+ if (ext === ".gif")
100
+ return "image/gif";
101
+ if (ext === ".txt" || ext === ".md")
102
+ return "text/plain";
103
+ if (ext === ".json")
104
+ return "application/json";
105
+ return "application/octet-stream";
106
+ }
90
107
  function expandUserPath(value) {
91
108
  const trimmed = value.trim();
92
109
  if (!trimmed) {
@@ -109,6 +126,9 @@ function clipText(value, maxLength) {
109
126
  }
110
127
  return `${value.slice(0, maxLength)}...`;
111
128
  }
129
+ function delay(ms) {
130
+ return new Promise((resolve) => setTimeout(resolve, ms));
131
+ }
112
132
  function escapeHtml(value) {
113
133
  return value
114
134
  .replace(/&/g, "&")
@@ -307,6 +327,13 @@ function parseStructuredActions(job) {
307
327
  actions.push({ type: "take_screenshot", path: savePath || undefined });
308
328
  continue;
309
329
  }
330
+ if (type === "read_frontmost_page" || type === "read_page" || type === "read_webpage") {
331
+ actions.push({
332
+ type: "read_frontmost_page",
333
+ app: asString(action.app) || asString(action.application) || "Safari",
334
+ });
335
+ continue;
336
+ }
310
337
  if (type === "read_file" || type === "read_local_file") {
311
338
  const filePath = asString(action.path);
312
339
  if (filePath) {
@@ -327,6 +354,25 @@ function parseStructuredActions(job) {
327
354
  if (command) {
328
355
  actions.push({ type: "run_shell", command, cwd: cwd || undefined });
329
356
  }
357
+ continue;
358
+ }
359
+ if (type === "set_volume" || type === "volume") {
360
+ const rawLevel = Number(action.level);
361
+ if (Number.isFinite(rawLevel)) {
362
+ actions.push({ type: "set_volume", level: Math.max(0, Math.min(Math.round(rawLevel), 100)) });
363
+ }
364
+ continue;
365
+ }
366
+ if (type === "click_visual_target" || type === "click_target") {
367
+ const description = asString(action.description) || asString(action.target);
368
+ if (description) {
369
+ actions.push({
370
+ type: "click_visual_target",
371
+ description,
372
+ app: asString(action.app) || undefined,
373
+ });
374
+ }
375
+ continue;
330
376
  }
331
377
  }
332
378
  return actions;
@@ -361,6 +407,30 @@ function deriveActionsFromText(job) {
361
407
  const task = extractTaskText(job);
362
408
  const detectedApp = detectKnownApp(task);
363
409
  const detectedUrl = detectUrl(task);
410
+ const normalizedTask = normalizeText(task);
411
+ if (/\b(volume|som|audio)\b/i.test(task)) {
412
+ const percentMatch = task.match(/(\d{1,3})\s*%/);
413
+ let level = 50;
414
+ if (percentMatch?.[1]) {
415
+ level = Math.max(0, Math.min(Number(percentMatch[1]), 100));
416
+ }
417
+ else if (/\b(mudo|mute|silencia)\b/i.test(task)) {
418
+ level = 0;
419
+ }
420
+ else if (/\b(aumenta|aumente|mais alto)\b/i.test(task)) {
421
+ level = 80;
422
+ }
423
+ else if (/\b(diminui|abaixa|mais baixo)\b/i.test(task)) {
424
+ level = 25;
425
+ }
426
+ return [{ type: "set_volume", level }];
427
+ }
428
+ if ((normalizedTask.includes("leia") || normalizedTask.includes("ler")) && detectedUrl) {
429
+ return [
430
+ { type: "open_url", url: detectedUrl, app: detectedApp || "Safari" },
431
+ { type: "read_frontmost_page", app: detectedApp || "Safari" },
432
+ ];
433
+ }
364
434
  if (detectedUrl) {
365
435
  return [{
366
436
  type: "open_url",
@@ -384,8 +454,12 @@ function extractActions(job) {
384
454
  return deriveActionsFromText(job);
385
455
  }
386
456
  export class NativeMacOSJobExecutor {
457
+ bridgeConfig;
387
458
  cancelledJobs = new Set();
388
459
  activeChild = null;
460
+ constructor(bridgeConfig) {
461
+ this.bridgeConfig = bridgeConfig;
462
+ }
389
463
  async run(job, reporter) {
390
464
  if (process.platform !== "darwin") {
391
465
  throw new Error("The native-macos executor only runs on macOS");
@@ -407,6 +481,13 @@ export class NativeMacOSJobExecutor {
407
481
  }
408
482
  try {
409
483
  const completionNotes = [];
484
+ const artifacts = [];
485
+ const resultPayload = {
486
+ executor: "native-macos",
487
+ actions,
488
+ artifacts,
489
+ action_summaries: completionNotes,
490
+ };
410
491
  for (let index = 0; index < actions.length; index += 1) {
411
492
  this.assertNotCancelled(job.job_id);
412
493
  const action = actions[index];
@@ -440,7 +521,24 @@ export class NativeMacOSJobExecutor {
440
521
  if (action.type === "take_screenshot") {
441
522
  await reporter.progress(progressPercent, "Capturando screenshot do Mac");
442
523
  const screenshotPath = await this.takeScreenshot(action.path);
443
- completionNotes.push(`Screenshot salvo em ${screenshotPath}`);
524
+ const screenshotArtifact = await this.uploadArtifactForJob(job.job_id, screenshotPath, {
525
+ kind: "screenshot",
526
+ });
527
+ if (screenshotArtifact) {
528
+ artifacts.push(screenshotArtifact);
529
+ completionNotes.push("Capturei a tela do Mac e anexei a imagem aqui no chat.");
530
+ }
531
+ else {
532
+ completionNotes.push(`Screenshot salvo em ${screenshotPath}`);
533
+ }
534
+ resultPayload.screenshot_path = screenshotPath;
535
+ continue;
536
+ }
537
+ if (action.type === "read_frontmost_page") {
538
+ await reporter.progress(progressPercent, `Lendo a pagina ativa em ${action.app || "Safari"}`);
539
+ const page = await this.readFrontmostPage(action.app || "Safari");
540
+ resultPayload.page = page;
541
+ completionNotes.push(`Li a pagina ${page.title || page.url || "ativa"} no navegador.`);
444
542
  continue;
445
543
  }
446
544
  if (action.type === "read_file") {
@@ -461,19 +559,51 @@ export class NativeMacOSJobExecutor {
461
559
  completionNotes.push(`Saida de \`${action.command}\`:\n${shellOutput}`);
462
560
  continue;
463
561
  }
562
+ if (action.type === "set_volume") {
563
+ await reporter.progress(progressPercent, `Ajustando volume para ${action.level}%`);
564
+ await this.setVolume(action.level);
565
+ completionNotes.push(`Volume ajustado para ${action.level}% no macOS.`);
566
+ continue;
567
+ }
568
+ if (action.type === "click_visual_target") {
569
+ if (action.app) {
570
+ await reporter.progress(progressPercent, `Trazendo ${action.app} para frente antes do clique`);
571
+ await this.focusApp(action.app);
572
+ }
573
+ await reporter.progress(progressPercent, `Capturando a tela para localizar ${action.description}`);
574
+ const screenshotPath = await this.takeScreenshot();
575
+ const artifact = await this.uploadArtifactForJob(job.job_id, screenshotPath, {
576
+ kind: "screenshot",
577
+ metadata: { purpose: "visual_click", target: action.description },
578
+ });
579
+ if (!artifact?.storage_path) {
580
+ throw new Error("Otto Bridge nao conseguiu enviar a screenshot necessaria para localizar o alvo visual.");
581
+ }
582
+ artifacts.push(artifact);
583
+ const artifactMetadata = artifact.metadata || {};
584
+ const width = Number(artifactMetadata.width || 0);
585
+ const height = Number(artifactMetadata.height || 0);
586
+ const location = await this.locateVisualTarget(job.job_id, artifact.storage_path, action.description, width, height, artifact.mime_type);
587
+ if (!location?.found || typeof location.x !== "number" || typeof location.y !== "number") {
588
+ throw new Error(`Nao consegui localizar ${action.description} com confianca suficiente na tela.`);
589
+ }
590
+ await reporter.progress(progressPercent, `Clicando em ${action.description}`);
591
+ await this.clickPoint(location.x, location.y);
592
+ completionNotes.push(`Localizei e cliquei em ${action.description}.`);
593
+ resultPayload.last_click = location;
594
+ continue;
595
+ }
464
596
  await reporter.progress(progressPercent, `Abrindo ${action.url}${action.app ? ` em ${action.app}` : ""}`);
465
597
  await this.openUrl(action.url, action.app);
598
+ await delay(1200);
466
599
  }
467
600
  const summary = completionNotes.length > 0
468
601
  ? completionNotes.join("\n\n")
469
602
  : (actions.length === 1
470
603
  ? this.describeAction(actions[0])
471
604
  : `${actions.length} ações executadas no macOS`);
472
- await reporter.completed({
473
- executor: "native-macos",
474
- summary,
475
- actions,
476
- });
605
+ resultPayload.summary = summary;
606
+ await reporter.completed(resultPayload);
477
607
  }
478
608
  finally {
479
609
  this.cancelledJobs.delete(job.job_id);
@@ -578,6 +708,108 @@ end tell
578
708
  await this.runCommand("screencapture", ["-x", screenshotPath]);
579
709
  return screenshotPath;
580
710
  }
711
+ async uploadArtifactForJob(jobId, localPath, options) {
712
+ if (!this.bridgeConfig?.apiBaseUrl || !this.bridgeConfig?.deviceToken) {
713
+ return null;
714
+ }
715
+ const bytes = await readFile(localPath);
716
+ const fileName = path.basename(localPath);
717
+ const mimeType = mimeTypeFromPath(fileName);
718
+ const dimensions = mimeType.startsWith("image/") ? await this.getImageDimensions(localPath) : null;
719
+ const metadata = {
720
+ ...(options?.metadata || {}),
721
+ ...(dimensions || {}),
722
+ };
723
+ const response = await uploadDeviceJobArtifact(this.bridgeConfig.apiBaseUrl, this.bridgeConfig.deviceToken, jobId, {
724
+ filename: fileName,
725
+ contentType: mimeType,
726
+ bytes,
727
+ kind: options?.kind || "file",
728
+ metadata,
729
+ });
730
+ return response.artifact || null;
731
+ }
732
+ async readFrontmostPage(app) {
733
+ const targetApp = app || "Safari";
734
+ if (targetApp !== "Safari") {
735
+ throw new Error("Leitura de pagina frontmost esta disponivel apenas para Safari no momento.");
736
+ }
737
+ const script = `
738
+ tell application "Safari"
739
+ activate
740
+ if (count of windows) = 0 then error "Safari nao possui janelas abertas."
741
+ delay 1
742
+ set pageJson to do JavaScript "(function(){const title=document.title||''; const url=location.href||''; const text=((document.body&&document.body.innerText)||'').trim().slice(0, 12000); return JSON.stringify({title:title,url:url,text:text});})();" in current tab of front window
743
+ end tell
744
+ return pageJson
745
+ `;
746
+ const { stdout } = await this.runCommandCapture("osascript", ["-e", script]);
747
+ const parsed = JSON.parse(stdout.trim() || "{}");
748
+ return {
749
+ title: asString(parsed.title) || "",
750
+ url: asString(parsed.url) || "",
751
+ text: asString(parsed.text) || "",
752
+ };
753
+ }
754
+ async setVolume(level) {
755
+ const bounded = Math.max(0, Math.min(Math.round(level), 100));
756
+ await this.runCommand("osascript", ["-e", `set volume output volume ${bounded}`]);
757
+ }
758
+ async locateVisualTarget(jobId, storagePath, target, width, height, mimeType) {
759
+ if (!this.bridgeConfig?.apiBaseUrl || !this.bridgeConfig?.deviceToken) {
760
+ throw new Error("Otto Bridge nao possui configuracao para usar visao no backend.");
761
+ }
762
+ const response = await postDeviceJson(this.bridgeConfig.apiBaseUrl, this.bridgeConfig.deviceToken, `/v1/devices/jobs/${encodeURIComponent(jobId)}/vision/locate`, {
763
+ storage_path: storagePath,
764
+ target,
765
+ image_width: Math.max(1, width),
766
+ image_height: Math.max(1, height),
767
+ mime_type: mimeType || "image/png",
768
+ });
769
+ return response.location || {};
770
+ }
771
+ async clickPoint(x, y) {
772
+ const script = `
773
+ import Cocoa
774
+ import ApplicationServices
775
+
776
+ let x = Double(CommandLine.arguments[1]) ?? 0
777
+ let y = Double(CommandLine.arguments[2]) ?? 0
778
+ let point = CGPoint(x: x, y: y)
779
+
780
+ func post(_ type: CGEventType) {
781
+ guard let event = CGEvent(mouseEventSource: nil, mouseType: type, mouseCursorPosition: point, mouseButton: .left) else {
782
+ fputs("failed to create mouse event\\n", stderr)
783
+ exit(1)
784
+ }
785
+ event.post(tap: .cghidEventTap)
786
+ }
787
+
788
+ post(.mouseMoved)
789
+ usleep(120000)
790
+ post(.leftMouseDown)
791
+ usleep(80000)
792
+ post(.leftMouseUp)
793
+ `;
794
+ await this.runCommand("swift", ["-e", script, String(Math.round(x)), String(Math.round(y))]);
795
+ }
796
+ async getImageDimensions(filePath) {
797
+ try {
798
+ const { stdout } = await this.runCommandCapture("sips", ["-g", "pixelWidth", "-g", "pixelHeight", filePath]);
799
+ const widthMatch = stdout.match(/pixelWidth:\s*(\d+)/i);
800
+ const heightMatch = stdout.match(/pixelHeight:\s*(\d+)/i);
801
+ if (!widthMatch || !heightMatch) {
802
+ return null;
803
+ }
804
+ return {
805
+ width: Number(widthMatch[1]),
806
+ height: Number(heightMatch[1]),
807
+ };
808
+ }
809
+ catch {
810
+ return null;
811
+ }
812
+ }
581
813
  async readLocalFile(filePath, maxChars = 4000) {
582
814
  const resolved = expandUserPath(filePath);
583
815
  const content = await readFile(resolved, "utf8");
@@ -642,6 +874,9 @@ end tell
642
874
  if (action.type === "take_screenshot") {
643
875
  return "Screenshot capturado no macOS";
644
876
  }
877
+ if (action.type === "read_frontmost_page") {
878
+ return `Pagina ativa lida em ${action.app || "Safari"}`;
879
+ }
645
880
  if (action.type === "read_file") {
646
881
  return `${action.path} foi lido no macOS`;
647
882
  }
@@ -651,6 +886,12 @@ end tell
651
886
  if (action.type === "run_shell") {
652
887
  return `Comando ${action.command} executado no macOS`;
653
888
  }
889
+ if (action.type === "set_volume") {
890
+ return `Volume ajustado para ${action.level}% no macOS`;
891
+ }
892
+ if (action.type === "click_visual_target") {
893
+ return `Clique guiado executado para ${action.description}`;
894
+ }
654
895
  const target = humanizeUrl(action.url);
655
896
  return `${target} foi aberto${action.app ? ` em ${action.app}` : ""}`;
656
897
  }
package/dist/http.js CHANGED
@@ -38,3 +38,32 @@ export async function postJson(apiBaseUrl, pathname, body) {
38
38
  body: JSON.stringify(body),
39
39
  });
40
40
  }
41
+ function buildDeviceAuthHeaders(deviceToken, headers) {
42
+ const next = new Headers(headers || {});
43
+ if (deviceToken) {
44
+ next.set("Authorization", `Bearer ${deviceToken}`);
45
+ }
46
+ return next;
47
+ }
48
+ export async function postDeviceJson(apiBaseUrl, deviceToken, pathname, body) {
49
+ return await requestJson(apiBaseUrl, pathname, {
50
+ method: "POST",
51
+ headers: buildDeviceAuthHeaders(deviceToken, {
52
+ "Content-Type": "application/json",
53
+ }),
54
+ body: JSON.stringify(body),
55
+ });
56
+ }
57
+ export async function uploadDeviceJobArtifact(apiBaseUrl, deviceToken, jobId, params) {
58
+ const form = new FormData();
59
+ form.append("file", new Blob([Buffer.from(params.bytes)], { type: params.contentType || "application/octet-stream" }), params.filename);
60
+ form.append("kind", String(params.kind || "file"));
61
+ if (params.metadata && Object.keys(params.metadata).length > 0) {
62
+ form.append("metadata", JSON.stringify(params.metadata));
63
+ }
64
+ return await requestJson(apiBaseUrl, `/v1/devices/jobs/${encodeURIComponent(jobId)}/artifacts`, {
65
+ method: "POST",
66
+ headers: buildDeviceAuthHeaders(deviceToken),
67
+ body: form,
68
+ });
69
+ }
package/dist/runtime.js CHANGED
@@ -280,7 +280,7 @@ export class BridgeRuntime {
280
280
  return new ClawdCursorJobExecutor(config.executor);
281
281
  }
282
282
  if (config.executor.type === "native-macos") {
283
- return new NativeMacOSJobExecutor();
283
+ return new NativeMacOSJobExecutor(config);
284
284
  }
285
285
  return new MockJobExecutor();
286
286
  }
package/dist/types.js CHANGED
@@ -1,5 +1,5 @@
1
1
  export const BRIDGE_CONFIG_VERSION = 1;
2
- export const BRIDGE_VERSION = "0.4.2";
2
+ export const BRIDGE_VERSION = "0.5.0";
3
3
  export const BRIDGE_PACKAGE_NAME = "@leg3ndy/otto-bridge";
4
4
  export const DEFAULT_API_BASE_URL = "http://localhost:8000";
5
5
  export const DEFAULT_POLL_INTERVAL_MS = 3000;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@leg3ndy/otto-bridge",
3
- "version": "0.4.2",
3
+ "version": "0.5.0",
4
4
  "private": false,
5
5
  "type": "module",
6
6
  "description": "Local companion for Otto Bridge device pairing and WebSocket runtime.",