@leg3ndy/otto-bridge 0.5.0 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -11,6 +11,8 @@ Companion local do Otto para:
11
11
 
12
12
  Para um passo a passo de instalacao, pareamento, uso, desconexao e desinstalacao, veja [USER_GUIDE.md](https://github.com/LGCYYL/ottoai/blob/main/otto-bridge/USER_GUIDE.md).
13
13
 
14
+ Para o estado atual da arquitetura, capacidades entregues, limitacoes e roadmap do Otto Bridge, veja [`leg3ndy-ai-backend/docs/OTTO_BRIDGE_ARCHITECTURE.md`](../leg3ndy-ai-backend/docs/OTTO_BRIDGE_ARCHITECTURE.md).
15
+
14
16
  ## Distribuicao
15
17
 
16
18
  Fluxo recomendado agora:
@@ -88,6 +88,39 @@ function humanizeUrl(url) {
88
88
  return normalized;
89
89
  }
90
90
  }
91
+ function uniqueStrings(values) {
92
+ const seen = new Set();
93
+ const result = [];
94
+ for (const value of values) {
95
+ const text = String(value || "").trim();
96
+ if (!text)
97
+ continue;
98
+ const key = normalizeText(text);
99
+ if (seen.has(key))
100
+ continue;
101
+ seen.add(key);
102
+ result.push(text);
103
+ }
104
+ return result;
105
+ }
106
+ function looksLikeAffirmativeVisualVerification(answer) {
107
+ const normalized = normalizeText(answer || "");
108
+ if (!normalized)
109
+ return false;
110
+ if (normalized.startsWith("sim"))
111
+ return true;
112
+ if (normalized.startsWith("nao") || normalized.startsWith("não"))
113
+ return false;
114
+ return (normalized.includes("tocando")
115
+ || normalized.includes("reproduzindo")
116
+ || normalized.includes("em reproducao")
117
+ || normalized.includes("em reprodução")
118
+ || normalized.includes("botao de pausa")
119
+ || normalized.includes("botão de pausa")
120
+ || normalized.includes("faixa ativa")
121
+ || normalized.includes("resultado selecionado")
122
+ || normalized.includes("foi acionado"));
123
+ }
91
124
  function mimeTypeFromPath(filePath) {
92
125
  const ext = path.extname(filePath).toLowerCase();
93
126
  if (ext === ".png")
@@ -175,50 +208,7 @@ function isSafeShellCommand(command) {
175
208
  if (!trimmed) {
176
209
  return false;
177
210
  }
178
- const forbiddenPatterns = [
179
- /(^|[;&|])\s*sudo\b/i,
180
- /\brm\b/i,
181
- /\bmv\b/i,
182
- /\bcp\b/i,
183
- /\bchmod\b/i,
184
- /\bchown\b/i,
185
- /\bshutdown\b/i,
186
- /\breboot\b/i,
187
- /\bmkfs\b/i,
188
- /\bdd\b/i,
189
- /\bkill(?:all)?\b/i,
190
- />/,
191
- />>/,
192
- ];
193
- if (forbiddenPatterns.some((pattern) => pattern.test(trimmed))) {
194
- return false;
195
- }
196
- const normalized = trimmed.replace(/\s+/g, " ");
197
- const allowedPrefixes = [
198
- "pwd",
199
- "ls",
200
- "cat ",
201
- "cat",
202
- "sed ",
203
- "rg ",
204
- "find ",
205
- "git status",
206
- "git log",
207
- "git diff",
208
- "head ",
209
- "tail ",
210
- "wc ",
211
- "stat ",
212
- "file ",
213
- "mdls ",
214
- "whoami",
215
- "date",
216
- "uname",
217
- "python3 --version",
218
- "node -v",
219
- "npm -v",
220
- ];
221
- return allowedPrefixes.some((prefix) => normalized === prefix || normalized.startsWith(`${prefix} `));
211
+ return true;
222
212
  }
223
213
  function extractConfirmationOptions(job, actions) {
224
214
  const payload = asRecord(job.payload);
@@ -370,6 +360,10 @@ function parseStructuredActions(job) {
370
360
  type: "click_visual_target",
371
361
  description,
372
362
  app: asString(action.app) || undefined,
363
+ verification_prompt: asString(action.verification_prompt) || undefined,
364
+ retry_descriptions: Array.isArray(action.retry_descriptions)
365
+ ? action.retry_descriptions.map((item) => asString(item)).filter(Boolean)
366
+ : undefined,
373
367
  });
374
368
  }
375
369
  continue;
@@ -495,11 +489,13 @@ export class NativeMacOSJobExecutor {
495
489
  if (action.type === "open_app") {
496
490
  await reporter.progress(progressPercent, `Abrindo ${action.app} no macOS`);
497
491
  await this.openApp(action.app);
492
+ completionNotes.push(`${action.app} foi aberto no macOS.`);
498
493
  continue;
499
494
  }
500
495
  if (action.type === "focus_app") {
501
496
  await reporter.progress(progressPercent, `Trazendo ${action.app} para frente`);
502
497
  await this.focusApp(action.app);
498
+ completionNotes.push(`${action.app} ficou em foco no macOS.`);
503
499
  continue;
504
500
  }
505
501
  if (action.type === "press_shortcut") {
@@ -521,8 +517,19 @@ export class NativeMacOSJobExecutor {
521
517
  if (action.type === "take_screenshot") {
522
518
  await reporter.progress(progressPercent, "Capturando screenshot do Mac");
523
519
  const screenshotPath = await this.takeScreenshot(action.path);
524
- const screenshotArtifact = await this.uploadArtifactForJob(job.job_id, screenshotPath, {
520
+ const uploadable = await this.buildUploadableImage(screenshotPath);
521
+ const screenshotArtifact = await this.uploadArtifactForJob(job.job_id, uploadable.path, {
525
522
  kind: "screenshot",
523
+ mimeTypeOverride: uploadable.mimeType,
524
+ fileNameOverride: uploadable.filename,
525
+ metadata: {
526
+ visible_in_chat: true,
527
+ width: uploadable.dimensions?.width || undefined,
528
+ height: uploadable.dimensions?.height || undefined,
529
+ original_width: uploadable.originalDimensions?.width || undefined,
530
+ original_height: uploadable.originalDimensions?.height || undefined,
531
+ resized_for_upload: uploadable.resized,
532
+ },
526
533
  });
527
534
  if (screenshotArtifact) {
528
535
  artifacts.push(screenshotArtifact);
@@ -537,6 +544,30 @@ export class NativeMacOSJobExecutor {
537
544
  if (action.type === "read_frontmost_page") {
538
545
  await reporter.progress(progressPercent, `Lendo a pagina ativa em ${action.app || "Safari"}`);
539
546
  const page = await this.readFrontmostPage(action.app || "Safari");
547
+ if (!page.text && this.bridgeConfig?.apiBaseUrl && this.bridgeConfig?.deviceToken) {
548
+ await reporter.progress(progressPercent, "Safari bloqueou leitura direta; vou analisar a pagina pela tela");
549
+ const screenshotPath = await this.takeScreenshot();
550
+ const uploadable = await this.buildUploadableImage(screenshotPath);
551
+ const artifact = await this.uploadArtifactForJob(job.job_id, uploadable.path, {
552
+ kind: "screenshot",
553
+ mimeTypeOverride: uploadable.mimeType,
554
+ fileNameOverride: uploadable.filename,
555
+ metadata: {
556
+ purpose: "page_read_fallback",
557
+ visible_in_chat: false,
558
+ width: uploadable.dimensions?.width || undefined,
559
+ height: uploadable.dimensions?.height || undefined,
560
+ original_width: uploadable.originalDimensions?.width || undefined,
561
+ original_height: uploadable.originalDimensions?.height || undefined,
562
+ resized_for_upload: uploadable.resized,
563
+ },
564
+ });
565
+ if (artifact?.storage_path) {
566
+ artifacts.push(artifact);
567
+ const answer = await this.analyzeUploadedArtifact(job.job_id, artifact.storage_path, "Leia o que esta visivel nesta pagina da web e resuma em portugues brasileiro o conteudo principal. Inclua titulos, chamadas e o que parecer mais importante na tela.", artifact.mime_type);
568
+ page.text = answer || page.text;
569
+ }
570
+ }
540
571
  resultPayload.page = page;
541
572
  completionNotes.push(`Li a pagina ${page.title || page.url || "ativa"} no navegador.`);
542
573
  continue;
@@ -570,32 +601,94 @@ export class NativeMacOSJobExecutor {
570
601
  await reporter.progress(progressPercent, `Trazendo ${action.app} para frente antes do clique`);
571
602
  await this.focusApp(action.app);
572
603
  }
573
- await reporter.progress(progressPercent, `Capturando a tela para localizar ${action.description}`);
574
- const screenshotPath = await this.takeScreenshot();
575
- const artifact = await this.uploadArtifactForJob(job.job_id, screenshotPath, {
576
- kind: "screenshot",
577
- metadata: { purpose: "visual_click", target: action.description },
578
- });
579
- if (!artifact?.storage_path) {
580
- throw new Error("Otto Bridge nao conseguiu enviar a screenshot necessaria para localizar o alvo visual.");
604
+ const targetDescriptions = uniqueStrings([action.description, ...(action.retry_descriptions || [])]);
605
+ let clickSucceeded = false;
606
+ let lastFailureReason = "";
607
+ for (let attempt = 0; attempt < targetDescriptions.length; attempt += 1) {
608
+ const targetDescription = targetDescriptions[attempt];
609
+ await reporter.progress(progressPercent, `Capturando a tela para localizar ${targetDescription}`);
610
+ const screenshotPath = await this.takeScreenshot();
611
+ const uploadable = await this.buildUploadableImage(screenshotPath);
612
+ const artifact = await this.uploadArtifactForJob(job.job_id, uploadable.path, {
613
+ kind: "screenshot",
614
+ mimeTypeOverride: uploadable.mimeType,
615
+ fileNameOverride: uploadable.filename,
616
+ metadata: {
617
+ purpose: "visual_click",
618
+ visible_in_chat: false,
619
+ target: targetDescription,
620
+ width: uploadable.dimensions?.width || undefined,
621
+ height: uploadable.dimensions?.height || undefined,
622
+ original_width: uploadable.originalDimensions?.width || undefined,
623
+ original_height: uploadable.originalDimensions?.height || undefined,
624
+ resized_for_upload: uploadable.resized,
625
+ },
626
+ });
627
+ if (!artifact?.storage_path) {
628
+ throw new Error("Otto Bridge nao conseguiu enviar a screenshot necessaria para localizar o alvo visual.");
629
+ }
630
+ artifacts.push(artifact);
631
+ const artifactMetadata = artifact.metadata || {};
632
+ const width = Number(artifactMetadata.width || 0);
633
+ const height = Number(artifactMetadata.height || 0);
634
+ const originalWidth = Number(artifactMetadata.original_width || width || 0);
635
+ const originalHeight = Number(artifactMetadata.original_height || height || 0);
636
+ const location = await this.locateVisualTarget(job.job_id, artifact.storage_path, targetDescription, width, height, artifact.mime_type);
637
+ if (!location?.found || typeof location.x !== "number" || typeof location.y !== "number") {
638
+ lastFailureReason = `Nao consegui localizar ${targetDescription} com confianca suficiente na tela.`;
639
+ continue;
640
+ }
641
+ await reporter.progress(progressPercent, `Clicando em ${targetDescription}`);
642
+ const scaledX = width > 0 && originalWidth > 0 ? (location.x / width) * originalWidth : location.x;
643
+ const scaledY = height > 0 && originalHeight > 0 ? (location.y / height) * originalHeight : location.y;
644
+ await this.clickPoint(scaledX, scaledY);
645
+ resultPayload.last_click = {
646
+ ...location,
647
+ x: scaledX,
648
+ y: scaledY,
649
+ };
650
+ if (action.verification_prompt) {
651
+ await delay(1600);
652
+ await reporter.progress(progressPercent, "Validando visualmente se a ação funcionou");
653
+ const afterClickPath = await this.takeScreenshot();
654
+ const afterClickUpload = await this.buildUploadableImage(afterClickPath);
655
+ const afterClickArtifact = await this.uploadArtifactForJob(job.job_id, afterClickUpload.path, {
656
+ kind: "screenshot",
657
+ mimeTypeOverride: afterClickUpload.mimeType,
658
+ fileNameOverride: afterClickUpload.filename,
659
+ metadata: {
660
+ purpose: "visual_click_result",
661
+ visible_in_chat: true,
662
+ target: targetDescription,
663
+ width: afterClickUpload.dimensions?.width || undefined,
664
+ height: afterClickUpload.dimensions?.height || undefined,
665
+ original_width: afterClickUpload.originalDimensions?.width || undefined,
666
+ original_height: afterClickUpload.originalDimensions?.height || undefined,
667
+ resized_for_upload: afterClickUpload.resized,
668
+ },
669
+ });
670
+ if (afterClickArtifact?.storage_path) {
671
+ artifacts.push(afterClickArtifact);
672
+ const verificationAnswer = await this.analyzeUploadedArtifact(job.job_id, afterClickArtifact.storage_path, action.verification_prompt, afterClickArtifact.mime_type);
673
+ if (!looksLikeAffirmativeVisualVerification(verificationAnswer)) {
674
+ lastFailureReason = verificationAnswer || `Nao consegui validar visualmente se ${targetDescription} foi acionado.`;
675
+ continue;
676
+ }
677
+ }
678
+ }
679
+ completionNotes.push(`Localizei e cliquei em ${targetDescription}.`);
680
+ clickSucceeded = true;
681
+ break;
581
682
  }
582
- artifacts.push(artifact);
583
- const artifactMetadata = artifact.metadata || {};
584
- const width = Number(artifactMetadata.width || 0);
585
- const height = Number(artifactMetadata.height || 0);
586
- const location = await this.locateVisualTarget(job.job_id, artifact.storage_path, action.description, width, height, artifact.mime_type);
587
- if (!location?.found || typeof location.x !== "number" || typeof location.y !== "number") {
588
- throw new Error(`Nao consegui localizar ${action.description} com confianca suficiente na tela.`);
683
+ if (!clickSucceeded) {
684
+ throw new Error(lastFailureReason || `Nao consegui concluir o clique visual para ${action.description}.`);
589
685
  }
590
- await reporter.progress(progressPercent, `Clicando em ${action.description}`);
591
- await this.clickPoint(location.x, location.y);
592
- completionNotes.push(`Localizei e cliquei em ${action.description}.`);
593
- resultPayload.last_click = location;
594
686
  continue;
595
687
  }
596
688
  await reporter.progress(progressPercent, `Abrindo ${action.url}${action.app ? ` em ${action.app}` : ""}`);
597
689
  await this.openUrl(action.url, action.app);
598
690
  await delay(1200);
691
+ completionNotes.push(`${humanizeUrl(action.url)} foi aberto${action.app ? ` em ${action.app}` : ""}.`);
599
692
  }
600
693
  const summary = completionNotes.length > 0
601
694
  ? completionNotes.join("\n\n")
@@ -713,8 +806,8 @@ end tell
713
806
  return null;
714
807
  }
715
808
  const bytes = await readFile(localPath);
716
- const fileName = path.basename(localPath);
717
- const mimeType = mimeTypeFromPath(fileName);
809
+ const fileName = options?.fileNameOverride || path.basename(localPath);
810
+ const mimeType = options?.mimeTypeOverride || mimeTypeFromPath(fileName);
718
811
  const dimensions = mimeType.startsWith("image/") ? await this.getImageDimensions(localPath) : null;
719
812
  const metadata = {
720
813
  ...(options?.metadata || {}),
@@ -729,6 +822,17 @@ end tell
729
822
  });
730
823
  return response.artifact || null;
731
824
  }
825
+ async analyzeUploadedArtifact(jobId, storagePath, question, mimeType) {
826
+ if (!this.bridgeConfig?.apiBaseUrl || !this.bridgeConfig?.deviceToken) {
827
+ return "";
828
+ }
829
+ const response = await postDeviceJson(this.bridgeConfig.apiBaseUrl, this.bridgeConfig.deviceToken, `/v1/devices/jobs/${encodeURIComponent(jobId)}/vision/analyze`, {
830
+ storage_path: storagePath,
831
+ question,
832
+ mime_type: mimeType || "image/jpeg",
833
+ });
834
+ return String(response.answer || "").trim();
835
+ }
732
836
  async readFrontmostPage(app) {
733
837
  const targetApp = app || "Safari";
734
838
  if (targetApp !== "Safari") {
@@ -743,13 +847,37 @@ tell application "Safari"
743
847
  end tell
744
848
  return pageJson
745
849
  `;
746
- const { stdout } = await this.runCommandCapture("osascript", ["-e", script]);
747
- const parsed = JSON.parse(stdout.trim() || "{}");
748
- return {
749
- title: asString(parsed.title) || "",
750
- url: asString(parsed.url) || "",
751
- text: asString(parsed.text) || "",
752
- };
850
+ try {
851
+ const { stdout } = await this.runCommandCapture("osascript", ["-e", script]);
852
+ const parsed = JSON.parse(stdout.trim() || "{}");
853
+ return {
854
+ title: asString(parsed.title) || "",
855
+ url: asString(parsed.url) || "",
856
+ text: asString(parsed.text) || "",
857
+ };
858
+ }
859
+ catch (error) {
860
+ const detail = error instanceof Error ? error.message : String(error);
861
+ if (!detail.toLowerCase().includes("allow javascript from apple events")) {
862
+ throw error;
863
+ }
864
+ const metadataScript = `
865
+ tell application "Safari"
866
+ activate
867
+ if (count of windows) = 0 then error "Safari nao possui janelas abertas."
868
+ set pageTitle to name of current tab of front window
869
+ set pageUrl to URL of current tab of front window
870
+ end tell
871
+ return pageTitle & linefeed & pageUrl
872
+ `;
873
+ const { stdout } = await this.runCommandCapture("osascript", ["-e", metadataScript]);
874
+ const [title, url] = stdout.split("\n");
875
+ return {
876
+ title: String(title || "").trim(),
877
+ url: String(url || "").trim(),
878
+ text: "",
879
+ };
880
+ }
753
881
  }
754
882
  async setVolume(level) {
755
883
  const bounded = Math.max(0, Math.min(Math.round(level), 100));
@@ -810,6 +938,59 @@ post(.leftMouseUp)
810
938
  return null;
811
939
  }
812
940
  }
941
+ async buildUploadableImage(localPath) {
942
+ const originalDimensions = await this.getImageDimensions(localPath);
943
+ const artifactsDir = path.join(os.homedir(), ".otto-bridge", "artifacts");
944
+ await mkdir(artifactsDir, { recursive: true });
945
+ let sourcePath = localPath;
946
+ let mimeType = mimeTypeFromPath(localPath);
947
+ let filename = path.basename(localPath);
948
+ let resized = false;
949
+ const conversionSteps = [
950
+ { width: 1280, quality: 42 },
951
+ { width: 1024, quality: 35 },
952
+ { width: 900, quality: 30 },
953
+ { width: 768, quality: 26 },
954
+ { width: 640, quality: 22 },
955
+ { width: 540, quality: 18 },
956
+ { width: 480, quality: 16 },
957
+ { width: 420, quality: 14 },
958
+ { width: 360, quality: 12 },
959
+ { width: 320, quality: 10 },
960
+ ];
961
+ for (const step of conversionSteps) {
962
+ const candidatePath = path.join(artifactsDir, `${path.basename(localPath, path.extname(localPath))}-${step.width}w-q${step.quality}.jpg`);
963
+ await this.runCommand("sips", [
964
+ "-s",
965
+ "format",
966
+ "jpeg",
967
+ "-s",
968
+ "formatOptions",
969
+ String(step.quality),
970
+ "--resampleWidth",
971
+ String(step.width),
972
+ localPath,
973
+ "--out",
974
+ candidatePath,
975
+ ]);
976
+ const candidateStat = await stat(candidatePath);
977
+ sourcePath = candidatePath;
978
+ mimeType = "image/jpeg";
979
+ filename = path.basename(candidatePath);
980
+ resized = true;
981
+ if (candidateStat.size <= 120_000) {
982
+ break;
983
+ }
984
+ }
985
+ return {
986
+ path: sourcePath,
987
+ mimeType,
988
+ filename,
989
+ dimensions: await this.getImageDimensions(sourcePath),
990
+ originalDimensions,
991
+ resized,
992
+ };
993
+ }
813
994
  async readLocalFile(filePath, maxChars = 4000) {
814
995
  const resolved = expandUserPath(filePath);
815
996
  const content = await readFile(resolved, "utf8");
@@ -834,7 +1015,7 @@ post(.leftMouseUp)
834
1015
  }
835
1016
  async runShellCommand(command, cwd) {
836
1017
  if (!isSafeShellCommand(command)) {
837
- throw new Error("Otto Bridge permite apenas shell de consulta no momento. Use comandos de leitura como pwd, ls, cat, rg, find ou git status.");
1018
+ throw new Error("Nenhum comando shell foi informado para execucao local.");
838
1019
  }
839
1020
  const resolvedCwd = cwd ? expandUserPath(cwd) : process.cwd();
840
1021
  const { stdout, stderr } = await this.runCommandCapture("/bin/zsh", ["-lc", command], {
@@ -936,10 +1117,6 @@ post(.leftMouseUp)
936
1117
  if (stderrText) {
937
1118
  console.warn(`[otto-bridge] ${command} stderr=${stderrText}`);
938
1119
  }
939
- const stdoutText = stdout.trim();
940
- if (stdoutText) {
941
- console.log(`[otto-bridge] ${command} stdout=${stdoutText}`);
942
- }
943
1120
  return { stdout, stderr };
944
1121
  }
945
1122
  catch (error) {
package/dist/main.js CHANGED
File without changes
package/dist/types.js CHANGED
@@ -1,5 +1,5 @@
1
1
  export const BRIDGE_CONFIG_VERSION = 1;
2
- export const BRIDGE_VERSION = "0.5.0";
2
+ export const BRIDGE_VERSION = "0.5.2";
3
3
  export const BRIDGE_PACKAGE_NAME = "@leg3ndy/otto-bridge";
4
4
  export const DEFAULT_API_BASE_URL = "http://localhost:8000";
5
5
  export const DEFAULT_POLL_INTERVAL_MS = 3000;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@leg3ndy/otto-bridge",
3
- "version": "0.5.0",
3
+ "version": "0.5.2",
4
4
  "private": false,
5
5
  "type": "module",
6
6
  "description": "Local companion for Otto Bridge device pairing and WebSocket runtime.",