@leg3ndy/otto-bridge 0.5.0 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -175,50 +175,7 @@ function isSafeShellCommand(command) {
175
175
  if (!trimmed) {
176
176
  return false;
177
177
  }
178
- const forbiddenPatterns = [
179
- /(^|[;&|])\s*sudo\b/i,
180
- /\brm\b/i,
181
- /\bmv\b/i,
182
- /\bcp\b/i,
183
- /\bchmod\b/i,
184
- /\bchown\b/i,
185
- /\bshutdown\b/i,
186
- /\breboot\b/i,
187
- /\bmkfs\b/i,
188
- /\bdd\b/i,
189
- /\bkill(?:all)?\b/i,
190
- />/,
191
- />>/,
192
- ];
193
- if (forbiddenPatterns.some((pattern) => pattern.test(trimmed))) {
194
- return false;
195
- }
196
- const normalized = trimmed.replace(/\s+/g, " ");
197
- const allowedPrefixes = [
198
- "pwd",
199
- "ls",
200
- "cat ",
201
- "cat",
202
- "sed ",
203
- "rg ",
204
- "find ",
205
- "git status",
206
- "git log",
207
- "git diff",
208
- "head ",
209
- "tail ",
210
- "wc ",
211
- "stat ",
212
- "file ",
213
- "mdls ",
214
- "whoami",
215
- "date",
216
- "uname",
217
- "python3 --version",
218
- "node -v",
219
- "npm -v",
220
- ];
221
- return allowedPrefixes.some((prefix) => normalized === prefix || normalized.startsWith(`${prefix} `));
178
+ return true;
222
179
  }
223
180
  function extractConfirmationOptions(job, actions) {
224
181
  const payload = asRecord(job.payload);
@@ -495,11 +452,13 @@ export class NativeMacOSJobExecutor {
495
452
  if (action.type === "open_app") {
496
453
  await reporter.progress(progressPercent, `Abrindo ${action.app} no macOS`);
497
454
  await this.openApp(action.app);
455
+ completionNotes.push(`${action.app} foi aberto no macOS.`);
498
456
  continue;
499
457
  }
500
458
  if (action.type === "focus_app") {
501
459
  await reporter.progress(progressPercent, `Trazendo ${action.app} para frente`);
502
460
  await this.focusApp(action.app);
461
+ completionNotes.push(`${action.app} ficou em foco no macOS.`);
503
462
  continue;
504
463
  }
505
464
  if (action.type === "press_shortcut") {
@@ -521,8 +480,18 @@ export class NativeMacOSJobExecutor {
521
480
  if (action.type === "take_screenshot") {
522
481
  await reporter.progress(progressPercent, "Capturando screenshot do Mac");
523
482
  const screenshotPath = await this.takeScreenshot(action.path);
524
- const screenshotArtifact = await this.uploadArtifactForJob(job.job_id, screenshotPath, {
483
+ const uploadable = await this.buildUploadableImage(screenshotPath);
484
+ const screenshotArtifact = await this.uploadArtifactForJob(job.job_id, uploadable.path, {
525
485
  kind: "screenshot",
486
+ mimeTypeOverride: uploadable.mimeType,
487
+ fileNameOverride: uploadable.filename,
488
+ metadata: {
489
+ width: uploadable.dimensions?.width || undefined,
490
+ height: uploadable.dimensions?.height || undefined,
491
+ original_width: uploadable.originalDimensions?.width || undefined,
492
+ original_height: uploadable.originalDimensions?.height || undefined,
493
+ resized_for_upload: uploadable.resized,
494
+ },
526
495
  });
527
496
  if (screenshotArtifact) {
528
497
  artifacts.push(screenshotArtifact);
@@ -537,6 +506,29 @@ export class NativeMacOSJobExecutor {
537
506
  if (action.type === "read_frontmost_page") {
538
507
  await reporter.progress(progressPercent, `Lendo a pagina ativa em ${action.app || "Safari"}`);
539
508
  const page = await this.readFrontmostPage(action.app || "Safari");
509
+ if (!page.text && this.bridgeConfig?.apiBaseUrl && this.bridgeConfig?.deviceToken) {
510
+ await reporter.progress(progressPercent, "Safari bloqueou leitura direta; vou analisar a pagina pela tela");
511
+ const screenshotPath = await this.takeScreenshot();
512
+ const uploadable = await this.buildUploadableImage(screenshotPath);
513
+ const artifact = await this.uploadArtifactForJob(job.job_id, uploadable.path, {
514
+ kind: "screenshot",
515
+ mimeTypeOverride: uploadable.mimeType,
516
+ fileNameOverride: uploadable.filename,
517
+ metadata: {
518
+ purpose: "page_read_fallback",
519
+ width: uploadable.dimensions?.width || undefined,
520
+ height: uploadable.dimensions?.height || undefined,
521
+ original_width: uploadable.originalDimensions?.width || undefined,
522
+ original_height: uploadable.originalDimensions?.height || undefined,
523
+ resized_for_upload: uploadable.resized,
524
+ },
525
+ });
526
+ if (artifact?.storage_path) {
527
+ artifacts.push(artifact);
528
+ const answer = await this.analyzeUploadedArtifact(job.job_id, artifact.storage_path, "Leia o que esta visivel nesta pagina da web e resuma em portugues brasileiro o conteudo principal. Inclua titulos, chamadas e o que parecer mais importante na tela.", artifact.mime_type);
529
+ page.text = answer || page.text;
530
+ }
531
+ }
540
532
  resultPayload.page = page;
541
533
  completionNotes.push(`Li a pagina ${page.title || page.url || "ativa"} no navegador.`);
542
534
  continue;
@@ -572,9 +564,20 @@ export class NativeMacOSJobExecutor {
572
564
  }
573
565
  await reporter.progress(progressPercent, `Capturando a tela para localizar ${action.description}`);
574
566
  const screenshotPath = await this.takeScreenshot();
575
- const artifact = await this.uploadArtifactForJob(job.job_id, screenshotPath, {
567
+ const uploadable = await this.buildUploadableImage(screenshotPath);
568
+ const artifact = await this.uploadArtifactForJob(job.job_id, uploadable.path, {
576
569
  kind: "screenshot",
577
- metadata: { purpose: "visual_click", target: action.description },
570
+ mimeTypeOverride: uploadable.mimeType,
571
+ fileNameOverride: uploadable.filename,
572
+ metadata: {
573
+ purpose: "visual_click",
574
+ target: action.description,
575
+ width: uploadable.dimensions?.width || undefined,
576
+ height: uploadable.dimensions?.height || undefined,
577
+ original_width: uploadable.originalDimensions?.width || undefined,
578
+ original_height: uploadable.originalDimensions?.height || undefined,
579
+ resized_for_upload: uploadable.resized,
580
+ },
578
581
  });
579
582
  if (!artifact?.storage_path) {
580
583
  throw new Error("Otto Bridge nao conseguiu enviar a screenshot necessaria para localizar o alvo visual.");
@@ -583,19 +586,28 @@ export class NativeMacOSJobExecutor {
583
586
  const artifactMetadata = artifact.metadata || {};
584
587
  const width = Number(artifactMetadata.width || 0);
585
588
  const height = Number(artifactMetadata.height || 0);
589
+ const originalWidth = Number(artifactMetadata.original_width || width || 0);
590
+ const originalHeight = Number(artifactMetadata.original_height || height || 0);
586
591
  const location = await this.locateVisualTarget(job.job_id, artifact.storage_path, action.description, width, height, artifact.mime_type);
587
592
  if (!location?.found || typeof location.x !== "number" || typeof location.y !== "number") {
588
593
  throw new Error(`Nao consegui localizar ${action.description} com confianca suficiente na tela.`);
589
594
  }
590
595
  await reporter.progress(progressPercent, `Clicando em ${action.description}`);
591
- await this.clickPoint(location.x, location.y);
596
+ const scaledX = width > 0 && originalWidth > 0 ? (location.x / width) * originalWidth : location.x;
597
+ const scaledY = height > 0 && originalHeight > 0 ? (location.y / height) * originalHeight : location.y;
598
+ await this.clickPoint(scaledX, scaledY);
592
599
  completionNotes.push(`Localizei e cliquei em ${action.description}.`);
593
- resultPayload.last_click = location;
600
+ resultPayload.last_click = {
601
+ ...location,
602
+ x: scaledX,
603
+ y: scaledY,
604
+ };
594
605
  continue;
595
606
  }
596
607
  await reporter.progress(progressPercent, `Abrindo ${action.url}${action.app ? ` em ${action.app}` : ""}`);
597
608
  await this.openUrl(action.url, action.app);
598
609
  await delay(1200);
610
+ completionNotes.push(`${humanizeUrl(action.url)} foi aberto${action.app ? ` em ${action.app}` : ""}.`);
599
611
  }
600
612
  const summary = completionNotes.length > 0
601
613
  ? completionNotes.join("\n\n")
@@ -713,8 +725,8 @@ end tell
713
725
  return null;
714
726
  }
715
727
  const bytes = await readFile(localPath);
716
- const fileName = path.basename(localPath);
717
- const mimeType = mimeTypeFromPath(fileName);
728
+ const fileName = options?.fileNameOverride || path.basename(localPath);
729
+ const mimeType = options?.mimeTypeOverride || mimeTypeFromPath(fileName);
718
730
  const dimensions = mimeType.startsWith("image/") ? await this.getImageDimensions(localPath) : null;
719
731
  const metadata = {
720
732
  ...(options?.metadata || {}),
@@ -729,6 +741,17 @@ end tell
729
741
  });
730
742
  return response.artifact || null;
731
743
  }
744
+ async analyzeUploadedArtifact(jobId, storagePath, question, mimeType) {
745
+ if (!this.bridgeConfig?.apiBaseUrl || !this.bridgeConfig?.deviceToken) {
746
+ return "";
747
+ }
748
+ const response = await postDeviceJson(this.bridgeConfig.apiBaseUrl, this.bridgeConfig.deviceToken, `/v1/devices/jobs/${encodeURIComponent(jobId)}/vision/analyze`, {
749
+ storage_path: storagePath,
750
+ question,
751
+ mime_type: mimeType || "image/jpeg",
752
+ });
753
+ return String(response.answer || "").trim();
754
+ }
732
755
  async readFrontmostPage(app) {
733
756
  const targetApp = app || "Safari";
734
757
  if (targetApp !== "Safari") {
@@ -743,13 +766,37 @@ tell application "Safari"
743
766
  end tell
744
767
  return pageJson
745
768
  `;
746
- const { stdout } = await this.runCommandCapture("osascript", ["-e", script]);
747
- const parsed = JSON.parse(stdout.trim() || "{}");
748
- return {
749
- title: asString(parsed.title) || "",
750
- url: asString(parsed.url) || "",
751
- text: asString(parsed.text) || "",
752
- };
769
+ try {
770
+ const { stdout } = await this.runCommandCapture("osascript", ["-e", script]);
771
+ const parsed = JSON.parse(stdout.trim() || "{}");
772
+ return {
773
+ title: asString(parsed.title) || "",
774
+ url: asString(parsed.url) || "",
775
+ text: asString(parsed.text) || "",
776
+ };
777
+ }
778
+ catch (error) {
779
+ const detail = error instanceof Error ? error.message : String(error);
780
+ if (!detail.toLowerCase().includes("allow javascript from apple events")) {
781
+ throw error;
782
+ }
783
+ const metadataScript = `
784
+ tell application "Safari"
785
+ activate
786
+ if (count of windows) = 0 then error "Safari nao possui janelas abertas."
787
+ set pageTitle to name of current tab of front window
788
+ set pageUrl to URL of current tab of front window
789
+ end tell
790
+ return pageTitle & linefeed & pageUrl
791
+ `;
792
+ const { stdout } = await this.runCommandCapture("osascript", ["-e", metadataScript]);
793
+ const [title, url] = stdout.split("\n");
794
+ return {
795
+ title: String(title || "").trim(),
796
+ url: String(url || "").trim(),
797
+ text: "",
798
+ };
799
+ }
753
800
  }
754
801
  async setVolume(level) {
755
802
  const bounded = Math.max(0, Math.min(Math.round(level), 100));
@@ -810,6 +857,56 @@ post(.leftMouseUp)
810
857
  return null;
811
858
  }
812
859
  }
860
+ async buildUploadableImage(localPath) {
861
+ const originalDimensions = await this.getImageDimensions(localPath);
862
+ const artifactsDir = path.join(os.homedir(), ".otto-bridge", "artifacts");
863
+ await mkdir(artifactsDir, { recursive: true });
864
+ let sourcePath = localPath;
865
+ let mimeType = mimeTypeFromPath(localPath);
866
+ let filename = path.basename(localPath);
867
+ let resized = false;
868
+ const conversionSteps = [
869
+ { width: 1280, quality: 42 },
870
+ { width: 1024, quality: 35 },
871
+ { width: 900, quality: 30 },
872
+ { width: 768, quality: 26 },
873
+ { width: 640, quality: 22 },
874
+ { width: 540, quality: 18 },
875
+ { width: 480, quality: 16 },
876
+ ];
877
+ for (const step of conversionSteps) {
878
+ const candidatePath = path.join(artifactsDir, `${path.basename(localPath, path.extname(localPath))}-${step.width}w-q${step.quality}.jpg`);
879
+ await this.runCommand("sips", [
880
+ "-s",
881
+ "format",
882
+ "jpeg",
883
+ "-s",
884
+ "formatOptions",
885
+ String(step.quality),
886
+ "--resampleWidth",
887
+ String(step.width),
888
+ localPath,
889
+ "--out",
890
+ candidatePath,
891
+ ]);
892
+ const candidateStat = await stat(candidatePath);
893
+ sourcePath = candidatePath;
894
+ mimeType = "image/jpeg";
895
+ filename = path.basename(candidatePath);
896
+ resized = true;
897
+ if (candidateStat.size <= 220_000) {
898
+ break;
899
+ }
900
+ }
901
+ return {
902
+ path: sourcePath,
903
+ mimeType,
904
+ filename,
905
+ dimensions: await this.getImageDimensions(sourcePath),
906
+ originalDimensions,
907
+ resized,
908
+ };
909
+ }
813
910
  async readLocalFile(filePath, maxChars = 4000) {
814
911
  const resolved = expandUserPath(filePath);
815
912
  const content = await readFile(resolved, "utf8");
@@ -834,7 +931,7 @@ post(.leftMouseUp)
834
931
  }
835
932
  async runShellCommand(command, cwd) {
836
933
  if (!isSafeShellCommand(command)) {
837
- throw new Error("Otto Bridge permite apenas shell de consulta no momento. Use comandos de leitura como pwd, ls, cat, rg, find ou git status.");
934
+ throw new Error("Nenhum comando shell foi informado para execucao local.");
838
935
  }
839
936
  const resolvedCwd = cwd ? expandUserPath(cwd) : process.cwd();
840
937
  const { stdout, stderr } = await this.runCommandCapture("/bin/zsh", ["-lc", command], {
@@ -936,10 +1033,6 @@ post(.leftMouseUp)
936
1033
  if (stderrText) {
937
1034
  console.warn(`[otto-bridge] ${command} stderr=${stderrText}`);
938
1035
  }
939
- const stdoutText = stdout.trim();
940
- if (stdoutText) {
941
- console.log(`[otto-bridge] ${command} stdout=${stdoutText}`);
942
- }
943
1036
  return { stdout, stderr };
944
1037
  }
945
1038
  catch (error) {
package/dist/main.js CHANGED
File without changes
package/dist/types.js CHANGED
@@ -1,5 +1,5 @@
1
1
  export const BRIDGE_CONFIG_VERSION = 1;
2
- export const BRIDGE_VERSION = "0.5.0";
2
+ export const BRIDGE_VERSION = "0.5.1";
3
3
  export const BRIDGE_PACKAGE_NAME = "@leg3ndy/otto-bridge";
4
4
  export const DEFAULT_API_BASE_URL = "http://localhost:8000";
5
5
  export const DEFAULT_POLL_INTERVAL_MS = 3000;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@leg3ndy/otto-bridge",
3
- "version": "0.5.0",
3
+ "version": "0.5.1",
4
4
  "private": false,
5
5
  "type": "module",
6
6
  "description": "Local companion for Otto Bridge device pairing and WebSocket runtime.",