@leg3ndy/otto-bridge 0.5.1 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -11,6 +11,8 @@ Companion local do Otto para:
11
11
 
12
12
  Para um passo a passo de instalacao, pareamento, uso, desconexao e desinstalacao, veja [USER_GUIDE.md](https://github.com/LGCYYL/ottoai/blob/main/otto-bridge/USER_GUIDE.md).
13
13
 
14
+ Para o estado atual da arquitetura, capacidades entregues, limitacoes e roadmap do Otto Bridge, veja [`leg3ndy-ai-backend/docs/OTTO_BRIDGE_ARCHITECTURE.md`](../leg3ndy-ai-backend/docs/OTTO_BRIDGE_ARCHITECTURE.md).
15
+
14
16
  ## Distribuicao
15
17
 
16
18
  Fluxo recomendado agora:
@@ -88,6 +88,39 @@ function humanizeUrl(url) {
88
88
  return normalized;
89
89
  }
90
90
  }
91
+ function uniqueStrings(values) {
92
+ const seen = new Set();
93
+ const result = [];
94
+ for (const value of values) {
95
+ const text = String(value || "").trim();
96
+ if (!text)
97
+ continue;
98
+ const key = normalizeText(text);
99
+ if (seen.has(key))
100
+ continue;
101
+ seen.add(key);
102
+ result.push(text);
103
+ }
104
+ return result;
105
+ }
106
+ function looksLikeAffirmativeVisualVerification(answer) {
107
+ const normalized = normalizeText(answer || "");
108
+ if (!normalized)
109
+ return false;
110
+ if (normalized.startsWith("sim"))
111
+ return true;
112
+ if (normalized.startsWith("nao") || normalized.startsWith("não"))
113
+ return false;
114
+ return (normalized.includes("tocando")
115
+ || normalized.includes("reproduzindo")
116
+ || normalized.includes("em reproducao")
117
+ || normalized.includes("em reprodução")
118
+ || normalized.includes("botao de pausa")
119
+ || normalized.includes("botão de pausa")
120
+ || normalized.includes("faixa ativa")
121
+ || normalized.includes("resultado selecionado")
122
+ || normalized.includes("foi acionado"));
123
+ }
91
124
  function mimeTypeFromPath(filePath) {
92
125
  const ext = path.extname(filePath).toLowerCase();
93
126
  if (ext === ".png")
@@ -327,6 +360,10 @@ function parseStructuredActions(job) {
327
360
  type: "click_visual_target",
328
361
  description,
329
362
  app: asString(action.app) || undefined,
363
+ verification_prompt: asString(action.verification_prompt) || undefined,
364
+ retry_descriptions: Array.isArray(action.retry_descriptions)
365
+ ? action.retry_descriptions.map((item) => asString(item)).filter(Boolean)
366
+ : undefined,
330
367
  });
331
368
  }
332
369
  continue;
@@ -486,6 +523,7 @@ export class NativeMacOSJobExecutor {
486
523
  mimeTypeOverride: uploadable.mimeType,
487
524
  fileNameOverride: uploadable.filename,
488
525
  metadata: {
526
+ visible_in_chat: true,
489
527
  width: uploadable.dimensions?.width || undefined,
490
528
  height: uploadable.dimensions?.height || undefined,
491
529
  original_width: uploadable.originalDimensions?.width || undefined,
@@ -516,6 +554,7 @@ export class NativeMacOSJobExecutor {
516
554
  fileNameOverride: uploadable.filename,
517
555
  metadata: {
518
556
  purpose: "page_read_fallback",
557
+ visible_in_chat: false,
519
558
  width: uploadable.dimensions?.width || undefined,
520
559
  height: uploadable.dimensions?.height || undefined,
521
560
  original_width: uploadable.originalDimensions?.width || undefined,
@@ -562,46 +601,88 @@ export class NativeMacOSJobExecutor {
562
601
  await reporter.progress(progressPercent, `Trazendo ${action.app} para frente antes do clique`);
563
602
  await this.focusApp(action.app);
564
603
  }
565
- await reporter.progress(progressPercent, `Capturando a tela para localizar ${action.description}`);
566
- const screenshotPath = await this.takeScreenshot();
567
- const uploadable = await this.buildUploadableImage(screenshotPath);
568
- const artifact = await this.uploadArtifactForJob(job.job_id, uploadable.path, {
569
- kind: "screenshot",
570
- mimeTypeOverride: uploadable.mimeType,
571
- fileNameOverride: uploadable.filename,
572
- metadata: {
573
- purpose: "visual_click",
574
- target: action.description,
575
- width: uploadable.dimensions?.width || undefined,
576
- height: uploadable.dimensions?.height || undefined,
577
- original_width: uploadable.originalDimensions?.width || undefined,
578
- original_height: uploadable.originalDimensions?.height || undefined,
579
- resized_for_upload: uploadable.resized,
580
- },
581
- });
582
- if (!artifact?.storage_path) {
583
- throw new Error("Otto Bridge nao conseguiu enviar a screenshot necessaria para localizar o alvo visual.");
604
+ const targetDescriptions = uniqueStrings([action.description, ...(action.retry_descriptions || [])]);
605
+ let clickSucceeded = false;
606
+ let lastFailureReason = "";
607
+ for (let attempt = 0; attempt < targetDescriptions.length; attempt += 1) {
608
+ const targetDescription = targetDescriptions[attempt];
609
+ await reporter.progress(progressPercent, `Capturando a tela para localizar ${targetDescription}`);
610
+ const screenshotPath = await this.takeScreenshot();
611
+ const uploadable = await this.buildUploadableImage(screenshotPath);
612
+ const artifact = await this.uploadArtifactForJob(job.job_id, uploadable.path, {
613
+ kind: "screenshot",
614
+ mimeTypeOverride: uploadable.mimeType,
615
+ fileNameOverride: uploadable.filename,
616
+ metadata: {
617
+ purpose: "visual_click",
618
+ visible_in_chat: false,
619
+ target: targetDescription,
620
+ width: uploadable.dimensions?.width || undefined,
621
+ height: uploadable.dimensions?.height || undefined,
622
+ original_width: uploadable.originalDimensions?.width || undefined,
623
+ original_height: uploadable.originalDimensions?.height || undefined,
624
+ resized_for_upload: uploadable.resized,
625
+ },
626
+ });
627
+ if (!artifact?.storage_path) {
628
+ throw new Error("Otto Bridge nao conseguiu enviar a screenshot necessaria para localizar o alvo visual.");
629
+ }
630
+ artifacts.push(artifact);
631
+ const artifactMetadata = artifact.metadata || {};
632
+ const width = Number(artifactMetadata.width || 0);
633
+ const height = Number(artifactMetadata.height || 0);
634
+ const originalWidth = Number(artifactMetadata.original_width || width || 0);
635
+ const originalHeight = Number(artifactMetadata.original_height || height || 0);
636
+ const location = await this.locateVisualTarget(job.job_id, artifact.storage_path, targetDescription, width, height, artifact.mime_type);
637
+ if (!location?.found || typeof location.x !== "number" || typeof location.y !== "number") {
638
+ lastFailureReason = `Nao consegui localizar ${targetDescription} com confianca suficiente na tela.`;
639
+ continue;
640
+ }
641
+ await reporter.progress(progressPercent, `Clicando em ${targetDescription}`);
642
+ const scaledX = width > 0 && originalWidth > 0 ? (location.x / width) * originalWidth : location.x;
643
+ const scaledY = height > 0 && originalHeight > 0 ? (location.y / height) * originalHeight : location.y;
644
+ await this.clickPoint(scaledX, scaledY);
645
+ resultPayload.last_click = {
646
+ ...location,
647
+ x: scaledX,
648
+ y: scaledY,
649
+ };
650
+ if (action.verification_prompt) {
651
+ await delay(1600);
652
+ await reporter.progress(progressPercent, "Validando visualmente se a ação funcionou");
653
+ const afterClickPath = await this.takeScreenshot();
654
+ const afterClickUpload = await this.buildUploadableImage(afterClickPath);
655
+ const afterClickArtifact = await this.uploadArtifactForJob(job.job_id, afterClickUpload.path, {
656
+ kind: "screenshot",
657
+ mimeTypeOverride: afterClickUpload.mimeType,
658
+ fileNameOverride: afterClickUpload.filename,
659
+ metadata: {
660
+ purpose: "visual_click_result",
661
+ visible_in_chat: true,
662
+ target: targetDescription,
663
+ width: afterClickUpload.dimensions?.width || undefined,
664
+ height: afterClickUpload.dimensions?.height || undefined,
665
+ original_width: afterClickUpload.originalDimensions?.width || undefined,
666
+ original_height: afterClickUpload.originalDimensions?.height || undefined,
667
+ resized_for_upload: afterClickUpload.resized,
668
+ },
669
+ });
670
+ if (afterClickArtifact?.storage_path) {
671
+ artifacts.push(afterClickArtifact);
672
+ const verificationAnswer = await this.analyzeUploadedArtifact(job.job_id, afterClickArtifact.storage_path, action.verification_prompt, afterClickArtifact.mime_type);
673
+ if (!looksLikeAffirmativeVisualVerification(verificationAnswer)) {
674
+ lastFailureReason = verificationAnswer || `Nao consegui validar visualmente se ${targetDescription} foi acionado.`;
675
+ continue;
676
+ }
677
+ }
678
+ }
679
+ completionNotes.push(`Localizei e cliquei em ${targetDescription}.`);
680
+ clickSucceeded = true;
681
+ break;
584
682
  }
585
- artifacts.push(artifact);
586
- const artifactMetadata = artifact.metadata || {};
587
- const width = Number(artifactMetadata.width || 0);
588
- const height = Number(artifactMetadata.height || 0);
589
- const originalWidth = Number(artifactMetadata.original_width || width || 0);
590
- const originalHeight = Number(artifactMetadata.original_height || height || 0);
591
- const location = await this.locateVisualTarget(job.job_id, artifact.storage_path, action.description, width, height, artifact.mime_type);
592
- if (!location?.found || typeof location.x !== "number" || typeof location.y !== "number") {
593
- throw new Error(`Nao consegui localizar ${action.description} com confianca suficiente na tela.`);
683
+ if (!clickSucceeded) {
684
+ throw new Error(lastFailureReason || `Nao consegui concluir o clique visual para ${action.description}.`);
594
685
  }
595
- await reporter.progress(progressPercent, `Clicando em ${action.description}`);
596
- const scaledX = width > 0 && originalWidth > 0 ? (location.x / width) * originalWidth : location.x;
597
- const scaledY = height > 0 && originalHeight > 0 ? (location.y / height) * originalHeight : location.y;
598
- await this.clickPoint(scaledX, scaledY);
599
- completionNotes.push(`Localizei e cliquei em ${action.description}.`);
600
- resultPayload.last_click = {
601
- ...location,
602
- x: scaledX,
603
- y: scaledY,
604
- };
605
686
  continue;
606
687
  }
607
688
  await reporter.progress(progressPercent, `Abrindo ${action.url}${action.app ? ` em ${action.app}` : ""}`);
@@ -873,6 +954,9 @@ post(.leftMouseUp)
873
954
  { width: 640, quality: 22 },
874
955
  { width: 540, quality: 18 },
875
956
  { width: 480, quality: 16 },
957
+ { width: 420, quality: 14 },
958
+ { width: 360, quality: 12 },
959
+ { width: 320, quality: 10 },
876
960
  ];
877
961
  for (const step of conversionSteps) {
878
962
  const candidatePath = path.join(artifactsDir, `${path.basename(localPath, path.extname(localPath))}-${step.width}w-q${step.quality}.jpg`);
@@ -894,7 +978,7 @@ post(.leftMouseUp)
894
978
  mimeType = "image/jpeg";
895
979
  filename = path.basename(candidatePath);
896
980
  resized = true;
897
- if (candidateStat.size <= 220_000) {
981
+ if (candidateStat.size <= 120_000) {
898
982
  break;
899
983
  }
900
984
  }
package/dist/types.js CHANGED
@@ -1,5 +1,5 @@
1
1
  export const BRIDGE_CONFIG_VERSION = 1;
2
- export const BRIDGE_VERSION = "0.5.1";
2
+ export const BRIDGE_VERSION = "0.5.2";
3
3
  export const BRIDGE_PACKAGE_NAME = "@leg3ndy/otto-bridge";
4
4
  export const DEFAULT_API_BASE_URL = "http://localhost:8000";
5
5
  export const DEFAULT_POLL_INTERVAL_MS = 3000;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@leg3ndy/otto-bridge",
3
- "version": "0.5.1",
3
+ "version": "0.5.2",
4
4
  "private": false,
5
5
  "type": "module",
6
6
  "description": "Local companion for Otto Bridge device pairing and WebSocket runtime.",