@leg3ndy/otto-bridge 0.5.1 → 0.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -0
- package/dist/executors/native_macos.js +867 -42
- package/dist/types.js +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -11,6 +11,8 @@ Companion local do Otto para:
|
|
|
11
11
|
|
|
12
12
|
Para um passo a passo de instalacao, pareamento, uso, desconexao e desinstalacao, veja [USER_GUIDE.md](https://github.com/LGCYYL/ottoai/blob/main/otto-bridge/USER_GUIDE.md).
|
|
13
13
|
|
|
14
|
+
Para o estado atual da arquitetura, capacidades entregues, limitacoes e roadmap do Otto Bridge, veja [`leg3ndy-ai-backend/docs/OTTO_BRIDGE_ARCHITECTURE.md`](../leg3ndy-ai-backend/docs/OTTO_BRIDGE_ARCHITECTURE.md).
|
|
15
|
+
|
|
14
16
|
## Distribuicao
|
|
15
17
|
|
|
16
18
|
Fluxo recomendado agora:
|
|
@@ -30,6 +30,61 @@ const KNOWN_SITES = [
|
|
|
30
30
|
{ label: "WhatsApp Web", url: "https://web.whatsapp.com", patterns: [/\bwhatsapp\b/i] },
|
|
31
31
|
{ label: "X", url: "https://x.com", patterns: [/\bx\.com\b/i, /\btwitter\b/i, /\bxis\b/i] },
|
|
32
32
|
];
|
|
33
|
+
const GENERIC_VISUAL_STOP_WORDS = new Set([
|
|
34
|
+
"o",
|
|
35
|
+
"a",
|
|
36
|
+
"os",
|
|
37
|
+
"as",
|
|
38
|
+
"um",
|
|
39
|
+
"uma",
|
|
40
|
+
"uns",
|
|
41
|
+
"umas",
|
|
42
|
+
"de",
|
|
43
|
+
"da",
|
|
44
|
+
"do",
|
|
45
|
+
"das",
|
|
46
|
+
"dos",
|
|
47
|
+
"em",
|
|
48
|
+
"no",
|
|
49
|
+
"na",
|
|
50
|
+
"nos",
|
|
51
|
+
"nas",
|
|
52
|
+
"por",
|
|
53
|
+
"para",
|
|
54
|
+
"com",
|
|
55
|
+
"sem",
|
|
56
|
+
"que",
|
|
57
|
+
"visivel",
|
|
58
|
+
"visiveis",
|
|
59
|
+
"tela",
|
|
60
|
+
"pagina",
|
|
61
|
+
"site",
|
|
62
|
+
"app",
|
|
63
|
+
"janela",
|
|
64
|
+
"aba",
|
|
65
|
+
"botao",
|
|
66
|
+
"botoes",
|
|
67
|
+
"link",
|
|
68
|
+
"item",
|
|
69
|
+
"resultado",
|
|
70
|
+
"resultados",
|
|
71
|
+
"primeiro",
|
|
72
|
+
"primeira",
|
|
73
|
+
"segundo",
|
|
74
|
+
"segunda",
|
|
75
|
+
"terceiro",
|
|
76
|
+
"terceira",
|
|
77
|
+
"video",
|
|
78
|
+
"videos",
|
|
79
|
+
"musica",
|
|
80
|
+
"faixa",
|
|
81
|
+
"clicar",
|
|
82
|
+
"clique",
|
|
83
|
+
"seleciona",
|
|
84
|
+
"selecionar",
|
|
85
|
+
"abre",
|
|
86
|
+
"abrir",
|
|
87
|
+
]);
|
|
33
88
|
function asRecord(value) {
|
|
34
89
|
return value && typeof value === "object" ? value : {};
|
|
35
90
|
}
|
|
@@ -49,6 +104,80 @@ function normalizeText(value) {
|
|
|
49
104
|
function escapeAppleScript(value) {
|
|
50
105
|
return value.replace(/\\/g, "\\\\").replace(/"/g, '\\"');
|
|
51
106
|
}
|
|
107
|
+
function normalizeComparableUrl(raw) {
|
|
108
|
+
const input = String(raw || "").trim();
|
|
109
|
+
if (!input) {
|
|
110
|
+
return "";
|
|
111
|
+
}
|
|
112
|
+
try {
|
|
113
|
+
const parsed = new URL(input);
|
|
114
|
+
parsed.hash = "";
|
|
115
|
+
return parsed.toString();
|
|
116
|
+
}
|
|
117
|
+
catch {
|
|
118
|
+
return input;
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
function extractQuotedPhrases(value) {
|
|
122
|
+
return Array.from(String(value || "").matchAll(/["'“”‘’]([^"'“”‘’]{2,80})["'“”‘’]/g))
|
|
123
|
+
.map((match) => normalizeText(match[1] || "").trim())
|
|
124
|
+
.filter(Boolean);
|
|
125
|
+
}
|
|
126
|
+
function extractMeaningfulDescriptionTokens(value) {
|
|
127
|
+
return Array.from(new Set(normalizeText(value || "")
|
|
128
|
+
.split(/[^a-z0-9]+/)
|
|
129
|
+
.map((token) => token.trim())
|
|
130
|
+
.filter((token) => token.length >= 3 && !GENERIC_VISUAL_STOP_WORDS.has(token))));
|
|
131
|
+
}
|
|
132
|
+
function descriptionLikelyHasTextAnchor(description) {
|
|
133
|
+
return extractQuotedPhrases(description).length > 0 || extractMeaningfulDescriptionTokens(description).length > 0;
|
|
134
|
+
}
|
|
135
|
+
function findOcrTextMatch(candidates, description) {
|
|
136
|
+
const phrases = extractQuotedPhrases(description);
|
|
137
|
+
const tokens = extractMeaningfulDescriptionTokens(description);
|
|
138
|
+
const normalizedDescription = normalizeText(description || "");
|
|
139
|
+
const wantsFirst = /\b(primeir[ao]?|first)\b/.test(normalizedDescription);
|
|
140
|
+
if (!phrases.length && !tokens.length) {
|
|
141
|
+
return null;
|
|
142
|
+
}
|
|
143
|
+
const scored = candidates
|
|
144
|
+
.map((candidate, index) => {
|
|
145
|
+
const normalizedText = normalizeText(candidate.text || "");
|
|
146
|
+
let score = 0;
|
|
147
|
+
for (const phrase of phrases) {
|
|
148
|
+
if (normalizedText.includes(phrase)) {
|
|
149
|
+
score += 120;
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
for (const token of tokens) {
|
|
153
|
+
if (normalizedText.includes(token)) {
|
|
154
|
+
score += 18;
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
if (wantsFirst) {
|
|
158
|
+
score += Math.max(0, 24 - Math.round(candidate.y / 60));
|
|
159
|
+
score += Math.max(0, 12 - index);
|
|
160
|
+
}
|
|
161
|
+
if (candidate.confidence) {
|
|
162
|
+
score += Math.round(candidate.confidence * 20);
|
|
163
|
+
}
|
|
164
|
+
return score > 0 ? {
|
|
165
|
+
candidate,
|
|
166
|
+
score,
|
|
167
|
+
} : null;
|
|
168
|
+
})
|
|
169
|
+
.filter(Boolean);
|
|
170
|
+
scored.sort((left, right) => {
|
|
171
|
+
if (right.score !== left.score) {
|
|
172
|
+
return right.score - left.score;
|
|
173
|
+
}
|
|
174
|
+
if (left.candidate.y !== right.candidate.y) {
|
|
175
|
+
return left.candidate.y - right.candidate.y;
|
|
176
|
+
}
|
|
177
|
+
return left.candidate.x - right.candidate.x;
|
|
178
|
+
});
|
|
179
|
+
return scored[0] || null;
|
|
180
|
+
}
|
|
52
181
|
function extractTaskText(job) {
|
|
53
182
|
const payload = asRecord(job.payload);
|
|
54
183
|
const candidates = [
|
|
@@ -88,6 +217,39 @@ function humanizeUrl(url) {
|
|
|
88
217
|
return normalized;
|
|
89
218
|
}
|
|
90
219
|
}
|
|
220
|
+
function uniqueStrings(values) {
|
|
221
|
+
const seen = new Set();
|
|
222
|
+
const result = [];
|
|
223
|
+
for (const value of values) {
|
|
224
|
+
const text = String(value || "").trim();
|
|
225
|
+
if (!text)
|
|
226
|
+
continue;
|
|
227
|
+
const key = normalizeText(text);
|
|
228
|
+
if (seen.has(key))
|
|
229
|
+
continue;
|
|
230
|
+
seen.add(key);
|
|
231
|
+
result.push(text);
|
|
232
|
+
}
|
|
233
|
+
return result;
|
|
234
|
+
}
|
|
235
|
+
function looksLikeAffirmativeVisualVerification(answer) {
|
|
236
|
+
const normalized = normalizeText(answer || "");
|
|
237
|
+
if (!normalized)
|
|
238
|
+
return false;
|
|
239
|
+
if (normalized.startsWith("sim"))
|
|
240
|
+
return true;
|
|
241
|
+
if (normalized.startsWith("nao") || normalized.startsWith("não"))
|
|
242
|
+
return false;
|
|
243
|
+
return (normalized.includes("tocando")
|
|
244
|
+
|| normalized.includes("reproduzindo")
|
|
245
|
+
|| normalized.includes("em reproducao")
|
|
246
|
+
|| normalized.includes("em reprodução")
|
|
247
|
+
|| normalized.includes("botao de pausa")
|
|
248
|
+
|| normalized.includes("botão de pausa")
|
|
249
|
+
|| normalized.includes("faixa ativa")
|
|
250
|
+
|| normalized.includes("resultado selecionado")
|
|
251
|
+
|| normalized.includes("foi acionado"));
|
|
252
|
+
}
|
|
91
253
|
function mimeTypeFromPath(filePath) {
|
|
92
254
|
const ext = path.extname(filePath).toLowerCase();
|
|
93
255
|
if (ext === ".png")
|
|
@@ -126,6 +288,38 @@ function clipText(value, maxLength) {
|
|
|
126
288
|
}
|
|
127
289
|
return `${value.slice(0, maxLength)}...`;
|
|
128
290
|
}
|
|
291
|
+
const TEXTUTIL_READABLE_EXTENSIONS = new Set([
|
|
292
|
+
".doc",
|
|
293
|
+
".docx",
|
|
294
|
+
".odt",
|
|
295
|
+
".pages",
|
|
296
|
+
".rtf",
|
|
297
|
+
".rtfd",
|
|
298
|
+
".webarchive",
|
|
299
|
+
]);
|
|
300
|
+
function sanitizeTextForJsonTransport(value) {
|
|
301
|
+
return value
|
|
302
|
+
.replace(/\r\n/g, "\n")
|
|
303
|
+
.replace(/[\u0000-\u0008\u000B\u000C\u000E-\u001F\u007F]/g, "")
|
|
304
|
+
.trim();
|
|
305
|
+
}
|
|
306
|
+
function isLikelyBinaryBuffer(buffer) {
|
|
307
|
+
if (buffer.length === 0) {
|
|
308
|
+
return false;
|
|
309
|
+
}
|
|
310
|
+
let suspiciousBytes = 0;
|
|
311
|
+
const sampleSize = Math.min(buffer.length, 4096);
|
|
312
|
+
for (let index = 0; index < sampleSize; index += 1) {
|
|
313
|
+
const byte = buffer[index];
|
|
314
|
+
if (byte === 0) {
|
|
315
|
+
return true;
|
|
316
|
+
}
|
|
317
|
+
if (byte < 7 || (byte > 13 && byte < 32) || byte === 127) {
|
|
318
|
+
suspiciousBytes += 1;
|
|
319
|
+
}
|
|
320
|
+
}
|
|
321
|
+
return (suspiciousBytes / sampleSize) > 0.1;
|
|
322
|
+
}
|
|
129
323
|
function delay(ms) {
|
|
130
324
|
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
131
325
|
}
|
|
@@ -327,6 +521,10 @@ function parseStructuredActions(job) {
|
|
|
327
521
|
type: "click_visual_target",
|
|
328
522
|
description,
|
|
329
523
|
app: asString(action.app) || undefined,
|
|
524
|
+
verification_prompt: asString(action.verification_prompt) || undefined,
|
|
525
|
+
retry_descriptions: Array.isArray(action.retry_descriptions)
|
|
526
|
+
? action.retry_descriptions.map((item) => asString(item)).filter(Boolean)
|
|
527
|
+
: undefined,
|
|
330
528
|
});
|
|
331
529
|
}
|
|
332
530
|
continue;
|
|
@@ -414,6 +612,7 @@ export class NativeMacOSJobExecutor {
|
|
|
414
612
|
bridgeConfig;
|
|
415
613
|
cancelledJobs = new Set();
|
|
416
614
|
activeChild = null;
|
|
615
|
+
lastActiveApp = null;
|
|
417
616
|
constructor(bridgeConfig) {
|
|
418
617
|
this.bridgeConfig = bridgeConfig;
|
|
419
618
|
}
|
|
@@ -486,6 +685,7 @@ export class NativeMacOSJobExecutor {
|
|
|
486
685
|
mimeTypeOverride: uploadable.mimeType,
|
|
487
686
|
fileNameOverride: uploadable.filename,
|
|
488
687
|
metadata: {
|
|
688
|
+
visible_in_chat: true,
|
|
489
689
|
width: uploadable.dimensions?.width || undefined,
|
|
490
690
|
height: uploadable.dimensions?.height || undefined,
|
|
491
691
|
original_width: uploadable.originalDimensions?.width || undefined,
|
|
@@ -516,6 +716,7 @@ export class NativeMacOSJobExecutor {
|
|
|
516
716
|
fileNameOverride: uploadable.filename,
|
|
517
717
|
metadata: {
|
|
518
718
|
purpose: "page_read_fallback",
|
|
719
|
+
visible_in_chat: false,
|
|
519
720
|
width: uploadable.dimensions?.width || undefined,
|
|
520
721
|
height: uploadable.dimensions?.height || undefined,
|
|
521
722
|
original_width: uploadable.originalDimensions?.width || undefined,
|
|
@@ -558,50 +759,162 @@ export class NativeMacOSJobExecutor {
|
|
|
558
759
|
continue;
|
|
559
760
|
}
|
|
560
761
|
if (action.type === "click_visual_target") {
|
|
561
|
-
|
|
762
|
+
const browserApp = await this.resolveLikelyBrowserApp(action.app);
|
|
763
|
+
if (browserApp) {
|
|
764
|
+
await reporter.progress(progressPercent, `Trazendo ${browserApp} para frente antes do clique`);
|
|
765
|
+
await this.focusApp(browserApp);
|
|
766
|
+
}
|
|
767
|
+
else if (action.app) {
|
|
562
768
|
await reporter.progress(progressPercent, `Trazendo ${action.app} para frente antes do clique`);
|
|
563
769
|
await this.focusApp(action.app);
|
|
564
770
|
}
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
771
|
+
const targetDescriptions = uniqueStrings([action.description, ...(action.retry_descriptions || [])]);
|
|
772
|
+
let clickSucceeded = false;
|
|
773
|
+
let lastFailureReason = "";
|
|
774
|
+
for (let attempt = 0; attempt < targetDescriptions.length; attempt += 1) {
|
|
775
|
+
const targetDescription = targetDescriptions[attempt];
|
|
776
|
+
const initialBrowserState = browserApp
|
|
777
|
+
? await this.captureBrowserPageState(browserApp).catch(() => null)
|
|
778
|
+
: null;
|
|
779
|
+
if (browserApp === "Safari") {
|
|
780
|
+
await reporter.progress(progressPercent, `Tentando localizar ${targetDescription} diretamente no Safari`);
|
|
781
|
+
const domClick = await this.trySafariDomClick(targetDescription);
|
|
782
|
+
if (domClick?.clicked) {
|
|
783
|
+
let validated = false;
|
|
784
|
+
let validationReason = "";
|
|
785
|
+
if (action.verification_prompt) {
|
|
786
|
+
const verification = await this.validateVisualClickWithVision(job.job_id, targetDescription, action.verification_prompt, progressPercent, reporter, artifacts, "dom_click_result");
|
|
787
|
+
validated = verification.ok;
|
|
788
|
+
validationReason = verification.reason;
|
|
789
|
+
}
|
|
790
|
+
else {
|
|
791
|
+
const browserValidation = await this.confirmBrowserClick(browserApp, initialBrowserState, targetDescription, domClick.matchedHref || null);
|
|
792
|
+
validated = browserValidation.ok;
|
|
793
|
+
validationReason = browserValidation.reason;
|
|
794
|
+
}
|
|
795
|
+
if (validated) {
|
|
796
|
+
resultPayload.last_click = {
|
|
797
|
+
strategy: domClick.strategy || "safari_dom",
|
|
798
|
+
matched_text: domClick.matchedText || null,
|
|
799
|
+
matched_href: domClick.matchedHref || null,
|
|
800
|
+
score: domClick.score || null,
|
|
801
|
+
total_candidates: domClick.totalCandidates || null,
|
|
802
|
+
};
|
|
803
|
+
completionNotes.push(`Localizei e cliquei em ${targetDescription} diretamente no navegador.`);
|
|
804
|
+
clickSucceeded = true;
|
|
805
|
+
break;
|
|
806
|
+
}
|
|
807
|
+
lastFailureReason = validationReason || `Clique DOM em ${targetDescription} nao alterou a pagina como esperado.`;
|
|
808
|
+
}
|
|
809
|
+
else if (domClick?.reason) {
|
|
810
|
+
lastFailureReason = domClick.reason;
|
|
811
|
+
}
|
|
812
|
+
}
|
|
813
|
+
const visualBeforeState = browserApp
|
|
814
|
+
? await this.captureBrowserPageState(browserApp).catch(() => initialBrowserState)
|
|
815
|
+
: initialBrowserState;
|
|
816
|
+
await reporter.progress(progressPercent, `Capturando a tela para localizar ${targetDescription}`);
|
|
817
|
+
let screenshotPath = await this.takeScreenshot();
|
|
818
|
+
const ocrClick = await this.tryLocalOcrClick(screenshotPath, targetDescription);
|
|
819
|
+
if (ocrClick.clicked) {
|
|
820
|
+
let validated = false;
|
|
821
|
+
let validationReason = "";
|
|
822
|
+
if (action.verification_prompt) {
|
|
823
|
+
const verification = await this.validateVisualClickWithVision(job.job_id, targetDescription, action.verification_prompt, progressPercent, reporter, artifacts, "local_ocr_click_result");
|
|
824
|
+
validated = verification.ok;
|
|
825
|
+
validationReason = verification.reason;
|
|
826
|
+
}
|
|
827
|
+
else if (browserApp) {
|
|
828
|
+
const browserValidation = await this.confirmBrowserClick(browserApp, visualBeforeState, targetDescription, null);
|
|
829
|
+
validated = browserValidation.ok;
|
|
830
|
+
validationReason = browserValidation.reason;
|
|
831
|
+
}
|
|
832
|
+
else {
|
|
833
|
+
validated = true;
|
|
834
|
+
}
|
|
835
|
+
if (validated) {
|
|
836
|
+
const candidate = ocrClick.candidate || null;
|
|
837
|
+
resultPayload.last_click = {
|
|
838
|
+
strategy: ocrClick.strategy || "local_ocr",
|
|
839
|
+
score: ocrClick.score || null,
|
|
840
|
+
matched_text: candidate?.text || null,
|
|
841
|
+
x: candidate ? candidate.x + (candidate.width / 2) : null,
|
|
842
|
+
y: candidate ? candidate.y + (candidate.height / 2) : null,
|
|
843
|
+
width: candidate?.width || null,
|
|
844
|
+
height: candidate?.height || null,
|
|
845
|
+
};
|
|
846
|
+
completionNotes.push(`Localizei e cliquei em ${targetDescription} por OCR local.`);
|
|
847
|
+
clickSucceeded = true;
|
|
848
|
+
break;
|
|
849
|
+
}
|
|
850
|
+
lastFailureReason = validationReason || `O clique por OCR local em ${targetDescription} nao teve efeito confirmavel.`;
|
|
851
|
+
await reporter.progress(progressPercent, "OCR local nao confirmou o clique; vou tentar visão remota");
|
|
852
|
+
screenshotPath = await this.takeScreenshot();
|
|
853
|
+
}
|
|
854
|
+
else if (ocrClick.reason) {
|
|
855
|
+
lastFailureReason = ocrClick.reason;
|
|
856
|
+
}
|
|
857
|
+
const uploadable = await this.buildUploadableImage(screenshotPath);
|
|
858
|
+
const artifact = await this.uploadArtifactForJob(job.job_id, uploadable.path, {
|
|
859
|
+
kind: "screenshot",
|
|
860
|
+
mimeTypeOverride: uploadable.mimeType,
|
|
861
|
+
fileNameOverride: uploadable.filename,
|
|
862
|
+
metadata: {
|
|
863
|
+
purpose: "visual_click",
|
|
864
|
+
visible_in_chat: false,
|
|
865
|
+
target: targetDescription,
|
|
866
|
+
width: uploadable.dimensions?.width || undefined,
|
|
867
|
+
height: uploadable.dimensions?.height || undefined,
|
|
868
|
+
original_width: uploadable.originalDimensions?.width || undefined,
|
|
869
|
+
original_height: uploadable.originalDimensions?.height || undefined,
|
|
870
|
+
resized_for_upload: uploadable.resized,
|
|
871
|
+
},
|
|
872
|
+
});
|
|
873
|
+
if (!artifact?.storage_path) {
|
|
874
|
+
throw new Error("Otto Bridge nao conseguiu enviar a screenshot necessaria para localizar o alvo visual.");
|
|
875
|
+
}
|
|
876
|
+
artifacts.push(artifact);
|
|
877
|
+
const artifactMetadata = artifact.metadata || {};
|
|
878
|
+
const width = Number(artifactMetadata.width || 0);
|
|
879
|
+
const height = Number(artifactMetadata.height || 0);
|
|
880
|
+
const originalWidth = Number(artifactMetadata.original_width || width || 0);
|
|
881
|
+
const originalHeight = Number(artifactMetadata.original_height || height || 0);
|
|
882
|
+
const location = await this.locateVisualTarget(job.job_id, artifact.storage_path, targetDescription, width, height, artifact.mime_type);
|
|
883
|
+
if (!location?.found || typeof location.x !== "number" || typeof location.y !== "number") {
|
|
884
|
+
lastFailureReason = `Nao consegui localizar ${targetDescription} com confianca suficiente na tela.`;
|
|
885
|
+
continue;
|
|
886
|
+
}
|
|
887
|
+
await reporter.progress(progressPercent, `Clicando em ${targetDescription}`);
|
|
888
|
+
const scaledX = width > 0 && originalWidth > 0 ? (location.x / width) * originalWidth : location.x;
|
|
889
|
+
const scaledY = height > 0 && originalHeight > 0 ? (location.y / height) * originalHeight : location.y;
|
|
890
|
+
await this.clickPoint(scaledX, scaledY);
|
|
891
|
+
resultPayload.last_click = {
|
|
892
|
+
...location,
|
|
893
|
+
x: scaledX,
|
|
894
|
+
y: scaledY,
|
|
895
|
+
strategy: "visual_locator",
|
|
896
|
+
};
|
|
897
|
+
if (action.verification_prompt) {
|
|
898
|
+
const verification = await this.validateVisualClickWithVision(job.job_id, targetDescription, action.verification_prompt, progressPercent, reporter, artifacts, "visual_click_result");
|
|
899
|
+
if (!verification.ok) {
|
|
900
|
+
lastFailureReason = verification.reason || `Nao consegui validar visualmente se ${targetDescription} foi acionado.`;
|
|
901
|
+
continue;
|
|
902
|
+
}
|
|
903
|
+
}
|
|
904
|
+
else if (browserApp) {
|
|
905
|
+
const browserValidation = await this.confirmBrowserClick(browserApp, visualBeforeState, targetDescription, null);
|
|
906
|
+
if (!browserValidation.ok) {
|
|
907
|
+
lastFailureReason = browserValidation.reason || `O clique em ${targetDescription} nao alterou a pagina como esperado.`;
|
|
908
|
+
continue;
|
|
909
|
+
}
|
|
910
|
+
}
|
|
911
|
+
completionNotes.push(`Localizei e cliquei em ${targetDescription}.`);
|
|
912
|
+
clickSucceeded = true;
|
|
913
|
+
break;
|
|
584
914
|
}
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
const width = Number(artifactMetadata.width || 0);
|
|
588
|
-
const height = Number(artifactMetadata.height || 0);
|
|
589
|
-
const originalWidth = Number(artifactMetadata.original_width || width || 0);
|
|
590
|
-
const originalHeight = Number(artifactMetadata.original_height || height || 0);
|
|
591
|
-
const location = await this.locateVisualTarget(job.job_id, artifact.storage_path, action.description, width, height, artifact.mime_type);
|
|
592
|
-
if (!location?.found || typeof location.x !== "number" || typeof location.y !== "number") {
|
|
593
|
-
throw new Error(`Nao consegui localizar ${action.description} com confianca suficiente na tela.`);
|
|
915
|
+
if (!clickSucceeded) {
|
|
916
|
+
throw new Error(lastFailureReason || `Nao consegui concluir o clique visual para ${action.description}.`);
|
|
594
917
|
}
|
|
595
|
-
await reporter.progress(progressPercent, `Clicando em ${action.description}`);
|
|
596
|
-
const scaledX = width > 0 && originalWidth > 0 ? (location.x / width) * originalWidth : location.x;
|
|
597
|
-
const scaledY = height > 0 && originalHeight > 0 ? (location.y / height) * originalHeight : location.y;
|
|
598
|
-
await this.clickPoint(scaledX, scaledY);
|
|
599
|
-
completionNotes.push(`Localizei e cliquei em ${action.description}.`);
|
|
600
|
-
resultPayload.last_click = {
|
|
601
|
-
...location,
|
|
602
|
-
x: scaledX,
|
|
603
|
-
y: scaledY,
|
|
604
|
-
};
|
|
605
918
|
continue;
|
|
606
919
|
}
|
|
607
920
|
await reporter.progress(progressPercent, `Abrindo ${action.url}${action.app ? ` em ${action.app}` : ""}`);
|
|
@@ -641,6 +954,7 @@ export class NativeMacOSJobExecutor {
|
|
|
641
954
|
if (app) {
|
|
642
955
|
await this.runCommand("open", ["-a", app, url]);
|
|
643
956
|
await this.focusApp(app);
|
|
957
|
+
this.lastActiveApp = app;
|
|
644
958
|
return;
|
|
645
959
|
}
|
|
646
960
|
await this.runCommand("open", [url]);
|
|
@@ -668,6 +982,118 @@ end tell
|
|
|
668
982
|
}
|
|
669
983
|
async focusApp(app) {
|
|
670
984
|
await this.runCommand("osascript", ["-e", `tell application "${escapeAppleScript(app)}" to activate`]);
|
|
985
|
+
this.lastActiveApp = app;
|
|
986
|
+
}
|
|
987
|
+
async getFrontmostAppName() {
|
|
988
|
+
try {
|
|
989
|
+
const { stdout } = await this.runCommandCapture("osascript", [
|
|
990
|
+
"-e",
|
|
991
|
+
'tell application "System Events" to get name of first application process whose frontmost is true',
|
|
992
|
+
]);
|
|
993
|
+
const app = String(stdout || "").trim();
|
|
994
|
+
return app || null;
|
|
995
|
+
}
|
|
996
|
+
catch {
|
|
997
|
+
return null;
|
|
998
|
+
}
|
|
999
|
+
}
|
|
1000
|
+
async resolveLikelyBrowserApp(preferredApp) {
|
|
1001
|
+
const candidates = [
|
|
1002
|
+
preferredApp || null,
|
|
1003
|
+
this.lastActiveApp,
|
|
1004
|
+
await this.getFrontmostAppName(),
|
|
1005
|
+
];
|
|
1006
|
+
for (const candidate of candidates) {
|
|
1007
|
+
if (candidate === "Safari") {
|
|
1008
|
+
return candidate;
|
|
1009
|
+
}
|
|
1010
|
+
}
|
|
1011
|
+
return null;
|
|
1012
|
+
}
|
|
1013
|
+
async captureBrowserPageState(app) {
|
|
1014
|
+
if (app !== "Safari") {
|
|
1015
|
+
return null;
|
|
1016
|
+
}
|
|
1017
|
+
const page = await this.readFrontmostPage(app);
|
|
1018
|
+
return {
|
|
1019
|
+
app,
|
|
1020
|
+
title: page.title,
|
|
1021
|
+
url: page.url,
|
|
1022
|
+
text: page.text,
|
|
1023
|
+
};
|
|
1024
|
+
}
|
|
1025
|
+
resolveExpectedBrowserHref(rawHref, baseUrl) {
|
|
1026
|
+
const href = String(rawHref || "").trim();
|
|
1027
|
+
if (!href) {
|
|
1028
|
+
return null;
|
|
1029
|
+
}
|
|
1030
|
+
try {
|
|
1031
|
+
const absolute = baseUrl ? new URL(href, baseUrl).toString() : new URL(href).toString();
|
|
1032
|
+
return normalizeComparableUrl(absolute);
|
|
1033
|
+
}
|
|
1034
|
+
catch {
|
|
1035
|
+
return normalizeComparableUrl(href);
|
|
1036
|
+
}
|
|
1037
|
+
}
|
|
1038
|
+
didBrowserPageStateChange(before, after, targetDescription, matchedHref) {
|
|
1039
|
+
if (!after) {
|
|
1040
|
+
return false;
|
|
1041
|
+
}
|
|
1042
|
+
const beforeUrl = normalizeComparableUrl(before?.url || "");
|
|
1043
|
+
const afterUrl = normalizeComparableUrl(after.url || "");
|
|
1044
|
+
const expectedHref = this.resolveExpectedBrowserHref(matchedHref || null, before?.url || after.url);
|
|
1045
|
+
if (expectedHref && afterUrl) {
|
|
1046
|
+
if (afterUrl === expectedHref || afterUrl.startsWith(expectedHref) || expectedHref.startsWith(afterUrl)) {
|
|
1047
|
+
return true;
|
|
1048
|
+
}
|
|
1049
|
+
}
|
|
1050
|
+
if (beforeUrl && afterUrl && beforeUrl !== afterUrl) {
|
|
1051
|
+
return true;
|
|
1052
|
+
}
|
|
1053
|
+
const normalizedDescription = normalizeText(targetDescription || "");
|
|
1054
|
+
if (normalizedDescription.includes("youtube")
|
|
1055
|
+
|| normalizedDescription.includes("video")
|
|
1056
|
+
|| normalizedDescription.includes("musica")) {
|
|
1057
|
+
if (afterUrl.includes("youtube.com/watch") || afterUrl.includes("youtube.com/shorts/")) {
|
|
1058
|
+
return true;
|
|
1059
|
+
}
|
|
1060
|
+
}
|
|
1061
|
+
const beforeTitle = normalizeText(before?.title || "");
|
|
1062
|
+
const afterTitle = normalizeText(after.title || "");
|
|
1063
|
+
if (beforeTitle && afterTitle && beforeTitle !== afterTitle) {
|
|
1064
|
+
return true;
|
|
1065
|
+
}
|
|
1066
|
+
const beforeText = normalizeText((before?.text || "").slice(0, 320));
|
|
1067
|
+
const afterText = normalizeText((after.text || "").slice(0, 320));
|
|
1068
|
+
if (beforeText && afterText && beforeText !== afterText) {
|
|
1069
|
+
return true;
|
|
1070
|
+
}
|
|
1071
|
+
return false;
|
|
1072
|
+
}
|
|
1073
|
+
async confirmBrowserClick(app, before, targetDescription, matchedHref) {
|
|
1074
|
+
if (app !== "Safari") {
|
|
1075
|
+
return {
|
|
1076
|
+
ok: true,
|
|
1077
|
+
reason: "",
|
|
1078
|
+
afterState: null,
|
|
1079
|
+
};
|
|
1080
|
+
}
|
|
1081
|
+
for (let attempt = 0; attempt < 4; attempt += 1) {
|
|
1082
|
+
await delay(attempt === 0 ? 900 : 700);
|
|
1083
|
+
const afterState = await this.captureBrowserPageState(app).catch(() => null);
|
|
1084
|
+
if (this.didBrowserPageStateChange(before, afterState, targetDescription, matchedHref)) {
|
|
1085
|
+
return {
|
|
1086
|
+
ok: true,
|
|
1087
|
+
reason: "",
|
|
1088
|
+
afterState,
|
|
1089
|
+
};
|
|
1090
|
+
}
|
|
1091
|
+
}
|
|
1092
|
+
return {
|
|
1093
|
+
ok: false,
|
|
1094
|
+
reason: `O clique em ${targetDescription} nao mudou a pagina do navegador de forma verificavel.`,
|
|
1095
|
+
afterState: null,
|
|
1096
|
+
};
|
|
671
1097
|
}
|
|
672
1098
|
async pressShortcut(shortcut) {
|
|
673
1099
|
const { key, modifiers } = parseShortcut(shortcut);
|
|
@@ -752,6 +1178,263 @@ end tell
|
|
|
752
1178
|
});
|
|
753
1179
|
return String(response.answer || "").trim();
|
|
754
1180
|
}
|
|
1181
|
+
async validateVisualClickWithVision(jobId, targetDescription, verificationPrompt, progressPercent, reporter, artifacts, purpose) {
|
|
1182
|
+
await delay(1600);
|
|
1183
|
+
await reporter.progress(progressPercent, "Validando visualmente se a ação funcionou");
|
|
1184
|
+
const afterClickPath = await this.takeScreenshot();
|
|
1185
|
+
const afterClickUpload = await this.buildUploadableImage(afterClickPath);
|
|
1186
|
+
const afterClickArtifact = await this.uploadArtifactForJob(jobId, afterClickUpload.path, {
|
|
1187
|
+
kind: "screenshot",
|
|
1188
|
+
mimeTypeOverride: afterClickUpload.mimeType,
|
|
1189
|
+
fileNameOverride: afterClickUpload.filename,
|
|
1190
|
+
metadata: {
|
|
1191
|
+
purpose,
|
|
1192
|
+
visible_in_chat: true,
|
|
1193
|
+
target: targetDescription,
|
|
1194
|
+
width: afterClickUpload.dimensions?.width || undefined,
|
|
1195
|
+
height: afterClickUpload.dimensions?.height || undefined,
|
|
1196
|
+
original_width: afterClickUpload.originalDimensions?.width || undefined,
|
|
1197
|
+
original_height: afterClickUpload.originalDimensions?.height || undefined,
|
|
1198
|
+
resized_for_upload: afterClickUpload.resized,
|
|
1199
|
+
},
|
|
1200
|
+
});
|
|
1201
|
+
if (!afterClickArtifact?.storage_path) {
|
|
1202
|
+
return {
|
|
1203
|
+
ok: false,
|
|
1204
|
+
reason: `Nao consegui registrar a tela apos tentar clicar em ${targetDescription}.`,
|
|
1205
|
+
};
|
|
1206
|
+
}
|
|
1207
|
+
artifacts.push(afterClickArtifact);
|
|
1208
|
+
const verificationAnswer = await this.analyzeUploadedArtifact(jobId, afterClickArtifact.storage_path, verificationPrompt, afterClickArtifact.mime_type);
|
|
1209
|
+
if (!looksLikeAffirmativeVisualVerification(verificationAnswer)) {
|
|
1210
|
+
return {
|
|
1211
|
+
ok: false,
|
|
1212
|
+
reason: verificationAnswer || `Nao consegui validar visualmente se ${targetDescription} foi acionado.`,
|
|
1213
|
+
};
|
|
1214
|
+
}
|
|
1215
|
+
return {
|
|
1216
|
+
ok: true,
|
|
1217
|
+
reason: verificationAnswer,
|
|
1218
|
+
};
|
|
1219
|
+
}
|
|
1220
|
+
async runSafariJsonScript(scriptBody, input) {
|
|
1221
|
+
const wrappedScript = `
|
|
1222
|
+
(function(){
|
|
1223
|
+
const __input = ${JSON.stringify(input || null)};
|
|
1224
|
+
try {
|
|
1225
|
+
const __result = (() => {
|
|
1226
|
+
${scriptBody}
|
|
1227
|
+
})();
|
|
1228
|
+
return JSON.stringify({ ok: true, result: __result === undefined ? null : __result });
|
|
1229
|
+
} catch (error) {
|
|
1230
|
+
return JSON.stringify({
|
|
1231
|
+
ok: false,
|
|
1232
|
+
error: String(error && error.message ? error.message : error)
|
|
1233
|
+
});
|
|
1234
|
+
}
|
|
1235
|
+
})()
|
|
1236
|
+
`;
|
|
1237
|
+
const script = `
|
|
1238
|
+
tell application "Safari"
|
|
1239
|
+
activate
|
|
1240
|
+
if (count of windows) = 0 then error "Safari nao possui janelas abertas."
|
|
1241
|
+
delay 0.2
|
|
1242
|
+
set scriptResult to do JavaScript "${escapeAppleScript(wrappedScript)}" in current tab of front window
|
|
1243
|
+
end tell
|
|
1244
|
+
return scriptResult
|
|
1245
|
+
`;
|
|
1246
|
+
const { stdout } = await this.runCommandCapture("osascript", ["-e", script]);
|
|
1247
|
+
const parsed = JSON.parse(stdout.trim() || "{}");
|
|
1248
|
+
if (parsed.ok !== true) {
|
|
1249
|
+
throw new Error(asString(parsed.error) || "Safari JavaScript execution failed");
|
|
1250
|
+
}
|
|
1251
|
+
return parsed.result;
|
|
1252
|
+
}
|
|
1253
|
+
async trySafariDomClick(description) {
|
|
1254
|
+
try {
|
|
1255
|
+
return await this.runSafariJsonScript(`
|
|
1256
|
+
const rawDescription = String(__input?.description || "");
|
|
1257
|
+
const normalize = (value) => String(value || "")
|
|
1258
|
+
.normalize("NFD")
|
|
1259
|
+
.replace(/[\\u0300-\\u036f]/g, "")
|
|
1260
|
+
.toLowerCase();
|
|
1261
|
+
const normalizedDescription = normalize(rawDescription);
|
|
1262
|
+
const wantsFirst = /\\b(primeir[ao]?|first)\\b/.test(normalizedDescription);
|
|
1263
|
+
const wantsVideo = /\\b(video|videos|musica|faixa|youtube|resultado|watch)\\b/.test(normalizedDescription) || location.hostname.includes("youtube");
|
|
1264
|
+
const stopWords = new Set([
|
|
1265
|
+
"o", "a", "os", "as", "um", "uma", "uns", "umas", "de", "da", "do", "das", "dos",
|
|
1266
|
+
"em", "no", "na", "nos", "nas", "para", "por", "com", "que", "visivel", "visiveis",
|
|
1267
|
+
"visivel", "tela", "pagina", "page", "site", "link", "botao", "botao", "clicar",
|
|
1268
|
+
"clique", "seleciona", "selecionar", "resultado", "resultados"
|
|
1269
|
+
]);
|
|
1270
|
+
const quotedPhrases = Array.from(rawDescription.matchAll(/["'“”‘’]([^"'“”‘’]{2,80})["'“”‘’]/g))
|
|
1271
|
+
.map((match) => normalize(match[1]));
|
|
1272
|
+
const tokens = Array.from(new Set(
|
|
1273
|
+
normalizedDescription
|
|
1274
|
+
.split(/[^a-z0-9]+/)
|
|
1275
|
+
.filter((token) => token.length >= 3 && !stopWords.has(token))
|
|
1276
|
+
));
|
|
1277
|
+
|
|
1278
|
+
const candidateSelectors = location.hostname.includes("youtube")
|
|
1279
|
+
? [
|
|
1280
|
+
"ytd-video-renderer a#video-title",
|
|
1281
|
+
"ytd-video-renderer ytd-thumbnail a",
|
|
1282
|
+
"ytd-video-renderer a#thumbnail",
|
|
1283
|
+
"ytd-rich-item-renderer a#video-title-link",
|
|
1284
|
+
"ytd-rich-item-renderer a#video-title",
|
|
1285
|
+
"ytd-rich-grid-media a#video-title-link",
|
|
1286
|
+
"a#video-title",
|
|
1287
|
+
"a[href*='/watch']",
|
|
1288
|
+
"button",
|
|
1289
|
+
"[role='button']",
|
|
1290
|
+
"[role='link']"
|
|
1291
|
+
]
|
|
1292
|
+
: [
|
|
1293
|
+
"a[href]",
|
|
1294
|
+
"button",
|
|
1295
|
+
"[role='button']",
|
|
1296
|
+
"[role='link']",
|
|
1297
|
+
"input[type='button']",
|
|
1298
|
+
"input[type='submit']"
|
|
1299
|
+
];
|
|
1300
|
+
|
|
1301
|
+
const seen = new Set();
|
|
1302
|
+
const candidates = [];
|
|
1303
|
+
|
|
1304
|
+
function isVisible(element) {
|
|
1305
|
+
if (!(element instanceof Element)) return false;
|
|
1306
|
+
const rect = element.getBoundingClientRect();
|
|
1307
|
+
if (rect.width < 4 || rect.height < 4) return false;
|
|
1308
|
+
const style = window.getComputedStyle(element);
|
|
1309
|
+
if (style.visibility === "hidden" || style.display === "none" || Number(style.opacity || "1") === 0) return false;
|
|
1310
|
+
return rect.bottom >= 0 && rect.right >= 0 && rect.top <= window.innerHeight && rect.left <= window.innerWidth;
|
|
1311
|
+
}
|
|
1312
|
+
|
|
1313
|
+
function deriveText(element) {
|
|
1314
|
+
const ownText = [
|
|
1315
|
+
element.innerText,
|
|
1316
|
+
element.textContent,
|
|
1317
|
+
element.getAttribute("aria-label"),
|
|
1318
|
+
element.getAttribute("title"),
|
|
1319
|
+
element.getAttribute("alt"),
|
|
1320
|
+
].find((value) => typeof value === "string" && value.trim());
|
|
1321
|
+
if (ownText && ownText.trim()) return ownText.trim();
|
|
1322
|
+
|
|
1323
|
+
const richVideo = element.closest("ytd-video-renderer, ytd-rich-item-renderer, ytd-rich-grid-media, ytmusic-responsive-list-item-renderer");
|
|
1324
|
+
if (richVideo && richVideo instanceof HTMLElement && richVideo.innerText.trim()) {
|
|
1325
|
+
return richVideo.innerText.trim();
|
|
1326
|
+
}
|
|
1327
|
+
|
|
1328
|
+
return "";
|
|
1329
|
+
}
|
|
1330
|
+
|
|
1331
|
+
function scoreCandidate(element, rank) {
|
|
1332
|
+
const text = deriveText(element);
|
|
1333
|
+
const href = element instanceof HTMLAnchorElement
|
|
1334
|
+
? (element.href || "")
|
|
1335
|
+
: (element.getAttribute("href") || "");
|
|
1336
|
+
const normalizedText = normalize(text);
|
|
1337
|
+
const normalizedHref = normalize(href);
|
|
1338
|
+
let score = 0;
|
|
1339
|
+
|
|
1340
|
+
if (wantsFirst) score += Math.max(0, 40 - rank);
|
|
1341
|
+
if (wantsVideo && normalizedHref.includes("/watch")) score += 30;
|
|
1342
|
+
if (location.hostname.includes("youtube") && element.closest("ytd-video-renderer, ytd-rich-item-renderer, ytd-rich-grid-media")) score += 20;
|
|
1343
|
+
if (element.id === "video-title") score += 12;
|
|
1344
|
+
if (!normalizedText && normalizedHref.includes("/watch")) score += 8;
|
|
1345
|
+
|
|
1346
|
+
for (const phrase of quotedPhrases) {
|
|
1347
|
+
if (!phrase) continue;
|
|
1348
|
+
if (normalizedText.includes(phrase)) score += 120;
|
|
1349
|
+
if (normalizedHref.includes(phrase)) score += 40;
|
|
1350
|
+
}
|
|
1351
|
+
|
|
1352
|
+
for (const token of tokens) {
|
|
1353
|
+
if (normalizedText.includes(token)) score += 18;
|
|
1354
|
+
if (normalizedHref.includes(token)) score += 8;
|
|
1355
|
+
}
|
|
1356
|
+
|
|
1357
|
+
return {
|
|
1358
|
+
element,
|
|
1359
|
+
text,
|
|
1360
|
+
href,
|
|
1361
|
+
score,
|
|
1362
|
+
rank,
|
|
1363
|
+
};
|
|
1364
|
+
}
|
|
1365
|
+
|
|
1366
|
+
for (const selector of candidateSelectors) {
|
|
1367
|
+
const nodes = document.querySelectorAll(selector);
|
|
1368
|
+
for (const node of nodes) {
|
|
1369
|
+
if (!(node instanceof HTMLElement || node instanceof HTMLAnchorElement)) continue;
|
|
1370
|
+
if (!isVisible(node)) continue;
|
|
1371
|
+
const key = [
|
|
1372
|
+
node.tagName,
|
|
1373
|
+
node.id || "",
|
|
1374
|
+
node.getAttribute("href") || "",
|
|
1375
|
+
deriveText(node).slice(0, 120),
|
|
1376
|
+
].join("|");
|
|
1377
|
+
if (seen.has(key)) continue;
|
|
1378
|
+
seen.add(key);
|
|
1379
|
+
candidates.push(scoreCandidate(node, candidates.length));
|
|
1380
|
+
}
|
|
1381
|
+
}
|
|
1382
|
+
|
|
1383
|
+
const ranked = candidates
|
|
1384
|
+
.filter((candidate) => candidate.score > 0 || (wantsFirst && normalize(candidate.href).includes("/watch")))
|
|
1385
|
+
.sort((left, right) => right.score - left.score || left.rank - right.rank);
|
|
1386
|
+
|
|
1387
|
+
if (!ranked.length) {
|
|
1388
|
+
return {
|
|
1389
|
+
clicked: false,
|
|
1390
|
+
reason: "Nenhum elemento clicavel no DOM combinou com a descricao atual.",
|
|
1391
|
+
totalCandidates: candidates.length,
|
|
1392
|
+
strategy: "safari_dom",
|
|
1393
|
+
};
|
|
1394
|
+
}
|
|
1395
|
+
|
|
1396
|
+
const winner = ranked[0];
|
|
1397
|
+
winner.element.scrollIntoView({ block: "center", inline: "center", behavior: "auto" });
|
|
1398
|
+
const rect = winner.element.getBoundingClientRect();
|
|
1399
|
+
for (const eventName of ["mouseover", "mousedown", "mouseup", "click"]) {
|
|
1400
|
+
winner.element.dispatchEvent(new MouseEvent(eventName, {
|
|
1401
|
+
bubbles: true,
|
|
1402
|
+
cancelable: true,
|
|
1403
|
+
view: window,
|
|
1404
|
+
clientX: rect.left + (rect.width / 2),
|
|
1405
|
+
clientY: rect.top + (rect.height / 2),
|
|
1406
|
+
}));
|
|
1407
|
+
}
|
|
1408
|
+
if (typeof winner.element.click === "function") {
|
|
1409
|
+
winner.element.click();
|
|
1410
|
+
}
|
|
1411
|
+
|
|
1412
|
+
return {
|
|
1413
|
+
clicked: true,
|
|
1414
|
+
matchedText: String(winner.text || "").slice(0, 180),
|
|
1415
|
+
matchedHref: winner.href || "",
|
|
1416
|
+
score: winner.score,
|
|
1417
|
+
totalCandidates: candidates.length,
|
|
1418
|
+
strategy: "safari_dom",
|
|
1419
|
+
};
|
|
1420
|
+
`, { description });
|
|
1421
|
+
}
|
|
1422
|
+
catch (error) {
|
|
1423
|
+
const detail = error instanceof Error ? error.message : String(error);
|
|
1424
|
+
if (detail.toLowerCase().includes("allow javascript from apple events")) {
|
|
1425
|
+
return {
|
|
1426
|
+
clicked: false,
|
|
1427
|
+
reason: "Safari ainda bloqueia JavaScript por Apple Events, entao o Otto Bridge caiu para o modo visual.",
|
|
1428
|
+
strategy: "safari_dom_blocked",
|
|
1429
|
+
};
|
|
1430
|
+
}
|
|
1431
|
+
return {
|
|
1432
|
+
clicked: false,
|
|
1433
|
+
reason: detail || "Falha ao tentar clicar via DOM no Safari.",
|
|
1434
|
+
strategy: "safari_dom_failed",
|
|
1435
|
+
};
|
|
1436
|
+
}
|
|
1437
|
+
}
|
|
755
1438
|
async readFrontmostPage(app) {
|
|
756
1439
|
const targetApp = app || "Safari";
|
|
757
1440
|
if (targetApp !== "Safari") {
|
|
@@ -840,6 +1523,128 @@ post(.leftMouseUp)
|
|
|
840
1523
|
`;
|
|
841
1524
|
await this.runCommand("swift", ["-e", script, String(Math.round(x)), String(Math.round(y))]);
|
|
842
1525
|
}
|
|
1526
|
+
async runLocalOcr(filePath) {
|
|
1527
|
+
const script = `
|
|
1528
|
+
import Foundation
|
|
1529
|
+
import Vision
|
|
1530
|
+
import ImageIO
|
|
1531
|
+
import CoreGraphics
|
|
1532
|
+
|
|
1533
|
+
let fileURL = URL(fileURLWithPath: CommandLine.arguments[1])
|
|
1534
|
+
guard let source = CGImageSourceCreateWithURL(fileURL as CFURL, nil),
|
|
1535
|
+
let image = CGImageSourceCreateImageAtIndex(source, 0, nil) else {
|
|
1536
|
+
fputs("failed to load image\\n", stderr)
|
|
1537
|
+
exit(1)
|
|
1538
|
+
}
|
|
1539
|
+
|
|
1540
|
+
let width = CGFloat(image.width)
|
|
1541
|
+
let height = CGFloat(image.height)
|
|
1542
|
+
var items: [[String: Any]] = []
|
|
1543
|
+
|
|
1544
|
+
let request = VNRecognizeTextRequest()
|
|
1545
|
+
request.recognitionLevel = .accurate
|
|
1546
|
+
request.usesLanguageCorrection = true
|
|
1547
|
+
request.recognitionLanguages = ["pt-BR", "en-US"]
|
|
1548
|
+
request.minimumTextHeight = 0.012
|
|
1549
|
+
|
|
1550
|
+
let handler = VNImageRequestHandler(cgImage: image, options: [:])
|
|
1551
|
+
try handler.perform([request])
|
|
1552
|
+
|
|
1553
|
+
let observations = request.results ?? []
|
|
1554
|
+
for observation in observations {
|
|
1555
|
+
guard let candidate = observation.topCandidates(1).first else { continue }
|
|
1556
|
+
let text = candidate.string.trimmingCharacters(in: .whitespacesAndNewlines)
|
|
1557
|
+
if text.isEmpty { continue }
|
|
1558
|
+
|
|
1559
|
+
let box = observation.boundingBox
|
|
1560
|
+
let x = box.origin.x * width
|
|
1561
|
+
let y = (1.0 - box.origin.y - box.size.height) * height
|
|
1562
|
+
let w = box.size.width * width
|
|
1563
|
+
let h = box.size.height * height
|
|
1564
|
+
|
|
1565
|
+
items.append([
|
|
1566
|
+
"text": text,
|
|
1567
|
+
"x": Int(round(x)),
|
|
1568
|
+
"y": Int(round(y)),
|
|
1569
|
+
"width": Int(round(w)),
|
|
1570
|
+
"height": Int(round(h)),
|
|
1571
|
+
"confidence": candidate.confidence
|
|
1572
|
+
])
|
|
1573
|
+
}
|
|
1574
|
+
|
|
1575
|
+
let payload: [String: Any] = ["items": items]
|
|
1576
|
+
let data = try JSONSerialization.data(withJSONObject: payload, options: [])
|
|
1577
|
+
if let output = String(data: data, encoding: .utf8) {
|
|
1578
|
+
print(output)
|
|
1579
|
+
}
|
|
1580
|
+
`;
|
|
1581
|
+
try {
|
|
1582
|
+
const { stdout } = await this.runCommandCapture("swift", ["-e", script, filePath]);
|
|
1583
|
+
const parsed = JSON.parse(stdout.trim() || "{}");
|
|
1584
|
+
const items = Array.isArray(parsed.items) ? parsed.items : [];
|
|
1585
|
+
return items
|
|
1586
|
+
.map((item) => {
|
|
1587
|
+
const row = asRecord(item);
|
|
1588
|
+
const text = asString(row.text);
|
|
1589
|
+
const x = Number(row.x);
|
|
1590
|
+
const y = Number(row.y);
|
|
1591
|
+
const width = Number(row.width);
|
|
1592
|
+
const height = Number(row.height);
|
|
1593
|
+
const confidence = Number(row.confidence);
|
|
1594
|
+
if (!text || !Number.isFinite(x) || !Number.isFinite(y) || !Number.isFinite(width) || !Number.isFinite(height)) {
|
|
1595
|
+
return null;
|
|
1596
|
+
}
|
|
1597
|
+
return {
|
|
1598
|
+
text,
|
|
1599
|
+
x,
|
|
1600
|
+
y,
|
|
1601
|
+
width,
|
|
1602
|
+
height,
|
|
1603
|
+
confidence: Number.isFinite(confidence) ? confidence : undefined,
|
|
1604
|
+
};
|
|
1605
|
+
})
|
|
1606
|
+
.filter(Boolean);
|
|
1607
|
+
}
|
|
1608
|
+
catch (error) {
|
|
1609
|
+
const detail = error instanceof Error ? error.message : String(error);
|
|
1610
|
+
console.warn(`[otto-bridge] local ocr failed=${detail}`);
|
|
1611
|
+
return [];
|
|
1612
|
+
}
|
|
1613
|
+
}
|
|
1614
|
+
async tryLocalOcrClick(screenshotPath, description) {
|
|
1615
|
+
if (!descriptionLikelyHasTextAnchor(description)) {
|
|
1616
|
+
return {
|
|
1617
|
+
clicked: false,
|
|
1618
|
+
reason: "A descricao nao traz ancora textual forte para OCR local.",
|
|
1619
|
+
strategy: "local_ocr_skipped",
|
|
1620
|
+
};
|
|
1621
|
+
}
|
|
1622
|
+
const candidates = await this.runLocalOcr(screenshotPath);
|
|
1623
|
+
if (!candidates.length) {
|
|
1624
|
+
return {
|
|
1625
|
+
clicked: false,
|
|
1626
|
+
reason: "OCR local nao encontrou texto utilizavel na tela.",
|
|
1627
|
+
strategy: "local_ocr_empty",
|
|
1628
|
+
};
|
|
1629
|
+
}
|
|
1630
|
+
const match = findOcrTextMatch(candidates, description);
|
|
1631
|
+
if (!match || match.score < 24) {
|
|
1632
|
+
return {
|
|
1633
|
+
clicked: false,
|
|
1634
|
+
reason: "OCR local nao encontrou texto suficientemente compativel com a descricao.",
|
|
1635
|
+
strategy: "local_ocr_no_match",
|
|
1636
|
+
};
|
|
1637
|
+
}
|
|
1638
|
+
const clickX = match.candidate.x + (match.candidate.width / 2);
|
|
1639
|
+
const clickY = match.candidate.y + (match.candidate.height / 2);
|
|
1640
|
+
await this.clickPoint(clickX, clickY);
|
|
1641
|
+
return {
|
|
1642
|
+
clicked: true,
|
|
1643
|
+
score: match.score,
|
|
1644
|
+
candidate: match.candidate,
|
|
1645
|
+
strategy: "local_ocr",
|
|
1646
|
+
};
|
|
1647
|
+
}
|
|
843
1648
|
async getImageDimensions(filePath) {
|
|
844
1649
|
try {
|
|
845
1650
|
const { stdout } = await this.runCommandCapture("sips", ["-g", "pixelWidth", "-g", "pixelHeight", filePath]);
|
|
@@ -873,6 +1678,9 @@ post(.leftMouseUp)
|
|
|
873
1678
|
{ width: 640, quality: 22 },
|
|
874
1679
|
{ width: 540, quality: 18 },
|
|
875
1680
|
{ width: 480, quality: 16 },
|
|
1681
|
+
{ width: 420, quality: 14 },
|
|
1682
|
+
{ width: 360, quality: 12 },
|
|
1683
|
+
{ width: 320, quality: 10 },
|
|
876
1684
|
];
|
|
877
1685
|
for (const step of conversionSteps) {
|
|
878
1686
|
const candidatePath = path.join(artifactsDir, `${path.basename(localPath, path.extname(localPath))}-${step.width}w-q${step.quality}.jpg`);
|
|
@@ -894,7 +1702,7 @@ post(.leftMouseUp)
|
|
|
894
1702
|
mimeType = "image/jpeg";
|
|
895
1703
|
filename = path.basename(candidatePath);
|
|
896
1704
|
resized = true;
|
|
897
|
-
if (candidateStat.size <=
|
|
1705
|
+
if (candidateStat.size <= 120_000) {
|
|
898
1706
|
break;
|
|
899
1707
|
}
|
|
900
1708
|
}
|
|
@@ -909,8 +1717,25 @@ post(.leftMouseUp)
|
|
|
909
1717
|
}
|
|
910
1718
|
async readLocalFile(filePath, maxChars = 4000) {
|
|
911
1719
|
const resolved = expandUserPath(filePath);
|
|
912
|
-
const
|
|
913
|
-
|
|
1720
|
+
const extension = path.extname(resolved).toLowerCase();
|
|
1721
|
+
if (TEXTUTIL_READABLE_EXTENSIONS.has(extension)) {
|
|
1722
|
+
const { stdout } = await this.runCommandCapture("textutil", [
|
|
1723
|
+
"-convert",
|
|
1724
|
+
"txt",
|
|
1725
|
+
"-stdout",
|
|
1726
|
+
resolved,
|
|
1727
|
+
]);
|
|
1728
|
+
const content = sanitizeTextForJsonTransport(stdout);
|
|
1729
|
+
return clipText(content || "(arquivo sem texto legivel)", maxChars);
|
|
1730
|
+
}
|
|
1731
|
+
const raw = await readFile(resolved);
|
|
1732
|
+
if (isLikelyBinaryBuffer(raw)) {
|
|
1733
|
+
const filename = path.basename(resolved);
|
|
1734
|
+
const detectedType = extension || "binario";
|
|
1735
|
+
return clipText(`O arquivo ${filename} parece ser binario (${detectedType}) e nao pode ser lido como texto puro pelo Otto Bridge ainda.`, maxChars);
|
|
1736
|
+
}
|
|
1737
|
+
const content = sanitizeTextForJsonTransport(raw.toString("utf8"));
|
|
1738
|
+
return clipText(content || "(arquivo vazio)", maxChars);
|
|
914
1739
|
}
|
|
915
1740
|
async listLocalFiles(directoryPath, limit = 40) {
|
|
916
1741
|
const resolved = expandUserPath(directoryPath);
|
package/dist/types.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
export const BRIDGE_CONFIG_VERSION = 1;
|
|
2
|
-
export const BRIDGE_VERSION = "0.5.
|
|
2
|
+
export const BRIDGE_VERSION = "0.5.3";
|
|
3
3
|
export const BRIDGE_PACKAGE_NAME = "@leg3ndy/otto-bridge";
|
|
4
4
|
export const DEFAULT_API_BASE_URL = "http://localhost:8000";
|
|
5
5
|
export const DEFAULT_POLL_INTERVAL_MS = 3000;
|