@leg3ndy/otto-bridge 0.4.2 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/executors/native_macos.js +247 -6
- package/dist/http.js +29 -0
- package/dist/runtime.js +1 -1
- package/dist/types.js +1 -1
- package/package.json +1 -1
|
@@ -4,6 +4,7 @@ import os from "node:os";
|
|
|
4
4
|
import path from "node:path";
|
|
5
5
|
import process from "node:process";
|
|
6
6
|
import { JobCancelledError } from "./shared.js";
|
|
7
|
+
import { postDeviceJson, uploadDeviceJobArtifact } from "../http.js";
|
|
7
8
|
const KNOWN_APPS = [
|
|
8
9
|
{ canonical: "Safari", patterns: [/\bsafari\b/i] },
|
|
9
10
|
{ canonical: "Google Chrome", patterns: [/\bgoogle chrome\b/i, /\bchrome\b/i] },
|
|
@@ -87,6 +88,22 @@ function humanizeUrl(url) {
|
|
|
87
88
|
return normalized;
|
|
88
89
|
}
|
|
89
90
|
}
|
|
91
|
+
function mimeTypeFromPath(filePath) {
|
|
92
|
+
const ext = path.extname(filePath).toLowerCase();
|
|
93
|
+
if (ext === ".png")
|
|
94
|
+
return "image/png";
|
|
95
|
+
if (ext === ".jpg" || ext === ".jpeg")
|
|
96
|
+
return "image/jpeg";
|
|
97
|
+
if (ext === ".webp")
|
|
98
|
+
return "image/webp";
|
|
99
|
+
if (ext === ".gif")
|
|
100
|
+
return "image/gif";
|
|
101
|
+
if (ext === ".txt" || ext === ".md")
|
|
102
|
+
return "text/plain";
|
|
103
|
+
if (ext === ".json")
|
|
104
|
+
return "application/json";
|
|
105
|
+
return "application/octet-stream";
|
|
106
|
+
}
|
|
90
107
|
function expandUserPath(value) {
|
|
91
108
|
const trimmed = value.trim();
|
|
92
109
|
if (!trimmed) {
|
|
@@ -109,6 +126,9 @@ function clipText(value, maxLength) {
|
|
|
109
126
|
}
|
|
110
127
|
return `${value.slice(0, maxLength)}...`;
|
|
111
128
|
}
|
|
129
|
+
function delay(ms) {
|
|
130
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
131
|
+
}
|
|
112
132
|
function escapeHtml(value) {
|
|
113
133
|
return value
|
|
114
134
|
.replace(/&/g, "&")
|
|
@@ -307,6 +327,13 @@ function parseStructuredActions(job) {
|
|
|
307
327
|
actions.push({ type: "take_screenshot", path: savePath || undefined });
|
|
308
328
|
continue;
|
|
309
329
|
}
|
|
330
|
+
if (type === "read_frontmost_page" || type === "read_page" || type === "read_webpage") {
|
|
331
|
+
actions.push({
|
|
332
|
+
type: "read_frontmost_page",
|
|
333
|
+
app: asString(action.app) || asString(action.application) || "Safari",
|
|
334
|
+
});
|
|
335
|
+
continue;
|
|
336
|
+
}
|
|
310
337
|
if (type === "read_file" || type === "read_local_file") {
|
|
311
338
|
const filePath = asString(action.path);
|
|
312
339
|
if (filePath) {
|
|
@@ -327,6 +354,25 @@ function parseStructuredActions(job) {
|
|
|
327
354
|
if (command) {
|
|
328
355
|
actions.push({ type: "run_shell", command, cwd: cwd || undefined });
|
|
329
356
|
}
|
|
357
|
+
continue;
|
|
358
|
+
}
|
|
359
|
+
if (type === "set_volume" || type === "volume") {
|
|
360
|
+
const rawLevel = Number(action.level);
|
|
361
|
+
if (Number.isFinite(rawLevel)) {
|
|
362
|
+
actions.push({ type: "set_volume", level: Math.max(0, Math.min(Math.round(rawLevel), 100)) });
|
|
363
|
+
}
|
|
364
|
+
continue;
|
|
365
|
+
}
|
|
366
|
+
if (type === "click_visual_target" || type === "click_target") {
|
|
367
|
+
const description = asString(action.description) || asString(action.target);
|
|
368
|
+
if (description) {
|
|
369
|
+
actions.push({
|
|
370
|
+
type: "click_visual_target",
|
|
371
|
+
description,
|
|
372
|
+
app: asString(action.app) || undefined,
|
|
373
|
+
});
|
|
374
|
+
}
|
|
375
|
+
continue;
|
|
330
376
|
}
|
|
331
377
|
}
|
|
332
378
|
return actions;
|
|
@@ -361,6 +407,30 @@ function deriveActionsFromText(job) {
|
|
|
361
407
|
const task = extractTaskText(job);
|
|
362
408
|
const detectedApp = detectKnownApp(task);
|
|
363
409
|
const detectedUrl = detectUrl(task);
|
|
410
|
+
const normalizedTask = normalizeText(task);
|
|
411
|
+
if (/\b(volume|som|audio)\b/i.test(task)) {
|
|
412
|
+
const percentMatch = task.match(/(\d{1,3})\s*%/);
|
|
413
|
+
let level = 50;
|
|
414
|
+
if (percentMatch?.[1]) {
|
|
415
|
+
level = Math.max(0, Math.min(Number(percentMatch[1]), 100));
|
|
416
|
+
}
|
|
417
|
+
else if (/\b(mudo|mute|silencia)\b/i.test(task)) {
|
|
418
|
+
level = 0;
|
|
419
|
+
}
|
|
420
|
+
else if (/\b(aumenta|aumente|mais alto)\b/i.test(task)) {
|
|
421
|
+
level = 80;
|
|
422
|
+
}
|
|
423
|
+
else if (/\b(diminui|abaixa|mais baixo)\b/i.test(task)) {
|
|
424
|
+
level = 25;
|
|
425
|
+
}
|
|
426
|
+
return [{ type: "set_volume", level }];
|
|
427
|
+
}
|
|
428
|
+
if ((normalizedTask.includes("leia") || normalizedTask.includes("ler")) && detectedUrl) {
|
|
429
|
+
return [
|
|
430
|
+
{ type: "open_url", url: detectedUrl, app: detectedApp || "Safari" },
|
|
431
|
+
{ type: "read_frontmost_page", app: detectedApp || "Safari" },
|
|
432
|
+
];
|
|
433
|
+
}
|
|
364
434
|
if (detectedUrl) {
|
|
365
435
|
return [{
|
|
366
436
|
type: "open_url",
|
|
@@ -384,8 +454,12 @@ function extractActions(job) {
|
|
|
384
454
|
return deriveActionsFromText(job);
|
|
385
455
|
}
|
|
386
456
|
export class NativeMacOSJobExecutor {
|
|
457
|
+
bridgeConfig;
|
|
387
458
|
cancelledJobs = new Set();
|
|
388
459
|
activeChild = null;
|
|
460
|
+
constructor(bridgeConfig) {
|
|
461
|
+
this.bridgeConfig = bridgeConfig;
|
|
462
|
+
}
|
|
389
463
|
async run(job, reporter) {
|
|
390
464
|
if (process.platform !== "darwin") {
|
|
391
465
|
throw new Error("The native-macos executor only runs on macOS");
|
|
@@ -407,6 +481,13 @@ export class NativeMacOSJobExecutor {
|
|
|
407
481
|
}
|
|
408
482
|
try {
|
|
409
483
|
const completionNotes = [];
|
|
484
|
+
const artifacts = [];
|
|
485
|
+
const resultPayload = {
|
|
486
|
+
executor: "native-macos",
|
|
487
|
+
actions,
|
|
488
|
+
artifacts,
|
|
489
|
+
action_summaries: completionNotes,
|
|
490
|
+
};
|
|
410
491
|
for (let index = 0; index < actions.length; index += 1) {
|
|
411
492
|
this.assertNotCancelled(job.job_id);
|
|
412
493
|
const action = actions[index];
|
|
@@ -440,7 +521,24 @@ export class NativeMacOSJobExecutor {
|
|
|
440
521
|
if (action.type === "take_screenshot") {
|
|
441
522
|
await reporter.progress(progressPercent, "Capturando screenshot do Mac");
|
|
442
523
|
const screenshotPath = await this.takeScreenshot(action.path);
|
|
443
|
-
|
|
524
|
+
const screenshotArtifact = await this.uploadArtifactForJob(job.job_id, screenshotPath, {
|
|
525
|
+
kind: "screenshot",
|
|
526
|
+
});
|
|
527
|
+
if (screenshotArtifact) {
|
|
528
|
+
artifacts.push(screenshotArtifact);
|
|
529
|
+
completionNotes.push("Capturei a tela do Mac e anexei a imagem aqui no chat.");
|
|
530
|
+
}
|
|
531
|
+
else {
|
|
532
|
+
completionNotes.push(`Screenshot salvo em ${screenshotPath}`);
|
|
533
|
+
}
|
|
534
|
+
resultPayload.screenshot_path = screenshotPath;
|
|
535
|
+
continue;
|
|
536
|
+
}
|
|
537
|
+
if (action.type === "read_frontmost_page") {
|
|
538
|
+
await reporter.progress(progressPercent, `Lendo a pagina ativa em ${action.app || "Safari"}`);
|
|
539
|
+
const page = await this.readFrontmostPage(action.app || "Safari");
|
|
540
|
+
resultPayload.page = page;
|
|
541
|
+
completionNotes.push(`Li a pagina ${page.title || page.url || "ativa"} no navegador.`);
|
|
444
542
|
continue;
|
|
445
543
|
}
|
|
446
544
|
if (action.type === "read_file") {
|
|
@@ -461,19 +559,51 @@ export class NativeMacOSJobExecutor {
|
|
|
461
559
|
completionNotes.push(`Saida de \`${action.command}\`:\n${shellOutput}`);
|
|
462
560
|
continue;
|
|
463
561
|
}
|
|
562
|
+
if (action.type === "set_volume") {
|
|
563
|
+
await reporter.progress(progressPercent, `Ajustando volume para ${action.level}%`);
|
|
564
|
+
await this.setVolume(action.level);
|
|
565
|
+
completionNotes.push(`Volume ajustado para ${action.level}% no macOS.`);
|
|
566
|
+
continue;
|
|
567
|
+
}
|
|
568
|
+
if (action.type === "click_visual_target") {
|
|
569
|
+
if (action.app) {
|
|
570
|
+
await reporter.progress(progressPercent, `Trazendo ${action.app} para frente antes do clique`);
|
|
571
|
+
await this.focusApp(action.app);
|
|
572
|
+
}
|
|
573
|
+
await reporter.progress(progressPercent, `Capturando a tela para localizar ${action.description}`);
|
|
574
|
+
const screenshotPath = await this.takeScreenshot();
|
|
575
|
+
const artifact = await this.uploadArtifactForJob(job.job_id, screenshotPath, {
|
|
576
|
+
kind: "screenshot",
|
|
577
|
+
metadata: { purpose: "visual_click", target: action.description },
|
|
578
|
+
});
|
|
579
|
+
if (!artifact?.storage_path) {
|
|
580
|
+
throw new Error("Otto Bridge nao conseguiu enviar a screenshot necessaria para localizar o alvo visual.");
|
|
581
|
+
}
|
|
582
|
+
artifacts.push(artifact);
|
|
583
|
+
const artifactMetadata = artifact.metadata || {};
|
|
584
|
+
const width = Number(artifactMetadata.width || 0);
|
|
585
|
+
const height = Number(artifactMetadata.height || 0);
|
|
586
|
+
const location = await this.locateVisualTarget(job.job_id, artifact.storage_path, action.description, width, height, artifact.mime_type);
|
|
587
|
+
if (!location?.found || typeof location.x !== "number" || typeof location.y !== "number") {
|
|
588
|
+
throw new Error(`Nao consegui localizar ${action.description} com confianca suficiente na tela.`);
|
|
589
|
+
}
|
|
590
|
+
await reporter.progress(progressPercent, `Clicando em ${action.description}`);
|
|
591
|
+
await this.clickPoint(location.x, location.y);
|
|
592
|
+
completionNotes.push(`Localizei e cliquei em ${action.description}.`);
|
|
593
|
+
resultPayload.last_click = location;
|
|
594
|
+
continue;
|
|
595
|
+
}
|
|
464
596
|
await reporter.progress(progressPercent, `Abrindo ${action.url}${action.app ? ` em ${action.app}` : ""}`);
|
|
465
597
|
await this.openUrl(action.url, action.app);
|
|
598
|
+
await delay(1200);
|
|
466
599
|
}
|
|
467
600
|
const summary = completionNotes.length > 0
|
|
468
601
|
? completionNotes.join("\n\n")
|
|
469
602
|
: (actions.length === 1
|
|
470
603
|
? this.describeAction(actions[0])
|
|
471
604
|
: `${actions.length} ações executadas no macOS`);
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
summary,
|
|
475
|
-
actions,
|
|
476
|
-
});
|
|
605
|
+
resultPayload.summary = summary;
|
|
606
|
+
await reporter.completed(resultPayload);
|
|
477
607
|
}
|
|
478
608
|
finally {
|
|
479
609
|
this.cancelledJobs.delete(job.job_id);
|
|
@@ -578,6 +708,108 @@ end tell
|
|
|
578
708
|
await this.runCommand("screencapture", ["-x", screenshotPath]);
|
|
579
709
|
return screenshotPath;
|
|
580
710
|
}
|
|
711
|
+
async uploadArtifactForJob(jobId, localPath, options) {
|
|
712
|
+
if (!this.bridgeConfig?.apiBaseUrl || !this.bridgeConfig?.deviceToken) {
|
|
713
|
+
return null;
|
|
714
|
+
}
|
|
715
|
+
const bytes = await readFile(localPath);
|
|
716
|
+
const fileName = path.basename(localPath);
|
|
717
|
+
const mimeType = mimeTypeFromPath(fileName);
|
|
718
|
+
const dimensions = mimeType.startsWith("image/") ? await this.getImageDimensions(localPath) : null;
|
|
719
|
+
const metadata = {
|
|
720
|
+
...(options?.metadata || {}),
|
|
721
|
+
...(dimensions || {}),
|
|
722
|
+
};
|
|
723
|
+
const response = await uploadDeviceJobArtifact(this.bridgeConfig.apiBaseUrl, this.bridgeConfig.deviceToken, jobId, {
|
|
724
|
+
filename: fileName,
|
|
725
|
+
contentType: mimeType,
|
|
726
|
+
bytes,
|
|
727
|
+
kind: options?.kind || "file",
|
|
728
|
+
metadata,
|
|
729
|
+
});
|
|
730
|
+
return response.artifact || null;
|
|
731
|
+
}
|
|
732
|
+
async readFrontmostPage(app) {
|
|
733
|
+
const targetApp = app || "Safari";
|
|
734
|
+
if (targetApp !== "Safari") {
|
|
735
|
+
throw new Error("Leitura de pagina frontmost esta disponivel apenas para Safari no momento.");
|
|
736
|
+
}
|
|
737
|
+
const script = `
|
|
738
|
+
tell application "Safari"
|
|
739
|
+
activate
|
|
740
|
+
if (count of windows) = 0 then error "Safari nao possui janelas abertas."
|
|
741
|
+
delay 1
|
|
742
|
+
set pageJson to do JavaScript "(function(){const title=document.title||''; const url=location.href||''; const text=((document.body&&document.body.innerText)||'').trim().slice(0, 12000); return JSON.stringify({title:title,url:url,text:text});})();" in current tab of front window
|
|
743
|
+
end tell
|
|
744
|
+
return pageJson
|
|
745
|
+
`;
|
|
746
|
+
const { stdout } = await this.runCommandCapture("osascript", ["-e", script]);
|
|
747
|
+
const parsed = JSON.parse(stdout.trim() || "{}");
|
|
748
|
+
return {
|
|
749
|
+
title: asString(parsed.title) || "",
|
|
750
|
+
url: asString(parsed.url) || "",
|
|
751
|
+
text: asString(parsed.text) || "",
|
|
752
|
+
};
|
|
753
|
+
}
|
|
754
|
+
async setVolume(level) {
|
|
755
|
+
const bounded = Math.max(0, Math.min(Math.round(level), 100));
|
|
756
|
+
await this.runCommand("osascript", ["-e", `set volume output volume ${bounded}`]);
|
|
757
|
+
}
|
|
758
|
+
async locateVisualTarget(jobId, storagePath, target, width, height, mimeType) {
|
|
759
|
+
if (!this.bridgeConfig?.apiBaseUrl || !this.bridgeConfig?.deviceToken) {
|
|
760
|
+
throw new Error("Otto Bridge nao possui configuracao para usar visao no backend.");
|
|
761
|
+
}
|
|
762
|
+
const response = await postDeviceJson(this.bridgeConfig.apiBaseUrl, this.bridgeConfig.deviceToken, `/v1/devices/jobs/${encodeURIComponent(jobId)}/vision/locate`, {
|
|
763
|
+
storage_path: storagePath,
|
|
764
|
+
target,
|
|
765
|
+
image_width: Math.max(1, width),
|
|
766
|
+
image_height: Math.max(1, height),
|
|
767
|
+
mime_type: mimeType || "image/png",
|
|
768
|
+
});
|
|
769
|
+
return response.location || {};
|
|
770
|
+
}
|
|
771
|
+
async clickPoint(x, y) {
|
|
772
|
+
const script = `
|
|
773
|
+
import Cocoa
|
|
774
|
+
import ApplicationServices
|
|
775
|
+
|
|
776
|
+
let x = Double(CommandLine.arguments[1]) ?? 0
|
|
777
|
+
let y = Double(CommandLine.arguments[2]) ?? 0
|
|
778
|
+
let point = CGPoint(x: x, y: y)
|
|
779
|
+
|
|
780
|
+
func post(_ type: CGEventType) {
|
|
781
|
+
guard let event = CGEvent(mouseEventSource: nil, mouseType: type, mouseCursorPosition: point, mouseButton: .left) else {
|
|
782
|
+
fputs("failed to create mouse event\\n", stderr)
|
|
783
|
+
exit(1)
|
|
784
|
+
}
|
|
785
|
+
event.post(tap: .cghidEventTap)
|
|
786
|
+
}
|
|
787
|
+
|
|
788
|
+
post(.mouseMoved)
|
|
789
|
+
usleep(120000)
|
|
790
|
+
post(.leftMouseDown)
|
|
791
|
+
usleep(80000)
|
|
792
|
+
post(.leftMouseUp)
|
|
793
|
+
`;
|
|
794
|
+
await this.runCommand("swift", ["-e", script, String(Math.round(x)), String(Math.round(y))]);
|
|
795
|
+
}
|
|
796
|
+
async getImageDimensions(filePath) {
|
|
797
|
+
try {
|
|
798
|
+
const { stdout } = await this.runCommandCapture("sips", ["-g", "pixelWidth", "-g", "pixelHeight", filePath]);
|
|
799
|
+
const widthMatch = stdout.match(/pixelWidth:\s*(\d+)/i);
|
|
800
|
+
const heightMatch = stdout.match(/pixelHeight:\s*(\d+)/i);
|
|
801
|
+
if (!widthMatch || !heightMatch) {
|
|
802
|
+
return null;
|
|
803
|
+
}
|
|
804
|
+
return {
|
|
805
|
+
width: Number(widthMatch[1]),
|
|
806
|
+
height: Number(heightMatch[1]),
|
|
807
|
+
};
|
|
808
|
+
}
|
|
809
|
+
catch {
|
|
810
|
+
return null;
|
|
811
|
+
}
|
|
812
|
+
}
|
|
581
813
|
async readLocalFile(filePath, maxChars = 4000) {
|
|
582
814
|
const resolved = expandUserPath(filePath);
|
|
583
815
|
const content = await readFile(resolved, "utf8");
|
|
@@ -642,6 +874,9 @@ end tell
|
|
|
642
874
|
if (action.type === "take_screenshot") {
|
|
643
875
|
return "Screenshot capturado no macOS";
|
|
644
876
|
}
|
|
877
|
+
if (action.type === "read_frontmost_page") {
|
|
878
|
+
return `Pagina ativa lida em ${action.app || "Safari"}`;
|
|
879
|
+
}
|
|
645
880
|
if (action.type === "read_file") {
|
|
646
881
|
return `${action.path} foi lido no macOS`;
|
|
647
882
|
}
|
|
@@ -651,6 +886,12 @@ end tell
|
|
|
651
886
|
if (action.type === "run_shell") {
|
|
652
887
|
return `Comando ${action.command} executado no macOS`;
|
|
653
888
|
}
|
|
889
|
+
if (action.type === "set_volume") {
|
|
890
|
+
return `Volume ajustado para ${action.level}% no macOS`;
|
|
891
|
+
}
|
|
892
|
+
if (action.type === "click_visual_target") {
|
|
893
|
+
return `Clique guiado executado para ${action.description}`;
|
|
894
|
+
}
|
|
654
895
|
const target = humanizeUrl(action.url);
|
|
655
896
|
return `${target} foi aberto${action.app ? ` em ${action.app}` : ""}`;
|
|
656
897
|
}
|
package/dist/http.js
CHANGED
|
@@ -38,3 +38,32 @@ export async function postJson(apiBaseUrl, pathname, body) {
|
|
|
38
38
|
body: JSON.stringify(body),
|
|
39
39
|
});
|
|
40
40
|
}
|
|
41
|
+
function buildDeviceAuthHeaders(deviceToken, headers) {
|
|
42
|
+
const next = new Headers(headers || {});
|
|
43
|
+
if (deviceToken) {
|
|
44
|
+
next.set("Authorization", `Bearer ${deviceToken}`);
|
|
45
|
+
}
|
|
46
|
+
return next;
|
|
47
|
+
}
|
|
48
|
+
export async function postDeviceJson(apiBaseUrl, deviceToken, pathname, body) {
|
|
49
|
+
return await requestJson(apiBaseUrl, pathname, {
|
|
50
|
+
method: "POST",
|
|
51
|
+
headers: buildDeviceAuthHeaders(deviceToken, {
|
|
52
|
+
"Content-Type": "application/json",
|
|
53
|
+
}),
|
|
54
|
+
body: JSON.stringify(body),
|
|
55
|
+
});
|
|
56
|
+
}
|
|
57
|
+
export async function uploadDeviceJobArtifact(apiBaseUrl, deviceToken, jobId, params) {
|
|
58
|
+
const form = new FormData();
|
|
59
|
+
form.append("file", new Blob([Buffer.from(params.bytes)], { type: params.contentType || "application/octet-stream" }), params.filename);
|
|
60
|
+
form.append("kind", String(params.kind || "file"));
|
|
61
|
+
if (params.metadata && Object.keys(params.metadata).length > 0) {
|
|
62
|
+
form.append("metadata", JSON.stringify(params.metadata));
|
|
63
|
+
}
|
|
64
|
+
return await requestJson(apiBaseUrl, `/v1/devices/jobs/${encodeURIComponent(jobId)}/artifacts`, {
|
|
65
|
+
method: "POST",
|
|
66
|
+
headers: buildDeviceAuthHeaders(deviceToken),
|
|
67
|
+
body: form,
|
|
68
|
+
});
|
|
69
|
+
}
|
package/dist/runtime.js
CHANGED
|
@@ -280,7 +280,7 @@ export class BridgeRuntime {
|
|
|
280
280
|
return new ClawdCursorJobExecutor(config.executor);
|
|
281
281
|
}
|
|
282
282
|
if (config.executor.type === "native-macos") {
|
|
283
|
-
return new NativeMacOSJobExecutor();
|
|
283
|
+
return new NativeMacOSJobExecutor(config);
|
|
284
284
|
}
|
|
285
285
|
return new MockJobExecutor();
|
|
286
286
|
}
|
package/dist/types.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
export const BRIDGE_CONFIG_VERSION = 1;
|
|
2
|
-
export const BRIDGE_VERSION = "0.
|
|
2
|
+
export const BRIDGE_VERSION = "0.5.0";
|
|
3
3
|
export const BRIDGE_PACKAGE_NAME = "@leg3ndy/otto-bridge";
|
|
4
4
|
export const DEFAULT_API_BASE_URL = "http://localhost:8000";
|
|
5
5
|
export const DEFAULT_POLL_INTERVAL_MS = 3000;
|