promethios-bridge 1.9.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "promethios-bridge",
3
- "version": "1.9.0",
3
+ "version": "2.0.0",
4
4
  "description": "Run Promethios agent frameworks locally on your computer with full file, terminal, browser access, ambient context capture, and the always-on-top floating chat overlay. Native Framework Mode supports OpenClaw and other frameworks via the bridge.",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -53,7 +53,9 @@
53
53
  },
54
54
  "optionalDependencies": {
55
55
  "playwright": "^1.42.0",
56
- "electron": "^29.0.0"
56
+ "electron": "^29.0.0",
57
+ "screenshot-desktop": "^1.12.7",
58
+ "sharp": "^0.33.0"
57
59
  },
58
60
  "engines": {
59
61
  "node": ">=18.0.0"
package/src/bridge.js CHANGED
@@ -22,7 +22,18 @@ const fetch = require('node-fetch');
22
22
  const { executeLocalTool } = require('./executor');
23
23
  const { captureContext } = require('./contextCapture');
24
24
  const { startMcpServer } = require('./mcp-server');
25
- const { setPinnedRegion, setPinnedApps } = require('./tools/desktop');
25
+ const { setPinnedRegion, setPinnedApps, registerBrowserPageAccessor } = require('./tools/desktop');
26
+
27
+ // Wire the browser-dom tools to the shared Playwright context.
28
+ // The context is created lazily in executor.js when browser_control is first used.
29
+ // We expose a getter so browser-dom tools can access the live page.
30
+ registerBrowserPageAccessor(async () => {
31
+ if (!global.__playwrightContext) {
32
+ throw new Error('Browser not open. Use the browser_control tool to navigate to a page first, then retry.');
33
+ }
34
+ const pages = global.__playwrightContext.pages();
35
+ return pages.length > 0 ? pages[pages.length - 1] : await global.__playwrightContext.newPage();
36
+ });
26
37
  const { initAndroidTools } = require('./tools/android');
27
38
 
28
39
  // Optional: Electron overlay window (bundled in src/overlay — gracefully skipped if Electron not available)
@@ -0,0 +1,408 @@
1
+ /**
2
+ * accessibility.js — Read live text content from native app windows
3
+ *
4
+ * Uses platform accessibility APIs to extract structured text from any
5
+ * focused or pinned window — without needing a saved file.
6
+ *
7
+ * Windows: PowerShell + UIAutomation (built-in, no install needed)
8
+ * macOS: osascript + AXUIElement accessibility API
9
+ * Linux: AT-SPI via xdotool + atspi-dump (best-effort)
10
+ *
11
+ * Exported tools:
12
+ * read_window_text — full text content of a window
13
+ * get_text_at_cursor — text around the current cursor/caret position
14
+ * get_selected_text — currently selected/highlighted text
15
+ */
16
+
17
+ 'use strict';
18
+
19
+ const { exec, execFile } = require('child_process');
20
+ const os = require('os');
21
+ const fs = require('fs');
22
+ const path = require('path');
23
+
24
+ // ── Windows: UIAutomation via PowerShell ──────────────────────────────────────
25
+ const WIN_READ_WINDOW_PS = (windowTitle) => `
26
+ Add-Type -AssemblyName UIAutomationClient
27
+ Add-Type -AssemblyName UIAutomationTypes
28
+
29
+ $root = [System.Windows.Automation.AutomationElement]::RootElement
30
+ $condition = New-Object System.Windows.Automation.PropertyCondition(
31
+ [System.Windows.Automation.AutomationElement]::NameProperty, "${windowTitle.replace(/"/g, '`"')}"
32
+ )
33
+ $window = $root.FindFirst([System.Windows.Automation.TreeScope]::Children, $condition)
34
+
35
+ if (-not $window) {
36
+ # Try partial match
37
+ $allWindows = $root.FindAll([System.Windows.Automation.TreeScope]::Children,
38
+ [System.Windows.Automation.Condition]::TrueCondition)
39
+ foreach ($w in $allWindows) {
40
+ if ($w.Current.Name -like "*${windowTitle.replace(/"/g, '`"')}*") {
41
+ $window = $w; break
42
+ }
43
+ }
44
+ }
45
+
46
+ if (-not $window) {
47
+ Write-Output "ERROR: Window not found: ${windowTitle.replace(/"/g, '`"')}"
48
+ exit 1
49
+ }
50
+
51
+ # Collect all text elements
52
+ $textCondition = New-Object System.Windows.Automation.PropertyCondition(
53
+ [System.Windows.Automation.AutomationElement]::ControlTypeProperty,
54
+ [System.Windows.Automation.ControlType]::Text
55
+ )
56
+ $editCondition = New-Object System.Windows.Automation.PropertyCondition(
57
+ [System.Windows.Automation.AutomationElement]::ControlTypeProperty,
58
+ [System.Windows.Automation.ControlType]::Edit
59
+ )
60
+ $docCondition = New-Object System.Windows.Automation.PropertyCondition(
61
+ [System.Windows.Automation.AutomationElement]::ControlTypeProperty,
62
+ [System.Windows.Automation.ControlType]::Document
63
+ )
64
+
65
+ $orCond = New-Object System.Windows.Automation.OrCondition($textCondition, $editCondition, $docCondition)
66
+ $elements = $window.FindAll([System.Windows.Automation.TreeScope]::Descendants, $orCond)
67
+
68
+ $lines = @()
69
+ foreach ($el in $elements) {
70
+ try {
71
+ $valPat = $el.GetCurrentPattern([System.Windows.Automation.ValuePattern]::Pattern)
72
+ $val = $valPat.Current.Value
73
+ if ($val -and $val.Trim()) { $lines += $val }
74
+ } catch {
75
+ $name = $el.Current.Name
76
+ if ($name -and $name.Trim()) { $lines += $name }
77
+ }
78
+ }
79
+
80
+ $lines | ForEach-Object { Write-Output $_ }
81
+ `;
82
+
83
+ const WIN_GET_SELECTED_TEXT_PS = `
84
+ Add-Type -AssemblyName UIAutomationClient
85
+ Add-Type -AssemblyName UIAutomationTypes
86
+ Add-Type -AssemblyName System.Windows.Forms
87
+
88
+ # Get focused element
89
+ $focused = [System.Windows.Automation.AutomationElement]::FocusedElement
90
+ if (-not $focused) { Write-Output ""; exit 0 }
91
+
92
+ try {
93
+ $textPat = $focused.GetCurrentPattern([System.Windows.Automation.TextPattern]::Pattern)
94
+ $sel = $textPat.GetSelection()
95
+ if ($sel -and $sel.Length -gt 0) {
96
+ Write-Output $sel[0].GetText(-1)
97
+ } else {
98
+ Write-Output ""
99
+ }
100
+ } catch {
101
+ # Fallback: clipboard method
102
+ $prev = [System.Windows.Forms.Clipboard]::GetText()
103
+ Add-Type -AssemblyName System.Windows.Forms
104
+ [System.Windows.Forms.SendKeys]::SendWait("^c")
105
+ Start-Sleep -Milliseconds 150
106
+ $sel = [System.Windows.Forms.Clipboard]::GetText()
107
+ # Restore clipboard
108
+ if ($prev) { [System.Windows.Forms.Clipboard]::SetText($prev) }
109
+ Write-Output $sel
110
+ }
111
+ `;
112
+
113
+ const WIN_GET_ACTIVE_DOCUMENT_PS = `
114
+ Add-Type -AssemblyName UIAutomationClient
115
+ Add-Type -AssemblyName UIAutomationTypes
116
+
117
+ $focused = [System.Windows.Automation.AutomationElement]::FocusedElement
118
+ if (-not $focused) { Write-Output "{}"; exit 0 }
119
+
120
+ # Walk up to find the window
121
+ $walker = [System.Windows.Automation.TreeWalker]::ControlViewWalker
122
+ $current = $focused
123
+ $window = $null
124
+ while ($current -ne $null) {
125
+ if ($current.Current.ControlType -eq [System.Windows.Automation.ControlType]::Window) {
126
+ $window = $current; break
127
+ }
128
+ try { $current = $walker.GetParent($current) } catch { break }
129
+ }
130
+
131
+ $result = @{
132
+ focused_app = $focused.Current.ClassName
133
+ focused_name = $focused.Current.Name
134
+ window_title = if ($window) { $window.Current.Name } else { "" }
135
+ process_name = $focused.Current.ProcessId
136
+ }
137
+
138
+ # Try to get text content of focused element
139
+ try {
140
+ $valPat = $focused.GetCurrentPattern([System.Windows.Automation.ValuePattern]::Pattern)
141
+ $result["text_content"] = $valPat.Current.Value
142
+ } catch {
143
+ try {
144
+ $textPat = $focused.GetCurrentPattern([System.Windows.Automation.TextPattern]::Pattern)
145
+ $docRange = $textPat.DocumentRange
146
+ $result["text_content"] = $docRange.GetText(50000)
147
+ } catch {
148
+ $result["text_content"] = ""
149
+ }
150
+ }
151
+
152
+ $result | ConvertTo-Json
153
+ `;
154
+
155
+ // ── macOS: osascript AXUIElement ──────────────────────────────────────────────
156
+ const MAC_READ_WINDOW_SCRIPT = (appName) => `
157
+ tell application "System Events"
158
+ tell process "${appName.replace(/"/g, '\\"')}"
159
+ set allText to ""
160
+ try
161
+ set frontWin to front window
162
+ set allText to value of text area 1 of scroll area 1 of frontWin
163
+ end try
164
+ if allText is "" then
165
+ try
166
+ set allText to value of text field 1 of front window
167
+ end try
168
+ end if
169
+ return allText
170
+ end tell
171
+ end tell
172
+ `;
173
+
174
+ const MAC_GET_SELECTED_TEXT_SCRIPT = `
175
+ tell application "System Events"
176
+ set frontApp to name of first process whose frontmost is true
177
+ end tell
178
+
179
+ tell application frontApp
180
+ try
181
+ return (get selection as text)
182
+ end try
183
+ end tell
184
+
185
+ -- Fallback: clipboard
186
+ set prevClip to the clipboard
187
+ tell application "System Events" to keystroke "c" using command down
188
+ delay 0.15
189
+ set selText to the clipboard
190
+ set the clipboard to prevClip
191
+ return selText
192
+ `;
193
+
194
+ // ── Helpers ───────────────────────────────────────────────────────────────────
195
+ function runPowerShell(script, timeout = 10000) {
196
+ return new Promise((resolve, reject) => {
197
+ const tmpPs = path.join(os.tmpdir(), `pmth_acc_${Date.now()}.ps1`);
198
+ fs.writeFileSync(tmpPs, script, 'utf8');
199
+ execFile('powershell', ['-NoProfile', '-NonInteractive', '-File', tmpPs],
200
+ { timeout },
201
+ (err, stdout, stderr) => {
202
+ try { fs.unlinkSync(tmpPs); } catch {}
203
+ if (err && !stdout) return reject(new Error(stderr || err.message));
204
+ resolve(stdout.trim());
205
+ }
206
+ );
207
+ });
208
+ }
209
+
210
+ function runOsascript(script, timeout = 8000) {
211
+ return new Promise((resolve, reject) => {
212
+ const tmpAs = path.join(os.tmpdir(), `pmth_acc_${Date.now()}.scpt`);
213
+ fs.writeFileSync(tmpAs, script, 'utf8');
214
+ execFile('osascript', [tmpAs], { timeout }, (err, stdout, stderr) => {
215
+ try { fs.unlinkSync(tmpAs); } catch {}
216
+ if (err && !stdout) return reject(new Error(stderr || err.message));
217
+ resolve(stdout.trim());
218
+ });
219
+ });
220
+ }
221
+
222
+ // ── Tool implementations ──────────────────────────────────────────────────────
223
+
224
+ /**
225
+ * Read the full text content of a window by title or app name.
226
+ */
227
+ async function readWindowText(args) {
228
+ const windowTitle = args.window_title || args.app || '';
229
+ const maxChars = args.max_chars || 50000;
230
+
231
+ if (process.platform === 'win32') {
232
+ try {
233
+ const output = await runPowerShell(WIN_READ_WINDOW_PS(windowTitle));
234
+ if (output.startsWith('ERROR:')) {
235
+ return { ok: false, error: output, text: null };
236
+ }
237
+ const text = output.slice(0, maxChars);
238
+ return {
239
+ ok: true,
240
+ text,
241
+ char_count: text.length,
242
+ truncated: output.length > maxChars,
243
+ source: 'UIAutomation',
244
+ window: windowTitle,
245
+ captured_at: new Date().toISOString(),
246
+ };
247
+ } catch (err) {
248
+ return { ok: false, error: err.message, text: null };
249
+ }
250
+ } else if (process.platform === 'darwin') {
251
+ try {
252
+ const text = await runOsascript(MAC_READ_WINDOW_SCRIPT(windowTitle));
253
+ return {
254
+ ok: true,
255
+ text: text.slice(0, maxChars),
256
+ char_count: text.length,
257
+ truncated: text.length > maxChars,
258
+ source: 'AppleScript',
259
+ window: windowTitle,
260
+ captured_at: new Date().toISOString(),
261
+ };
262
+ } catch (err) {
263
+ return { ok: false, error: err.message, text: null };
264
+ }
265
+ } else {
266
+ // Linux: best-effort via xdotool + xclip
267
+ return new Promise((resolve) => {
268
+ exec(`xdotool getactivewindow getwindowname`, { timeout: 3000 }, (err, stdout) => {
269
+ resolve({
270
+ ok: !err,
271
+ text: stdout.trim() || null,
272
+ source: 'xdotool',
273
+ note: 'Linux accessibility support is limited. Full text extraction requires AT-SPI.',
274
+ captured_at: new Date().toISOString(),
275
+ });
276
+ });
277
+ });
278
+ }
279
+ }
280
+
281
+ /**
282
+ * Get the currently selected/highlighted text in the focused window.
283
+ */
284
+ async function getSelectedText() {
285
+ if (process.platform === 'win32') {
286
+ try {
287
+ const text = await runPowerShell(WIN_GET_SELECTED_TEXT_PS, 5000);
288
+ return { ok: true, selected_text: text, captured_at: new Date().toISOString() };
289
+ } catch (err) {
290
+ return { ok: false, error: err.message, selected_text: null };
291
+ }
292
+ } else if (process.platform === 'darwin') {
293
+ try {
294
+ const text = await runOsascript(MAC_GET_SELECTED_TEXT_SCRIPT, 5000);
295
+ return { ok: true, selected_text: text, captured_at: new Date().toISOString() };
296
+ } catch (err) {
297
+ return { ok: false, error: err.message, selected_text: null };
298
+ }
299
+ } else {
300
+ return { ok: false, error: 'Linux selected text not yet supported', selected_text: null };
301
+ }
302
+ }
303
+
304
+ /**
305
+ * Get the active document context: focused app, window title, and text content
306
+ * of the focused element (works great for Word, Notepad, VS Code, etc.)
307
+ */
308
+ async function getActiveDocumentContext() {
309
+ if (process.platform === 'win32') {
310
+ try {
311
+ const json = await runPowerShell(WIN_GET_ACTIVE_DOCUMENT_PS, 10000);
312
+ let parsed;
313
+ try { parsed = JSON.parse(json); } catch { parsed = { raw: json }; }
314
+ return {
315
+ ok: true,
316
+ source: 'UIAutomation',
317
+ captured_at: new Date().toISOString(),
318
+ ...parsed,
319
+ };
320
+ } catch (err) {
321
+ return { ok: false, error: err.message };
322
+ }
323
+ } else if (process.platform === 'darwin') {
324
+ try {
325
+ const script = `
326
+ tell application "System Events"
327
+ set frontApp to name of first process whose frontmost is true
328
+ set frontTitle to ""
329
+ try
330
+ set frontTitle to name of front window of process frontApp
331
+ end try
332
+ return frontApp & "|" & frontTitle
333
+ end tell
334
+ `;
335
+ const out = await runOsascript(script, 5000);
336
+ const [app, title] = out.split('|');
337
+ const textResult = await readWindowText({ app: app?.trim(), max_chars: 50000 });
338
+ return {
339
+ ok: true,
340
+ focused_app: app?.trim(),
341
+ window_title: title?.trim(),
342
+ text_content: textResult.text || '',
343
+ source: 'AppleScript',
344
+ captured_at: new Date().toISOString(),
345
+ };
346
+ } catch (err) {
347
+ return { ok: false, error: err.message };
348
+ }
349
+ } else {
350
+ return { ok: false, error: 'Linux not yet fully supported' };
351
+ }
352
+ }
353
+
354
+ // ── TOOL_MANIFEST entries (added to desktop.js TOOL_MANIFEST) ─────────────────
355
+ const ACCESSIBILITY_TOOLS = [
356
+ {
357
+ name: 'read_window_text',
358
+ description: 'Read the full text content of any open window or application using the OS accessibility API. Works on unsaved documents (Word, Notepad, VS Code, etc.) — no file path needed. Returns the live text as the user sees it.',
359
+ inputSchema: {
360
+ type: 'object',
361
+ properties: {
362
+ window_title: {
363
+ type: 'string',
364
+ description: 'Title of the window to read (partial match supported). Use list_open_windows to find window titles.',
365
+ },
366
+ app: {
367
+ type: 'string',
368
+ description: 'Application name (e.g. "Word", "Notepad", "Code"). Used if window_title is not provided.',
369
+ },
370
+ max_chars: {
371
+ type: 'number',
372
+ description: 'Maximum characters to return (default: 50000)',
373
+ },
374
+ },
375
+ },
376
+ handler: readWindowText,
377
+ },
378
+ {
379
+ name: 'get_selected_text',
380
+ description: 'Get the text the user currently has selected/highlighted in any application. Useful for "help me with this specific part" workflows.',
381
+ inputSchema: {
382
+ type: 'object',
383
+ properties: {},
384
+ },
385
+ handler: getSelectedText,
386
+ },
387
+ {
388
+ name: 'get_active_document',
389
+ description: 'Get the full context of what the user is actively working on: the focused application, window title, and complete text content of the focused document or editor. This is the primary tool for understanding what the user is working on right now.',
390
+ inputSchema: {
391
+ type: 'object',
392
+ properties: {
393
+ max_chars: {
394
+ type: 'number',
395
+ description: 'Maximum characters to return from the document (default: 50000)',
396
+ },
397
+ },
398
+ },
399
+ handler: getActiveDocumentContext,
400
+ },
401
+ ];
402
+
403
+ module.exports = {
404
+ ACCESSIBILITY_TOOLS,
405
+ readWindowText,
406
+ getSelectedText,
407
+ getActiveDocumentContext,
408
+ };