promethios-bridge 1.2.0 → 1.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "promethios-bridge",
3
- "version": "1.2.0",
3
+ "version": "1.4.1",
4
4
  "description": "Run Promethios agent frameworks locally on your computer with full file, terminal, and browser access.",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -43,7 +43,11 @@
43
43
  "express": "^4.18.2",
44
44
  "open": "^8.4.2",
45
45
  "ora": "^5.4.1",
46
- "node-fetch": "^2.7.0"
46
+ "node-fetch": "^2.7.0",
47
+ "playwright": "^1.42.0"
48
+ },
49
+ "optionalDependencies": {
50
+ "playwright": "^1.42.0"
47
51
  },
48
52
  "engines": {
49
53
  "node": ">=18.0.0"
package/src/bridge.js CHANGED
@@ -246,6 +246,15 @@ async function registerBridge({ authToken, apiBase, callbackUrl, port, dev }) {
246
246
  const deviceId = require('crypto').randomBytes(8).toString('hex');
247
247
  const capabilities = getSupportedCapabilities();
248
248
 
249
+ // Detect OS and shell so the cloud can inject OS-aware guidance into the agent
250
+ const osModule = require('os');
251
+ const platform = process.platform; // 'win32' | 'darwin' | 'linux'
252
+ const shell = platform === 'win32'
253
+ ? 'cmd'
254
+ : (process.env.SHELL || '/bin/zsh').split('/').pop();
255
+ const homeDir = osModule.homedir();
256
+ const username = osModule.userInfo().username;
257
+
249
258
  const res = await fetch(`${apiBase}/api/local-bridge/register`, {
250
259
  method: 'POST',
251
260
  headers: {
@@ -257,6 +266,10 @@ async function registerBridge({ authToken, apiBase, callbackUrl, port, dev }) {
257
266
  callbackUrl,
258
267
  capabilities,
259
268
  bridgeVersion: require('../package.json').version,
269
+ os: platform, // 'win32' | 'darwin' | 'linux'
270
+ shell, // 'cmd' | 'zsh' | 'bash' etc.
271
+ homeDir, // e.g. 'C:\\Users\\ted' or '/Users/ted'
272
+ username, // e.g. 'ted'
260
273
  }),
261
274
  });
262
275
 
package/src/executor.js CHANGED
@@ -36,6 +36,15 @@ async function executeLocalTool({ toolName, args, frameworkId, dev }) {
36
36
  if (toolName === 'local_file_write') {
37
37
  return executeLocalTool({ toolName: 'write_file', args: { path: args.path, content: args.content, encoding: args.encoding }, frameworkId, dev });
38
38
  }
39
+ if (toolName === 'local_file_read_binary') {
40
+ return executeLocalTool({ toolName: 'read_file_binary', args: { path: args.path, maxSizeBytes: args.maxSizeBytes }, frameworkId, dev });
41
+ }
42
+ if (toolName === 'local_file_upload_to_thread') {
43
+ return executeLocalTool({ toolName: 'upload_file_to_thread', args: { path: args.path, displayName: args.displayName }, frameworkId, dev });
44
+ }
45
+ if (toolName === 'local_browser_control') {
46
+ return executeLocalTool({ toolName: 'browser_control', args, frameworkId, dev });
47
+ }
39
48
 
40
49
  // ── local_execute is the built-in tool injected by the backend when the bridge
41
50
  // is connected. It uses an `action` field to dispatch to the right handler.
@@ -71,8 +80,17 @@ async function executeLocalTool({ toolName, args, frameworkId, dev }) {
71
80
  const filePath = resolveSafePath(args.path);
72
81
  log('write_file', filePath);
73
82
  const mode = args.append ? 'a' : 'w';
74
- await fs.writeFile(filePath, args.content || '', { flag: mode, encoding: 'utf8' });
75
- return { success: true, path: filePath, bytesWritten: (args.content || '').length };
83
+ // Detect base64-encoded binary content (e.g. images transferred from phone)
84
+ const content = args.content || '';
85
+ const isBase64 = args.encoding === 'base64' || /^[A-Za-z0-9+/]+=*$/.test(content.replace(/\s/g, '')) && content.length > 100 && !content.includes(' ');
86
+ if (isBase64 && args.encoding === 'base64') {
87
+ const buffer = Buffer.from(content, 'base64');
88
+ await fs.writeFile(filePath, buffer, { flag: mode });
89
+ return { success: true, path: filePath, bytesWritten: buffer.length };
90
+ } else {
91
+ await fs.writeFile(filePath, content, { flag: mode, encoding: 'utf8' });
92
+ return { success: true, path: filePath, bytesWritten: Buffer.byteLength(content, 'utf8') };
93
+ }
76
94
  }
77
95
 
78
96
  case 'list_directory': {
@@ -100,6 +118,193 @@ async function executeLocalTool({ toolName, args, frameworkId, dev }) {
100
118
  };
101
119
  }
102
120
 
121
+ // ── Browser Control (Playwright) ──────────────────────────────────────
122
+ case 'browser_control': {
123
+ // Lazy-load playwright — auto-install if missing so users never need
124
+ // to run terminal commands manually. This runs once on first use.
125
+ let playwright;
126
+ try {
127
+ playwright = require('playwright');
128
+ } catch (e) {
129
+ // Playwright not installed — install it automatically
130
+ const chalk = require('chalk');
131
+ console.log(chalk.yellow('\n Playwright not found — installing automatically (one-time setup, ~2 min)...\n'));
132
+ try {
133
+ execSync('npm install -g playwright', { stdio: 'inherit' });
134
+ execSync('npx playwright install chromium', { stdio: 'inherit' });
135
+ playwright = require('playwright');
136
+ console.log(chalk.green('\n Playwright installed. Browser automation is ready.\n'));
137
+ } catch (installErr) {
138
+ throw new Error(
139
+ 'Auto-install of Playwright failed: ' + installErr.message +
140
+ '\nPlease run manually: npm install -g playwright && npx playwright install chromium'
141
+ );
142
+ }
143
+ }
144
+
145
+ const action = args.action;
146
+ if (!action) throw new Error('action is required for browser_control');
147
+ log('browser_control', action, args.url || args.selector || '');
148
+
149
+ // We maintain a single persistent browser context per process so sessions
150
+ // (cookies / localStorage) survive across multiple tool calls.
151
+ if (!global.__playwrightBrowser) {
152
+ // Try to connect to an existing Chrome instance first (user's real profile)
153
+ // Falls back to a fresh Chromium instance if not available.
154
+ try {
155
+ // Launch Chromium with the user's real Chrome profile directory
156
+ const os = require('os');
157
+ const platform = process.platform;
158
+ let userDataDir;
159
+ if (platform === 'win32') {
160
+ userDataDir = path.join(process.env.LOCALAPPDATA || os.homedir(), 'Google', 'Chrome', 'User Data');
161
+ } else if (platform === 'darwin') {
162
+ userDataDir = path.join(os.homedir(), 'Library', 'Application Support', 'Google', 'Chrome');
163
+ } else {
164
+ userDataDir = path.join(os.homedir(), '.config', 'google-chrome');
165
+ }
166
+
167
+ // Use persistent context with real Chrome profile if it exists
168
+ const fsSync = require('fs');
169
+ if (fsSync.existsSync(userDataDir)) {
170
+ global.__playwrightContext = await playwright.chromium.launchPersistentContext(userDataDir, {
171
+ headless: false,
172
+ channel: 'chrome',
173
+ args: ['--no-first-run', '--disable-blink-features=AutomationControlled'],
174
+ });
175
+ } else {
176
+ // Fallback: fresh Chromium (no saved logins)
177
+ global.__playwrightBrowser = await playwright.chromium.launch({ headless: false });
178
+ global.__playwrightContext = await global.__playwrightBrowser.newContext();
179
+ }
180
+ } catch (e) {
181
+ // Final fallback: headless Chromium
182
+ global.__playwrightBrowser = await playwright.chromium.launch({ headless: true });
183
+ global.__playwrightContext = await global.__playwrightBrowser.newContext();
184
+ }
185
+ }
186
+
187
+ const context = global.__playwrightContext;
188
+
189
+ // Get or create a page
190
+ const getPage = async () => {
191
+ const pages = context.pages();
192
+ return pages.length > 0 ? pages[pages.length - 1] : await context.newPage();
193
+ };
194
+
195
+ switch (action) {
196
+ case 'navigate': {
197
+ const page = await getPage();
198
+ await page.goto(args.url, { waitUntil: 'domcontentloaded', timeout: 30000 });
199
+ const title = await page.title();
200
+ const url = page.url();
201
+ return { success: true, title, url };
202
+ }
203
+
204
+ case 'click': {
205
+ const page = await getPage();
206
+ if (args.selector) {
207
+ await page.click(args.selector, { timeout: 10000 });
208
+ } else if (args.text) {
209
+ await page.getByText(args.text).first().click({ timeout: 10000 });
210
+ } else {
211
+ throw new Error('click requires selector or text');
212
+ }
213
+ return { success: true };
214
+ }
215
+
216
+ case 'type': {
217
+ const page = await getPage();
218
+ await page.fill(args.selector, args.text || '', { timeout: 10000 });
219
+ return { success: true };
220
+ }
221
+
222
+ case 'press_key': {
223
+ const page = await getPage();
224
+ await page.keyboard.press(args.key || 'Enter');
225
+ return { success: true };
226
+ }
227
+
228
+ case 'read_page': {
229
+ const page = await getPage();
230
+ // Return page text content and current URL
231
+ const textContent = await page.evaluate(() => document.body.innerText);
232
+ const url = page.url();
233
+ const title = await page.title();
234
+ // Truncate to avoid overwhelming the agent
235
+ const maxChars = args.maxChars || 8000;
236
+ return {
237
+ url,
238
+ title,
239
+ text: textContent.slice(0, maxChars),
240
+ truncated: textContent.length > maxChars,
241
+ totalChars: textContent.length,
242
+ };
243
+ }
244
+
245
+ case 'screenshot': {
246
+ const page = await getPage();
247
+ const screenshotBuffer = await page.screenshot({ fullPage: !!args.fullPage });
248
+ const base64 = screenshotBuffer.toString('base64');
249
+ return {
250
+ base64,
251
+ mimeType: 'image/png',
252
+ url: page.url(),
253
+ title: await page.title(),
254
+ };
255
+ }
256
+
257
+ case 'get_html': {
258
+ const page = await getPage();
259
+ const html = await page.content();
260
+ const maxChars = args.maxChars || 20000;
261
+ return {
262
+ html: html.slice(0, maxChars),
263
+ truncated: html.length > maxChars,
264
+ url: page.url(),
265
+ };
266
+ }
267
+
268
+ case 'wait_for': {
269
+ const page = await getPage();
270
+ if (args.selector) {
271
+ await page.waitForSelector(args.selector, { timeout: args.timeout || 15000 });
272
+ } else if (args.text) {
273
+ await page.waitForFunction(
274
+ (t) => document.body.innerText.includes(t),
275
+ args.text,
276
+ { timeout: args.timeout || 15000 }
277
+ );
278
+ } else {
279
+ await page.waitForLoadState('networkidle', { timeout: args.timeout || 15000 });
280
+ }
281
+ return { success: true };
282
+ }
283
+
284
+ case 'new_tab': {
285
+ const page = await context.newPage();
286
+ if (args.url) await page.goto(args.url, { waitUntil: 'domcontentloaded', timeout: 30000 });
287
+ return { success: true, url: page.url() };
288
+ }
289
+
290
+ case 'close': {
291
+ // Close the browser context and clean up
292
+ if (global.__playwrightContext) {
293
+ await global.__playwrightContext.close();
294
+ delete global.__playwrightContext;
295
+ }
296
+ if (global.__playwrightBrowser) {
297
+ await global.__playwrightBrowser.close();
298
+ delete global.__playwrightBrowser;
299
+ }
300
+ return { success: true };
301
+ }
302
+
303
+ default:
304
+ throw new Error(`Unknown browser_control action: ${action}. Valid actions: navigate, click, type, press_key, read_page, screenshot, get_html, wait_for, new_tab, close`);
305
+ }
306
+ }
307
+
103
308
  // ── Terminal ──────────────────────────────────────────────────────────
104
309
  case 'run_command': {
105
310
  const cmd = args.command;
@@ -118,7 +323,94 @@ async function executeLocalTool({ toolName, args, frameworkId, dev }) {
118
323
  return { stdout: stdout.trim(), stderr: stderr.trim(), exitCode: 0 };
119
324
  }
120
325
 
121
- // ── Browser ───────────────────────────────────────────────────────────
326
+ // ── Binary file read ────────────────────────────────────────────────────────────────────
327
+ case 'read_file_binary': {
328
+ const filePath = resolveSafePath(args.path);
329
+ log('read_file_binary', filePath);
330
+ const maxSize = args.maxSizeBytes || 10 * 1024 * 1024; // 10MB default
331
+ const stat = await fs.stat(filePath);
332
+ if (stat.size > maxSize) {
333
+ throw new Error(`File too large: ${stat.size} bytes exceeds limit of ${maxSize} bytes`);
334
+ }
335
+ const buffer = await fs.readFile(filePath);
336
+ const base64 = buffer.toString('base64');
337
+ // Detect MIME type from extension
338
+ const ext = path.extname(filePath).toLowerCase();
339
+ const mimeTypes = {
340
+ '.pdf': 'application/pdf',
341
+ '.png': 'image/png',
342
+ '.jpg': 'image/jpeg',
343
+ '.jpeg': 'image/jpeg',
344
+ '.gif': 'image/gif',
345
+ '.webp': 'image/webp',
346
+ '.bmp': 'image/bmp',
347
+ '.tiff': 'image/tiff',
348
+ '.tif': 'image/tiff',
349
+ '.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
350
+ '.xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
351
+ '.pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
352
+ '.doc': 'application/msword',
353
+ '.xls': 'application/vnd.ms-excel',
354
+ '.ppt': 'application/vnd.ms-powerpoint',
355
+ '.zip': 'application/zip',
356
+ '.mp4': 'video/mp4',
357
+ '.mp3': 'audio/mpeg',
358
+ };
359
+ const mimeType = mimeTypes[ext] || 'application/octet-stream';
360
+ return {
361
+ base64,
362
+ mimeType,
363
+ sizeBytes: stat.size,
364
+ fileName: path.basename(filePath),
365
+ path: filePath
366
+ };
367
+ }
368
+
369
+ // ── Upload file to thread (read locally, return base64 for cloud upload) ────────────────────
370
+ case 'upload_file_to_thread': {
371
+ const filePath = resolveSafePath(args.path);
372
+ log('upload_file_to_thread', filePath);
373
+ const maxSize = 50 * 1024 * 1024; // 50MB limit for uploads
374
+ const stat = await fs.stat(filePath);
375
+ if (stat.size > maxSize) {
376
+ throw new Error(`File too large: ${stat.size} bytes exceeds upload limit of 50MB`);
377
+ }
378
+ const buffer = await fs.readFile(filePath);
379
+ const base64 = buffer.toString('base64');
380
+ const ext = path.extname(filePath).toLowerCase();
381
+ const mimeTypes = {
382
+ '.pdf': 'application/pdf',
383
+ '.png': 'image/png',
384
+ '.jpg': 'image/jpeg',
385
+ '.jpeg': 'image/jpeg',
386
+ '.gif': 'image/gif',
387
+ '.webp': 'image/webp',
388
+ '.md': 'text/markdown',
389
+ '.txt': 'text/plain',
390
+ '.csv': 'text/csv',
391
+ '.json': 'application/json',
392
+ '.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
393
+ '.xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
394
+ '.pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
395
+ '.zip': 'application/zip',
396
+ '.mp4': 'video/mp4',
397
+ '.mp3': 'audio/mpeg',
398
+ };
399
+ const mimeType = mimeTypes[ext] || 'application/octet-stream';
400
+ const fileName = args.displayName || path.basename(filePath);
401
+ // Return the base64 payload — the cloud API will handle the actual upload to Firebase Storage
402
+ // and return the download URL back to the agent
403
+ return {
404
+ __upload_payload: true,
405
+ base64,
406
+ mimeType,
407
+ sizeBytes: stat.size,
408
+ fileName,
409
+ originalPath: filePath
410
+ };
411
+ }
412
+
413
+ // ── Browser ────────────────────────────────────────────────────────────────────
122
414
  case 'open_browser': {
123
415
  const url = args.url;
124
416
  if (!url || !/^https?:\/\//.test(url)) throw new Error('Valid http/https URL required');
@@ -203,9 +495,72 @@ async function executeLocalTool({ toolName, args, frameworkId, dev }) {
203
495
  // Resolve a path safely — expand ~ and normalize
204
496
  // Does NOT restrict to a specific directory (user approved full access)
205
497
  // ─────────────────────────────────────────────────────────────────────────────
498
+
499
+ /**
500
+ * On Windows, the user's Desktop may live under OneDrive sync rather than the
501
+ * local profile directory. This function detects the real Desktop path by
502
+ * querying the Windows Shell folder registry key, falling back to the
503
+ * OneDrive\Desktop path, then the local profile Desktop.
504
+ *
505
+ * On macOS/Linux the standard ~/Desktop is used.
506
+ */
507
+ function resolveDesktopPath() {
508
+ if (process.platform !== 'win32') {
509
+ return path.join(require('os').homedir(), 'Desktop');
510
+ }
511
+ // Try registry first (most reliable — works even with custom Desktop locations)
512
+ try {
513
+ const { execSync } = require('child_process');
514
+ const regOut = execSync(
515
+ 'reg query "HKCU\\Software\\Microsoft\\Windows\\CurrentVersion\\Explorer\\User Shell Folders" /v Desktop',
516
+ { encoding: 'utf8', timeout: 3000 }
517
+ );
518
+ const match = regOut.match(/Desktop\s+REG_(?:SZ|EXPAND_SZ)\s+(.+)/i);
519
+ if (match) {
520
+ // Expand environment variables like %USERPROFILE%
521
+ let desktopPath = match[1].trim();
522
+ desktopPath = desktopPath.replace(/%([^%]+)%/g, (_, varName) => process.env[varName] || `%${varName}%`);
523
+ if (require('fs').existsSync(desktopPath)) return desktopPath;
524
+ }
525
+ } catch { /* registry query failed, fall through */ }
526
+
527
+ // Fallback: check OneDrive Desktop first (most common on Windows 11)
528
+ const userProfile = process.env.USERPROFILE || require('os').homedir();
529
+ const oneDriveDesktop = path.join(userProfile, 'OneDrive', 'Desktop');
530
+ if (require('fs').existsSync(oneDriveDesktop)) return oneDriveDesktop;
531
+
532
+ // Final fallback: local Desktop
533
+ return path.join(userProfile, 'Desktop');
534
+ }
535
+
206
536
  function resolveSafePath(inputPath) {
207
537
  if (!inputPath) throw new Error('Path is required');
208
- const expanded = inputPath.replace(/^~/, process.env.HOME || '/home');
538
+
539
+ // Expand ~ to home directory
540
+ let expanded = inputPath.replace(/^~/, require('os').homedir());
541
+
542
+ // On Windows, expand %DESKTOP% and ~/Desktop shortcuts to the real Desktop path
543
+ // This handles the common case where the agent writes to "C:\Users\user\Desktop"
544
+ // but the actual visible Desktop is under OneDrive.
545
+ if (process.platform === 'win32') {
546
+ // Replace %DESKTOP% placeholder
547
+ expanded = expanded.replace(/%DESKTOP%/gi, resolveDesktopPath());
548
+
549
+ // If path contains \Desktop\ or ends with \Desktop, check if OneDrive Desktop exists
550
+ // and remap the local Desktop path to the OneDrive one.
551
+ const userProfile = process.env.USERPROFILE || require('os').homedir();
552
+ const localDesktop = path.join(userProfile, 'Desktop');
553
+ const realDesktop = resolveDesktopPath();
554
+ if (realDesktop !== localDesktop) {
555
+ // Normalize separators for comparison
556
+ const normalizedExpanded = expanded.replace(/\//g, '\\');
557
+ const normalizedLocal = localDesktop.replace(/\//g, '\\');
558
+ if (normalizedExpanded.toLowerCase().startsWith(normalizedLocal.toLowerCase())) {
559
+ expanded = realDesktop + expanded.slice(localDesktop.length);
560
+ }
561
+ }
562
+ }
563
+
209
564
  return path.resolve(expanded);
210
565
  }
211
566