npm - promethios-bridge - Versions diffs - 1.2.0 → 1.4.1 - Mend

promethios-bridge 1.2.0 → 1.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "promethios-bridge",
-  "version": "1.2.0",
+  "version": "1.4.1",
   "description": "Run Promethios agent frameworks locally on your computer with full file, terminal, and browser access.",
   "main": "src/index.js",
   "bin": {
@@ -43,7 +43,11 @@
     "express": "^4.18.2",
     "open": "^8.4.2",
     "ora": "^5.4.1",
-    "node-fetch": "^2.7.0"
+    "node-fetch": "^2.7.0",
+    "playwright": "^1.42.0"
+  },
+  "optionalDependencies": {
+    "playwright": "^1.42.0"
   },
   "engines": {
     "node": ">=18.0.0"

package/src/bridge.js CHANGED Viewed

@@ -246,6 +246,15 @@ async function registerBridge({ authToken, apiBase, callbackUrl, port, dev }) {
   const deviceId = require('crypto').randomBytes(8).toString('hex');
   const capabilities = getSupportedCapabilities();
+  // Detect OS and shell so the cloud can inject OS-aware guidance into the agent
+  const osModule = require('os');
+  const platform = process.platform; // 'win32' | 'darwin' | 'linux'
+  const shell = platform === 'win32'
+    ? 'cmd'
+    : (process.env.SHELL || '/bin/zsh').split('/').pop();
+  const homeDir = osModule.homedir();
+  const username = osModule.userInfo().username;
   const res = await fetch(`${apiBase}/api/local-bridge/register`, {
     method: 'POST',
     headers: {
@@ -257,6 +266,10 @@ async function registerBridge({ authToken, apiBase, callbackUrl, port, dev }) {
       callbackUrl,
       capabilities,
       bridgeVersion: require('../package.json').version,
+      os: platform,   // 'win32' | 'darwin' | 'linux'
+      shell,          // 'cmd' | 'zsh' | 'bash' etc.
+      homeDir,        // e.g. 'C:\\Users\\ted' or '/Users/ted'
+      username,       // e.g. 'ted'
     }),
   });

package/src/executor.js CHANGED Viewed

@@ -36,6 +36,15 @@ async function executeLocalTool({ toolName, args, frameworkId, dev }) {
   if (toolName === 'local_file_write') {
     return executeLocalTool({ toolName: 'write_file', args: { path: args.path, content: args.content, encoding: args.encoding }, frameworkId, dev });
   }
+  if (toolName === 'local_file_read_binary') {
+    return executeLocalTool({ toolName: 'read_file_binary', args: { path: args.path, maxSizeBytes: args.maxSizeBytes }, frameworkId, dev });
+  }
+  if (toolName === 'local_file_upload_to_thread') {
+    return executeLocalTool({ toolName: 'upload_file_to_thread', args: { path: args.path, displayName: args.displayName }, frameworkId, dev });
+  }
+  if (toolName === 'local_browser_control') {
+    return executeLocalTool({ toolName: 'browser_control', args, frameworkId, dev });
+  }
   // ── local_execute is the built-in tool injected by the backend when the bridge
   //    is connected. It uses an `action` field to dispatch to the right handler.
@@ -71,8 +80,17 @@ async function executeLocalTool({ toolName, args, frameworkId, dev }) {
       const filePath = resolveSafePath(args.path);
       log('write_file', filePath);
       const mode = args.append ? 'a' : 'w';
-      await fs.writeFile(filePath, args.content || '', { flag: mode, encoding: 'utf8' });
-      return { success: true, path: filePath, bytesWritten: (args.content || '').length };
+      // Detect base64-encoded binary content (e.g. images transferred from phone)
+      const content = args.content || '';
+      const isBase64 = args.encoding === 'base64' || /^[A-Za-z0-9+/]+=*$/.test(content.replace(/\s/g, '')) && content.length > 100 && !content.includes(' ');
+      if (isBase64 && args.encoding === 'base64') {
+        const buffer = Buffer.from(content, 'base64');
+        await fs.writeFile(filePath, buffer, { flag: mode });
+        return { success: true, path: filePath, bytesWritten: buffer.length };
+      } else {
+        await fs.writeFile(filePath, content, { flag: mode, encoding: 'utf8' });
+        return { success: true, path: filePath, bytesWritten: Buffer.byteLength(content, 'utf8') };
+      }
     }
     case 'list_directory': {
@@ -100,6 +118,193 @@ async function executeLocalTool({ toolName, args, frameworkId, dev }) {
       };
     }
+    // ── Browser Control (Playwright) ──────────────────────────────────────
+    case 'browser_control': {
+      // Lazy-load playwright — auto-install if missing so users never need
+      // to run terminal commands manually. This runs once on first use.
+      let playwright;
+      try {
+        playwright = require('playwright');
+      } catch (e) {
+        // Playwright not installed — install it automatically
+        const chalk = require('chalk');
+        console.log(chalk.yellow('\n  Playwright not found — installing automatically (one-time setup, ~2 min)...\n'));
+        try {
+          execSync('npm install -g playwright', { stdio: 'inherit' });
+          execSync('npx playwright install chromium', { stdio: 'inherit' });
+          playwright = require('playwright');
+          console.log(chalk.green('\n  Playwright installed. Browser automation is ready.\n'));
+        } catch (installErr) {
+          throw new Error(
+            'Auto-install of Playwright failed: ' + installErr.message +
+            '\nPlease run manually: npm install -g playwright && npx playwright install chromium'
+          );
+        }
+      }
+      const action = args.action;
+      if (!action) throw new Error('action is required for browser_control');
+      log('browser_control', action, args.url || args.selector || '');
+      // We maintain a single persistent browser context per process so sessions
+      // (cookies / localStorage) survive across multiple tool calls.
+      if (!global.__playwrightBrowser) {
+        // Try to connect to an existing Chrome instance first (user's real profile)
+        // Falls back to a fresh Chromium instance if not available.
+        try {
+          // Launch Chromium with the user's real Chrome profile directory
+          const os = require('os');
+          const platform = process.platform;
+          let userDataDir;
+          if (platform === 'win32') {
+            userDataDir = path.join(process.env.LOCALAPPDATA || os.homedir(), 'Google', 'Chrome', 'User Data');
+          } else if (platform === 'darwin') {
+            userDataDir = path.join(os.homedir(), 'Library', 'Application Support', 'Google', 'Chrome');
+          } else {
+            userDataDir = path.join(os.homedir(), '.config', 'google-chrome');
+          }
+          // Use persistent context with real Chrome profile if it exists
+          const fsSync = require('fs');
+          if (fsSync.existsSync(userDataDir)) {
+            global.__playwrightContext = await playwright.chromium.launchPersistentContext(userDataDir, {
+              headless: false,
+              channel: 'chrome',
+              args: ['--no-first-run', '--disable-blink-features=AutomationControlled'],
+            });
+          } else {
+            // Fallback: fresh Chromium (no saved logins)
+            global.__playwrightBrowser = await playwright.chromium.launch({ headless: false });
+            global.__playwrightContext = await global.__playwrightBrowser.newContext();
+          }
+        } catch (e) {
+          // Final fallback: headless Chromium
+          global.__playwrightBrowser = await playwright.chromium.launch({ headless: true });
+          global.__playwrightContext = await global.__playwrightBrowser.newContext();
+        }
+      }
+      const context = global.__playwrightContext;
+      // Get or create a page
+      const getPage = async () => {
+        const pages = context.pages();
+        return pages.length > 0 ? pages[pages.length - 1] : await context.newPage();
+      };
+      switch (action) {
+        case 'navigate': {
+          const page = await getPage();
+          await page.goto(args.url, { waitUntil: 'domcontentloaded', timeout: 30000 });
+          const title = await page.title();
+          const url = page.url();
+          return { success: true, title, url };
+        }
+        case 'click': {
+          const page = await getPage();
+          if (args.selector) {
+            await page.click(args.selector, { timeout: 10000 });
+          } else if (args.text) {
+            await page.getByText(args.text).first().click({ timeout: 10000 });
+          } else {
+            throw new Error('click requires selector or text');
+          }
+          return { success: true };
+        }
+        case 'type': {
+          const page = await getPage();
+          await page.fill(args.selector, args.text || '', { timeout: 10000 });
+          return { success: true };
+        }
+        case 'press_key': {
+          const page = await getPage();
+          await page.keyboard.press(args.key || 'Enter');
+          return { success: true };
+        }
+        case 'read_page': {
+          const page = await getPage();
+          // Return page text content and current URL
+          const textContent = await page.evaluate(() => document.body.innerText);
+          const url = page.url();
+          const title = await page.title();
+          // Truncate to avoid overwhelming the agent
+          const maxChars = args.maxChars || 8000;
+          return {
+            url,
+            title,
+            text: textContent.slice(0, maxChars),
+            truncated: textContent.length > maxChars,
+            totalChars: textContent.length,
+          };
+        }
+        case 'screenshot': {
+          const page = await getPage();
+          const screenshotBuffer = await page.screenshot({ fullPage: !!args.fullPage });
+          const base64 = screenshotBuffer.toString('base64');
+          return {
+            base64,
+            mimeType: 'image/png',
+            url: page.url(),
+            title: await page.title(),
+          };
+        }
+        case 'get_html': {
+          const page = await getPage();
+          const html = await page.content();
+          const maxChars = args.maxChars || 20000;
+          return {
+            html: html.slice(0, maxChars),
+            truncated: html.length > maxChars,
+            url: page.url(),
+          };
+        }
+        case 'wait_for': {
+          const page = await getPage();
+          if (args.selector) {
+            await page.waitForSelector(args.selector, { timeout: args.timeout || 15000 });
+          } else if (args.text) {
+            await page.waitForFunction(
+              (t) => document.body.innerText.includes(t),
+              args.text,
+              { timeout: args.timeout || 15000 }
+            );
+          } else {
+            await page.waitForLoadState('networkidle', { timeout: args.timeout || 15000 });
+          }
+          return { success: true };
+        }
+        case 'new_tab': {
+          const page = await context.newPage();
+          if (args.url) await page.goto(args.url, { waitUntil: 'domcontentloaded', timeout: 30000 });
+          return { success: true, url: page.url() };
+        }
+        case 'close': {
+          // Close the browser context and clean up
+          if (global.__playwrightContext) {
+            await global.__playwrightContext.close();
+            delete global.__playwrightContext;
+          }
+          if (global.__playwrightBrowser) {
+            await global.__playwrightBrowser.close();
+            delete global.__playwrightBrowser;
+          }
+          return { success: true };
+        }
+        default:
+          throw new Error(`Unknown browser_control action: ${action}. Valid actions: navigate, click, type, press_key, read_page, screenshot, get_html, wait_for, new_tab, close`);
+      }
+    }
     // ── Terminal ──────────────────────────────────────────────────────────
     case 'run_command': {
       const cmd = args.command;
@@ -118,7 +323,94 @@ async function executeLocalTool({ toolName, args, frameworkId, dev }) {
       return { stdout: stdout.trim(), stderr: stderr.trim(), exitCode: 0 };
     }
-    // ── Browser ───────────────────────────────────────────────────────────
+    // ── Binary file read ────────────────────────────────────────────────────────────────────
+    case 'read_file_binary': {
+      const filePath = resolveSafePath(args.path);
+      log('read_file_binary', filePath);
+      const maxSize = args.maxSizeBytes || 10 * 1024 * 1024; // 10MB default
+      const stat = await fs.stat(filePath);
+      if (stat.size > maxSize) {
+        throw new Error(`File too large: ${stat.size} bytes exceeds limit of ${maxSize} bytes`);
+      }
+      const buffer = await fs.readFile(filePath);
+      const base64 = buffer.toString('base64');
+      // Detect MIME type from extension
+      const ext = path.extname(filePath).toLowerCase();
+      const mimeTypes = {
+        '.pdf': 'application/pdf',
+        '.png': 'image/png',
+        '.jpg': 'image/jpeg',
+        '.jpeg': 'image/jpeg',
+        '.gif': 'image/gif',
+        '.webp': 'image/webp',
+        '.bmp': 'image/bmp',
+        '.tiff': 'image/tiff',
+        '.tif': 'image/tiff',
+        '.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
+        '.xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
+        '.pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
+        '.doc': 'application/msword',
+        '.xls': 'application/vnd.ms-excel',
+        '.ppt': 'application/vnd.ms-powerpoint',
+        '.zip': 'application/zip',
+        '.mp4': 'video/mp4',
+        '.mp3': 'audio/mpeg',
+      };
+      const mimeType = mimeTypes[ext] || 'application/octet-stream';
+      return {
+        base64,
+        mimeType,
+        sizeBytes: stat.size,
+        fileName: path.basename(filePath),
+        path: filePath
+      };
+    }
+    // ── Upload file to thread (read locally, return base64 for cloud upload) ────────────────────
+    case 'upload_file_to_thread': {
+      const filePath = resolveSafePath(args.path);
+      log('upload_file_to_thread', filePath);
+      const maxSize = 50 * 1024 * 1024; // 50MB limit for uploads
+      const stat = await fs.stat(filePath);
+      if (stat.size > maxSize) {
+        throw new Error(`File too large: ${stat.size} bytes exceeds upload limit of 50MB`);
+      }
+      const buffer = await fs.readFile(filePath);
+      const base64 = buffer.toString('base64');
+      const ext = path.extname(filePath).toLowerCase();
+      const mimeTypes = {
+        '.pdf': 'application/pdf',
+        '.png': 'image/png',
+        '.jpg': 'image/jpeg',
+        '.jpeg': 'image/jpeg',
+        '.gif': 'image/gif',
+        '.webp': 'image/webp',
+        '.md': 'text/markdown',
+        '.txt': 'text/plain',
+        '.csv': 'text/csv',
+        '.json': 'application/json',
+        '.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
+        '.xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
+        '.pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
+        '.zip': 'application/zip',
+        '.mp4': 'video/mp4',
+        '.mp3': 'audio/mpeg',
+      };
+      const mimeType = mimeTypes[ext] || 'application/octet-stream';
+      const fileName = args.displayName || path.basename(filePath);
+      // Return the base64 payload — the cloud API will handle the actual upload to Firebase Storage
+      // and return the download URL back to the agent
+      return {
+        __upload_payload: true,
+        base64,
+        mimeType,
+        sizeBytes: stat.size,
+        fileName,
+        originalPath: filePath
+      };
+    }
+    // ── Browser ────────────────────────────────────────────────────────────────────
     case 'open_browser': {
       const url = args.url;
       if (!url || !/^https?:\/\//.test(url)) throw new Error('Valid http/https URL required');
@@ -203,9 +495,72 @@ async function executeLocalTool({ toolName, args, frameworkId, dev }) {
 // Resolve a path safely — expand ~ and normalize
 // Does NOT restrict to a specific directory (user approved full access)
 // ─────────────────────────────────────────────────────────────────────────────
+/**
+ * On Windows, the user's Desktop may live under OneDrive sync rather than the
+ * local profile directory. This function detects the real Desktop path by
+ * querying the Windows Shell folder registry key, falling back to the
+ * OneDrive\Desktop path, then the local profile Desktop.
+ *
+ * On macOS/Linux the standard ~/Desktop is used.
+ */
+function resolveDesktopPath() {
+  if (process.platform !== 'win32') {
+    return path.join(require('os').homedir(), 'Desktop');
+  }
+  // Try registry first (most reliable — works even with custom Desktop locations)
+  try {
+    const { execSync } = require('child_process');
+    const regOut = execSync(
+      'reg query "HKCU\\Software\\Microsoft\\Windows\\CurrentVersion\\Explorer\\User Shell Folders" /v Desktop',
+      { encoding: 'utf8', timeout: 3000 }
+    );
+    const match = regOut.match(/Desktop\s+REG_(?:SZ|EXPAND_SZ)\s+(.+)/i);
+    if (match) {
+      // Expand environment variables like %USERPROFILE%
+      let desktopPath = match[1].trim();
+      desktopPath = desktopPath.replace(/%([^%]+)%/g, (_, varName) => process.env[varName] || `%${varName}%`);
+      if (require('fs').existsSync(desktopPath)) return desktopPath;
+    }
+  } catch { /* registry query failed, fall through */ }
+  // Fallback: check OneDrive Desktop first (most common on Windows 11)
+  const userProfile = process.env.USERPROFILE || require('os').homedir();
+  const oneDriveDesktop = path.join(userProfile, 'OneDrive', 'Desktop');
+  if (require('fs').existsSync(oneDriveDesktop)) return oneDriveDesktop;
+  // Final fallback: local Desktop
+  return path.join(userProfile, 'Desktop');
+}
 function resolveSafePath(inputPath) {
   if (!inputPath) throw new Error('Path is required');
-  const expanded = inputPath.replace(/^~/, process.env.HOME || '/home');
+  // Expand ~ to home directory
+  let expanded = inputPath.replace(/^~/, require('os').homedir());
+  // On Windows, expand %DESKTOP% and ~/Desktop shortcuts to the real Desktop path
+  // This handles the common case where the agent writes to "C:\Users\user\Desktop"
+  // but the actual visible Desktop is under OneDrive.
+  if (process.platform === 'win32') {
+    // Replace %DESKTOP% placeholder
+    expanded = expanded.replace(/%DESKTOP%/gi, resolveDesktopPath());
+    // If path contains \Desktop\ or ends with \Desktop, check if OneDrive Desktop exists
+    // and remap the local Desktop path to the OneDrive one.
+    const userProfile = process.env.USERPROFILE || require('os').homedir();
+    const localDesktop = path.join(userProfile, 'Desktop');
+    const realDesktop = resolveDesktopPath();
+    if (realDesktop !== localDesktop) {
+      // Normalize separators for comparison
+      const normalizedExpanded = expanded.replace(/\//g, '\\');
+      const normalizedLocal = localDesktop.replace(/\//g, '\\');
+      if (normalizedExpanded.toLowerCase().startsWith(normalizedLocal.toLowerCase())) {
+        expanded = realDesktop + expanded.slice(localDesktop.length);
+      }
+    }
+  }
   return path.resolve(expanded);
 }