npm - assistme - Versions diffs - 0.1.7 → 0.1.9 - Mend

assistme 0.1.7 → 0.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/dist/index.js +117 -79
package/package.json +1 -1
package/src/agent/mcp-servers.ts +17 -54
package/src/agent/processor.ts +18 -41
package/src/index.ts +11 -2
package/src/tools/browser.ts +118 -35
package/src/tools/index.ts +20 -12

package/dist/index.js CHANGED Viewed

@@ -432,9 +432,7 @@ var BrowserController = class {
     const available = await this.isAvailable();
     if (!available) {
       throw new Error(
-        `Cannot connect to browser on port ${this.debugPort}. Please start Chrome with: --remote-debugging-port=9222
-macOS: open -a 'Google Chrome' --args --remote-debugging-port=9222
-Linux: google-chrome --remote-debugging-port=9222`
+        `Cannot connect to browser on port ${this.debugPort}. Chrome remote debugging is not reachable. Please ensure Chrome is running with remote debugging enabled.`
       );
     }
     const tabs = await this.getTabs();
@@ -855,14 +853,24 @@ function findChromePath() {
 function isChromeRunning() {
   try {
     if (platform() === "win32") {
-      const out = execSync('tasklist /FI "IMAGENAME eq chrome.exe" /NH', {
+      const out2 = execSync('tasklist /FI "IMAGENAME eq chrome.exe" /NH', {
         encoding: "utf-8",
         stdio: ["pipe", "pipe", "pipe"]
       });
-      return out.includes("chrome.exe");
+      return out2.includes("chrome.exe");
     }
-    execSync("pgrep -f chrome", { stdio: ["pipe", "pipe", "pipe"] });
-    return true;
+    if (platform() === "darwin") {
+      const out2 = execSync('pgrep -f "Google Chrome.app/Contents/MacOS/Google Chrome"', {
+        encoding: "utf-8",
+        stdio: ["pipe", "pipe", "pipe"]
+      });
+      return out2.trim().length > 0;
+    }
+    const out = execSync("pgrep -f '(chrome|chromium)' 2>/dev/null || true", {
+      encoding: "utf-8",
+      stdio: ["pipe", "pipe", "pipe"]
+    });
+    return out.trim().length > 0;
   } catch {
     return false;
   }
@@ -908,52 +916,97 @@ async function killChromeGracefully() {
   await new Promise((r) => setTimeout(r, 1e3));
 }
 function spawnChrome(chromePath, port) {
-  const os = platform();
   const cdpFlag = `--remote-debugging-port=${port}`;
-  if (os === "darwin") {
-    const appName = chromePath.includes("Chromium") ? "Chromium" : chromePath.includes("Canary") ? "Google Chrome Canary" : "Google Chrome";
-    spawn("open", ["-a", appName, "--args", cdpFlag, "--restore-last-session"], {
-      detached: true,
-      stdio: "ignore"
-    }).unref();
-  } else {
-    spawn(chromePath, [cdpFlag, "--restore-last-session"], {
-      detached: true,
-      stdio: "ignore"
-    }).unref();
-  }
+  log.debug(`Spawning Chrome: ${chromePath} ${cdpFlag} --restore-last-session`);
+  const child = spawn(chromePath, [cdpFlag, "--restore-last-session"], {
+    detached: true,
+    stdio: "ignore"
+  });
+  child.on("error", (err) => {
+    log.error(`Chrome spawn error: ${err.message}`);
+  });
+  child.unref();
+  return child;
 }
-async function waitForCDP(browser, timeoutMs = 15e3) {
+async function waitForCDP(browser, timeoutMs = 3e4) {
   const start = Date.now();
+  let attempts = 0;
   while (Date.now() - start < timeoutMs) {
-    if (await browser.isAvailable()) return true;
+    attempts++;
+    if (await browser.isAvailable()) {
+      log.debug(`CDP became reachable after ${attempts} attempts (${Date.now() - start}ms)`);
+      return true;
+    }
     await new Promise((r) => setTimeout(r, 500));
   }
+  log.debug(`CDP not reachable after ${attempts} attempts (${timeoutMs}ms timeout)`);
   return false;
 }
+async function isPortInUse(port) {
+  try {
+    const res = await fetch(`http://127.0.0.1:${port}/json/version`, {
+      signal: AbortSignal.timeout(1e3)
+    });
+    const body = await res.text();
+    return !body.includes("Chrome");
+  } catch {
+    return false;
+  }
+}
 async function ensureBrowserAvailable(port = 9222) {
   const browser = getBrowser(port);
   if (await browser.isAvailable()) {
+    log.debug("CDP already reachable \u2014 no launch needed");
     return { success: true, action: "already_available" };
   }
+  if (await isPortInUse(port)) {
+    log.debug(`Port ${port} is in use by a non-Chrome process`);
+    return {
+      success: false,
+      action: "port_conflict",
+      detail: `Port ${port} is already in use by another process. Try a different port or stop the conflicting process.`
+    };
+  }
   const chromePath = findChromePath();
   if (!chromePath) {
+    log.debug("Chrome binary not found on this system");
     return { success: false, action: "chrome_not_found" };
   }
+  log.debug(`Found Chrome at: ${chromePath}`);
   const running = isChromeRunning();
+  log.debug(`Chrome currently running: ${running}`);
   if (running) {
+    log.debug("Killing Chrome gracefully for restart with CDP...");
     await killChromeGracefully();
     spawnChrome(chromePath, port);
     if (await waitForCDP(browser)) {
       return { success: true, action: "restarted", chromePath };
     }
-    return { success: false, action: "launch_failed", chromePath };
+    log.debug("First CDP wait timed out after restart, retrying...");
+    if (await waitForCDP(browser, 15e3)) {
+      return { success: true, action: "restarted", chromePath };
+    }
+    return {
+      success: false,
+      action: "launch_failed",
+      chromePath,
+      detail: "Chrome was restarted but CDP did not become reachable within timeout."
+    };
   }
   spawnChrome(chromePath, port);
   if (await waitForCDP(browser)) {
     return { success: true, action: "launched", chromePath };
   }
-  return { success: false, action: "launch_failed", chromePath };
+  log.debug("First CDP wait timed out after launch, retrying...");
+  if (await waitForCDP(browser, 15e3)) {
+    return { success: true, action: "launched", chromePath };
+  }
+  return {
+    success: false,
+    action: "launch_failed",
+    chromePath,
+    detail: "Chrome was launched but CDP did not become reachable within timeout."
+  };
 }
 var browserInstance = null;
 function getBrowser(port = 9222) {
@@ -2164,8 +2217,17 @@ async function executeTool(name, input) {
     case "execute_command":
       return executeShell(input.command, input.cwd);
     // ── Browser (CDP) ───────────────────────────────────────
-    case "browser_connect":
+    case "browser_connect": {
+      if (!await browser.isAvailable()) {
+        const result = await ensureBrowserAvailable();
+        if (!result.success) {
+          throw new Error(
+            `Failed to auto-launch Chrome (${result.action}). Please ensure Google Chrome is installed.`
+          );
+        }
+      }
       return browser.connect(input.tab_index);
+    }
     case "browser_navigate":
       if (!browser.isConnected()) await browser.connect();
       return browser.navigate(input.url);
@@ -2341,7 +2403,7 @@ function createBrowserMcpServer() {
     tools: [
       tool(
         "browser_connect",
-        "Connect to the user's real Chrome browser via CDP. The user must have Chrome running with --remote-debugging-port=9222.",
+        "Connect to the user's real Chrome browser via CDP. Chrome will be auto-launched if not already running.",
         { tab_index: z.number().optional().describe("Tab index (default: 0)") },
         async (args) => callTool("browser_connect", args)
       ),
@@ -2459,18 +2521,14 @@ function createAgentToolsServer(deps) {
         "Store a memory about the user that persists across conversations. Use when you learn preferences, habits, or standing instructions.",
         {
           content: z.string().describe("What to remember (concise, factual statement)"),
-          category: z.string().optional().describe(
-            "Category: general, preference, instruction, context, skill_learned, fact"
-          ),
+          category: z.string().optional().describe("Category: general, preference, instruction, context, skill_learned, fact"),
           importance: z.number().optional().describe("Importance 1-10 (default: 5). Use 8+ for instructions"),
           tags: z.array(z.string()).optional().describe("Optional tags for searchability")
         },
         async (args) => {
           if (!memoryManager) {
             return {
-              content: [
-                { type: "text", text: "Memory manager not available." }
-              ]
+              content: [{ type: "text", text: "Memory manager not available." }]
             };
           }
           const mem = await memoryManager.remember(
@@ -2507,11 +2565,7 @@ function createAgentToolsServer(deps) {
               ]
             };
           }
-          const filePath = skillManager.create(
-            args.name,
-            args.description,
-            args.instructions
-          );
+          const filePath = skillManager.create(args.name, args.description, args.instructions);
           if (args.emoji) {
             const skill = skillManager.get(args.name);
             if (skill) {
@@ -2571,9 +2625,7 @@ ${args.instructions}
               args.description || existing.description,
               args.improved_instructions
             );
-            log.success(
-              `Self-improvement: overrode bundled skill "${args.name}"`
-            );
+            log.success(`Self-improvement: overrode bundled skill "${args.name}"`);
             return {
               content: [
                 {
@@ -2670,6 +2722,8 @@ KEY PRINCIPLE: You operate the user's real browser, not a headless sandbox. This
 - When you navigate to amazon.com, you see the user's logged-in Amazon
 - If a site needs login, ask the user to log in using browser_request_user_action
 - You are like a human assistant sitting at the user's computer
+- Chrome is automatically managed \u2014 just call browser_connect and it will auto-launch if needed
+- NEVER ask the user to manually start Chrome or run any terminal commands for browser setup
 Available capabilities:
 1. BROWSER CONTROL (user's real Chrome via CDP):
@@ -2731,10 +2785,7 @@ var TaskProcessor = class {
     let tokenUsage;
     try {
       await emitEvent(task.id, "status_change", { status: "running" });
-      let systemPrompt = BASE_SYSTEM_PROMPT.replace(
-        "{workspace_path}",
-        config.workspacePath
-      );
+      let systemPrompt = BASE_SYSTEM_PROMPT.replace("{workspace_path}", config.workspacePath);
       if (this.memoryManager) {
         try {
           const memoryPrompt = await this.memoryManager.buildMemoryPrompt();
@@ -2769,9 +2820,7 @@ var TaskProcessor = class {
         "Glob",
         "Grep",
         // Browser MCP tools
-        ...BROWSER_TOOL_NAMES.map(
-          (n) => `mcp__assistme-browser__${n}`
-        ),
+        ...BROWSER_TOOL_NAMES.map((n) => `mcp__assistme-browser__${n}`),
         // Agent MCP tools (memory, skills)
         "mcp__assistme-agent__memory_store",
         "mcp__assistme-agent__skill_create",
@@ -2830,9 +2879,7 @@ var TaskProcessor = class {
                   });
                 } else if (block.type === "thinking" && "thinking" in block) {
                   const thinkingText = block.thinking;
-                  log.debug(
-                    `Thinking: ${thinkingText.slice(0, 100)}...`
-                  );
+                  log.debug(`Thinking: ${thinkingText.slice(0, 100)}...`);
                   await emitEvent(task.id, "thinking", {
                     text: thinkingText
                   });
@@ -2871,14 +2918,11 @@ var TaskProcessor = class {
       } finally {
         clearTimeout(timeoutId);
       }
-      await withRetry(
-        () => completeTask(task.id, finalResponse, tokenUsage),
-        {
-          maxRetries: 2,
-          baseDelayMs: 300,
-          label: "completeTask"
-        }
-      );
+      await withRetry(() => completeTask(task.id, finalResponse, tokenUsage), {
+        maxRetries: 2,
+        baseDelayMs: 300,
+        label: "completeTask"
+      });
       await emitEvent(task.id, "status_change", { status: "completed" });
       log.success("Task completed.");
       if (this.memoryManager && finalResponse) {
@@ -2892,9 +2936,7 @@ var TaskProcessor = class {
                 tags: mem.tags,
                 sourceMessageId: taskIdRef
               });
-              log.info(
-                `Memory extracted: [${mem.category}] ${mem.content.slice(0, 60)}...`
-              );
+              log.info(`Memory extracted: [${mem.category}] ${mem.content.slice(0, 60)}...`);
             } catch {
             }
           }
@@ -2918,11 +2960,7 @@ var TaskProcessor = class {
             );
             return;
           }
-          const filePath = sm.create(
-            extracted.name,
-            extracted.description,
-            extracted.steps
-          );
+          const filePath = sm.create(extracted.name, extracted.description, extracted.steps);
           if (extracted.emoji) {
             const { writeFile: writeFile2 } = await import("fs/promises");
             const metaJson = JSON.stringify({
@@ -2948,19 +2986,10 @@ ${extracted.steps}
         for (const skillName of usedSkillNames) {
           const skill = sm.get(skillName);
           if (!skill) continue;
-          analyzeSkillImprovement(
-            skill.content,
-            task.prompt,
-            finalResponse,
-            realToolCalls
-          ).then(async (improvement) => {
+          analyzeSkillImprovement(skill.content, task.prompt, finalResponse, realToolCalls).then(async (improvement) => {
             if (!improvement) return;
             if (skill.source === "bundled") {
-              sm.create(
-                skillName,
-                skill.description,
-                improvement.improved_steps
-              );
+              sm.create(skillName, skill.description, improvement.improved_steps);
             } else {
               sm.update(skillName, improvement.improved_steps);
             }
@@ -3216,11 +3245,20 @@ program.command("start", { isDefault: true }).description("Start the agent and l
       case "chrome_not_found":
         launchSpinner.fail("Chrome not found on this system");
         log.info("Please install Google Chrome and try again.");
-        log.info('Or run "assistme browser setup" for manual instructions.');
+        break;
+      case "port_conflict":
+        launchSpinner.fail("Port 9222 is in use by another process");
+        log.info(launchResult.detail ?? "Stop the conflicting process or use a different port.");
         break;
       default:
         launchSpinner.fail("Failed to start Chrome with remote debugging");
-        log.info('Run "assistme browser setup" for manual setup instructions.');
+        if (launchResult.detail) {
+          log.info(launchResult.detail);
+        }
+        if (launchResult.chromePath) {
+          log.info(`Chrome binary: ${launchResult.chromePath}`);
+        }
+        log.info("Browser will be auto-launched when the first task needs it.");
         break;
     }
   }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "assistme",
-  "version": "0.1.7",
+  "version": "0.1.9",
   "description": "AssistMe CLI Agent - AI-powered assistant that controls your real browser",
   "type": "module",
   "main": "dist/index.js",

package/src/agent/mcp-servers.ts CHANGED Viewed

@@ -49,7 +49,7 @@ export function createBrowserMcpServer(): McpSdkServerConfigWithInstance {
     tools: [
       tool(
         "browser_connect",
-        "Connect to the user's real Chrome browser via CDP. The user must have Chrome running with --remote-debugging-port=9222.",
+        "Connect to the user's real Chrome browser via CDP. Chrome will be auto-launched if not already running.",
         { tab_index: z.number().optional().describe("Tab index (default: 0)") },
         async (args) => callTool("browser_connect", args)
       ),
@@ -126,11 +126,8 @@ export function createBrowserMcpServer(): McpSdkServerConfigWithInstance {
         { expression: z.string().describe("JavaScript expression to evaluate") },
         async (args) => callTool("browser_evaluate", args)
       ),
-      tool(
-        "browser_list_tabs",
-        "List all open tabs in the user's browser.",
-        {},
-        async () => callTool("browser_list_tabs", {})
+      tool("browser_list_tabs", "List all open tabs in the user's browser.", {}, async () =>
+        callTool("browser_list_tabs", {})
       ),
       tool(
         "browser_switch_tab",
@@ -148,13 +145,8 @@ export function createBrowserMcpServer(): McpSdkServerConfigWithInstance {
         "browser_request_user_action",
         "Request the user to perform an action in their browser (login, CAPTCHA, 2FA, etc.).",
         {
-          message: z
-            .string()
-            .describe("Clear description of what the user needs to do"),
-          wait_seconds: z
-            .number()
-            .optional()
-            .describe("How long to wait (default: 60)"),
+          message: z.string().describe("Clear description of what the user needs to do"),
+          wait_seconds: z.number().optional().describe("How long to wait (default: 60)"),
         },
         async (args) => callTool("browser_request_user_action", args)
       ),
@@ -170,9 +162,7 @@ export interface AgentToolsDeps {
   taskId: string;
 }
-export function createAgentToolsServer(
-  deps: AgentToolsDeps
-): McpSdkServerConfigWithInstance {
+export function createAgentToolsServer(deps: AgentToolsDeps): McpSdkServerConfigWithInstance {
   const { memoryManager, skillManager, taskId } = deps;
   return createSdkMcpServer({
@@ -183,30 +173,21 @@ export function createAgentToolsServer(
         "memory_store",
         "Store a memory about the user that persists across conversations. Use when you learn preferences, habits, or standing instructions.",
         {
-          content: z
-            .string()
-            .describe("What to remember (concise, factual statement)"),
+          content: z.string().describe("What to remember (concise, factual statement)"),
           category: z
             .string()
             .optional()
-            .describe(
-              "Category: general, preference, instruction, context, skill_learned, fact"
-            ),
+            .describe("Category: general, preference, instruction, context, skill_learned, fact"),
           importance: z
             .number()
             .optional()
             .describe("Importance 1-10 (default: 5). Use 8+ for instructions"),
-          tags: z
-            .array(z.string())
-            .optional()
-            .describe("Optional tags for searchability"),
+          tags: z.array(z.string()).optional().describe("Optional tags for searchability"),
         },
         async (args) => {
           if (!memoryManager) {
             return {
-              content: [
-                { type: "text", text: "Memory manager not available." },
-              ],
+              content: [{ type: "text", text: "Memory manager not available." }],
             };
           }
           const mem = await memoryManager.remember(
@@ -226,19 +207,10 @@ export function createAgentToolsServer(
         "skill_create",
         "Create a new reusable skill from a workflow you just executed. Write generic, reusable instructions with placeholders like {product}, {query}.",
         {
-          name: z
-            .string()
-            .describe("Skill name in kebab-case, e.g. 'flight-booking'"),
-          description: z
-            .string()
-            .describe("One-line description of what this skill does"),
-          instructions: z
-            .string()
-            .describe("Markdown step-by-step instructions"),
-          emoji: z
-            .string()
-            .optional()
-            .describe("Single emoji representing this skill"),
+          name: z.string().describe("Skill name in kebab-case, e.g. 'flight-booking'"),
+          description: z.string().describe("One-line description of what this skill does"),
+          instructions: z.string().describe("Markdown step-by-step instructions"),
+          emoji: z.string().optional().describe("Single emoji representing this skill"),
         },
         async (args) => {
           // Check for duplicates
@@ -254,11 +226,7 @@ export function createAgentToolsServer(
             };
           }
-          const filePath = skillManager.create(
-            args.name,
-            args.description,
-            args.instructions
-          );
+          const filePath = skillManager.create(args.name, args.description, args.instructions);
           // Add emoji metadata if provided
           if (args.emoji) {
@@ -293,10 +261,7 @@ export function createAgentToolsServer(
           improved_instructions: z
             .string()
             .describe("Full updated markdown instructions (not a diff)"),
-          description: z
-            .string()
-            .optional()
-            .describe("Updated description (optional)"),
+          description: z.string().optional().describe("Updated description (optional)"),
         },
         async (args) => {
           const existing = skillManager.get(args.name);
@@ -321,9 +286,7 @@ export function createAgentToolsServer(
               args.description || existing.description,
               args.improved_instructions
             );
-            log.success(
-              `Self-improvement: overrode bundled skill "${args.name}"`
-            );
+            log.success(`Self-improvement: overrode bundled skill "${args.name}"`);
             return {
               content: [
                 {

package/src/agent/processor.ts CHANGED Viewed

@@ -39,6 +39,8 @@ KEY PRINCIPLE: You operate the user's real browser, not a headless sandbox. This
 - When you navigate to amazon.com, you see the user's logged-in Amazon
 - If a site needs login, ask the user to log in using browser_request_user_action
 - You are like a human assistant sitting at the user's computer
+- Chrome is automatically managed — just call browser_connect and it will auto-launch if needed
+- NEVER ask the user to manually start Chrome or run any terminal commands for browser setup
 Available capabilities:
 1. BROWSER CONTROL (user's real Chrome via CDP):
@@ -97,7 +99,9 @@ export class TaskProcessor {
     resetEventSequence();
     // Wall-clock timeout for the entire task (default: 10 minutes)
-    const taskTimeoutMs = ((config as unknown as Record<string, unknown>).taskTimeoutMinutes as number || 10) * 60_000;
+    const taskTimeoutMs =
+      (((config as unknown as Record<string, unknown>).taskTimeoutMinutes as number) || 10) *
+      60_000;
     // Set correlation ID for this task's log messages
     newCorrelationId();
@@ -113,10 +117,7 @@ export class TaskProcessor {
       await emitEvent(task.id, "status_change", { status: "running" });
       // Build system prompt with memories + skills
-      let systemPrompt = BASE_SYSTEM_PROMPT.replace(
-        "{workspace_path}",
-        config.workspacePath
-      );
+      let systemPrompt = BASE_SYSTEM_PROMPT.replace("{workspace_path}", config.workspacePath);
       // Inject memories
       if (this.memoryManager) {
@@ -163,9 +164,7 @@ export class TaskProcessor {
         "Glob",
         "Grep",
         // Browser MCP tools
-        ...BROWSER_TOOL_NAMES.map(
-          (n) => `mcp__assistme-browser__${n}`
-        ),
+        ...BROWSER_TOOL_NAMES.map((n) => `mcp__assistme-browser__${n}`),
         // Agent MCP tools (memory, skills)
         "mcp__assistme-agent__memory_store",
         "mcp__assistme-agent__skill_create",
@@ -233,9 +232,7 @@ export class TaskProcessor {
                   });
                 } else if (block.type === "thinking" && "thinking" in block) {
                   const thinkingText = (block as unknown as { thinking: string }).thinking;
-                  log.debug(
-                    `Thinking: ${thinkingText.slice(0, 100)}...`
-                  );
+                  log.debug(`Thinking: ${thinkingText.slice(0, 100)}...`);
                   await emitEvent(task.id, "thinking", {
                     text: thinkingText,
                   });
@@ -283,14 +280,11 @@ export class TaskProcessor {
       }
       // Complete the task (with retry for transient DB failures)
-      await withRetry(
-        () => completeTask(task.id, finalResponse, tokenUsage),
-        {
-          maxRetries: 2,
-          baseDelayMs: 300,
-          label: "completeTask",
-        }
-      );
+      await withRetry(() => completeTask(task.id, finalResponse, tokenUsage), {
+        maxRetries: 2,
+        baseDelayMs: 300,
+        label: "completeTask",
+      });
       await emitEvent(task.id, "status_change", { status: "completed" });
       log.success("Task completed.");
@@ -309,9 +303,7 @@ export class TaskProcessor {
                   tags: mem.tags,
                   sourceMessageId: taskIdRef,
                 });
-                log.info(
-                  `Memory extracted: [${mem.category}] ${mem.content.slice(0, 60)}...`
-                );
+                log.info(`Memory extracted: [${mem.category}] ${mem.content.slice(0, 60)}...`);
               } catch {
                 // Non-critical — skip individual memory failures
               }
@@ -326,9 +318,7 @@ export class TaskProcessor {
       // Auto-extract skills from multi-step workflows
       const realToolCalls = toolCallRecords.filter(
         (tc) =>
-          tc.name !== "memory_store" &&
-          tc.name !== "skill_create" &&
-          tc.name !== "skill_improve"
+          tc.name !== "memory_store" && tc.name !== "skill_create" && tc.name !== "skill_improve"
       );
       if (realToolCalls.length >= 3 && finalResponse) {
@@ -347,11 +337,7 @@ export class TaskProcessor {
               return;
             }
-            const filePath = sm.create(
-              extracted.name,
-              extracted.description,
-              extracted.steps
-            );
+            const filePath = sm.create(extracted.name, extracted.description, extracted.steps);
             if (extracted.emoji) {
               const { writeFile } = await import("fs/promises");
@@ -373,21 +359,12 @@ export class TaskProcessor {
           const skill = sm.get(skillName);
           if (!skill) continue;
-          analyzeSkillImprovement(
-            skill.content,
-            task.prompt,
-            finalResponse,
-            realToolCalls
-          )
+          analyzeSkillImprovement(skill.content, task.prompt, finalResponse, realToolCalls)
             .then(async (improvement) => {
               if (!improvement) return;
               if (skill.source === "bundled") {
-                sm.create(
-                  skillName,
-                  skill.description,
-                  improvement.improved_steps
-                );
+                sm.create(skillName, skill.description, improvement.improved_steps);
               } else {
                 sm.update(skillName, improvement.improved_steps);
               }

package/src/index.ts CHANGED Viewed

@@ -332,11 +332,20 @@ program
         case "chrome_not_found":
           launchSpinner.fail("Chrome not found on this system");
           log.info("Please install Google Chrome and try again.");
-          log.info('Or run "assistme browser setup" for manual instructions.');
+          break;
+        case "port_conflict":
+          launchSpinner.fail("Port 9222 is in use by another process");
+          log.info(launchResult.detail ?? "Stop the conflicting process or use a different port.");
           break;
         default:
           launchSpinner.fail("Failed to start Chrome with remote debugging");
-          log.info('Run "assistme browser setup" for manual setup instructions.');
+          if (launchResult.detail) {
+            log.info(launchResult.detail);
+          }
+          if (launchResult.chromePath) {
+            log.info(`Chrome binary: ${launchResult.chromePath}`);
+          }
+          log.info("Browser will be auto-launched when the first task needs it.");
           break;
       }
     }

package/src/tools/browser.ts CHANGED Viewed

@@ -15,9 +15,10 @@
  */
 import { WebSocket } from "ws";
-import { execSync, spawn } from "node:child_process";
+import { execSync, spawn, type ChildProcess } from "node:child_process";
 import { platform } from "node:os";
 import { existsSync } from "node:fs";
+import { log } from "../utils/logger.js";
 interface CDPTab {
   id: string;
@@ -89,9 +90,8 @@ export class BrowserController {
     if (!available) {
       throw new Error(
         `Cannot connect to browser on port ${this.debugPort}. ` +
-          "Please start Chrome with: --remote-debugging-port=9222\n" +
-          "macOS: open -a 'Google Chrome' --args --remote-debugging-port=9222\n" +
-          "Linux: google-chrome --remote-debugging-port=9222"
+          "Chrome remote debugging is not reachable. " +
+          "Please ensure Chrome is running with remote debugging enabled."
       );
     }
@@ -606,9 +606,21 @@ export function isChromeRunning(): boolean {
       });
       return out.includes("chrome.exe");
     }
-    // macOS and Linux — pgrep automatically excludes its own process
-    execSync("pgrep -f chrome", { stdio: ["pipe", "pipe", "pipe"] });
-    return true;
+    if (platform() === "darwin") {
+      // Match the main Chrome process (not helper/renderer sub-processes).
+      // No trailing $ — the process command line includes flags after the binary.
+      const out = execSync('pgrep -f "Google Chrome.app/Contents/MacOS/Google Chrome"', {
+        encoding: "utf-8",
+        stdio: ["pipe", "pipe", "pipe"],
+      });
+      return out.trim().length > 0;
+    }
+    // Linux — match common chrome binary names
+    const out = execSync("pgrep -f '(chrome|chromium)' 2>/dev/null || true", {
+      encoding: "utf-8",
+      stdio: ["pipe", "pipe", "pipe"],
+    });
+    return out.trim().length > 0;
   } catch {
     return false;
   }
@@ -667,93 +679,164 @@ async function killChromeGracefully(): Promise<void> {
 /**
  * Spawn Chrome with the remote-debugging-port flag.
+ * Returns the child process so callers can detect early failures.
  */
-function spawnChrome(chromePath: string, port: number): void {
-  const os = platform();
+function spawnChrome(chromePath: string, port: number): ChildProcess {
   const cdpFlag = `--remote-debugging-port=${port}`;
-  if (os === "darwin") {
-    // Determine app name from binary path
-    const appName = chromePath.includes("Chromium")
-      ? "Chromium"
-      : chromePath.includes("Canary")
-        ? "Google Chrome Canary"
-        : "Google Chrome";
-    spawn("open", ["-a", appName, "--args", cdpFlag, "--restore-last-session"], {
-      detached: true,
-      stdio: "ignore",
-    }).unref();
-  } else {
-    spawn(chromePath, [cdpFlag, "--restore-last-session"], {
-      detached: true,
-      stdio: "ignore",
-    }).unref();
-  }
+  // Always invoke the Chrome binary directly rather than `open -a`.
+  // On macOS, `open -a` silently ignores --args when Chrome is already
+  // running, which would cause CDP to never be enabled.
+  log.debug(`Spawning Chrome: ${chromePath} ${cdpFlag} --restore-last-session`);
+  const child = spawn(chromePath, [cdpFlag, "--restore-last-session"], {
+    detached: true,
+    stdio: "ignore",
+  });
+  child.on("error", (err) => {
+    log.error(`Chrome spawn error: ${err.message}`);
+  });
+  child.unref();
+  return child;
 }
 /**
  * Wait for CDP to become reachable.
  */
-async function waitForCDP(browser: BrowserController, timeoutMs = 15000): Promise<boolean> {
+async function waitForCDP(browser: BrowserController, timeoutMs = 30000): Promise<boolean> {
   const start = Date.now();
+  let attempts = 0;
   while (Date.now() - start < timeoutMs) {
-    if (await browser.isAvailable()) return true;
+    attempts++;
+    if (await browser.isAvailable()) {
+      log.debug(`CDP became reachable after ${attempts} attempts (${Date.now() - start}ms)`);
+      return true;
+    }
     await new Promise((r) => setTimeout(r, 500));
   }
+  log.debug(`CDP not reachable after ${attempts} attempts (${timeoutMs}ms timeout)`);
   return false;
 }
+/**
+ * Check if a port is already in use by another process (not Chrome CDP).
+ */
+async function isPortInUse(port: number): Promise<boolean> {
+  try {
+    const res = await fetch(`http://127.0.0.1:${port}/json/version`, {
+      signal: AbortSignal.timeout(1000),
+    });
+    // If we get a response but it's not Chrome, the port is occupied
+    const body = await res.text();
+    return !body.includes("Chrome");
+  } catch {
+    // Connection refused → port is free
+    return false;
+  }
+}
 /**
  * Result of an auto-launch attempt.
  */
 export interface AutoLaunchResult {
   success: boolean;
-  action: "already_available" | "launched" | "restarted" | "chrome_not_found" | "launch_failed";
+  action:
+    | "already_available"
+    | "launched"
+    | "restarted"
+    | "chrome_not_found"
+    | "launch_failed"
+    | "port_conflict";
   chromePath?: string;
+  detail?: string;
 }
 /**
  * Ensure Chrome is running with CDP enabled.
  *
  * 1. Already listening on the port → return immediately.
- * 2. Chrome not running → launch with --remote-debugging-port.
- * 3. Chrome running without CDP → graceful quit, then relaunch with CDP.
+ * 2. Port occupied by non-Chrome process → report conflict.
+ * 3. Chrome not running → launch with --remote-debugging-port.
+ * 4. Chrome running without CDP → graceful quit, then relaunch with CDP.
  *    Chrome's session restore brings back all tabs.
+ *
+ * On launch failure, retries once with a longer wait.
  */
 export async function ensureBrowserAvailable(port = 9222): Promise<AutoLaunchResult> {
   const browser = getBrowser(port);
   // Case 1: CDP already reachable
   if (await browser.isAvailable()) {
+    log.debug("CDP already reachable — no launch needed");
     return { success: true, action: "already_available" };
   }
+  // Case 2: Port occupied by something else
+  if (await isPortInUse(port)) {
+    log.debug(`Port ${port} is in use by a non-Chrome process`);
+    return {
+      success: false,
+      action: "port_conflict",
+      detail: `Port ${port} is already in use by another process. Try a different port or stop the conflicting process.`,
+    };
+  }
   // Find Chrome binary
   const chromePath = findChromePath();
   if (!chromePath) {
+    log.debug("Chrome binary not found on this system");
     return { success: false, action: "chrome_not_found" };
   }
+  log.debug(`Found Chrome at: ${chromePath}`);
   const running = isChromeRunning();
+  log.debug(`Chrome currently running: ${running}`);
-  // Case 2: Chrome running without CDP → restart
+  // Case 3: Chrome running without CDP → restart
   if (running) {
+    log.debug("Killing Chrome gracefully for restart with CDP...");
     await killChromeGracefully();
     spawnChrome(chromePath, port);
     if (await waitForCDP(browser)) {
       return { success: true, action: "restarted", chromePath };
     }
-    return { success: false, action: "launch_failed", chromePath };
+    // Retry once — Chrome can be slow to start (extensions, session restore)
+    log.debug("First CDP wait timed out after restart, retrying...");
+    if (await waitForCDP(browser, 15000)) {
+      return { success: true, action: "restarted", chromePath };
+    }
+    return {
+      success: false,
+      action: "launch_failed",
+      chromePath,
+      detail: "Chrome was restarted but CDP did not become reachable within timeout.",
+    };
   }
-  // Case 3: Chrome not running → launch
+  // Case 4: Chrome not running → launch
   spawnChrome(chromePath, port);
   if (await waitForCDP(browser)) {
     return { success: true, action: "launched", chromePath };
   }
-  return { success: false, action: "launch_failed", chromePath };
+  // Retry once
+  log.debug("First CDP wait timed out after launch, retrying...");
+  if (await waitForCDP(browser, 15000)) {
+    return { success: true, action: "launched", chromePath };
+  }
+  return {
+    success: false,
+    action: "launch_failed",
+    chromePath,
+    detail: "Chrome was launched but CDP did not become reachable within timeout.",
+  };
 }
 // ── Singleton ───────────────────────────────────────────────────────

package/src/tools/index.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import { getBrowser } from "./browser.js";
+import { getBrowser, ensureBrowserAvailable } from "./browser.js";
 import {
   readFileContent,
   writeFileContent,
@@ -99,13 +99,14 @@ export function getToolDefinitions(): ToolDefinition[] {
     {
       name: "browser_connect",
       description:
-        "Connect to the user's real Chrome browser via CDP. The user must have Chrome running with --remote-debugging-port=9222. This shares the user's actual browser session including all logins and cookies.",
+        "Connect to the user's real Chrome browser via CDP. Chrome will be auto-launched if not already running. This shares the user's actual browser session including all logins and cookies.",
       input_schema: {
         type: "object",
         properties: {
           tab_index: {
             type: "number",
-            description: "Tab index to connect to (default: 0, the first tab). Use browser_list_tabs to see available tabs.",
+            description:
+              "Tab index to connect to (default: 0, the first tab). Use browser_list_tabs to see available tabs.",
           },
         },
       },
@@ -143,7 +144,8 @@ export function getToolDefinitions(): ToolDefinition[] {
         properties: {
           selector: {
             type: "string",
-            description: "CSS selector of the element to click (e.g. '#submit-btn', 'a.nav-link', 'button:nth-of-type(2)')",
+            description:
+              "CSS selector of the element to click (e.g. '#submit-btn', 'a.nav-link', 'button:nth-of-type(2)')",
           },
         },
         required: ["selector"],
@@ -268,10 +270,7 @@ export function getToolDefinitions(): ToolDefinition[] {
   ];
 }
-export async function executeTool(
-  name: string,
-  input: Record<string, unknown>
-): Promise<string> {
+export async function executeTool(name: string, input: Record<string, unknown>): Promise<string> {
   const browser = getBrowser();
   switch (name) {
@@ -298,8 +297,19 @@ export async function executeTool(
       return executeShell(input.command as string, input.cwd as string | undefined);
     // ── Browser (CDP) ───────────────────────────────────────
-    case "browser_connect":
+    case "browser_connect": {
+      // Auto-launch Chrome if CDP is not reachable
+      if (!(await browser.isAvailable())) {
+        const result = await ensureBrowserAvailable();
+        if (!result.success) {
+          throw new Error(
+            `Failed to auto-launch Chrome (${result.action}). ` +
+              "Please ensure Google Chrome is installed."
+          );
+        }
+      }
       return browser.connect(input.tab_index as number | undefined);
+    }
     case "browser_navigate":
       if (!browser.isConnected()) await browser.connect();
       return browser.navigate(input.url as string);
@@ -314,9 +324,7 @@ export async function executeTool(
     case "browser_press_key":
       return browser.pressKey(input.key as string);
     case "browser_scroll":
-      return (input.direction as string) === "up"
-        ? browser.scrollUp()
-        : browser.scrollDown();
+      return (input.direction as string) === "up" ? browser.scrollUp() : browser.scrollDown();
     case "browser_get_elements":
       return browser.getInteractiveElements();
     case "browser_evaluate":