assistme 0.1.7 → 0.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -432,9 +432,7 @@ var BrowserController = class {
432
432
  const available = await this.isAvailable();
433
433
  if (!available) {
434
434
  throw new Error(
435
- `Cannot connect to browser on port ${this.debugPort}. Please start Chrome with: --remote-debugging-port=9222
436
- macOS: open -a 'Google Chrome' --args --remote-debugging-port=9222
437
- Linux: google-chrome --remote-debugging-port=9222`
435
+ `Cannot connect to browser on port ${this.debugPort}. Chrome remote debugging is not reachable. Please ensure Chrome is running with remote debugging enabled.`
438
436
  );
439
437
  }
440
438
  const tabs = await this.getTabs();
@@ -855,14 +853,24 @@ function findChromePath() {
855
853
  function isChromeRunning() {
856
854
  try {
857
855
  if (platform() === "win32") {
858
- const out = execSync('tasklist /FI "IMAGENAME eq chrome.exe" /NH', {
856
+ const out2 = execSync('tasklist /FI "IMAGENAME eq chrome.exe" /NH', {
859
857
  encoding: "utf-8",
860
858
  stdio: ["pipe", "pipe", "pipe"]
861
859
  });
862
- return out.includes("chrome.exe");
860
+ return out2.includes("chrome.exe");
863
861
  }
864
- execSync("pgrep -f chrome", { stdio: ["pipe", "pipe", "pipe"] });
865
- return true;
862
+ if (platform() === "darwin") {
863
+ const out2 = execSync('pgrep -f "Google Chrome.app/Contents/MacOS/Google Chrome"', {
864
+ encoding: "utf-8",
865
+ stdio: ["pipe", "pipe", "pipe"]
866
+ });
867
+ return out2.trim().length > 0;
868
+ }
869
+ const out = execSync("pgrep -f '(chrome|chromium)' 2>/dev/null || true", {
870
+ encoding: "utf-8",
871
+ stdio: ["pipe", "pipe", "pipe"]
872
+ });
873
+ return out.trim().length > 0;
866
874
  } catch {
867
875
  return false;
868
876
  }
@@ -908,52 +916,97 @@ async function killChromeGracefully() {
908
916
  await new Promise((r) => setTimeout(r, 1e3));
909
917
  }
910
918
  function spawnChrome(chromePath, port) {
911
- const os = platform();
912
919
  const cdpFlag = `--remote-debugging-port=${port}`;
913
- if (os === "darwin") {
914
- const appName = chromePath.includes("Chromium") ? "Chromium" : chromePath.includes("Canary") ? "Google Chrome Canary" : "Google Chrome";
915
- spawn("open", ["-a", appName, "--args", cdpFlag, "--restore-last-session"], {
916
- detached: true,
917
- stdio: "ignore"
918
- }).unref();
919
- } else {
920
- spawn(chromePath, [cdpFlag, "--restore-last-session"], {
921
- detached: true,
922
- stdio: "ignore"
923
- }).unref();
924
- }
920
+ log.debug(`Spawning Chrome: ${chromePath} ${cdpFlag} --restore-last-session`);
921
+ const child = spawn(chromePath, [cdpFlag, "--restore-last-session"], {
922
+ detached: true,
923
+ stdio: "ignore"
924
+ });
925
+ child.on("error", (err) => {
926
+ log.error(`Chrome spawn error: ${err.message}`);
927
+ });
928
+ child.unref();
929
+ return child;
925
930
  }
926
- async function waitForCDP(browser, timeoutMs = 15e3) {
931
+ async function waitForCDP(browser, timeoutMs = 3e4) {
927
932
  const start = Date.now();
933
+ let attempts = 0;
928
934
  while (Date.now() - start < timeoutMs) {
929
- if (await browser.isAvailable()) return true;
935
+ attempts++;
936
+ if (await browser.isAvailable()) {
937
+ log.debug(`CDP became reachable after ${attempts} attempts (${Date.now() - start}ms)`);
938
+ return true;
939
+ }
930
940
  await new Promise((r) => setTimeout(r, 500));
931
941
  }
942
+ log.debug(`CDP not reachable after ${attempts} attempts (${timeoutMs}ms timeout)`);
932
943
  return false;
933
944
  }
945
+ async function isPortInUse(port) {
946
+ try {
947
+ const res = await fetch(`http://127.0.0.1:${port}/json/version`, {
948
+ signal: AbortSignal.timeout(1e3)
949
+ });
950
+ const body = await res.text();
951
+ return !body.includes("Chrome");
952
+ } catch {
953
+ return false;
954
+ }
955
+ }
934
956
  async function ensureBrowserAvailable(port = 9222) {
935
957
  const browser = getBrowser(port);
936
958
  if (await browser.isAvailable()) {
959
+ log.debug("CDP already reachable \u2014 no launch needed");
937
960
  return { success: true, action: "already_available" };
938
961
  }
962
+ if (await isPortInUse(port)) {
963
+ log.debug(`Port ${port} is in use by a non-Chrome process`);
964
+ return {
965
+ success: false,
966
+ action: "port_conflict",
967
+ detail: `Port ${port} is already in use by another process. Try a different port or stop the conflicting process.`
968
+ };
969
+ }
939
970
  const chromePath = findChromePath();
940
971
  if (!chromePath) {
972
+ log.debug("Chrome binary not found on this system");
941
973
  return { success: false, action: "chrome_not_found" };
942
974
  }
975
+ log.debug(`Found Chrome at: ${chromePath}`);
943
976
  const running = isChromeRunning();
977
+ log.debug(`Chrome currently running: ${running}`);
944
978
  if (running) {
979
+ log.debug("Killing Chrome gracefully for restart with CDP...");
945
980
  await killChromeGracefully();
946
981
  spawnChrome(chromePath, port);
947
982
  if (await waitForCDP(browser)) {
948
983
  return { success: true, action: "restarted", chromePath };
949
984
  }
950
- return { success: false, action: "launch_failed", chromePath };
985
+ log.debug("First CDP wait timed out after restart, retrying...");
986
+ if (await waitForCDP(browser, 15e3)) {
987
+ return { success: true, action: "restarted", chromePath };
988
+ }
989
+ return {
990
+ success: false,
991
+ action: "launch_failed",
992
+ chromePath,
993
+ detail: "Chrome was restarted but CDP did not become reachable within timeout."
994
+ };
951
995
  }
952
996
  spawnChrome(chromePath, port);
953
997
  if (await waitForCDP(browser)) {
954
998
  return { success: true, action: "launched", chromePath };
955
999
  }
956
- return { success: false, action: "launch_failed", chromePath };
1000
+ log.debug("First CDP wait timed out after launch, retrying...");
1001
+ if (await waitForCDP(browser, 15e3)) {
1002
+ return { success: true, action: "launched", chromePath };
1003
+ }
1004
+ return {
1005
+ success: false,
1006
+ action: "launch_failed",
1007
+ chromePath,
1008
+ detail: "Chrome was launched but CDP did not become reachable within timeout."
1009
+ };
957
1010
  }
958
1011
  var browserInstance = null;
959
1012
  function getBrowser(port = 9222) {
@@ -2164,8 +2217,17 @@ async function executeTool(name, input) {
2164
2217
  case "execute_command":
2165
2218
  return executeShell(input.command, input.cwd);
2166
2219
  // ── Browser (CDP) ───────────────────────────────────────
2167
- case "browser_connect":
2220
+ case "browser_connect": {
2221
+ if (!await browser.isAvailable()) {
2222
+ const result = await ensureBrowserAvailable();
2223
+ if (!result.success) {
2224
+ throw new Error(
2225
+ `Failed to auto-launch Chrome (${result.action}). Please ensure Google Chrome is installed.`
2226
+ );
2227
+ }
2228
+ }
2168
2229
  return browser.connect(input.tab_index);
2230
+ }
2169
2231
  case "browser_navigate":
2170
2232
  if (!browser.isConnected()) await browser.connect();
2171
2233
  return browser.navigate(input.url);
@@ -2341,7 +2403,7 @@ function createBrowserMcpServer() {
2341
2403
  tools: [
2342
2404
  tool(
2343
2405
  "browser_connect",
2344
- "Connect to the user's real Chrome browser via CDP. The user must have Chrome running with --remote-debugging-port=9222.",
2406
+ "Connect to the user's real Chrome browser via CDP. Chrome will be auto-launched if not already running.",
2345
2407
  { tab_index: z.number().optional().describe("Tab index (default: 0)") },
2346
2408
  async (args) => callTool("browser_connect", args)
2347
2409
  ),
@@ -2459,18 +2521,14 @@ function createAgentToolsServer(deps) {
2459
2521
  "Store a memory about the user that persists across conversations. Use when you learn preferences, habits, or standing instructions.",
2460
2522
  {
2461
2523
  content: z.string().describe("What to remember (concise, factual statement)"),
2462
- category: z.string().optional().describe(
2463
- "Category: general, preference, instruction, context, skill_learned, fact"
2464
- ),
2524
+ category: z.string().optional().describe("Category: general, preference, instruction, context, skill_learned, fact"),
2465
2525
  importance: z.number().optional().describe("Importance 1-10 (default: 5). Use 8+ for instructions"),
2466
2526
  tags: z.array(z.string()).optional().describe("Optional tags for searchability")
2467
2527
  },
2468
2528
  async (args) => {
2469
2529
  if (!memoryManager) {
2470
2530
  return {
2471
- content: [
2472
- { type: "text", text: "Memory manager not available." }
2473
- ]
2531
+ content: [{ type: "text", text: "Memory manager not available." }]
2474
2532
  };
2475
2533
  }
2476
2534
  const mem = await memoryManager.remember(
@@ -2507,11 +2565,7 @@ function createAgentToolsServer(deps) {
2507
2565
  ]
2508
2566
  };
2509
2567
  }
2510
- const filePath = skillManager.create(
2511
- args.name,
2512
- args.description,
2513
- args.instructions
2514
- );
2568
+ const filePath = skillManager.create(args.name, args.description, args.instructions);
2515
2569
  if (args.emoji) {
2516
2570
  const skill = skillManager.get(args.name);
2517
2571
  if (skill) {
@@ -2571,9 +2625,7 @@ ${args.instructions}
2571
2625
  args.description || existing.description,
2572
2626
  args.improved_instructions
2573
2627
  );
2574
- log.success(
2575
- `Self-improvement: overrode bundled skill "${args.name}"`
2576
- );
2628
+ log.success(`Self-improvement: overrode bundled skill "${args.name}"`);
2577
2629
  return {
2578
2630
  content: [
2579
2631
  {
@@ -2670,6 +2722,8 @@ KEY PRINCIPLE: You operate the user's real browser, not a headless sandbox. This
2670
2722
  - When you navigate to amazon.com, you see the user's logged-in Amazon
2671
2723
  - If a site needs login, ask the user to log in using browser_request_user_action
2672
2724
  - You are like a human assistant sitting at the user's computer
2725
+ - Chrome is automatically managed \u2014 just call browser_connect and it will auto-launch if needed
2726
+ - NEVER ask the user to manually start Chrome or run any terminal commands for browser setup
2673
2727
 
2674
2728
  Available capabilities:
2675
2729
  1. BROWSER CONTROL (user's real Chrome via CDP):
@@ -2731,10 +2785,7 @@ var TaskProcessor = class {
2731
2785
  let tokenUsage;
2732
2786
  try {
2733
2787
  await emitEvent(task.id, "status_change", { status: "running" });
2734
- let systemPrompt = BASE_SYSTEM_PROMPT.replace(
2735
- "{workspace_path}",
2736
- config.workspacePath
2737
- );
2788
+ let systemPrompt = BASE_SYSTEM_PROMPT.replace("{workspace_path}", config.workspacePath);
2738
2789
  if (this.memoryManager) {
2739
2790
  try {
2740
2791
  const memoryPrompt = await this.memoryManager.buildMemoryPrompt();
@@ -2769,9 +2820,7 @@ var TaskProcessor = class {
2769
2820
  "Glob",
2770
2821
  "Grep",
2771
2822
  // Browser MCP tools
2772
- ...BROWSER_TOOL_NAMES.map(
2773
- (n) => `mcp__assistme-browser__${n}`
2774
- ),
2823
+ ...BROWSER_TOOL_NAMES.map((n) => `mcp__assistme-browser__${n}`),
2775
2824
  // Agent MCP tools (memory, skills)
2776
2825
  "mcp__assistme-agent__memory_store",
2777
2826
  "mcp__assistme-agent__skill_create",
@@ -2830,9 +2879,7 @@ var TaskProcessor = class {
2830
2879
  });
2831
2880
  } else if (block.type === "thinking" && "thinking" in block) {
2832
2881
  const thinkingText = block.thinking;
2833
- log.debug(
2834
- `Thinking: ${thinkingText.slice(0, 100)}...`
2835
- );
2882
+ log.debug(`Thinking: ${thinkingText.slice(0, 100)}...`);
2836
2883
  await emitEvent(task.id, "thinking", {
2837
2884
  text: thinkingText
2838
2885
  });
@@ -2871,14 +2918,11 @@ var TaskProcessor = class {
2871
2918
  } finally {
2872
2919
  clearTimeout(timeoutId);
2873
2920
  }
2874
- await withRetry(
2875
- () => completeTask(task.id, finalResponse, tokenUsage),
2876
- {
2877
- maxRetries: 2,
2878
- baseDelayMs: 300,
2879
- label: "completeTask"
2880
- }
2881
- );
2921
+ await withRetry(() => completeTask(task.id, finalResponse, tokenUsage), {
2922
+ maxRetries: 2,
2923
+ baseDelayMs: 300,
2924
+ label: "completeTask"
2925
+ });
2882
2926
  await emitEvent(task.id, "status_change", { status: "completed" });
2883
2927
  log.success("Task completed.");
2884
2928
  if (this.memoryManager && finalResponse) {
@@ -2892,9 +2936,7 @@ var TaskProcessor = class {
2892
2936
  tags: mem.tags,
2893
2937
  sourceMessageId: taskIdRef
2894
2938
  });
2895
- log.info(
2896
- `Memory extracted: [${mem.category}] ${mem.content.slice(0, 60)}...`
2897
- );
2939
+ log.info(`Memory extracted: [${mem.category}] ${mem.content.slice(0, 60)}...`);
2898
2940
  } catch {
2899
2941
  }
2900
2942
  }
@@ -2918,11 +2960,7 @@ var TaskProcessor = class {
2918
2960
  );
2919
2961
  return;
2920
2962
  }
2921
- const filePath = sm.create(
2922
- extracted.name,
2923
- extracted.description,
2924
- extracted.steps
2925
- );
2963
+ const filePath = sm.create(extracted.name, extracted.description, extracted.steps);
2926
2964
  if (extracted.emoji) {
2927
2965
  const { writeFile: writeFile2 } = await import("fs/promises");
2928
2966
  const metaJson = JSON.stringify({
@@ -2948,19 +2986,10 @@ ${extracted.steps}
2948
2986
  for (const skillName of usedSkillNames) {
2949
2987
  const skill = sm.get(skillName);
2950
2988
  if (!skill) continue;
2951
- analyzeSkillImprovement(
2952
- skill.content,
2953
- task.prompt,
2954
- finalResponse,
2955
- realToolCalls
2956
- ).then(async (improvement) => {
2989
+ analyzeSkillImprovement(skill.content, task.prompt, finalResponse, realToolCalls).then(async (improvement) => {
2957
2990
  if (!improvement) return;
2958
2991
  if (skill.source === "bundled") {
2959
- sm.create(
2960
- skillName,
2961
- skill.description,
2962
- improvement.improved_steps
2963
- );
2992
+ sm.create(skillName, skill.description, improvement.improved_steps);
2964
2993
  } else {
2965
2994
  sm.update(skillName, improvement.improved_steps);
2966
2995
  }
@@ -3216,11 +3245,20 @@ program.command("start", { isDefault: true }).description("Start the agent and l
3216
3245
  case "chrome_not_found":
3217
3246
  launchSpinner.fail("Chrome not found on this system");
3218
3247
  log.info("Please install Google Chrome and try again.");
3219
- log.info('Or run "assistme browser setup" for manual instructions.');
3248
+ break;
3249
+ case "port_conflict":
3250
+ launchSpinner.fail("Port 9222 is in use by another process");
3251
+ log.info(launchResult.detail ?? "Stop the conflicting process or use a different port.");
3220
3252
  break;
3221
3253
  default:
3222
3254
  launchSpinner.fail("Failed to start Chrome with remote debugging");
3223
- log.info('Run "assistme browser setup" for manual setup instructions.');
3255
+ if (launchResult.detail) {
3256
+ log.info(launchResult.detail);
3257
+ }
3258
+ if (launchResult.chromePath) {
3259
+ log.info(`Chrome binary: ${launchResult.chromePath}`);
3260
+ }
3261
+ log.info("Browser will be auto-launched when the first task needs it.");
3224
3262
  break;
3225
3263
  }
3226
3264
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "assistme",
3
- "version": "0.1.7",
3
+ "version": "0.1.9",
4
4
  "description": "AssistMe CLI Agent - AI-powered assistant that controls your real browser",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -49,7 +49,7 @@ export function createBrowserMcpServer(): McpSdkServerConfigWithInstance {
49
49
  tools: [
50
50
  tool(
51
51
  "browser_connect",
52
- "Connect to the user's real Chrome browser via CDP. The user must have Chrome running with --remote-debugging-port=9222.",
52
+ "Connect to the user's real Chrome browser via CDP. Chrome will be auto-launched if not already running.",
53
53
  { tab_index: z.number().optional().describe("Tab index (default: 0)") },
54
54
  async (args) => callTool("browser_connect", args)
55
55
  ),
@@ -126,11 +126,8 @@ export function createBrowserMcpServer(): McpSdkServerConfigWithInstance {
126
126
  { expression: z.string().describe("JavaScript expression to evaluate") },
127
127
  async (args) => callTool("browser_evaluate", args)
128
128
  ),
129
- tool(
130
- "browser_list_tabs",
131
- "List all open tabs in the user's browser.",
132
- {},
133
- async () => callTool("browser_list_tabs", {})
129
+ tool("browser_list_tabs", "List all open tabs in the user's browser.", {}, async () =>
130
+ callTool("browser_list_tabs", {})
134
131
  ),
135
132
  tool(
136
133
  "browser_switch_tab",
@@ -148,13 +145,8 @@ export function createBrowserMcpServer(): McpSdkServerConfigWithInstance {
148
145
  "browser_request_user_action",
149
146
  "Request the user to perform an action in their browser (login, CAPTCHA, 2FA, etc.).",
150
147
  {
151
- message: z
152
- .string()
153
- .describe("Clear description of what the user needs to do"),
154
- wait_seconds: z
155
- .number()
156
- .optional()
157
- .describe("How long to wait (default: 60)"),
148
+ message: z.string().describe("Clear description of what the user needs to do"),
149
+ wait_seconds: z.number().optional().describe("How long to wait (default: 60)"),
158
150
  },
159
151
  async (args) => callTool("browser_request_user_action", args)
160
152
  ),
@@ -170,9 +162,7 @@ export interface AgentToolsDeps {
170
162
  taskId: string;
171
163
  }
172
164
 
173
- export function createAgentToolsServer(
174
- deps: AgentToolsDeps
175
- ): McpSdkServerConfigWithInstance {
165
+ export function createAgentToolsServer(deps: AgentToolsDeps): McpSdkServerConfigWithInstance {
176
166
  const { memoryManager, skillManager, taskId } = deps;
177
167
 
178
168
  return createSdkMcpServer({
@@ -183,30 +173,21 @@ export function createAgentToolsServer(
183
173
  "memory_store",
184
174
  "Store a memory about the user that persists across conversations. Use when you learn preferences, habits, or standing instructions.",
185
175
  {
186
- content: z
187
- .string()
188
- .describe("What to remember (concise, factual statement)"),
176
+ content: z.string().describe("What to remember (concise, factual statement)"),
189
177
  category: z
190
178
  .string()
191
179
  .optional()
192
- .describe(
193
- "Category: general, preference, instruction, context, skill_learned, fact"
194
- ),
180
+ .describe("Category: general, preference, instruction, context, skill_learned, fact"),
195
181
  importance: z
196
182
  .number()
197
183
  .optional()
198
184
  .describe("Importance 1-10 (default: 5). Use 8+ for instructions"),
199
- tags: z
200
- .array(z.string())
201
- .optional()
202
- .describe("Optional tags for searchability"),
185
+ tags: z.array(z.string()).optional().describe("Optional tags for searchability"),
203
186
  },
204
187
  async (args) => {
205
188
  if (!memoryManager) {
206
189
  return {
207
- content: [
208
- { type: "text", text: "Memory manager not available." },
209
- ],
190
+ content: [{ type: "text", text: "Memory manager not available." }],
210
191
  };
211
192
  }
212
193
  const mem = await memoryManager.remember(
@@ -226,19 +207,10 @@ export function createAgentToolsServer(
226
207
  "skill_create",
227
208
  "Create a new reusable skill from a workflow you just executed. Write generic, reusable instructions with placeholders like {product}, {query}.",
228
209
  {
229
- name: z
230
- .string()
231
- .describe("Skill name in kebab-case, e.g. 'flight-booking'"),
232
- description: z
233
- .string()
234
- .describe("One-line description of what this skill does"),
235
- instructions: z
236
- .string()
237
- .describe("Markdown step-by-step instructions"),
238
- emoji: z
239
- .string()
240
- .optional()
241
- .describe("Single emoji representing this skill"),
210
+ name: z.string().describe("Skill name in kebab-case, e.g. 'flight-booking'"),
211
+ description: z.string().describe("One-line description of what this skill does"),
212
+ instructions: z.string().describe("Markdown step-by-step instructions"),
213
+ emoji: z.string().optional().describe("Single emoji representing this skill"),
242
214
  },
243
215
  async (args) => {
244
216
  // Check for duplicates
@@ -254,11 +226,7 @@ export function createAgentToolsServer(
254
226
  };
255
227
  }
256
228
 
257
- const filePath = skillManager.create(
258
- args.name,
259
- args.description,
260
- args.instructions
261
- );
229
+ const filePath = skillManager.create(args.name, args.description, args.instructions);
262
230
 
263
231
  // Add emoji metadata if provided
264
232
  if (args.emoji) {
@@ -293,10 +261,7 @@ export function createAgentToolsServer(
293
261
  improved_instructions: z
294
262
  .string()
295
263
  .describe("Full updated markdown instructions (not a diff)"),
296
- description: z
297
- .string()
298
- .optional()
299
- .describe("Updated description (optional)"),
264
+ description: z.string().optional().describe("Updated description (optional)"),
300
265
  },
301
266
  async (args) => {
302
267
  const existing = skillManager.get(args.name);
@@ -321,9 +286,7 @@ export function createAgentToolsServer(
321
286
  args.description || existing.description,
322
287
  args.improved_instructions
323
288
  );
324
- log.success(
325
- `Self-improvement: overrode bundled skill "${args.name}"`
326
- );
289
+ log.success(`Self-improvement: overrode bundled skill "${args.name}"`);
327
290
  return {
328
291
  content: [
329
292
  {
@@ -39,6 +39,8 @@ KEY PRINCIPLE: You operate the user's real browser, not a headless sandbox. This
39
39
  - When you navigate to amazon.com, you see the user's logged-in Amazon
40
40
  - If a site needs login, ask the user to log in using browser_request_user_action
41
41
  - You are like a human assistant sitting at the user's computer
42
+ - Chrome is automatically managed — just call browser_connect and it will auto-launch if needed
43
+ - NEVER ask the user to manually start Chrome or run any terminal commands for browser setup
42
44
 
43
45
  Available capabilities:
44
46
  1. BROWSER CONTROL (user's real Chrome via CDP):
@@ -97,7 +99,9 @@ export class TaskProcessor {
97
99
  resetEventSequence();
98
100
 
99
101
  // Wall-clock timeout for the entire task (default: 10 minutes)
100
- const taskTimeoutMs = ((config as unknown as Record<string, unknown>).taskTimeoutMinutes as number || 10) * 60_000;
102
+ const taskTimeoutMs =
103
+ (((config as unknown as Record<string, unknown>).taskTimeoutMinutes as number) || 10) *
104
+ 60_000;
101
105
 
102
106
  // Set correlation ID for this task's log messages
103
107
  newCorrelationId();
@@ -113,10 +117,7 @@ export class TaskProcessor {
113
117
  await emitEvent(task.id, "status_change", { status: "running" });
114
118
 
115
119
  // Build system prompt with memories + skills
116
- let systemPrompt = BASE_SYSTEM_PROMPT.replace(
117
- "{workspace_path}",
118
- config.workspacePath
119
- );
120
+ let systemPrompt = BASE_SYSTEM_PROMPT.replace("{workspace_path}", config.workspacePath);
120
121
 
121
122
  // Inject memories
122
123
  if (this.memoryManager) {
@@ -163,9 +164,7 @@ export class TaskProcessor {
163
164
  "Glob",
164
165
  "Grep",
165
166
  // Browser MCP tools
166
- ...BROWSER_TOOL_NAMES.map(
167
- (n) => `mcp__assistme-browser__${n}`
168
- ),
167
+ ...BROWSER_TOOL_NAMES.map((n) => `mcp__assistme-browser__${n}`),
169
168
  // Agent MCP tools (memory, skills)
170
169
  "mcp__assistme-agent__memory_store",
171
170
  "mcp__assistme-agent__skill_create",
@@ -233,9 +232,7 @@ export class TaskProcessor {
233
232
  });
234
233
  } else if (block.type === "thinking" && "thinking" in block) {
235
234
  const thinkingText = (block as unknown as { thinking: string }).thinking;
236
- log.debug(
237
- `Thinking: ${thinkingText.slice(0, 100)}...`
238
- );
235
+ log.debug(`Thinking: ${thinkingText.slice(0, 100)}...`);
239
236
  await emitEvent(task.id, "thinking", {
240
237
  text: thinkingText,
241
238
  });
@@ -283,14 +280,11 @@ export class TaskProcessor {
283
280
  }
284
281
 
285
282
  // Complete the task (with retry for transient DB failures)
286
- await withRetry(
287
- () => completeTask(task.id, finalResponse, tokenUsage),
288
- {
289
- maxRetries: 2,
290
- baseDelayMs: 300,
291
- label: "completeTask",
292
- }
293
- );
283
+ await withRetry(() => completeTask(task.id, finalResponse, tokenUsage), {
284
+ maxRetries: 2,
285
+ baseDelayMs: 300,
286
+ label: "completeTask",
287
+ });
294
288
  await emitEvent(task.id, "status_change", { status: "completed" });
295
289
  log.success("Task completed.");
296
290
 
@@ -309,9 +303,7 @@ export class TaskProcessor {
309
303
  tags: mem.tags,
310
304
  sourceMessageId: taskIdRef,
311
305
  });
312
- log.info(
313
- `Memory extracted: [${mem.category}] ${mem.content.slice(0, 60)}...`
314
- );
306
+ log.info(`Memory extracted: [${mem.category}] ${mem.content.slice(0, 60)}...`);
315
307
  } catch {
316
308
  // Non-critical — skip individual memory failures
317
309
  }
@@ -326,9 +318,7 @@ export class TaskProcessor {
326
318
  // Auto-extract skills from multi-step workflows
327
319
  const realToolCalls = toolCallRecords.filter(
328
320
  (tc) =>
329
- tc.name !== "memory_store" &&
330
- tc.name !== "skill_create" &&
331
- tc.name !== "skill_improve"
321
+ tc.name !== "memory_store" && tc.name !== "skill_create" && tc.name !== "skill_improve"
332
322
  );
333
323
 
334
324
  if (realToolCalls.length >= 3 && finalResponse) {
@@ -347,11 +337,7 @@ export class TaskProcessor {
347
337
  return;
348
338
  }
349
339
 
350
- const filePath = sm.create(
351
- extracted.name,
352
- extracted.description,
353
- extracted.steps
354
- );
340
+ const filePath = sm.create(extracted.name, extracted.description, extracted.steps);
355
341
 
356
342
  if (extracted.emoji) {
357
343
  const { writeFile } = await import("fs/promises");
@@ -373,21 +359,12 @@ export class TaskProcessor {
373
359
  const skill = sm.get(skillName);
374
360
  if (!skill) continue;
375
361
 
376
- analyzeSkillImprovement(
377
- skill.content,
378
- task.prompt,
379
- finalResponse,
380
- realToolCalls
381
- )
362
+ analyzeSkillImprovement(skill.content, task.prompt, finalResponse, realToolCalls)
382
363
  .then(async (improvement) => {
383
364
  if (!improvement) return;
384
365
 
385
366
  if (skill.source === "bundled") {
386
- sm.create(
387
- skillName,
388
- skill.description,
389
- improvement.improved_steps
390
- );
367
+ sm.create(skillName, skill.description, improvement.improved_steps);
391
368
  } else {
392
369
  sm.update(skillName, improvement.improved_steps);
393
370
  }
package/src/index.ts CHANGED
@@ -332,11 +332,20 @@ program
332
332
  case "chrome_not_found":
333
333
  launchSpinner.fail("Chrome not found on this system");
334
334
  log.info("Please install Google Chrome and try again.");
335
- log.info('Or run "assistme browser setup" for manual instructions.');
335
+ break;
336
+ case "port_conflict":
337
+ launchSpinner.fail("Port 9222 is in use by another process");
338
+ log.info(launchResult.detail ?? "Stop the conflicting process or use a different port.");
336
339
  break;
337
340
  default:
338
341
  launchSpinner.fail("Failed to start Chrome with remote debugging");
339
- log.info('Run "assistme browser setup" for manual setup instructions.');
342
+ if (launchResult.detail) {
343
+ log.info(launchResult.detail);
344
+ }
345
+ if (launchResult.chromePath) {
346
+ log.info(`Chrome binary: ${launchResult.chromePath}`);
347
+ }
348
+ log.info("Browser will be auto-launched when the first task needs it.");
340
349
  break;
341
350
  }
342
351
  }
@@ -15,9 +15,10 @@
15
15
  */
16
16
 
17
17
  import { WebSocket } from "ws";
18
- import { execSync, spawn } from "node:child_process";
18
+ import { execSync, spawn, type ChildProcess } from "node:child_process";
19
19
  import { platform } from "node:os";
20
20
  import { existsSync } from "node:fs";
21
+ import { log } from "../utils/logger.js";
21
22
 
22
23
  interface CDPTab {
23
24
  id: string;
@@ -89,9 +90,8 @@ export class BrowserController {
89
90
  if (!available) {
90
91
  throw new Error(
91
92
  `Cannot connect to browser on port ${this.debugPort}. ` +
92
- "Please start Chrome with: --remote-debugging-port=9222\n" +
93
- "macOS: open -a 'Google Chrome' --args --remote-debugging-port=9222\n" +
94
- "Linux: google-chrome --remote-debugging-port=9222"
93
+ "Chrome remote debugging is not reachable. " +
94
+ "Please ensure Chrome is running with remote debugging enabled."
95
95
  );
96
96
  }
97
97
 
@@ -606,9 +606,21 @@ export function isChromeRunning(): boolean {
606
606
  });
607
607
  return out.includes("chrome.exe");
608
608
  }
609
- // macOS and Linux — pgrep automatically excludes its own process
610
- execSync("pgrep -f chrome", { stdio: ["pipe", "pipe", "pipe"] });
611
- return true;
609
+ if (platform() === "darwin") {
610
+ // Match the main Chrome process (not helper/renderer sub-processes).
611
+ // No trailing $ — the process command line includes flags after the binary.
612
+ const out = execSync('pgrep -f "Google Chrome.app/Contents/MacOS/Google Chrome"', {
613
+ encoding: "utf-8",
614
+ stdio: ["pipe", "pipe", "pipe"],
615
+ });
616
+ return out.trim().length > 0;
617
+ }
618
+ // Linux — match common chrome binary names
619
+ const out = execSync("pgrep -f '(chrome|chromium)' 2>/dev/null || true", {
620
+ encoding: "utf-8",
621
+ stdio: ["pipe", "pipe", "pipe"],
622
+ });
623
+ return out.trim().length > 0;
612
624
  } catch {
613
625
  return false;
614
626
  }
@@ -667,93 +679,164 @@ async function killChromeGracefully(): Promise<void> {
667
679
 
668
680
  /**
669
681
  * Spawn Chrome with the remote-debugging-port flag.
682
+ * Returns the child process so callers can detect early failures.
670
683
  */
671
- function spawnChrome(chromePath: string, port: number): void {
672
- const os = platform();
684
+ function spawnChrome(chromePath: string, port: number): ChildProcess {
673
685
  const cdpFlag = `--remote-debugging-port=${port}`;
674
686
 
675
- if (os === "darwin") {
676
- // Determine app name from binary path
677
- const appName = chromePath.includes("Chromium")
678
- ? "Chromium"
679
- : chromePath.includes("Canary")
680
- ? "Google Chrome Canary"
681
- : "Google Chrome";
682
- spawn("open", ["-a", appName, "--args", cdpFlag, "--restore-last-session"], {
683
- detached: true,
684
- stdio: "ignore",
685
- }).unref();
686
- } else {
687
- spawn(chromePath, [cdpFlag, "--restore-last-session"], {
688
- detached: true,
689
- stdio: "ignore",
690
- }).unref();
691
- }
687
+ // Always invoke the Chrome binary directly rather than `open -a`.
688
+ // On macOS, `open -a` silently ignores --args when Chrome is already
689
+ // running, which would cause CDP to never be enabled.
690
+ log.debug(`Spawning Chrome: ${chromePath} ${cdpFlag} --restore-last-session`);
691
+ const child = spawn(chromePath, [cdpFlag, "--restore-last-session"], {
692
+ detached: true,
693
+ stdio: "ignore",
694
+ });
695
+
696
+ child.on("error", (err) => {
697
+ log.error(`Chrome spawn error: ${err.message}`);
698
+ });
699
+
700
+ child.unref();
701
+ return child;
692
702
  }
693
703
 
694
704
  /**
695
705
  * Wait for CDP to become reachable.
696
706
  */
697
- async function waitForCDP(browser: BrowserController, timeoutMs = 15000): Promise<boolean> {
707
+ async function waitForCDP(browser: BrowserController, timeoutMs = 30000): Promise<boolean> {
698
708
  const start = Date.now();
709
+ let attempts = 0;
699
710
  while (Date.now() - start < timeoutMs) {
700
- if (await browser.isAvailable()) return true;
711
+ attempts++;
712
+ if (await browser.isAvailable()) {
713
+ log.debug(`CDP became reachable after ${attempts} attempts (${Date.now() - start}ms)`);
714
+ return true;
715
+ }
701
716
  await new Promise((r) => setTimeout(r, 500));
702
717
  }
718
+ log.debug(`CDP not reachable after ${attempts} attempts (${timeoutMs}ms timeout)`);
703
719
  return false;
704
720
  }
705
721
 
722
+ /**
723
+ * Check if a port is already in use by another process (not Chrome CDP).
724
+ */
725
+ async function isPortInUse(port: number): Promise<boolean> {
726
+ try {
727
+ const res = await fetch(`http://127.0.0.1:${port}/json/version`, {
728
+ signal: AbortSignal.timeout(1000),
729
+ });
730
+ // If we get a response but it's not Chrome, the port is occupied
731
+ const body = await res.text();
732
+ return !body.includes("Chrome");
733
+ } catch {
734
+ // Connection refused → port is free
735
+ return false;
736
+ }
737
+ }
738
+
706
739
  /**
707
740
  * Result of an auto-launch attempt.
708
741
  */
709
742
  export interface AutoLaunchResult {
710
743
  success: boolean;
711
- action: "already_available" | "launched" | "restarted" | "chrome_not_found" | "launch_failed";
744
+ action:
745
+ | "already_available"
746
+ | "launched"
747
+ | "restarted"
748
+ | "chrome_not_found"
749
+ | "launch_failed"
750
+ | "port_conflict";
712
751
  chromePath?: string;
752
+ detail?: string;
713
753
  }
714
754
 
715
755
  /**
716
756
  * Ensure Chrome is running with CDP enabled.
717
757
  *
718
758
  * 1. Already listening on the port → return immediately.
719
- * 2. Chrome not running launch with --remote-debugging-port.
720
- * 3. Chrome running without CDP graceful quit, then relaunch with CDP.
759
+ * 2. Port occupied by non-Chrome processreport conflict.
760
+ * 3. Chrome not running → launch with --remote-debugging-port.
761
+ * 4. Chrome running without CDP → graceful quit, then relaunch with CDP.
721
762
  * Chrome's session restore brings back all tabs.
763
+ *
764
+ * On launch failure, retries once with a longer wait.
722
765
  */
723
766
  export async function ensureBrowserAvailable(port = 9222): Promise<AutoLaunchResult> {
724
767
  const browser = getBrowser(port);
725
768
 
726
769
  // Case 1: CDP already reachable
727
770
  if (await browser.isAvailable()) {
771
+ log.debug("CDP already reachable — no launch needed");
728
772
  return { success: true, action: "already_available" };
729
773
  }
730
774
 
775
+ // Case 2: Port occupied by something else
776
+ if (await isPortInUse(port)) {
777
+ log.debug(`Port ${port} is in use by a non-Chrome process`);
778
+ return {
779
+ success: false,
780
+ action: "port_conflict",
781
+ detail: `Port ${port} is already in use by another process. Try a different port or stop the conflicting process.`,
782
+ };
783
+ }
784
+
731
785
  // Find Chrome binary
732
786
  const chromePath = findChromePath();
733
787
  if (!chromePath) {
788
+ log.debug("Chrome binary not found on this system");
734
789
  return { success: false, action: "chrome_not_found" };
735
790
  }
736
791
 
792
+ log.debug(`Found Chrome at: ${chromePath}`);
793
+
737
794
  const running = isChromeRunning();
795
+ log.debug(`Chrome currently running: ${running}`);
738
796
 
739
- // Case 2: Chrome running without CDP → restart
797
+ // Case 3: Chrome running without CDP → restart
740
798
  if (running) {
799
+ log.debug("Killing Chrome gracefully for restart with CDP...");
741
800
  await killChromeGracefully();
742
801
  spawnChrome(chromePath, port);
743
802
 
744
803
  if (await waitForCDP(browser)) {
745
804
  return { success: true, action: "restarted", chromePath };
746
805
  }
747
- return { success: false, action: "launch_failed", chromePath };
806
+
807
+ // Retry once — Chrome can be slow to start (extensions, session restore)
808
+ log.debug("First CDP wait timed out after restart, retrying...");
809
+ if (await waitForCDP(browser, 15000)) {
810
+ return { success: true, action: "restarted", chromePath };
811
+ }
812
+
813
+ return {
814
+ success: false,
815
+ action: "launch_failed",
816
+ chromePath,
817
+ detail: "Chrome was restarted but CDP did not become reachable within timeout.",
818
+ };
748
819
  }
749
820
 
750
- // Case 3: Chrome not running → launch
821
+ // Case 4: Chrome not running → launch
751
822
  spawnChrome(chromePath, port);
752
823
 
753
824
  if (await waitForCDP(browser)) {
754
825
  return { success: true, action: "launched", chromePath };
755
826
  }
756
- return { success: false, action: "launch_failed", chromePath };
827
+
828
+ // Retry once
829
+ log.debug("First CDP wait timed out after launch, retrying...");
830
+ if (await waitForCDP(browser, 15000)) {
831
+ return { success: true, action: "launched", chromePath };
832
+ }
833
+
834
+ return {
835
+ success: false,
836
+ action: "launch_failed",
837
+ chromePath,
838
+ detail: "Chrome was launched but CDP did not become reachable within timeout.",
839
+ };
757
840
  }
758
841
 
759
842
  // ── Singleton ───────────────────────────────────────────────────────
@@ -1,4 +1,4 @@
1
- import { getBrowser } from "./browser.js";
1
+ import { getBrowser, ensureBrowserAvailable } from "./browser.js";
2
2
  import {
3
3
  readFileContent,
4
4
  writeFileContent,
@@ -99,13 +99,14 @@ export function getToolDefinitions(): ToolDefinition[] {
99
99
  {
100
100
  name: "browser_connect",
101
101
  description:
102
- "Connect to the user's real Chrome browser via CDP. The user must have Chrome running with --remote-debugging-port=9222. This shares the user's actual browser session including all logins and cookies.",
102
+ "Connect to the user's real Chrome browser via CDP. Chrome will be auto-launched if not already running. This shares the user's actual browser session including all logins and cookies.",
103
103
  input_schema: {
104
104
  type: "object",
105
105
  properties: {
106
106
  tab_index: {
107
107
  type: "number",
108
- description: "Tab index to connect to (default: 0, the first tab). Use browser_list_tabs to see available tabs.",
108
+ description:
109
+ "Tab index to connect to (default: 0, the first tab). Use browser_list_tabs to see available tabs.",
109
110
  },
110
111
  },
111
112
  },
@@ -143,7 +144,8 @@ export function getToolDefinitions(): ToolDefinition[] {
143
144
  properties: {
144
145
  selector: {
145
146
  type: "string",
146
- description: "CSS selector of the element to click (e.g. '#submit-btn', 'a.nav-link', 'button:nth-of-type(2)')",
147
+ description:
148
+ "CSS selector of the element to click (e.g. '#submit-btn', 'a.nav-link', 'button:nth-of-type(2)')",
147
149
  },
148
150
  },
149
151
  required: ["selector"],
@@ -268,10 +270,7 @@ export function getToolDefinitions(): ToolDefinition[] {
268
270
  ];
269
271
  }
270
272
 
271
- export async function executeTool(
272
- name: string,
273
- input: Record<string, unknown>
274
- ): Promise<string> {
273
+ export async function executeTool(name: string, input: Record<string, unknown>): Promise<string> {
275
274
  const browser = getBrowser();
276
275
 
277
276
  switch (name) {
@@ -298,8 +297,19 @@ export async function executeTool(
298
297
  return executeShell(input.command as string, input.cwd as string | undefined);
299
298
 
300
299
  // ── Browser (CDP) ───────────────────────────────────────
301
- case "browser_connect":
300
+ case "browser_connect": {
301
+ // Auto-launch Chrome if CDP is not reachable
302
+ if (!(await browser.isAvailable())) {
303
+ const result = await ensureBrowserAvailable();
304
+ if (!result.success) {
305
+ throw new Error(
306
+ `Failed to auto-launch Chrome (${result.action}). ` +
307
+ "Please ensure Google Chrome is installed."
308
+ );
309
+ }
310
+ }
302
311
  return browser.connect(input.tab_index as number | undefined);
312
+ }
303
313
  case "browser_navigate":
304
314
  if (!browser.isConnected()) await browser.connect();
305
315
  return browser.navigate(input.url as string);
@@ -314,9 +324,7 @@ export async function executeTool(
314
324
  case "browser_press_key":
315
325
  return browser.pressKey(input.key as string);
316
326
  case "browser_scroll":
317
- return (input.direction as string) === "up"
318
- ? browser.scrollUp()
319
- : browser.scrollDown();
327
+ return (input.direction as string) === "up" ? browser.scrollUp() : browser.scrollDown();
320
328
  case "browser_get_elements":
321
329
  return browser.getInteractiveElements();
322
330
  case "browser_evaluate":