assistme 0.1.7 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -432,9 +432,7 @@ var BrowserController = class {
432
432
  const available = await this.isAvailable();
433
433
  if (!available) {
434
434
  throw new Error(
435
- `Cannot connect to browser on port ${this.debugPort}. Please start Chrome with: --remote-debugging-port=9222
436
- macOS: open -a 'Google Chrome' --args --remote-debugging-port=9222
437
- Linux: google-chrome --remote-debugging-port=9222`
435
+ `Cannot connect to browser on port ${this.debugPort}. Chrome remote debugging is not reachable. Please ensure Chrome is running with remote debugging enabled.`
438
436
  );
439
437
  }
440
438
  const tabs = await this.getTabs();
@@ -855,14 +853,24 @@ function findChromePath() {
855
853
  function isChromeRunning() {
856
854
  try {
857
855
  if (platform() === "win32") {
858
- const out = execSync('tasklist /FI "IMAGENAME eq chrome.exe" /NH', {
856
+ const out2 = execSync('tasklist /FI "IMAGENAME eq chrome.exe" /NH', {
859
857
  encoding: "utf-8",
860
858
  stdio: ["pipe", "pipe", "pipe"]
861
859
  });
862
- return out.includes("chrome.exe");
860
+ return out2.includes("chrome.exe");
863
861
  }
864
- execSync("pgrep -f chrome", { stdio: ["pipe", "pipe", "pipe"] });
865
- return true;
862
+ if (platform() === "darwin") {
863
+ const out2 = execSync('pgrep -f "Google Chrome.app/Contents/MacOS/Google Chrome$"', {
864
+ encoding: "utf-8",
865
+ stdio: ["pipe", "pipe", "pipe"]
866
+ });
867
+ return out2.trim().length > 0;
868
+ }
869
+ const out = execSync("pgrep -f '(chrome|chromium)' 2>/dev/null || true", {
870
+ encoding: "utf-8",
871
+ stdio: ["pipe", "pipe", "pipe"]
872
+ });
873
+ return out.trim().length > 0;
866
874
  } catch {
867
875
  return false;
868
876
  }
@@ -910,50 +918,106 @@ async function killChromeGracefully() {
910
918
  function spawnChrome(chromePath, port) {
911
919
  const os = platform();
912
920
  const cdpFlag = `--remote-debugging-port=${port}`;
921
+ let child;
913
922
  if (os === "darwin") {
914
923
  const appName = chromePath.includes("Chromium") ? "Chromium" : chromePath.includes("Canary") ? "Google Chrome Canary" : "Google Chrome";
915
- spawn("open", ["-a", appName, "--args", cdpFlag, "--restore-last-session"], {
924
+ log.debug(`Spawning Chrome via: open -a "${appName}" --args ${cdpFlag} --restore-last-session`);
925
+ child = spawn("open", ["-a", appName, "--args", cdpFlag, "--restore-last-session"], {
916
926
  detached: true,
917
927
  stdio: "ignore"
918
- }).unref();
928
+ });
919
929
  } else {
920
- spawn(chromePath, [cdpFlag, "--restore-last-session"], {
930
+ log.debug(`Spawning Chrome via: ${chromePath} ${cdpFlag} --restore-last-session`);
931
+ child = spawn(chromePath, [cdpFlag, "--restore-last-session"], {
921
932
  detached: true,
922
933
  stdio: "ignore"
923
- }).unref();
934
+ });
924
935
  }
936
+ child.on("error", (err) => {
937
+ log.error(`Chrome spawn error: ${err.message}`);
938
+ });
939
+ child.unref();
940
+ return child;
925
941
  }
926
- async function waitForCDP(browser, timeoutMs = 15e3) {
942
+ async function waitForCDP(browser, timeoutMs = 3e4) {
927
943
  const start = Date.now();
944
+ let attempts = 0;
928
945
  while (Date.now() - start < timeoutMs) {
929
- if (await browser.isAvailable()) return true;
946
+ attempts++;
947
+ if (await browser.isAvailable()) {
948
+ log.debug(`CDP became reachable after ${attempts} attempts (${Date.now() - start}ms)`);
949
+ return true;
950
+ }
930
951
  await new Promise((r) => setTimeout(r, 500));
931
952
  }
953
+ log.debug(`CDP not reachable after ${attempts} attempts (${timeoutMs}ms timeout)`);
932
954
  return false;
933
955
  }
956
+ async function isPortInUse(port) {
957
+ try {
958
+ const res = await fetch(`http://127.0.0.1:${port}/json/version`, {
959
+ signal: AbortSignal.timeout(1e3)
960
+ });
961
+ const body = await res.text();
962
+ return !body.includes("Chrome");
963
+ } catch {
964
+ return false;
965
+ }
966
+ }
934
967
  async function ensureBrowserAvailable(port = 9222) {
935
968
  const browser = getBrowser(port);
936
969
  if (await browser.isAvailable()) {
970
+ log.debug("CDP already reachable \u2014 no launch needed");
937
971
  return { success: true, action: "already_available" };
938
972
  }
973
+ if (await isPortInUse(port)) {
974
+ log.debug(`Port ${port} is in use by a non-Chrome process`);
975
+ return {
976
+ success: false,
977
+ action: "port_conflict",
978
+ detail: `Port ${port} is already in use by another process. Try a different port or stop the conflicting process.`
979
+ };
980
+ }
939
981
  const chromePath = findChromePath();
940
982
  if (!chromePath) {
983
+ log.debug("Chrome binary not found on this system");
941
984
  return { success: false, action: "chrome_not_found" };
942
985
  }
986
+ log.debug(`Found Chrome at: ${chromePath}`);
943
987
  const running = isChromeRunning();
988
+ log.debug(`Chrome currently running: ${running}`);
944
989
  if (running) {
990
+ log.debug("Killing Chrome gracefully for restart with CDP...");
945
991
  await killChromeGracefully();
946
992
  spawnChrome(chromePath, port);
947
993
  if (await waitForCDP(browser)) {
948
994
  return { success: true, action: "restarted", chromePath };
949
995
  }
950
- return { success: false, action: "launch_failed", chromePath };
996
+ log.debug("First CDP wait timed out after restart, retrying...");
997
+ if (await waitForCDP(browser, 15e3)) {
998
+ return { success: true, action: "restarted", chromePath };
999
+ }
1000
+ return {
1001
+ success: false,
1002
+ action: "launch_failed",
1003
+ chromePath,
1004
+ detail: "Chrome was restarted but CDP did not become reachable within timeout."
1005
+ };
951
1006
  }
952
1007
  spawnChrome(chromePath, port);
953
1008
  if (await waitForCDP(browser)) {
954
1009
  return { success: true, action: "launched", chromePath };
955
1010
  }
956
- return { success: false, action: "launch_failed", chromePath };
1011
+ log.debug("First CDP wait timed out after launch, retrying...");
1012
+ if (await waitForCDP(browser, 15e3)) {
1013
+ return { success: true, action: "launched", chromePath };
1014
+ }
1015
+ return {
1016
+ success: false,
1017
+ action: "launch_failed",
1018
+ chromePath,
1019
+ detail: "Chrome was launched but CDP did not become reachable within timeout."
1020
+ };
957
1021
  }
958
1022
  var browserInstance = null;
959
1023
  function getBrowser(port = 9222) {
@@ -2164,8 +2228,17 @@ async function executeTool(name, input) {
2164
2228
  case "execute_command":
2165
2229
  return executeShell(input.command, input.cwd);
2166
2230
  // ── Browser (CDP) ───────────────────────────────────────
2167
- case "browser_connect":
2231
+ case "browser_connect": {
2232
+ if (!await browser.isAvailable()) {
2233
+ const result = await ensureBrowserAvailable();
2234
+ if (!result.success) {
2235
+ throw new Error(
2236
+ `Failed to auto-launch Chrome (${result.action}). Please ensure Google Chrome is installed.`
2237
+ );
2238
+ }
2239
+ }
2168
2240
  return browser.connect(input.tab_index);
2241
+ }
2169
2242
  case "browser_navigate":
2170
2243
  if (!browser.isConnected()) await browser.connect();
2171
2244
  return browser.navigate(input.url);
@@ -2341,7 +2414,7 @@ function createBrowserMcpServer() {
2341
2414
  tools: [
2342
2415
  tool(
2343
2416
  "browser_connect",
2344
- "Connect to the user's real Chrome browser via CDP. The user must have Chrome running with --remote-debugging-port=9222.",
2417
+ "Connect to the user's real Chrome browser via CDP. Chrome will be auto-launched if not already running.",
2345
2418
  { tab_index: z.number().optional().describe("Tab index (default: 0)") },
2346
2419
  async (args) => callTool("browser_connect", args)
2347
2420
  ),
@@ -2459,18 +2532,14 @@ function createAgentToolsServer(deps) {
2459
2532
  "Store a memory about the user that persists across conversations. Use when you learn preferences, habits, or standing instructions.",
2460
2533
  {
2461
2534
  content: z.string().describe("What to remember (concise, factual statement)"),
2462
- category: z.string().optional().describe(
2463
- "Category: general, preference, instruction, context, skill_learned, fact"
2464
- ),
2535
+ category: z.string().optional().describe("Category: general, preference, instruction, context, skill_learned, fact"),
2465
2536
  importance: z.number().optional().describe("Importance 1-10 (default: 5). Use 8+ for instructions"),
2466
2537
  tags: z.array(z.string()).optional().describe("Optional tags for searchability")
2467
2538
  },
2468
2539
  async (args) => {
2469
2540
  if (!memoryManager) {
2470
2541
  return {
2471
- content: [
2472
- { type: "text", text: "Memory manager not available." }
2473
- ]
2542
+ content: [{ type: "text", text: "Memory manager not available." }]
2474
2543
  };
2475
2544
  }
2476
2545
  const mem = await memoryManager.remember(
@@ -2507,11 +2576,7 @@ function createAgentToolsServer(deps) {
2507
2576
  ]
2508
2577
  };
2509
2578
  }
2510
- const filePath = skillManager.create(
2511
- args.name,
2512
- args.description,
2513
- args.instructions
2514
- );
2579
+ const filePath = skillManager.create(args.name, args.description, args.instructions);
2515
2580
  if (args.emoji) {
2516
2581
  const skill = skillManager.get(args.name);
2517
2582
  if (skill) {
@@ -2571,9 +2636,7 @@ ${args.instructions}
2571
2636
  args.description || existing.description,
2572
2637
  args.improved_instructions
2573
2638
  );
2574
- log.success(
2575
- `Self-improvement: overrode bundled skill "${args.name}"`
2576
- );
2639
+ log.success(`Self-improvement: overrode bundled skill "${args.name}"`);
2577
2640
  return {
2578
2641
  content: [
2579
2642
  {
@@ -2670,6 +2733,8 @@ KEY PRINCIPLE: You operate the user's real browser, not a headless sandbox. This
2670
2733
  - When you navigate to amazon.com, you see the user's logged-in Amazon
2671
2734
  - If a site needs login, ask the user to log in using browser_request_user_action
2672
2735
  - You are like a human assistant sitting at the user's computer
2736
+ - Chrome is automatically managed \u2014 just call browser_connect and it will auto-launch if needed
2737
+ - NEVER ask the user to manually start Chrome or run any terminal commands for browser setup
2673
2738
 
2674
2739
  Available capabilities:
2675
2740
  1. BROWSER CONTROL (user's real Chrome via CDP):
@@ -2731,10 +2796,7 @@ var TaskProcessor = class {
2731
2796
  let tokenUsage;
2732
2797
  try {
2733
2798
  await emitEvent(task.id, "status_change", { status: "running" });
2734
- let systemPrompt = BASE_SYSTEM_PROMPT.replace(
2735
- "{workspace_path}",
2736
- config.workspacePath
2737
- );
2799
+ let systemPrompt = BASE_SYSTEM_PROMPT.replace("{workspace_path}", config.workspacePath);
2738
2800
  if (this.memoryManager) {
2739
2801
  try {
2740
2802
  const memoryPrompt = await this.memoryManager.buildMemoryPrompt();
@@ -2769,9 +2831,7 @@ var TaskProcessor = class {
2769
2831
  "Glob",
2770
2832
  "Grep",
2771
2833
  // Browser MCP tools
2772
- ...BROWSER_TOOL_NAMES.map(
2773
- (n) => `mcp__assistme-browser__${n}`
2774
- ),
2834
+ ...BROWSER_TOOL_NAMES.map((n) => `mcp__assistme-browser__${n}`),
2775
2835
  // Agent MCP tools (memory, skills)
2776
2836
  "mcp__assistme-agent__memory_store",
2777
2837
  "mcp__assistme-agent__skill_create",
@@ -2830,9 +2890,7 @@ var TaskProcessor = class {
2830
2890
  });
2831
2891
  } else if (block.type === "thinking" && "thinking" in block) {
2832
2892
  const thinkingText = block.thinking;
2833
- log.debug(
2834
- `Thinking: ${thinkingText.slice(0, 100)}...`
2835
- );
2893
+ log.debug(`Thinking: ${thinkingText.slice(0, 100)}...`);
2836
2894
  await emitEvent(task.id, "thinking", {
2837
2895
  text: thinkingText
2838
2896
  });
@@ -2871,14 +2929,11 @@ var TaskProcessor = class {
2871
2929
  } finally {
2872
2930
  clearTimeout(timeoutId);
2873
2931
  }
2874
- await withRetry(
2875
- () => completeTask(task.id, finalResponse, tokenUsage),
2876
- {
2877
- maxRetries: 2,
2878
- baseDelayMs: 300,
2879
- label: "completeTask"
2880
- }
2881
- );
2932
+ await withRetry(() => completeTask(task.id, finalResponse, tokenUsage), {
2933
+ maxRetries: 2,
2934
+ baseDelayMs: 300,
2935
+ label: "completeTask"
2936
+ });
2882
2937
  await emitEvent(task.id, "status_change", { status: "completed" });
2883
2938
  log.success("Task completed.");
2884
2939
  if (this.memoryManager && finalResponse) {
@@ -2892,9 +2947,7 @@ var TaskProcessor = class {
2892
2947
  tags: mem.tags,
2893
2948
  sourceMessageId: taskIdRef
2894
2949
  });
2895
- log.info(
2896
- `Memory extracted: [${mem.category}] ${mem.content.slice(0, 60)}...`
2897
- );
2950
+ log.info(`Memory extracted: [${mem.category}] ${mem.content.slice(0, 60)}...`);
2898
2951
  } catch {
2899
2952
  }
2900
2953
  }
@@ -2918,11 +2971,7 @@ var TaskProcessor = class {
2918
2971
  );
2919
2972
  return;
2920
2973
  }
2921
- const filePath = sm.create(
2922
- extracted.name,
2923
- extracted.description,
2924
- extracted.steps
2925
- );
2974
+ const filePath = sm.create(extracted.name, extracted.description, extracted.steps);
2926
2975
  if (extracted.emoji) {
2927
2976
  const { writeFile: writeFile2 } = await import("fs/promises");
2928
2977
  const metaJson = JSON.stringify({
@@ -2948,19 +2997,10 @@ ${extracted.steps}
2948
2997
  for (const skillName of usedSkillNames) {
2949
2998
  const skill = sm.get(skillName);
2950
2999
  if (!skill) continue;
2951
- analyzeSkillImprovement(
2952
- skill.content,
2953
- task.prompt,
2954
- finalResponse,
2955
- realToolCalls
2956
- ).then(async (improvement) => {
3000
+ analyzeSkillImprovement(skill.content, task.prompt, finalResponse, realToolCalls).then(async (improvement) => {
2957
3001
  if (!improvement) return;
2958
3002
  if (skill.source === "bundled") {
2959
- sm.create(
2960
- skillName,
2961
- skill.description,
2962
- improvement.improved_steps
2963
- );
3003
+ sm.create(skillName, skill.description, improvement.improved_steps);
2964
3004
  } else {
2965
3005
  sm.update(skillName, improvement.improved_steps);
2966
3006
  }
@@ -3216,11 +3256,20 @@ program.command("start", { isDefault: true }).description("Start the agent and l
3216
3256
  case "chrome_not_found":
3217
3257
  launchSpinner.fail("Chrome not found on this system");
3218
3258
  log.info("Please install Google Chrome and try again.");
3219
- log.info('Or run "assistme browser setup" for manual instructions.');
3259
+ break;
3260
+ case "port_conflict":
3261
+ launchSpinner.fail("Port 9222 is in use by another process");
3262
+ log.info(launchResult.detail ?? "Stop the conflicting process or use a different port.");
3220
3263
  break;
3221
3264
  default:
3222
3265
  launchSpinner.fail("Failed to start Chrome with remote debugging");
3223
- log.info('Run "assistme browser setup" for manual setup instructions.');
3266
+ if (launchResult.detail) {
3267
+ log.info(launchResult.detail);
3268
+ }
3269
+ if (launchResult.chromePath) {
3270
+ log.info(`Chrome binary: ${launchResult.chromePath}`);
3271
+ }
3272
+ log.info("Browser will be auto-launched when the first task needs it.");
3224
3273
  break;
3225
3274
  }
3226
3275
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "assistme",
3
- "version": "0.1.7",
3
+ "version": "0.1.8",
4
4
  "description": "AssistMe CLI Agent - AI-powered assistant that controls your real browser",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -49,7 +49,7 @@ export function createBrowserMcpServer(): McpSdkServerConfigWithInstance {
49
49
  tools: [
50
50
  tool(
51
51
  "browser_connect",
52
- "Connect to the user's real Chrome browser via CDP. The user must have Chrome running with --remote-debugging-port=9222.",
52
+ "Connect to the user's real Chrome browser via CDP. Chrome will be auto-launched if not already running.",
53
53
  { tab_index: z.number().optional().describe("Tab index (default: 0)") },
54
54
  async (args) => callTool("browser_connect", args)
55
55
  ),
@@ -126,11 +126,8 @@ export function createBrowserMcpServer(): McpSdkServerConfigWithInstance {
126
126
  { expression: z.string().describe("JavaScript expression to evaluate") },
127
127
  async (args) => callTool("browser_evaluate", args)
128
128
  ),
129
- tool(
130
- "browser_list_tabs",
131
- "List all open tabs in the user's browser.",
132
- {},
133
- async () => callTool("browser_list_tabs", {})
129
+ tool("browser_list_tabs", "List all open tabs in the user's browser.", {}, async () =>
130
+ callTool("browser_list_tabs", {})
134
131
  ),
135
132
  tool(
136
133
  "browser_switch_tab",
@@ -148,13 +145,8 @@ export function createBrowserMcpServer(): McpSdkServerConfigWithInstance {
148
145
  "browser_request_user_action",
149
146
  "Request the user to perform an action in their browser (login, CAPTCHA, 2FA, etc.).",
150
147
  {
151
- message: z
152
- .string()
153
- .describe("Clear description of what the user needs to do"),
154
- wait_seconds: z
155
- .number()
156
- .optional()
157
- .describe("How long to wait (default: 60)"),
148
+ message: z.string().describe("Clear description of what the user needs to do"),
149
+ wait_seconds: z.number().optional().describe("How long to wait (default: 60)"),
158
150
  },
159
151
  async (args) => callTool("browser_request_user_action", args)
160
152
  ),
@@ -170,9 +162,7 @@ export interface AgentToolsDeps {
170
162
  taskId: string;
171
163
  }
172
164
 
173
- export function createAgentToolsServer(
174
- deps: AgentToolsDeps
175
- ): McpSdkServerConfigWithInstance {
165
+ export function createAgentToolsServer(deps: AgentToolsDeps): McpSdkServerConfigWithInstance {
176
166
  const { memoryManager, skillManager, taskId } = deps;
177
167
 
178
168
  return createSdkMcpServer({
@@ -183,30 +173,21 @@ export function createAgentToolsServer(
183
173
  "memory_store",
184
174
  "Store a memory about the user that persists across conversations. Use when you learn preferences, habits, or standing instructions.",
185
175
  {
186
- content: z
187
- .string()
188
- .describe("What to remember (concise, factual statement)"),
176
+ content: z.string().describe("What to remember (concise, factual statement)"),
189
177
  category: z
190
178
  .string()
191
179
  .optional()
192
- .describe(
193
- "Category: general, preference, instruction, context, skill_learned, fact"
194
- ),
180
+ .describe("Category: general, preference, instruction, context, skill_learned, fact"),
195
181
  importance: z
196
182
  .number()
197
183
  .optional()
198
184
  .describe("Importance 1-10 (default: 5). Use 8+ for instructions"),
199
- tags: z
200
- .array(z.string())
201
- .optional()
202
- .describe("Optional tags for searchability"),
185
+ tags: z.array(z.string()).optional().describe("Optional tags for searchability"),
203
186
  },
204
187
  async (args) => {
205
188
  if (!memoryManager) {
206
189
  return {
207
- content: [
208
- { type: "text", text: "Memory manager not available." },
209
- ],
190
+ content: [{ type: "text", text: "Memory manager not available." }],
210
191
  };
211
192
  }
212
193
  const mem = await memoryManager.remember(
@@ -226,19 +207,10 @@ export function createAgentToolsServer(
226
207
  "skill_create",
227
208
  "Create a new reusable skill from a workflow you just executed. Write generic, reusable instructions with placeholders like {product}, {query}.",
228
209
  {
229
- name: z
230
- .string()
231
- .describe("Skill name in kebab-case, e.g. 'flight-booking'"),
232
- description: z
233
- .string()
234
- .describe("One-line description of what this skill does"),
235
- instructions: z
236
- .string()
237
- .describe("Markdown step-by-step instructions"),
238
- emoji: z
239
- .string()
240
- .optional()
241
- .describe("Single emoji representing this skill"),
210
+ name: z.string().describe("Skill name in kebab-case, e.g. 'flight-booking'"),
211
+ description: z.string().describe("One-line description of what this skill does"),
212
+ instructions: z.string().describe("Markdown step-by-step instructions"),
213
+ emoji: z.string().optional().describe("Single emoji representing this skill"),
242
214
  },
243
215
  async (args) => {
244
216
  // Check for duplicates
@@ -254,11 +226,7 @@ export function createAgentToolsServer(
254
226
  };
255
227
  }
256
228
 
257
- const filePath = skillManager.create(
258
- args.name,
259
- args.description,
260
- args.instructions
261
- );
229
+ const filePath = skillManager.create(args.name, args.description, args.instructions);
262
230
 
263
231
  // Add emoji metadata if provided
264
232
  if (args.emoji) {
@@ -293,10 +261,7 @@ export function createAgentToolsServer(
293
261
  improved_instructions: z
294
262
  .string()
295
263
  .describe("Full updated markdown instructions (not a diff)"),
296
- description: z
297
- .string()
298
- .optional()
299
- .describe("Updated description (optional)"),
264
+ description: z.string().optional().describe("Updated description (optional)"),
300
265
  },
301
266
  async (args) => {
302
267
  const existing = skillManager.get(args.name);
@@ -321,9 +286,7 @@ export function createAgentToolsServer(
321
286
  args.description || existing.description,
322
287
  args.improved_instructions
323
288
  );
324
- log.success(
325
- `Self-improvement: overrode bundled skill "${args.name}"`
326
- );
289
+ log.success(`Self-improvement: overrode bundled skill "${args.name}"`);
327
290
  return {
328
291
  content: [
329
292
  {
@@ -39,6 +39,8 @@ KEY PRINCIPLE: You operate the user's real browser, not a headless sandbox. This
39
39
  - When you navigate to amazon.com, you see the user's logged-in Amazon
40
40
  - If a site needs login, ask the user to log in using browser_request_user_action
41
41
  - You are like a human assistant sitting at the user's computer
42
+ - Chrome is automatically managed — just call browser_connect and it will auto-launch if needed
43
+ - NEVER ask the user to manually start Chrome or run any terminal commands for browser setup
42
44
 
43
45
  Available capabilities:
44
46
  1. BROWSER CONTROL (user's real Chrome via CDP):
@@ -97,7 +99,9 @@ export class TaskProcessor {
97
99
  resetEventSequence();
98
100
 
99
101
  // Wall-clock timeout for the entire task (default: 10 minutes)
100
- const taskTimeoutMs = ((config as unknown as Record<string, unknown>).taskTimeoutMinutes as number || 10) * 60_000;
102
+ const taskTimeoutMs =
103
+ (((config as unknown as Record<string, unknown>).taskTimeoutMinutes as number) || 10) *
104
+ 60_000;
101
105
 
102
106
  // Set correlation ID for this task's log messages
103
107
  newCorrelationId();
@@ -113,10 +117,7 @@ export class TaskProcessor {
113
117
  await emitEvent(task.id, "status_change", { status: "running" });
114
118
 
115
119
  // Build system prompt with memories + skills
116
- let systemPrompt = BASE_SYSTEM_PROMPT.replace(
117
- "{workspace_path}",
118
- config.workspacePath
119
- );
120
+ let systemPrompt = BASE_SYSTEM_PROMPT.replace("{workspace_path}", config.workspacePath);
120
121
 
121
122
  // Inject memories
122
123
  if (this.memoryManager) {
@@ -163,9 +164,7 @@ export class TaskProcessor {
163
164
  "Glob",
164
165
  "Grep",
165
166
  // Browser MCP tools
166
- ...BROWSER_TOOL_NAMES.map(
167
- (n) => `mcp__assistme-browser__${n}`
168
- ),
167
+ ...BROWSER_TOOL_NAMES.map((n) => `mcp__assistme-browser__${n}`),
169
168
  // Agent MCP tools (memory, skills)
170
169
  "mcp__assistme-agent__memory_store",
171
170
  "mcp__assistme-agent__skill_create",
@@ -233,9 +232,7 @@ export class TaskProcessor {
233
232
  });
234
233
  } else if (block.type === "thinking" && "thinking" in block) {
235
234
  const thinkingText = (block as unknown as { thinking: string }).thinking;
236
- log.debug(
237
- `Thinking: ${thinkingText.slice(0, 100)}...`
238
- );
235
+ log.debug(`Thinking: ${thinkingText.slice(0, 100)}...`);
239
236
  await emitEvent(task.id, "thinking", {
240
237
  text: thinkingText,
241
238
  });
@@ -283,14 +280,11 @@ export class TaskProcessor {
283
280
  }
284
281
 
285
282
  // Complete the task (with retry for transient DB failures)
286
- await withRetry(
287
- () => completeTask(task.id, finalResponse, tokenUsage),
288
- {
289
- maxRetries: 2,
290
- baseDelayMs: 300,
291
- label: "completeTask",
292
- }
293
- );
283
+ await withRetry(() => completeTask(task.id, finalResponse, tokenUsage), {
284
+ maxRetries: 2,
285
+ baseDelayMs: 300,
286
+ label: "completeTask",
287
+ });
294
288
  await emitEvent(task.id, "status_change", { status: "completed" });
295
289
  log.success("Task completed.");
296
290
 
@@ -309,9 +303,7 @@ export class TaskProcessor {
309
303
  tags: mem.tags,
310
304
  sourceMessageId: taskIdRef,
311
305
  });
312
- log.info(
313
- `Memory extracted: [${mem.category}] ${mem.content.slice(0, 60)}...`
314
- );
306
+ log.info(`Memory extracted: [${mem.category}] ${mem.content.slice(0, 60)}...`);
315
307
  } catch {
316
308
  // Non-critical — skip individual memory failures
317
309
  }
@@ -326,9 +318,7 @@ export class TaskProcessor {
326
318
  // Auto-extract skills from multi-step workflows
327
319
  const realToolCalls = toolCallRecords.filter(
328
320
  (tc) =>
329
- tc.name !== "memory_store" &&
330
- tc.name !== "skill_create" &&
331
- tc.name !== "skill_improve"
321
+ tc.name !== "memory_store" && tc.name !== "skill_create" && tc.name !== "skill_improve"
332
322
  );
333
323
 
334
324
  if (realToolCalls.length >= 3 && finalResponse) {
@@ -347,11 +337,7 @@ export class TaskProcessor {
347
337
  return;
348
338
  }
349
339
 
350
- const filePath = sm.create(
351
- extracted.name,
352
- extracted.description,
353
- extracted.steps
354
- );
340
+ const filePath = sm.create(extracted.name, extracted.description, extracted.steps);
355
341
 
356
342
  if (extracted.emoji) {
357
343
  const { writeFile } = await import("fs/promises");
@@ -373,21 +359,12 @@ export class TaskProcessor {
373
359
  const skill = sm.get(skillName);
374
360
  if (!skill) continue;
375
361
 
376
- analyzeSkillImprovement(
377
- skill.content,
378
- task.prompt,
379
- finalResponse,
380
- realToolCalls
381
- )
362
+ analyzeSkillImprovement(skill.content, task.prompt, finalResponse, realToolCalls)
382
363
  .then(async (improvement) => {
383
364
  if (!improvement) return;
384
365
 
385
366
  if (skill.source === "bundled") {
386
- sm.create(
387
- skillName,
388
- skill.description,
389
- improvement.improved_steps
390
- );
367
+ sm.create(skillName, skill.description, improvement.improved_steps);
391
368
  } else {
392
369
  sm.update(skillName, improvement.improved_steps);
393
370
  }
package/src/index.ts CHANGED
@@ -332,11 +332,20 @@ program
332
332
  case "chrome_not_found":
333
333
  launchSpinner.fail("Chrome not found on this system");
334
334
  log.info("Please install Google Chrome and try again.");
335
- log.info('Or run "assistme browser setup" for manual instructions.');
335
+ break;
336
+ case "port_conflict":
337
+ launchSpinner.fail("Port 9222 is in use by another process");
338
+ log.info(launchResult.detail ?? "Stop the conflicting process or use a different port.");
336
339
  break;
337
340
  default:
338
341
  launchSpinner.fail("Failed to start Chrome with remote debugging");
339
- log.info('Run "assistme browser setup" for manual setup instructions.');
342
+ if (launchResult.detail) {
343
+ log.info(launchResult.detail);
344
+ }
345
+ if (launchResult.chromePath) {
346
+ log.info(`Chrome binary: ${launchResult.chromePath}`);
347
+ }
348
+ log.info("Browser will be auto-launched when the first task needs it.");
340
349
  break;
341
350
  }
342
351
  }
@@ -15,9 +15,10 @@
15
15
  */
16
16
 
17
17
  import { WebSocket } from "ws";
18
- import { execSync, spawn } from "node:child_process";
18
+ import { execSync, spawn, type ChildProcess } from "node:child_process";
19
19
  import { platform } from "node:os";
20
20
  import { existsSync } from "node:fs";
21
+ import { log } from "../utils/logger.js";
21
22
 
22
23
  interface CDPTab {
23
24
  id: string;
@@ -89,9 +90,8 @@ export class BrowserController {
89
90
  if (!available) {
90
91
  throw new Error(
91
92
  `Cannot connect to browser on port ${this.debugPort}. ` +
92
- "Please start Chrome with: --remote-debugging-port=9222\n" +
93
- "macOS: open -a 'Google Chrome' --args --remote-debugging-port=9222\n" +
94
- "Linux: google-chrome --remote-debugging-port=9222"
93
+ "Chrome remote debugging is not reachable. " +
94
+ "Please ensure Chrome is running with remote debugging enabled."
95
95
  );
96
96
  }
97
97
 
@@ -606,9 +606,20 @@ export function isChromeRunning(): boolean {
606
606
  });
607
607
  return out.includes("chrome.exe");
608
608
  }
609
- // macOS and Linux — pgrep automatically excludes its own process
610
- execSync("pgrep -f chrome", { stdio: ["pipe", "pipe", "pipe"] });
611
- return true;
609
+ if (platform() === "darwin") {
610
+ // Match the actual macOS process name "Google Chrome" (not helper processes)
611
+ const out = execSync('pgrep -f "Google Chrome.app/Contents/MacOS/Google Chrome$"', {
612
+ encoding: "utf-8",
613
+ stdio: ["pipe", "pipe", "pipe"],
614
+ });
615
+ return out.trim().length > 0;
616
+ }
617
+ // Linux — match common chrome binary names
618
+ const out = execSync("pgrep -f '(chrome|chromium)' 2>/dev/null || true", {
619
+ encoding: "utf-8",
620
+ stdio: ["pipe", "pipe", "pipe"],
621
+ });
622
+ return out.trim().length > 0;
612
623
  } catch {
613
624
  return false;
614
625
  }
@@ -667,11 +678,14 @@ async function killChromeGracefully(): Promise<void> {
667
678
 
668
679
  /**
669
680
  * Spawn Chrome with the remote-debugging-port flag.
681
+ * Returns the child process so callers can detect early failures.
670
682
  */
671
- function spawnChrome(chromePath: string, port: number): void {
683
+ function spawnChrome(chromePath: string, port: number): ChildProcess {
672
684
  const os = platform();
673
685
  const cdpFlag = `--remote-debugging-port=${port}`;
674
686
 
687
+ let child: ChildProcess;
688
+
675
689
  if (os === "darwin") {
676
690
  // Determine app name from binary path
677
691
  const appName = chromePath.includes("Chromium")
@@ -679,81 +693,163 @@ function spawnChrome(chromePath: string, port: number): void {
679
693
  : chromePath.includes("Canary")
680
694
  ? "Google Chrome Canary"
681
695
  : "Google Chrome";
682
- spawn("open", ["-a", appName, "--args", cdpFlag, "--restore-last-session"], {
696
+ log.debug(`Spawning Chrome via: open -a "${appName}" --args ${cdpFlag} --restore-last-session`);
697
+ child = spawn("open", ["-a", appName, "--args", cdpFlag, "--restore-last-session"], {
683
698
  detached: true,
684
699
  stdio: "ignore",
685
- }).unref();
700
+ });
686
701
  } else {
687
- spawn(chromePath, [cdpFlag, "--restore-last-session"], {
702
+ log.debug(`Spawning Chrome via: ${chromePath} ${cdpFlag} --restore-last-session`);
703
+ child = spawn(chromePath, [cdpFlag, "--restore-last-session"], {
688
704
  detached: true,
689
705
  stdio: "ignore",
690
- }).unref();
706
+ });
691
707
  }
708
+
709
+ child.on("error", (err) => {
710
+ log.error(`Chrome spawn error: ${err.message}`);
711
+ });
712
+
713
+ child.unref();
714
+ return child;
692
715
  }
693
716
 
694
717
  /**
695
718
  * Wait for CDP to become reachable.
696
719
  */
697
- async function waitForCDP(browser: BrowserController, timeoutMs = 15000): Promise<boolean> {
720
+ async function waitForCDP(browser: BrowserController, timeoutMs = 30000): Promise<boolean> {
698
721
  const start = Date.now();
722
+ let attempts = 0;
699
723
  while (Date.now() - start < timeoutMs) {
700
- if (await browser.isAvailable()) return true;
724
+ attempts++;
725
+ if (await browser.isAvailable()) {
726
+ log.debug(`CDP became reachable after ${attempts} attempts (${Date.now() - start}ms)`);
727
+ return true;
728
+ }
701
729
  await new Promise((r) => setTimeout(r, 500));
702
730
  }
731
+ log.debug(`CDP not reachable after ${attempts} attempts (${timeoutMs}ms timeout)`);
703
732
  return false;
704
733
  }
705
734
 
735
+ /**
736
+ * Check if a port is already in use by another process (not Chrome CDP).
737
+ */
738
+ async function isPortInUse(port: number): Promise<boolean> {
739
+ try {
740
+ const res = await fetch(`http://127.0.0.1:${port}/json/version`, {
741
+ signal: AbortSignal.timeout(1000),
742
+ });
743
+ // If we get a response but it's not Chrome, the port is occupied
744
+ const body = await res.text();
745
+ return !body.includes("Chrome");
746
+ } catch {
747
+ // Connection refused → port is free
748
+ return false;
749
+ }
750
+ }
751
+
706
752
  /**
707
753
  * Result of an auto-launch attempt.
708
754
  */
709
755
  export interface AutoLaunchResult {
710
756
  success: boolean;
711
- action: "already_available" | "launched" | "restarted" | "chrome_not_found" | "launch_failed";
757
+ action:
758
+ | "already_available"
759
+ | "launched"
760
+ | "restarted"
761
+ | "chrome_not_found"
762
+ | "launch_failed"
763
+ | "port_conflict";
712
764
  chromePath?: string;
765
+ detail?: string;
713
766
  }
714
767
 
715
768
  /**
716
769
  * Ensure Chrome is running with CDP enabled.
717
770
  *
718
771
  * 1. Already listening on the port → return immediately.
719
- * 2. Chrome not running launch with --remote-debugging-port.
720
- * 3. Chrome running without CDP graceful quit, then relaunch with CDP.
772
+ * 2. Port occupied by non-Chrome processreport conflict.
773
+ * 3. Chrome not running → launch with --remote-debugging-port.
774
+ * 4. Chrome running without CDP → graceful quit, then relaunch with CDP.
721
775
  * Chrome's session restore brings back all tabs.
776
+ *
777
+ * On launch failure, retries once with a longer wait.
722
778
  */
723
779
  export async function ensureBrowserAvailable(port = 9222): Promise<AutoLaunchResult> {
724
780
  const browser = getBrowser(port);
725
781
 
726
782
  // Case 1: CDP already reachable
727
783
  if (await browser.isAvailable()) {
784
+ log.debug("CDP already reachable — no launch needed");
728
785
  return { success: true, action: "already_available" };
729
786
  }
730
787
 
788
+ // Case 2: Port occupied by something else
789
+ if (await isPortInUse(port)) {
790
+ log.debug(`Port ${port} is in use by a non-Chrome process`);
791
+ return {
792
+ success: false,
793
+ action: "port_conflict",
794
+ detail: `Port ${port} is already in use by another process. Try a different port or stop the conflicting process.`,
795
+ };
796
+ }
797
+
731
798
  // Find Chrome binary
732
799
  const chromePath = findChromePath();
733
800
  if (!chromePath) {
801
+ log.debug("Chrome binary not found on this system");
734
802
  return { success: false, action: "chrome_not_found" };
735
803
  }
736
804
 
805
+ log.debug(`Found Chrome at: ${chromePath}`);
806
+
737
807
  const running = isChromeRunning();
808
+ log.debug(`Chrome currently running: ${running}`);
738
809
 
739
- // Case 2: Chrome running without CDP → restart
810
+ // Case 3: Chrome running without CDP → restart
740
811
  if (running) {
812
+ log.debug("Killing Chrome gracefully for restart with CDP...");
741
813
  await killChromeGracefully();
742
814
  spawnChrome(chromePath, port);
743
815
 
744
816
  if (await waitForCDP(browser)) {
745
817
  return { success: true, action: "restarted", chromePath };
746
818
  }
747
- return { success: false, action: "launch_failed", chromePath };
819
+
820
+ // Retry once — Chrome can be slow to start (extensions, session restore)
821
+ log.debug("First CDP wait timed out after restart, retrying...");
822
+ if (await waitForCDP(browser, 15000)) {
823
+ return { success: true, action: "restarted", chromePath };
824
+ }
825
+
826
+ return {
827
+ success: false,
828
+ action: "launch_failed",
829
+ chromePath,
830
+ detail: "Chrome was restarted but CDP did not become reachable within timeout.",
831
+ };
748
832
  }
749
833
 
750
- // Case 3: Chrome not running → launch
834
+ // Case 4: Chrome not running → launch
751
835
  spawnChrome(chromePath, port);
752
836
 
753
837
  if (await waitForCDP(browser)) {
754
838
  return { success: true, action: "launched", chromePath };
755
839
  }
756
- return { success: false, action: "launch_failed", chromePath };
840
+
841
+ // Retry once
842
+ log.debug("First CDP wait timed out after launch, retrying...");
843
+ if (await waitForCDP(browser, 15000)) {
844
+ return { success: true, action: "launched", chromePath };
845
+ }
846
+
847
+ return {
848
+ success: false,
849
+ action: "launch_failed",
850
+ chromePath,
851
+ detail: "Chrome was launched but CDP did not become reachable within timeout.",
852
+ };
757
853
  }
758
854
 
759
855
  // ── Singleton ───────────────────────────────────────────────────────
@@ -1,4 +1,4 @@
1
- import { getBrowser } from "./browser.js";
1
+ import { getBrowser, ensureBrowserAvailable } from "./browser.js";
2
2
  import {
3
3
  readFileContent,
4
4
  writeFileContent,
@@ -99,13 +99,14 @@ export function getToolDefinitions(): ToolDefinition[] {
99
99
  {
100
100
  name: "browser_connect",
101
101
  description:
102
- "Connect to the user's real Chrome browser via CDP. The user must have Chrome running with --remote-debugging-port=9222. This shares the user's actual browser session including all logins and cookies.",
102
+ "Connect to the user's real Chrome browser via CDP. Chrome will be auto-launched if not already running. This shares the user's actual browser session including all logins and cookies.",
103
103
  input_schema: {
104
104
  type: "object",
105
105
  properties: {
106
106
  tab_index: {
107
107
  type: "number",
108
- description: "Tab index to connect to (default: 0, the first tab). Use browser_list_tabs to see available tabs.",
108
+ description:
109
+ "Tab index to connect to (default: 0, the first tab). Use browser_list_tabs to see available tabs.",
109
110
  },
110
111
  },
111
112
  },
@@ -143,7 +144,8 @@ export function getToolDefinitions(): ToolDefinition[] {
143
144
  properties: {
144
145
  selector: {
145
146
  type: "string",
146
- description: "CSS selector of the element to click (e.g. '#submit-btn', 'a.nav-link', 'button:nth-of-type(2)')",
147
+ description:
148
+ "CSS selector of the element to click (e.g. '#submit-btn', 'a.nav-link', 'button:nth-of-type(2)')",
147
149
  },
148
150
  },
149
151
  required: ["selector"],
@@ -268,10 +270,7 @@ export function getToolDefinitions(): ToolDefinition[] {
268
270
  ];
269
271
  }
270
272
 
271
- export async function executeTool(
272
- name: string,
273
- input: Record<string, unknown>
274
- ): Promise<string> {
273
+ export async function executeTool(name: string, input: Record<string, unknown>): Promise<string> {
275
274
  const browser = getBrowser();
276
275
 
277
276
  switch (name) {
@@ -298,8 +297,19 @@ export async function executeTool(
298
297
  return executeShell(input.command as string, input.cwd as string | undefined);
299
298
 
300
299
  // ── Browser (CDP) ───────────────────────────────────────
301
- case "browser_connect":
300
+ case "browser_connect": {
301
+ // Auto-launch Chrome if CDP is not reachable
302
+ if (!(await browser.isAvailable())) {
303
+ const result = await ensureBrowserAvailable();
304
+ if (!result.success) {
305
+ throw new Error(
306
+ `Failed to auto-launch Chrome (${result.action}). ` +
307
+ "Please ensure Google Chrome is installed."
308
+ );
309
+ }
310
+ }
302
311
  return browser.connect(input.tab_index as number | undefined);
312
+ }
303
313
  case "browser_navigate":
304
314
  if (!browser.isConnected()) await browser.connect();
305
315
  return browser.navigate(input.url as string);
@@ -314,9 +324,7 @@ export async function executeTool(
314
324
  case "browser_press_key":
315
325
  return browser.pressKey(input.key as string);
316
326
  case "browser_scroll":
317
- return (input.direction as string) === "up"
318
- ? browser.scrollUp()
319
- : browser.scrollDown();
327
+ return (input.direction as string) === "up" ? browser.scrollUp() : browser.scrollDown();
320
328
  case "browser_get_elements":
321
329
  return browser.getInteractiveElements();
322
330
  case "browser_evaluate":