crawlio-browser 1.4.7 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,9 +1,11 @@
1
1
  // src/shared/constants.ts
2
2
  import { homedir } from "os";
3
3
  import { join } from "path";
4
- var PKG_VERSION = "1.4.7";
4
+ var PKG_VERSION = "1.5.0";
5
5
  var WS_PORT = 9333;
6
+ var WS_PORT_MAX = 9342;
6
7
  var WS_HOST = "127.0.0.1";
8
+ var BRIDGE_DIR = join(homedir(), ".crawlio", "bridges");
7
9
  var CRAWLIO_PORT_FILE = join(
8
10
  homedir(),
9
11
  "Library",
@@ -30,7 +32,9 @@ var WS_RECONNECT_GRACE = 5e3;
30
32
  export {
31
33
  PKG_VERSION,
32
34
  WS_PORT,
35
+ WS_PORT_MAX,
33
36
  WS_HOST,
37
+ BRIDGE_DIR,
34
38
  CRAWLIO_PORT_FILE,
35
39
  TIMEOUTS,
36
40
  WS_HEARTBEAT_INTERVAL,
@@ -1,13 +1,15 @@
1
1
  import {
2
+ BRIDGE_DIR,
2
3
  CRAWLIO_PORT_FILE,
3
4
  PKG_VERSION,
4
5
  TIMEOUTS,
5
6
  WS_HEARTBEAT_INTERVAL,
6
7
  WS_HOST,
7
8
  WS_PORT,
9
+ WS_PORT_MAX,
8
10
  WS_RECONNECT_GRACE,
9
11
  WS_STALE_THRESHOLD
10
- } from "./chunk-RA3LVVKH.js";
12
+ } from "./chunk-GJH6PLST.js";
11
13
 
12
14
  // src/mcp-server/index.ts
13
15
  import { randomBytes as randomBytes2 } from "crypto";
@@ -25,7 +27,8 @@ import { WebSocketServer, WebSocket } from "ws";
25
27
  import { createServer } from "http";
26
28
  import { randomUUID } from "crypto";
27
29
  import { fileURLToPath } from "url";
28
- import { dirname, resolve } from "path";
30
+ import { dirname, join, resolve } from "path";
31
+ import { mkdirSync, writeFileSync, unlinkSync, readdirSync, readFileSync } from "fs";
29
32
  function resolveIndexPath() {
30
33
  if (process.argv[1] && process.argv[1].includes("dist")) {
31
34
  return process.argv[1];
@@ -219,6 +222,66 @@ var MessageQueue = class {
219
222
  };
220
223
  var HEARTBEAT_INTERVAL = WS_HEARTBEAT_INTERVAL;
221
224
  var STALE_THRESHOLD = WS_STALE_THRESHOLD;
225
+ async function findAvailablePort(start) {
226
+ for (let port = start; port <= WS_PORT_MAX; port++) {
227
+ try {
228
+ const res = await fetch(`http://${WS_HOST}:${port}/health`, {
229
+ signal: AbortSignal.timeout(300)
230
+ });
231
+ const body = await res.json();
232
+ if (body.service === "crawlio-mcp" && body.pid === process.pid) {
233
+ return port;
234
+ }
235
+ } catch {
236
+ return port;
237
+ }
238
+ }
239
+ throw new Error(`All ports ${WS_PORT}-${WS_PORT_MAX} in use by crawlio-mcp instances`);
240
+ }
241
+ function cleanStaleBridgeFiles() {
242
+ try {
243
+ const files = readdirSync(BRIDGE_DIR);
244
+ for (const file of files) {
245
+ if (!file.endsWith(".json")) continue;
246
+ try {
247
+ const data = JSON.parse(readFileSync(join(BRIDGE_DIR, file), "utf-8"));
248
+ if (data.pid && data.pid !== process.pid) {
249
+ try {
250
+ process.kill(data.pid, 0);
251
+ } catch {
252
+ try {
253
+ unlinkSync(join(BRIDGE_DIR, file));
254
+ } catch {
255
+ }
256
+ }
257
+ }
258
+ } catch {
259
+ try {
260
+ unlinkSync(join(BRIDGE_DIR, file));
261
+ } catch {
262
+ }
263
+ }
264
+ }
265
+ } catch {
266
+ }
267
+ }
268
+ function writeBridgeFile(port) {
269
+ mkdirSync(BRIDGE_DIR, { recursive: true });
270
+ const bridgeFile = join(BRIDGE_DIR, `${process.pid}.json`);
271
+ writeFileSync(bridgeFile, JSON.stringify({
272
+ port,
273
+ pid: process.pid,
274
+ cwd: process.cwd(),
275
+ startedAt: Date.now()
276
+ }));
277
+ return bridgeFile;
278
+ }
279
+ function removeBridgeFile() {
280
+ try {
281
+ unlinkSync(join(BRIDGE_DIR, `${process.pid}.json`));
282
+ } catch {
283
+ }
284
+ }
222
285
  var WebSocketBridge = class {
223
286
  wss = null;
224
287
  httpServer = null;
@@ -233,8 +296,12 @@ var WebSocketBridge = class {
233
296
  connectTime = 0;
234
297
  reconnectCount = -1;
235
298
  // first connection is not a "reconnect"
299
+ actualPort = WS_PORT;
236
300
  onClientConnected;
237
301
  onPortRefreshRequested;
302
+ get port() {
303
+ return this.actualPort;
304
+ }
238
305
  startHeartbeat() {
239
306
  this.stopHeartbeat();
240
307
  this.heartbeatTimer = setInterval(() => {
@@ -283,7 +350,7 @@ var WebSocketBridge = class {
283
350
  "Cache-Control": "no-store",
284
351
  "Access-Control-Allow-Origin": "*"
285
352
  });
286
- res.end(JSON.stringify({ service: "crawlio-mcp", ...this.getHealth() }));
353
+ res.end(JSON.stringify({ service: "crawlio-mcp", pid: process.pid, port: this.actualPort, ...this.getHealth() }));
287
354
  return;
288
355
  }
289
356
  if (req.method === "OPTIONS") {
@@ -424,34 +491,36 @@ var WebSocketBridge = class {
424
491
  console.error("[Bridge] WebSocket handshake error:", err.message);
425
492
  socket.destroy();
426
493
  });
427
- try {
428
- const probe = await fetch(`http://${WS_HOST}:${WS_PORT}/health`, { signal: AbortSignal.timeout(500) });
429
- const body = await probe.json();
430
- if (body.service === "crawlio-mcp") {
431
- const { execFileSync } = await import("child_process");
432
- const pid = execFileSync("lsof", ["-ti", `:${WS_PORT}`, "-sTCP:LISTEN"]).toString().trim().split("\n")[0];
433
- if (pid && Number(pid) !== process.pid) {
434
- console.error(`[Bridge] Killing stale crawlio-mcp on port ${WS_PORT} (PID ${pid})`);
435
- process.kill(Number(pid), "SIGTERM");
436
- for (let i = 0; i < 8; i++) {
437
- await new Promise((r) => setTimeout(r, 250));
438
- try {
439
- execFileSync("lsof", ["-ti", `:${WS_PORT}`, "-sTCP:LISTEN"]);
440
- } catch {
441
- break;
442
- }
443
- }
444
- }
445
- }
446
- } catch {
447
- }
494
+ cleanStaleBridgeFiles();
495
+ const requestedPort = parseInt(process.env.CRAWLIO_PORT || "", 10);
496
+ const startPort = requestedPort >= WS_PORT && requestedPort <= WS_PORT_MAX ? requestedPort : WS_PORT;
497
+ this.actualPort = await findAvailablePort(startPort);
448
498
  await new Promise((resolve2, reject) => {
449
- this.httpServer.once("error", (err) => reject(err));
450
- this.httpServer.listen(WS_PORT, WS_HOST, () => {
451
- console.error(`[Bridge] WebSocket server listening on ws://${WS_HOST}:${WS_PORT}`);
452
- resolve2();
453
- });
499
+ let attempts = 0;
500
+ const tryListen = () => {
501
+ const onError = (err) => {
502
+ if (err.code === "EADDRINUSE" && attempts < 3) {
503
+ attempts++;
504
+ console.error(`[Bridge] Port ${this.actualPort} still in use, retry ${attempts}/3...`);
505
+ setTimeout(tryListen, 1e3 * attempts);
506
+ } else {
507
+ reject(err);
508
+ }
509
+ };
510
+ this.httpServer.once("error", onError);
511
+ this.httpServer.listen(this.actualPort, WS_HOST, () => {
512
+ this.httpServer.removeListener("error", onError);
513
+ console.error(`[Bridge] WebSocket server listening on ws://${WS_HOST}:${this.actualPort}`);
514
+ resolve2();
515
+ });
516
+ };
517
+ tryListen();
454
518
  });
519
+ writeBridgeFile(this.actualPort);
520
+ const cleanup = () => removeBridgeFile();
521
+ process.on("SIGTERM", cleanup);
522
+ process.on("SIGINT", cleanup);
523
+ process.on("beforeExit", cleanup);
455
524
  }
456
525
  get isConnected() {
457
526
  return this.client?.readyState === WebSocket.OPEN;
@@ -536,6 +605,7 @@ var WebSocketBridge = class {
536
605
  this.client?.close();
537
606
  this.wss?.close();
538
607
  this.httpServer?.close();
608
+ removeBridgeFile();
539
609
  }
540
610
  };
541
611
 
@@ -710,7 +780,7 @@ import { z } from "zod";
710
780
  import { execFile } from "child_process";
711
781
  import { writeFile, unlink } from "fs/promises";
712
782
  import { tmpdir } from "os";
713
- import { join as join2, dirname as dirname3 } from "path";
783
+ import { join as join3, dirname as dirname3 } from "path";
714
784
  import { randomBytes } from "crypto";
715
785
  import { fileURLToPath as fileURLToPath3 } from "url";
716
786
  import { promisify } from "util";
@@ -1078,8 +1148,8 @@ function escapeString(s) {
1078
1148
  }
1079
1149
 
1080
1150
  // src/mcp-server/semantic-search.ts
1081
- import { readFileSync } from "fs";
1082
- import { join, dirname as dirname2 } from "path";
1151
+ import { readFileSync as readFileSync2 } from "fs";
1152
+ import { join as join2, dirname as dirname2 } from "path";
1083
1153
  import { fileURLToPath as fileURLToPath2 } from "url";
1084
1154
  function cosineSimilarity(a, b) {
1085
1155
  if (a.length !== b.length || a.length === 0) return 0;
@@ -1099,8 +1169,8 @@ function loadEmbeddings() {
1099
1169
  const map = /* @__PURE__ */ new Map();
1100
1170
  try {
1101
1171
  const thisDir = dirname2(fileURLToPath2(import.meta.url));
1102
- const assetPath = join(thisDir, "tool-embeddings.json");
1103
- const raw = readFileSync(assetPath, "utf-8");
1172
+ const assetPath = join2(thisDir, "tool-embeddings.json");
1173
+ const raw = readFileSync2(assetPath, "utf-8");
1104
1174
  const asset = JSON.parse(raw);
1105
1175
  if (!asset.embeddings || typeof asset.embeddings !== "object") return map;
1106
1176
  for (const [name, vec] of Object.entries(asset.embeddings)) {
@@ -1429,7 +1499,7 @@ function createTools(bridge2, crawlio2) {
1429
1499
  // --- AI orchestration tools ---
1430
1500
  {
1431
1501
  name: "connect_tab",
1432
- description: "Connect to a browser tab for capture and interaction. Three modes: (1) provide a URL to find or create a tab, (2) provide a tabId to connect to a specific tab, (3) no args to connect to the active tab. Starts CDP capture automatically.",
1502
+ description: "Pin a specific browser tab for all subsequent commands. Optional \u2014 without this, tools auto-connect to the active tab. Three modes: (1) provide a URL to find or create a tab, (2) provide a tabId to connect to a specific tab, (3) no args to pin the active tab. Starts CDP capture automatically.",
1433
1503
  inputSchema: {
1434
1504
  type: "object",
1435
1505
  properties: {
@@ -3604,9 +3674,9 @@ function createTools(bridge2, crawlio2) {
3604
3674
  if (!data?.data) {
3605
3675
  return toolError("Screenshot capture failed \u2014 no image data returned");
3606
3676
  }
3607
- tmpPath = join2(tmpdir(), `crawlio-ocr-${randomBytes(6).toString("hex")}.png`);
3677
+ tmpPath = join3(tmpdir(), `crawlio-ocr-${randomBytes(6).toString("hex")}.png`);
3608
3678
  await writeFile(tmpPath, Buffer.from(data.data, "base64"));
3609
- const shimPath = join2(dirname3(fileURLToPath3(import.meta.url)), "..", "..", "bin", "ocr-shim.swift");
3679
+ const shimPath = join3(dirname3(fileURLToPath3(import.meta.url)), "..", "..", "bin", "ocr-shim.swift");
3610
3680
  const { stdout, stderr } = await execFileAsync("swift", [shimPath, tmpPath], { timeout: 25e3 });
3611
3681
  if (!stdout.trim()) {
3612
3682
  return toolError(`OCR produced no output${stderr ? `: ${stderr.trim()}` : ""}`);
@@ -4042,7 +4112,7 @@ function createCodeModeTools(bridge2, crawlio2) {
4042
4112
  process.title = "Crawlio Agent";
4043
4113
  var initMode = process.argv.includes("init") || process.argv.includes("--setup") || process.argv.includes("setup");
4044
4114
  if (initMode) {
4045
- const { runInit } = await import("./init-4D3LCB7J.js");
4115
+ const { runInit } = await import("./init-CG5XREFD.js");
4046
4116
  await runInit(process.argv.slice(2));
4047
4117
  process.exit(0);
4048
4118
  }
@@ -1,6 +1,6 @@
1
1
  import {
2
2
  PKG_VERSION
3
- } from "./chunk-RA3LVVKH.js";
3
+ } from "./chunk-GJH6PLST.js";
4
4
 
5
5
  // src/mcp-server/init.ts
6
6
  import { execFileSync, spawn } from "child_process";
@@ -830,21 +830,11 @@ async function configureMetaMcp(found, options) {
830
830
  }
831
831
  const conflicts = findConflictingConfigs();
832
832
  if (conflicts.length > 0) {
833
- console.log(` ${yellow("!")} crawlio-browser already configured in:`);
833
+ console.log(` ${dim("i")} crawlio-browser also configured in:`);
834
834
  for (const c of conflicts) {
835
835
  console.log(` ${dim("\u2192")} ${c}`);
836
836
  }
837
- console.log(` ${yellow("!")} Adding a second entry would cause a port collision (port 9333)`);
838
- if (!options.yes) {
839
- const proceed = await confirm("Add anyway? (not recommended)", false);
840
- if (!proceed) {
841
- console.log(` ${dim("Skipped \u2014 using existing configuration")}`);
842
- return;
843
- }
844
- } else {
845
- console.log(` ${dim("Skipped \u2014 existing configuration takes priority")}`);
846
- return;
847
- }
837
+ console.log(` ${dim(" Multi-instance supported \u2014 each server gets its own port (9333-9342)")}`);
848
838
  }
849
839
  if (!options.yes) {
850
840
  const proceed = await confirm("Add crawlio-browser to this config?");
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "crawlio-browser",
3
- "version": "1.4.7",
3
+ "version": "1.5.0",
4
4
  "description": "MCP server with 96 CDP-backed tools for browser automation — screenshots, DOM, network capture, framework detection, cookies, storage, session recording, performance metrics via Chrome",
5
5
  "type": "module",
6
6
  "main": "dist/mcp-server/index.js",