crawlio-browser 1.4.7 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
// src/shared/constants.ts
|
|
2
2
|
import { homedir } from "os";
|
|
3
3
|
import { join } from "path";
|
|
4
|
-
var PKG_VERSION = "1.
|
|
4
|
+
var PKG_VERSION = "1.5.0";
|
|
5
5
|
var WS_PORT = 9333;
|
|
6
|
+
var WS_PORT_MAX = 9342;
|
|
6
7
|
var WS_HOST = "127.0.0.1";
|
|
8
|
+
var BRIDGE_DIR = join(homedir(), ".crawlio", "bridges");
|
|
7
9
|
var CRAWLIO_PORT_FILE = join(
|
|
8
10
|
homedir(),
|
|
9
11
|
"Library",
|
|
@@ -30,7 +32,9 @@ var WS_RECONNECT_GRACE = 5e3;
|
|
|
30
32
|
export {
|
|
31
33
|
PKG_VERSION,
|
|
32
34
|
WS_PORT,
|
|
35
|
+
WS_PORT_MAX,
|
|
33
36
|
WS_HOST,
|
|
37
|
+
BRIDGE_DIR,
|
|
34
38
|
CRAWLIO_PORT_FILE,
|
|
35
39
|
TIMEOUTS,
|
|
36
40
|
WS_HEARTBEAT_INTERVAL,
|
package/dist/mcp-server/index.js
CHANGED
|
@@ -1,13 +1,15 @@
|
|
|
1
1
|
import {
|
|
2
|
+
BRIDGE_DIR,
|
|
2
3
|
CRAWLIO_PORT_FILE,
|
|
3
4
|
PKG_VERSION,
|
|
4
5
|
TIMEOUTS,
|
|
5
6
|
WS_HEARTBEAT_INTERVAL,
|
|
6
7
|
WS_HOST,
|
|
7
8
|
WS_PORT,
|
|
9
|
+
WS_PORT_MAX,
|
|
8
10
|
WS_RECONNECT_GRACE,
|
|
9
11
|
WS_STALE_THRESHOLD
|
|
10
|
-
} from "./chunk-
|
|
12
|
+
} from "./chunk-GJH6PLST.js";
|
|
11
13
|
|
|
12
14
|
// src/mcp-server/index.ts
|
|
13
15
|
import { randomBytes as randomBytes2 } from "crypto";
|
|
@@ -25,7 +27,8 @@ import { WebSocketServer, WebSocket } from "ws";
|
|
|
25
27
|
import { createServer } from "http";
|
|
26
28
|
import { randomUUID } from "crypto";
|
|
27
29
|
import { fileURLToPath } from "url";
|
|
28
|
-
import { dirname, resolve } from "path";
|
|
30
|
+
import { dirname, join, resolve } from "path";
|
|
31
|
+
import { mkdirSync, writeFileSync, unlinkSync, readdirSync, readFileSync } from "fs";
|
|
29
32
|
function resolveIndexPath() {
|
|
30
33
|
if (process.argv[1] && process.argv[1].includes("dist")) {
|
|
31
34
|
return process.argv[1];
|
|
@@ -219,6 +222,66 @@ var MessageQueue = class {
|
|
|
219
222
|
};
|
|
220
223
|
var HEARTBEAT_INTERVAL = WS_HEARTBEAT_INTERVAL;
|
|
221
224
|
var STALE_THRESHOLD = WS_STALE_THRESHOLD;
|
|
225
|
+
async function findAvailablePort(start) {
|
|
226
|
+
for (let port = start; port <= WS_PORT_MAX; port++) {
|
|
227
|
+
try {
|
|
228
|
+
const res = await fetch(`http://${WS_HOST}:${port}/health`, {
|
|
229
|
+
signal: AbortSignal.timeout(300)
|
|
230
|
+
});
|
|
231
|
+
const body = await res.json();
|
|
232
|
+
if (body.service === "crawlio-mcp" && body.pid === process.pid) {
|
|
233
|
+
return port;
|
|
234
|
+
}
|
|
235
|
+
} catch {
|
|
236
|
+
return port;
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
throw new Error(`All ports ${WS_PORT}-${WS_PORT_MAX} in use by crawlio-mcp instances`);
|
|
240
|
+
}
|
|
241
|
+
function cleanStaleBridgeFiles() {
|
|
242
|
+
try {
|
|
243
|
+
const files = readdirSync(BRIDGE_DIR);
|
|
244
|
+
for (const file of files) {
|
|
245
|
+
if (!file.endsWith(".json")) continue;
|
|
246
|
+
try {
|
|
247
|
+
const data = JSON.parse(readFileSync(join(BRIDGE_DIR, file), "utf-8"));
|
|
248
|
+
if (data.pid && data.pid !== process.pid) {
|
|
249
|
+
try {
|
|
250
|
+
process.kill(data.pid, 0);
|
|
251
|
+
} catch {
|
|
252
|
+
try {
|
|
253
|
+
unlinkSync(join(BRIDGE_DIR, file));
|
|
254
|
+
} catch {
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
} catch {
|
|
259
|
+
try {
|
|
260
|
+
unlinkSync(join(BRIDGE_DIR, file));
|
|
261
|
+
} catch {
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
}
|
|
265
|
+
} catch {
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
function writeBridgeFile(port) {
|
|
269
|
+
mkdirSync(BRIDGE_DIR, { recursive: true });
|
|
270
|
+
const bridgeFile = join(BRIDGE_DIR, `${process.pid}.json`);
|
|
271
|
+
writeFileSync(bridgeFile, JSON.stringify({
|
|
272
|
+
port,
|
|
273
|
+
pid: process.pid,
|
|
274
|
+
cwd: process.cwd(),
|
|
275
|
+
startedAt: Date.now()
|
|
276
|
+
}));
|
|
277
|
+
return bridgeFile;
|
|
278
|
+
}
|
|
279
|
+
function removeBridgeFile() {
|
|
280
|
+
try {
|
|
281
|
+
unlinkSync(join(BRIDGE_DIR, `${process.pid}.json`));
|
|
282
|
+
} catch {
|
|
283
|
+
}
|
|
284
|
+
}
|
|
222
285
|
var WebSocketBridge = class {
|
|
223
286
|
wss = null;
|
|
224
287
|
httpServer = null;
|
|
@@ -233,8 +296,12 @@ var WebSocketBridge = class {
|
|
|
233
296
|
connectTime = 0;
|
|
234
297
|
reconnectCount = -1;
|
|
235
298
|
// first connection is not a "reconnect"
|
|
299
|
+
actualPort = WS_PORT;
|
|
236
300
|
onClientConnected;
|
|
237
301
|
onPortRefreshRequested;
|
|
302
|
+
get port() {
|
|
303
|
+
return this.actualPort;
|
|
304
|
+
}
|
|
238
305
|
startHeartbeat() {
|
|
239
306
|
this.stopHeartbeat();
|
|
240
307
|
this.heartbeatTimer = setInterval(() => {
|
|
@@ -283,7 +350,7 @@ var WebSocketBridge = class {
|
|
|
283
350
|
"Cache-Control": "no-store",
|
|
284
351
|
"Access-Control-Allow-Origin": "*"
|
|
285
352
|
});
|
|
286
|
-
res.end(JSON.stringify({ service: "crawlio-mcp", ...this.getHealth() }));
|
|
353
|
+
res.end(JSON.stringify({ service: "crawlio-mcp", pid: process.pid, port: this.actualPort, ...this.getHealth() }));
|
|
287
354
|
return;
|
|
288
355
|
}
|
|
289
356
|
if (req.method === "OPTIONS") {
|
|
@@ -424,34 +491,36 @@ var WebSocketBridge = class {
|
|
|
424
491
|
console.error("[Bridge] WebSocket handshake error:", err.message);
|
|
425
492
|
socket.destroy();
|
|
426
493
|
});
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
const { execFileSync } = await import("child_process");
|
|
432
|
-
const pid = execFileSync("lsof", ["-ti", `:${WS_PORT}`, "-sTCP:LISTEN"]).toString().trim().split("\n")[0];
|
|
433
|
-
if (pid && Number(pid) !== process.pid) {
|
|
434
|
-
console.error(`[Bridge] Killing stale crawlio-mcp on port ${WS_PORT} (PID ${pid})`);
|
|
435
|
-
process.kill(Number(pid), "SIGTERM");
|
|
436
|
-
for (let i = 0; i < 8; i++) {
|
|
437
|
-
await new Promise((r) => setTimeout(r, 250));
|
|
438
|
-
try {
|
|
439
|
-
execFileSync("lsof", ["-ti", `:${WS_PORT}`, "-sTCP:LISTEN"]);
|
|
440
|
-
} catch {
|
|
441
|
-
break;
|
|
442
|
-
}
|
|
443
|
-
}
|
|
444
|
-
}
|
|
445
|
-
}
|
|
446
|
-
} catch {
|
|
447
|
-
}
|
|
494
|
+
cleanStaleBridgeFiles();
|
|
495
|
+
const requestedPort = parseInt(process.env.CRAWLIO_PORT || "", 10);
|
|
496
|
+
const startPort = requestedPort >= WS_PORT && requestedPort <= WS_PORT_MAX ? requestedPort : WS_PORT;
|
|
497
|
+
this.actualPort = await findAvailablePort(startPort);
|
|
448
498
|
await new Promise((resolve2, reject) => {
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
499
|
+
let attempts = 0;
|
|
500
|
+
const tryListen = () => {
|
|
501
|
+
const onError = (err) => {
|
|
502
|
+
if (err.code === "EADDRINUSE" && attempts < 3) {
|
|
503
|
+
attempts++;
|
|
504
|
+
console.error(`[Bridge] Port ${this.actualPort} still in use, retry ${attempts}/3...`);
|
|
505
|
+
setTimeout(tryListen, 1e3 * attempts);
|
|
506
|
+
} else {
|
|
507
|
+
reject(err);
|
|
508
|
+
}
|
|
509
|
+
};
|
|
510
|
+
this.httpServer.once("error", onError);
|
|
511
|
+
this.httpServer.listen(this.actualPort, WS_HOST, () => {
|
|
512
|
+
this.httpServer.removeListener("error", onError);
|
|
513
|
+
console.error(`[Bridge] WebSocket server listening on ws://${WS_HOST}:${this.actualPort}`);
|
|
514
|
+
resolve2();
|
|
515
|
+
});
|
|
516
|
+
};
|
|
517
|
+
tryListen();
|
|
454
518
|
});
|
|
519
|
+
writeBridgeFile(this.actualPort);
|
|
520
|
+
const cleanup = () => removeBridgeFile();
|
|
521
|
+
process.on("SIGTERM", cleanup);
|
|
522
|
+
process.on("SIGINT", cleanup);
|
|
523
|
+
process.on("beforeExit", cleanup);
|
|
455
524
|
}
|
|
456
525
|
get isConnected() {
|
|
457
526
|
return this.client?.readyState === WebSocket.OPEN;
|
|
@@ -536,6 +605,7 @@ var WebSocketBridge = class {
|
|
|
536
605
|
this.client?.close();
|
|
537
606
|
this.wss?.close();
|
|
538
607
|
this.httpServer?.close();
|
|
608
|
+
removeBridgeFile();
|
|
539
609
|
}
|
|
540
610
|
};
|
|
541
611
|
|
|
@@ -710,7 +780,7 @@ import { z } from "zod";
|
|
|
710
780
|
import { execFile } from "child_process";
|
|
711
781
|
import { writeFile, unlink } from "fs/promises";
|
|
712
782
|
import { tmpdir } from "os";
|
|
713
|
-
import { join as
|
|
783
|
+
import { join as join3, dirname as dirname3 } from "path";
|
|
714
784
|
import { randomBytes } from "crypto";
|
|
715
785
|
import { fileURLToPath as fileURLToPath3 } from "url";
|
|
716
786
|
import { promisify } from "util";
|
|
@@ -1078,8 +1148,8 @@ function escapeString(s) {
|
|
|
1078
1148
|
}
|
|
1079
1149
|
|
|
1080
1150
|
// src/mcp-server/semantic-search.ts
|
|
1081
|
-
import { readFileSync } from "fs";
|
|
1082
|
-
import { join, dirname as dirname2 } from "path";
|
|
1151
|
+
import { readFileSync as readFileSync2 } from "fs";
|
|
1152
|
+
import { join as join2, dirname as dirname2 } from "path";
|
|
1083
1153
|
import { fileURLToPath as fileURLToPath2 } from "url";
|
|
1084
1154
|
function cosineSimilarity(a, b) {
|
|
1085
1155
|
if (a.length !== b.length || a.length === 0) return 0;
|
|
@@ -1099,8 +1169,8 @@ function loadEmbeddings() {
|
|
|
1099
1169
|
const map = /* @__PURE__ */ new Map();
|
|
1100
1170
|
try {
|
|
1101
1171
|
const thisDir = dirname2(fileURLToPath2(import.meta.url));
|
|
1102
|
-
const assetPath =
|
|
1103
|
-
const raw =
|
|
1172
|
+
const assetPath = join2(thisDir, "tool-embeddings.json");
|
|
1173
|
+
const raw = readFileSync2(assetPath, "utf-8");
|
|
1104
1174
|
const asset = JSON.parse(raw);
|
|
1105
1175
|
if (!asset.embeddings || typeof asset.embeddings !== "object") return map;
|
|
1106
1176
|
for (const [name, vec] of Object.entries(asset.embeddings)) {
|
|
@@ -1429,7 +1499,7 @@ function createTools(bridge2, crawlio2) {
|
|
|
1429
1499
|
// --- AI orchestration tools ---
|
|
1430
1500
|
{
|
|
1431
1501
|
name: "connect_tab",
|
|
1432
|
-
description: "
|
|
1502
|
+
description: "Pin a specific browser tab for all subsequent commands. Optional \u2014 without this, tools auto-connect to the active tab. Three modes: (1) provide a URL to find or create a tab, (2) provide a tabId to connect to a specific tab, (3) no args to pin the active tab. Starts CDP capture automatically.",
|
|
1433
1503
|
inputSchema: {
|
|
1434
1504
|
type: "object",
|
|
1435
1505
|
properties: {
|
|
@@ -3604,9 +3674,9 @@ function createTools(bridge2, crawlio2) {
|
|
|
3604
3674
|
if (!data?.data) {
|
|
3605
3675
|
return toolError("Screenshot capture failed \u2014 no image data returned");
|
|
3606
3676
|
}
|
|
3607
|
-
tmpPath =
|
|
3677
|
+
tmpPath = join3(tmpdir(), `crawlio-ocr-${randomBytes(6).toString("hex")}.png`);
|
|
3608
3678
|
await writeFile(tmpPath, Buffer.from(data.data, "base64"));
|
|
3609
|
-
const shimPath =
|
|
3679
|
+
const shimPath = join3(dirname3(fileURLToPath3(import.meta.url)), "..", "..", "bin", "ocr-shim.swift");
|
|
3610
3680
|
const { stdout, stderr } = await execFileAsync("swift", [shimPath, tmpPath], { timeout: 25e3 });
|
|
3611
3681
|
if (!stdout.trim()) {
|
|
3612
3682
|
return toolError(`OCR produced no output${stderr ? `: ${stderr.trim()}` : ""}`);
|
|
@@ -4042,7 +4112,7 @@ function createCodeModeTools(bridge2, crawlio2) {
|
|
|
4042
4112
|
process.title = "Crawlio Agent";
|
|
4043
4113
|
var initMode = process.argv.includes("init") || process.argv.includes("--setup") || process.argv.includes("setup");
|
|
4044
4114
|
if (initMode) {
|
|
4045
|
-
const { runInit } = await import("./init-
|
|
4115
|
+
const { runInit } = await import("./init-CG5XREFD.js");
|
|
4046
4116
|
await runInit(process.argv.slice(2));
|
|
4047
4117
|
process.exit(0);
|
|
4048
4118
|
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import {
|
|
2
2
|
PKG_VERSION
|
|
3
|
-
} from "./chunk-
|
|
3
|
+
} from "./chunk-GJH6PLST.js";
|
|
4
4
|
|
|
5
5
|
// src/mcp-server/init.ts
|
|
6
6
|
import { execFileSync, spawn } from "child_process";
|
|
@@ -830,21 +830,11 @@ async function configureMetaMcp(found, options) {
|
|
|
830
830
|
}
|
|
831
831
|
const conflicts = findConflictingConfigs();
|
|
832
832
|
if (conflicts.length > 0) {
|
|
833
|
-
console.log(` ${
|
|
833
|
+
console.log(` ${dim("i")} crawlio-browser also configured in:`);
|
|
834
834
|
for (const c of conflicts) {
|
|
835
835
|
console.log(` ${dim("\u2192")} ${c}`);
|
|
836
836
|
}
|
|
837
|
-
console.log(` ${
|
|
838
|
-
if (!options.yes) {
|
|
839
|
-
const proceed = await confirm("Add anyway? (not recommended)", false);
|
|
840
|
-
if (!proceed) {
|
|
841
|
-
console.log(` ${dim("Skipped \u2014 using existing configuration")}`);
|
|
842
|
-
return;
|
|
843
|
-
}
|
|
844
|
-
} else {
|
|
845
|
-
console.log(` ${dim("Skipped \u2014 existing configuration takes priority")}`);
|
|
846
|
-
return;
|
|
847
|
-
}
|
|
837
|
+
console.log(` ${dim(" Multi-instance supported \u2014 each server gets its own port (9333-9342)")}`);
|
|
848
838
|
}
|
|
849
839
|
if (!options.yes) {
|
|
850
840
|
const proceed = await confirm("Add crawlio-browser to this config?");
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "crawlio-browser",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.5.0",
|
|
4
4
|
"description": "MCP server with 96 CDP-backed tools for browser automation — screenshots, DOM, network capture, framework detection, cookies, storage, session recording, performance metrics via Chrome",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/mcp-server/index.js",
|