crawlio-browser 1.4.8 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
// src/shared/constants.ts
|
|
2
2
|
import { homedir } from "os";
|
|
3
3
|
import { join } from "path";
|
|
4
|
-
var PKG_VERSION = "1.
|
|
4
|
+
var PKG_VERSION = "1.5.0";
|
|
5
5
|
var WS_PORT = 9333;
|
|
6
|
+
var WS_PORT_MAX = 9342;
|
|
6
7
|
var WS_HOST = "127.0.0.1";
|
|
8
|
+
var BRIDGE_DIR = join(homedir(), ".crawlio", "bridges");
|
|
7
9
|
var CRAWLIO_PORT_FILE = join(
|
|
8
10
|
homedir(),
|
|
9
11
|
"Library",
|
|
@@ -30,7 +32,9 @@ var WS_RECONNECT_GRACE = 5e3;
|
|
|
30
32
|
export {
|
|
31
33
|
PKG_VERSION,
|
|
32
34
|
WS_PORT,
|
|
35
|
+
WS_PORT_MAX,
|
|
33
36
|
WS_HOST,
|
|
37
|
+
BRIDGE_DIR,
|
|
34
38
|
CRAWLIO_PORT_FILE,
|
|
35
39
|
TIMEOUTS,
|
|
36
40
|
WS_HEARTBEAT_INTERVAL,
|
package/dist/mcp-server/index.js
CHANGED
|
@@ -1,13 +1,15 @@
|
|
|
1
1
|
import {
|
|
2
|
+
BRIDGE_DIR,
|
|
2
3
|
CRAWLIO_PORT_FILE,
|
|
3
4
|
PKG_VERSION,
|
|
4
5
|
TIMEOUTS,
|
|
5
6
|
WS_HEARTBEAT_INTERVAL,
|
|
6
7
|
WS_HOST,
|
|
7
8
|
WS_PORT,
|
|
9
|
+
WS_PORT_MAX,
|
|
8
10
|
WS_RECONNECT_GRACE,
|
|
9
11
|
WS_STALE_THRESHOLD
|
|
10
|
-
} from "./chunk-
|
|
12
|
+
} from "./chunk-GJH6PLST.js";
|
|
11
13
|
|
|
12
14
|
// src/mcp-server/index.ts
|
|
13
15
|
import { randomBytes as randomBytes2 } from "crypto";
|
|
@@ -25,7 +27,8 @@ import { WebSocketServer, WebSocket } from "ws";
|
|
|
25
27
|
import { createServer } from "http";
|
|
26
28
|
import { randomUUID } from "crypto";
|
|
27
29
|
import { fileURLToPath } from "url";
|
|
28
|
-
import { dirname, resolve } from "path";
|
|
30
|
+
import { dirname, join, resolve } from "path";
|
|
31
|
+
import { mkdirSync, writeFileSync, unlinkSync, readdirSync, readFileSync } from "fs";
|
|
29
32
|
function resolveIndexPath() {
|
|
30
33
|
if (process.argv[1] && process.argv[1].includes("dist")) {
|
|
31
34
|
return process.argv[1];
|
|
@@ -219,6 +222,66 @@ var MessageQueue = class {
|
|
|
219
222
|
};
|
|
220
223
|
var HEARTBEAT_INTERVAL = WS_HEARTBEAT_INTERVAL;
|
|
221
224
|
var STALE_THRESHOLD = WS_STALE_THRESHOLD;
|
|
225
|
+
async function findAvailablePort(start) {
|
|
226
|
+
for (let port = start; port <= WS_PORT_MAX; port++) {
|
|
227
|
+
try {
|
|
228
|
+
const res = await fetch(`http://${WS_HOST}:${port}/health`, {
|
|
229
|
+
signal: AbortSignal.timeout(300)
|
|
230
|
+
});
|
|
231
|
+
const body = await res.json();
|
|
232
|
+
if (body.service === "crawlio-mcp" && body.pid === process.pid) {
|
|
233
|
+
return port;
|
|
234
|
+
}
|
|
235
|
+
} catch {
|
|
236
|
+
return port;
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
throw new Error(`All ports ${WS_PORT}-${WS_PORT_MAX} in use by crawlio-mcp instances`);
|
|
240
|
+
}
|
|
241
|
+
function cleanStaleBridgeFiles() {
|
|
242
|
+
try {
|
|
243
|
+
const files = readdirSync(BRIDGE_DIR);
|
|
244
|
+
for (const file of files) {
|
|
245
|
+
if (!file.endsWith(".json")) continue;
|
|
246
|
+
try {
|
|
247
|
+
const data = JSON.parse(readFileSync(join(BRIDGE_DIR, file), "utf-8"));
|
|
248
|
+
if (data.pid && data.pid !== process.pid) {
|
|
249
|
+
try {
|
|
250
|
+
process.kill(data.pid, 0);
|
|
251
|
+
} catch {
|
|
252
|
+
try {
|
|
253
|
+
unlinkSync(join(BRIDGE_DIR, file));
|
|
254
|
+
} catch {
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
} catch {
|
|
259
|
+
try {
|
|
260
|
+
unlinkSync(join(BRIDGE_DIR, file));
|
|
261
|
+
} catch {
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
}
|
|
265
|
+
} catch {
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
function writeBridgeFile(port) {
|
|
269
|
+
mkdirSync(BRIDGE_DIR, { recursive: true });
|
|
270
|
+
const bridgeFile = join(BRIDGE_DIR, `${process.pid}.json`);
|
|
271
|
+
writeFileSync(bridgeFile, JSON.stringify({
|
|
272
|
+
port,
|
|
273
|
+
pid: process.pid,
|
|
274
|
+
cwd: process.cwd(),
|
|
275
|
+
startedAt: Date.now()
|
|
276
|
+
}));
|
|
277
|
+
return bridgeFile;
|
|
278
|
+
}
|
|
279
|
+
function removeBridgeFile() {
|
|
280
|
+
try {
|
|
281
|
+
unlinkSync(join(BRIDGE_DIR, `${process.pid}.json`));
|
|
282
|
+
} catch {
|
|
283
|
+
}
|
|
284
|
+
}
|
|
222
285
|
var WebSocketBridge = class {
|
|
223
286
|
wss = null;
|
|
224
287
|
httpServer = null;
|
|
@@ -233,8 +296,12 @@ var WebSocketBridge = class {
|
|
|
233
296
|
connectTime = 0;
|
|
234
297
|
reconnectCount = -1;
|
|
235
298
|
// first connection is not a "reconnect"
|
|
299
|
+
actualPort = WS_PORT;
|
|
236
300
|
onClientConnected;
|
|
237
301
|
onPortRefreshRequested;
|
|
302
|
+
get port() {
|
|
303
|
+
return this.actualPort;
|
|
304
|
+
}
|
|
238
305
|
startHeartbeat() {
|
|
239
306
|
this.stopHeartbeat();
|
|
240
307
|
this.heartbeatTimer = setInterval(() => {
|
|
@@ -283,7 +350,7 @@ var WebSocketBridge = class {
|
|
|
283
350
|
"Cache-Control": "no-store",
|
|
284
351
|
"Access-Control-Allow-Origin": "*"
|
|
285
352
|
});
|
|
286
|
-
res.end(JSON.stringify({ service: "crawlio-mcp", ...this.getHealth() }));
|
|
353
|
+
res.end(JSON.stringify({ service: "crawlio-mcp", pid: process.pid, port: this.actualPort, ...this.getHealth() }));
|
|
287
354
|
return;
|
|
288
355
|
}
|
|
289
356
|
if (req.method === "OPTIONS") {
|
|
@@ -424,48 +491,36 @@ var WebSocketBridge = class {
|
|
|
424
491
|
console.error("[Bridge] WebSocket handshake error:", err.message);
|
|
425
492
|
socket.destroy();
|
|
426
493
|
});
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
const { execFileSync } = await import("child_process");
|
|
432
|
-
const pid = execFileSync("lsof", ["-ti", `:${WS_PORT}`, "-sTCP:LISTEN"]).toString().trim().split("\n")[0];
|
|
433
|
-
if (pid && Number(pid) !== process.pid) {
|
|
434
|
-
console.error(`[Bridge] Killing stale crawlio-mcp on port ${WS_PORT} (PID ${pid})`);
|
|
435
|
-
process.kill(Number(pid), "SIGTERM");
|
|
436
|
-
for (let i = 0; i < 8; i++) {
|
|
437
|
-
await new Promise((r) => setTimeout(r, 250));
|
|
438
|
-
try {
|
|
439
|
-
execFileSync("lsof", ["-ti", `:${WS_PORT}`, "-sTCP:LISTEN"]);
|
|
440
|
-
} catch {
|
|
441
|
-
break;
|
|
442
|
-
}
|
|
443
|
-
}
|
|
444
|
-
}
|
|
445
|
-
}
|
|
446
|
-
} catch {
|
|
447
|
-
}
|
|
494
|
+
cleanStaleBridgeFiles();
|
|
495
|
+
const requestedPort = parseInt(process.env.CRAWLIO_PORT || "", 10);
|
|
496
|
+
const startPort = requestedPort >= WS_PORT && requestedPort <= WS_PORT_MAX ? requestedPort : WS_PORT;
|
|
497
|
+
this.actualPort = await findAvailablePort(startPort);
|
|
448
498
|
await new Promise((resolve2, reject) => {
|
|
449
499
|
let attempts = 0;
|
|
450
500
|
const tryListen = () => {
|
|
451
501
|
const onError = (err) => {
|
|
452
502
|
if (err.code === "EADDRINUSE" && attempts < 3) {
|
|
453
503
|
attempts++;
|
|
454
|
-
console.error(`[Bridge] Port ${
|
|
504
|
+
console.error(`[Bridge] Port ${this.actualPort} still in use, retry ${attempts}/3...`);
|
|
455
505
|
setTimeout(tryListen, 1e3 * attempts);
|
|
456
506
|
} else {
|
|
457
507
|
reject(err);
|
|
458
508
|
}
|
|
459
509
|
};
|
|
460
510
|
this.httpServer.once("error", onError);
|
|
461
|
-
this.httpServer.listen(
|
|
511
|
+
this.httpServer.listen(this.actualPort, WS_HOST, () => {
|
|
462
512
|
this.httpServer.removeListener("error", onError);
|
|
463
|
-
console.error(`[Bridge] WebSocket server listening on ws://${WS_HOST}:${
|
|
513
|
+
console.error(`[Bridge] WebSocket server listening on ws://${WS_HOST}:${this.actualPort}`);
|
|
464
514
|
resolve2();
|
|
465
515
|
});
|
|
466
516
|
};
|
|
467
517
|
tryListen();
|
|
468
518
|
});
|
|
519
|
+
writeBridgeFile(this.actualPort);
|
|
520
|
+
const cleanup = () => removeBridgeFile();
|
|
521
|
+
process.on("SIGTERM", cleanup);
|
|
522
|
+
process.on("SIGINT", cleanup);
|
|
523
|
+
process.on("beforeExit", cleanup);
|
|
469
524
|
}
|
|
470
525
|
get isConnected() {
|
|
471
526
|
return this.client?.readyState === WebSocket.OPEN;
|
|
@@ -550,6 +605,7 @@ var WebSocketBridge = class {
|
|
|
550
605
|
this.client?.close();
|
|
551
606
|
this.wss?.close();
|
|
552
607
|
this.httpServer?.close();
|
|
608
|
+
removeBridgeFile();
|
|
553
609
|
}
|
|
554
610
|
};
|
|
555
611
|
|
|
@@ -724,7 +780,7 @@ import { z } from "zod";
|
|
|
724
780
|
import { execFile } from "child_process";
|
|
725
781
|
import { writeFile, unlink } from "fs/promises";
|
|
726
782
|
import { tmpdir } from "os";
|
|
727
|
-
import { join as
|
|
783
|
+
import { join as join3, dirname as dirname3 } from "path";
|
|
728
784
|
import { randomBytes } from "crypto";
|
|
729
785
|
import { fileURLToPath as fileURLToPath3 } from "url";
|
|
730
786
|
import { promisify } from "util";
|
|
@@ -1092,8 +1148,8 @@ function escapeString(s) {
|
|
|
1092
1148
|
}
|
|
1093
1149
|
|
|
1094
1150
|
// src/mcp-server/semantic-search.ts
|
|
1095
|
-
import { readFileSync } from "fs";
|
|
1096
|
-
import { join, dirname as dirname2 } from "path";
|
|
1151
|
+
import { readFileSync as readFileSync2 } from "fs";
|
|
1152
|
+
import { join as join2, dirname as dirname2 } from "path";
|
|
1097
1153
|
import { fileURLToPath as fileURLToPath2 } from "url";
|
|
1098
1154
|
function cosineSimilarity(a, b) {
|
|
1099
1155
|
if (a.length !== b.length || a.length === 0) return 0;
|
|
@@ -1113,8 +1169,8 @@ function loadEmbeddings() {
|
|
|
1113
1169
|
const map = /* @__PURE__ */ new Map();
|
|
1114
1170
|
try {
|
|
1115
1171
|
const thisDir = dirname2(fileURLToPath2(import.meta.url));
|
|
1116
|
-
const assetPath =
|
|
1117
|
-
const raw =
|
|
1172
|
+
const assetPath = join2(thisDir, "tool-embeddings.json");
|
|
1173
|
+
const raw = readFileSync2(assetPath, "utf-8");
|
|
1118
1174
|
const asset = JSON.parse(raw);
|
|
1119
1175
|
if (!asset.embeddings || typeof asset.embeddings !== "object") return map;
|
|
1120
1176
|
for (const [name, vec] of Object.entries(asset.embeddings)) {
|
|
@@ -3618,9 +3674,9 @@ function createTools(bridge2, crawlio2) {
|
|
|
3618
3674
|
if (!data?.data) {
|
|
3619
3675
|
return toolError("Screenshot capture failed \u2014 no image data returned");
|
|
3620
3676
|
}
|
|
3621
|
-
tmpPath =
|
|
3677
|
+
tmpPath = join3(tmpdir(), `crawlio-ocr-${randomBytes(6).toString("hex")}.png`);
|
|
3622
3678
|
await writeFile(tmpPath, Buffer.from(data.data, "base64"));
|
|
3623
|
-
const shimPath =
|
|
3679
|
+
const shimPath = join3(dirname3(fileURLToPath3(import.meta.url)), "..", "..", "bin", "ocr-shim.swift");
|
|
3624
3680
|
const { stdout, stderr } = await execFileAsync("swift", [shimPath, tmpPath], { timeout: 25e3 });
|
|
3625
3681
|
if (!stdout.trim()) {
|
|
3626
3682
|
return toolError(`OCR produced no output${stderr ? `: ${stderr.trim()}` : ""}`);
|
|
@@ -4056,7 +4112,7 @@ function createCodeModeTools(bridge2, crawlio2) {
|
|
|
4056
4112
|
process.title = "Crawlio Agent";
|
|
4057
4113
|
var initMode = process.argv.includes("init") || process.argv.includes("--setup") || process.argv.includes("setup");
|
|
4058
4114
|
if (initMode) {
|
|
4059
|
-
const { runInit } = await import("./init-
|
|
4115
|
+
const { runInit } = await import("./init-CG5XREFD.js");
|
|
4060
4116
|
await runInit(process.argv.slice(2));
|
|
4061
4117
|
process.exit(0);
|
|
4062
4118
|
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import {
|
|
2
2
|
PKG_VERSION
|
|
3
|
-
} from "./chunk-
|
|
3
|
+
} from "./chunk-GJH6PLST.js";
|
|
4
4
|
|
|
5
5
|
// src/mcp-server/init.ts
|
|
6
6
|
import { execFileSync, spawn } from "child_process";
|
|
@@ -830,21 +830,11 @@ async function configureMetaMcp(found, options) {
|
|
|
830
830
|
}
|
|
831
831
|
const conflicts = findConflictingConfigs();
|
|
832
832
|
if (conflicts.length > 0) {
|
|
833
|
-
console.log(` ${
|
|
833
|
+
console.log(` ${dim("i")} crawlio-browser also configured in:`);
|
|
834
834
|
for (const c of conflicts) {
|
|
835
835
|
console.log(` ${dim("\u2192")} ${c}`);
|
|
836
836
|
}
|
|
837
|
-
console.log(` ${
|
|
838
|
-
if (!options.yes) {
|
|
839
|
-
const proceed = await confirm("Add anyway? (not recommended)", false);
|
|
840
|
-
if (!proceed) {
|
|
841
|
-
console.log(` ${dim("Skipped \u2014 using existing configuration")}`);
|
|
842
|
-
return;
|
|
843
|
-
}
|
|
844
|
-
} else {
|
|
845
|
-
console.log(` ${dim("Skipped \u2014 existing configuration takes priority")}`);
|
|
846
|
-
return;
|
|
847
|
-
}
|
|
837
|
+
console.log(` ${dim(" Multi-instance supported \u2014 each server gets its own port (9333-9342)")}`);
|
|
848
838
|
}
|
|
849
839
|
if (!options.yes) {
|
|
850
840
|
const proceed = await confirm("Add crawlio-browser to this config?");
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "crawlio-browser",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.5.0",
|
|
4
4
|
"description": "MCP server with 96 CDP-backed tools for browser automation — screenshots, DOM, network capture, framework detection, cookies, storage, session recording, performance metrics via Chrome",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/mcp-server/index.js",
|