peeky-search 1.0.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +256 -0
- package/dist/chunk-F3PNR32Z.js +227 -0
- package/dist/chunk-S3WZDJCP.js +2716 -0
- package/dist/cli.js +357 -0
- package/dist/docker-IWGZDSIP.js +20 -0
- package/dist/index.js +179 -0
- package/dist/mcp/server.js +67 -0
- package/package.json +39 -0
package/dist/cli.js
ADDED
|
@@ -0,0 +1,357 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import {
|
|
3
|
+
createConfigFiles,
|
|
4
|
+
getConfigDir,
|
|
5
|
+
getMcpConfigJson,
|
|
6
|
+
readConfig,
|
|
7
|
+
startContainer,
|
|
8
|
+
stopContainer,
|
|
9
|
+
waitForReady
|
|
10
|
+
} from "./chunk-F3PNR32Z.js";
|
|
11
|
+
|
|
12
|
+
// src/setup/checks.ts
|
|
13
|
+
import { exec } from "child_process";
|
|
14
|
+
import { promisify } from "util";
|
|
15
|
+
import * as net from "net";
|
|
16
|
+
var execAsync = promisify(exec);
|
|
17
|
+
async function checkDockerInstalled() {
|
|
18
|
+
try {
|
|
19
|
+
await execAsync("docker --version");
|
|
20
|
+
return { success: true, message: "Docker is installed" };
|
|
21
|
+
} catch {
|
|
22
|
+
return {
|
|
23
|
+
success: false,
|
|
24
|
+
message: "Docker not found. Install from https://docker.com"
|
|
25
|
+
};
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
async function checkDockerRunning() {
|
|
29
|
+
try {
|
|
30
|
+
await execAsync("docker info");
|
|
31
|
+
return { success: true, message: "Docker is running" };
|
|
32
|
+
} catch {
|
|
33
|
+
return {
|
|
34
|
+
success: false,
|
|
35
|
+
message: "Docker is not running. Please start Docker Desktop."
|
|
36
|
+
};
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
async function checkPortAvailable(port) {
|
|
40
|
+
return new Promise((resolve) => {
|
|
41
|
+
const server = net.createServer();
|
|
42
|
+
server.once("error", (err) => {
|
|
43
|
+
if (err.code === "EADDRINUSE") {
|
|
44
|
+
resolve({
|
|
45
|
+
success: false,
|
|
46
|
+
message: `Port ${port} is in use. Use --port to specify another.`
|
|
47
|
+
});
|
|
48
|
+
} else {
|
|
49
|
+
resolve({
|
|
50
|
+
success: false,
|
|
51
|
+
message: `Port check failed: ${err.message}`
|
|
52
|
+
});
|
|
53
|
+
}
|
|
54
|
+
});
|
|
55
|
+
server.once("listening", () => {
|
|
56
|
+
server.close(() => {
|
|
57
|
+
resolve({ success: true, message: `Port ${port} is available` });
|
|
58
|
+
});
|
|
59
|
+
});
|
|
60
|
+
server.listen(port, "127.0.0.1");
|
|
61
|
+
});
|
|
62
|
+
}
|
|
63
|
+
async function runAllChecks(port) {
|
|
64
|
+
const results = [];
|
|
65
|
+
const dockerInstalled = await checkDockerInstalled();
|
|
66
|
+
results.push(dockerInstalled);
|
|
67
|
+
if (!dockerInstalled.success) {
|
|
68
|
+
return { allPassed: false, results };
|
|
69
|
+
}
|
|
70
|
+
const dockerRunning = await checkDockerRunning();
|
|
71
|
+
results.push(dockerRunning);
|
|
72
|
+
if (!dockerRunning.success) {
|
|
73
|
+
return { allPassed: false, results };
|
|
74
|
+
}
|
|
75
|
+
const portAvailable = await checkPortAvailable(port);
|
|
76
|
+
results.push(portAvailable);
|
|
77
|
+
return {
|
|
78
|
+
allPassed: results.every((r) => r.success),
|
|
79
|
+
results
|
|
80
|
+
};
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
// src/setup/index.ts
|
|
84
|
+
var DEFAULT_PORT = 8888;
|
|
85
|
+
function printStep(success, message) {
|
|
86
|
+
const symbol = success ? "\u2713" : "\u2717";
|
|
87
|
+
console.log(` ${symbol} ${message}`);
|
|
88
|
+
}
|
|
89
|
+
function printSection(title) {
|
|
90
|
+
console.log(`
|
|
91
|
+
${title}`);
|
|
92
|
+
}
|
|
93
|
+
async function runSetup(options = {}) {
|
|
94
|
+
const port = options.port ?? DEFAULT_PORT;
|
|
95
|
+
console.log("\npeeky-search setup");
|
|
96
|
+
console.log("=".repeat(60));
|
|
97
|
+
printSection("Checking prerequisites...");
|
|
98
|
+
const { allPassed, results } = await runAllChecks(port);
|
|
99
|
+
for (const result of results) {
|
|
100
|
+
printStep(result.success, result.message);
|
|
101
|
+
}
|
|
102
|
+
if (!allPassed) {
|
|
103
|
+
console.log("\n Setup cannot continue. Please fix the issues above.\n");
|
|
104
|
+
return false;
|
|
105
|
+
}
|
|
106
|
+
if (options.checkOnly) {
|
|
107
|
+
console.log("\n All prerequisites passed!\n");
|
|
108
|
+
return true;
|
|
109
|
+
}
|
|
110
|
+
printSection("Setting up SearXNG...");
|
|
111
|
+
try {
|
|
112
|
+
createConfigFiles(port);
|
|
113
|
+
printStep(true, `Created ${getConfigDir()}/docker-compose.yml`);
|
|
114
|
+
printStep(true, `Created ${getConfigDir()}/settings.yml (secret key generated)`);
|
|
115
|
+
} catch (err) {
|
|
116
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
117
|
+
printStep(false, `Failed to create config files: ${message}`);
|
|
118
|
+
return false;
|
|
119
|
+
}
|
|
120
|
+
const startResult = await startContainer();
|
|
121
|
+
if (!startResult.success) {
|
|
122
|
+
printStep(false, startResult.message);
|
|
123
|
+
return false;
|
|
124
|
+
}
|
|
125
|
+
printStep(true, "Started SearXNG container");
|
|
126
|
+
console.log(" Waiting for SearXNG to be ready...");
|
|
127
|
+
const ready = await waitForReady(port, 6e4);
|
|
128
|
+
if (!ready) {
|
|
129
|
+
printStep(false, "SearXNG did not respond within 60 seconds");
|
|
130
|
+
console.log(" Try running 'peeky-search status' to check the container.\n");
|
|
131
|
+
return false;
|
|
132
|
+
}
|
|
133
|
+
printStep(true, `SearXNG is responding at http://localhost:${port}`);
|
|
134
|
+
console.log("\n \u2705 Setup complete!");
|
|
135
|
+
console.log("\n Add this to your MCP client config:");
|
|
136
|
+
console.log(" " + "-".repeat(56));
|
|
137
|
+
const configJson = getMcpConfigJson(port);
|
|
138
|
+
for (const line of configJson.split("\n")) {
|
|
139
|
+
console.log(" " + line);
|
|
140
|
+
}
|
|
141
|
+
console.log(" " + "-".repeat(56));
|
|
142
|
+
console.log("\n Commands:");
|
|
143
|
+
console.log(" peeky-search start Start the SearXNG container");
|
|
144
|
+
console.log(" peeky-search stop Stop the SearXNG container");
|
|
145
|
+
console.log(" peeky-search status Check if SearXNG is running");
|
|
146
|
+
console.log(" peeky-search uninstall Remove container and config");
|
|
147
|
+
console.log("\n Requirements:");
|
|
148
|
+
console.log(" Docker must be running for SearXNG to work.");
|
|
149
|
+
console.log(" The container runs in the background and restarts automatically.");
|
|
150
|
+
console.log("");
|
|
151
|
+
return true;
|
|
152
|
+
}
|
|
153
|
+
async function printStatus() {
|
|
154
|
+
const config = readConfig();
|
|
155
|
+
console.log("\npeeky-search status");
|
|
156
|
+
console.log("=".repeat(60));
|
|
157
|
+
if (!config) {
|
|
158
|
+
console.log(" Not installed. Run 'peeky-search setup' first.\n");
|
|
159
|
+
return;
|
|
160
|
+
}
|
|
161
|
+
console.log(` Config directory: ${getConfigDir()}`);
|
|
162
|
+
console.log(` Port: ${config.port}`);
|
|
163
|
+
console.log(` Installed: ${config.installedAt}`);
|
|
164
|
+
const { getStatus } = await import("./docker-IWGZDSIP.js");
|
|
165
|
+
const status = await getStatus();
|
|
166
|
+
console.log("");
|
|
167
|
+
printStep(status.containerRunning, `Container ${status.containerRunning ? "running" : "not running"}`);
|
|
168
|
+
printStep(
|
|
169
|
+
status.searxngResponding,
|
|
170
|
+
`SearXNG ${status.searxngResponding ? "responding" : "not responding"} at http://localhost:${config.port}`
|
|
171
|
+
);
|
|
172
|
+
if (!status.containerRunning) {
|
|
173
|
+
console.log("\n Run 'peeky-search start' to start the container.\n");
|
|
174
|
+
} else {
|
|
175
|
+
console.log("");
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
// src/setup/uninstall.ts
|
|
180
|
+
import * as fs from "fs";
|
|
181
|
+
async function uninstall() {
|
|
182
|
+
const steps = [];
|
|
183
|
+
const configDir = getConfigDir();
|
|
184
|
+
const stopResult = await stopContainer();
|
|
185
|
+
steps.push({
|
|
186
|
+
step: "Stop container",
|
|
187
|
+
success: stopResult.success,
|
|
188
|
+
message: stopResult.message
|
|
189
|
+
});
|
|
190
|
+
if (fs.existsSync(configDir)) {
|
|
191
|
+
try {
|
|
192
|
+
fs.rmSync(configDir, { recursive: true, force: true });
|
|
193
|
+
steps.push({
|
|
194
|
+
step: "Remove config",
|
|
195
|
+
success: true,
|
|
196
|
+
message: `Removed ${configDir}`
|
|
197
|
+
});
|
|
198
|
+
} catch (err) {
|
|
199
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
200
|
+
steps.push({
|
|
201
|
+
step: "Remove config",
|
|
202
|
+
success: false,
|
|
203
|
+
message: `Failed to remove config: ${message}`
|
|
204
|
+
});
|
|
205
|
+
}
|
|
206
|
+
} else {
|
|
207
|
+
steps.push({
|
|
208
|
+
step: "Remove config",
|
|
209
|
+
success: true,
|
|
210
|
+
message: "Config directory not found (already removed)"
|
|
211
|
+
});
|
|
212
|
+
}
|
|
213
|
+
return {
|
|
214
|
+
success: steps.every((s) => s.success),
|
|
215
|
+
steps
|
|
216
|
+
};
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
// src/cli.ts
|
|
220
|
+
var HELP_TEXT = `
|
|
221
|
+
peeky-search - Web search tool for MCP (Model Context Protocol)
|
|
222
|
+
|
|
223
|
+
Searches the web via a local SearXNG instance, scrapes pages, and extracts
|
|
224
|
+
relevant excerpts using IR (information retrieval) techniques. Exposes a
|
|
225
|
+
"peeky_web_search" tool to any MCP-compatible client.
|
|
226
|
+
|
|
227
|
+
REQUIREMENTS:
|
|
228
|
+
Docker Required to run SearXNG (the search backend)
|
|
229
|
+
|
|
230
|
+
QUICK START:
|
|
231
|
+
npx peeky-search setup One-time setup (starts SearXNG in Docker)
|
|
232
|
+
Then add the MCP config to your client and restart it.
|
|
233
|
+
|
|
234
|
+
COMMANDS:
|
|
235
|
+
setup [options] Install and start SearXNG
|
|
236
|
+
--port <port> Use custom port (default: 8888)
|
|
237
|
+
--check Check prerequisites only, don't install
|
|
238
|
+
|
|
239
|
+
start Start the SearXNG container
|
|
240
|
+
stop Stop the SearXNG container
|
|
241
|
+
status Check if SearXNG is running
|
|
242
|
+
uninstall Stop container and remove all config
|
|
243
|
+
|
|
244
|
+
mcp Start the MCP server (called by MCP clients)
|
|
245
|
+
help, --help Show this help message
|
|
246
|
+
|
|
247
|
+
EXAMPLES:
|
|
248
|
+
npx peeky-search setup # First-time setup
|
|
249
|
+
npx peeky-search setup --port 9999 # Use a different port
|
|
250
|
+
npx peeky-search status # Check if running
|
|
251
|
+
npx peeky-search stop # Stop SearXNG
|
|
252
|
+
npx peeky-search start # Start SearXNG again
|
|
253
|
+
npx peeky-search uninstall # Remove everything
|
|
254
|
+
`;
|
|
255
|
+
async function main() {
|
|
256
|
+
const args = process.argv.slice(2).filter((a) => a !== "--");
|
|
257
|
+
const command = args[0];
|
|
258
|
+
function parseSetupOptions() {
|
|
259
|
+
const options = {};
|
|
260
|
+
for (let i = 1; i < args.length; i++) {
|
|
261
|
+
const arg = args[i];
|
|
262
|
+
const nextArg = args[i + 1];
|
|
263
|
+
if (arg === "--port" && nextArg !== void 0) {
|
|
264
|
+
options.port = parseInt(nextArg, 10);
|
|
265
|
+
i++;
|
|
266
|
+
} else if (arg === "--check") {
|
|
267
|
+
options.checkOnly = true;
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
return options;
|
|
271
|
+
}
|
|
272
|
+
switch (command) {
|
|
273
|
+
case "setup": {
|
|
274
|
+
const options = parseSetupOptions();
|
|
275
|
+
const success = await runSetup(options);
|
|
276
|
+
process.exit(success ? 0 : 1);
|
|
277
|
+
break;
|
|
278
|
+
}
|
|
279
|
+
case "start": {
|
|
280
|
+
console.log("\nStarting SearXNG container...");
|
|
281
|
+
const result = await startContainer();
|
|
282
|
+
if (result.success) {
|
|
283
|
+
console.log(" \u2713 " + result.message);
|
|
284
|
+
console.log("\nRun 'peeky-search status' to verify.\n");
|
|
285
|
+
} else {
|
|
286
|
+
console.log(" \u2717 " + result.message + "\n");
|
|
287
|
+
process.exit(1);
|
|
288
|
+
}
|
|
289
|
+
break;
|
|
290
|
+
}
|
|
291
|
+
case "stop": {
|
|
292
|
+
console.log("\nStopping SearXNG container...");
|
|
293
|
+
const result = await stopContainer();
|
|
294
|
+
if (result.success) {
|
|
295
|
+
console.log(" \u2713 " + result.message + "\n");
|
|
296
|
+
} else {
|
|
297
|
+
console.log(" \u2717 " + result.message + "\n");
|
|
298
|
+
process.exit(1);
|
|
299
|
+
}
|
|
300
|
+
break;
|
|
301
|
+
}
|
|
302
|
+
case "status": {
|
|
303
|
+
await printStatus();
|
|
304
|
+
break;
|
|
305
|
+
}
|
|
306
|
+
case "uninstall": {
|
|
307
|
+
console.log("\npeeky-search uninstall");
|
|
308
|
+
console.log("=".repeat(60));
|
|
309
|
+
const result = await uninstall();
|
|
310
|
+
console.log("");
|
|
311
|
+
for (const step of result.steps) {
|
|
312
|
+
const symbol = step.success ? "\u2713" : "\u2717";
|
|
313
|
+
console.log(` ${symbol} ${step.message}`);
|
|
314
|
+
}
|
|
315
|
+
if (result.success) {
|
|
316
|
+
console.log("\n \u2705 Uninstall complete!");
|
|
317
|
+
console.log(
|
|
318
|
+
"\n Note: Remember to remove peeky-search from your MCP client config.\n"
|
|
319
|
+
);
|
|
320
|
+
} else {
|
|
321
|
+
console.log("\n Some steps failed. Check the errors above.\n");
|
|
322
|
+
process.exit(1);
|
|
323
|
+
}
|
|
324
|
+
break;
|
|
325
|
+
}
|
|
326
|
+
case "mcp": {
|
|
327
|
+
await import("./mcp/server.js");
|
|
328
|
+
break;
|
|
329
|
+
}
|
|
330
|
+
case "--help":
|
|
331
|
+
case "-h":
|
|
332
|
+
case "help": {
|
|
333
|
+
console.log(HELP_TEXT);
|
|
334
|
+
break;
|
|
335
|
+
}
|
|
336
|
+
case void 0: {
|
|
337
|
+
console.log(HELP_TEXT);
|
|
338
|
+
break;
|
|
339
|
+
}
|
|
340
|
+
default: {
|
|
341
|
+
if (command?.startsWith("--") || command?.startsWith("-")) {
|
|
342
|
+
await runExtraction(args);
|
|
343
|
+
} else {
|
|
344
|
+
console.log(`Unknown command: ${command}`);
|
|
345
|
+
console.log("Run 'peeky-search --help' for usage.\n");
|
|
346
|
+
process.exit(1);
|
|
347
|
+
}
|
|
348
|
+
}
|
|
349
|
+
}
|
|
350
|
+
}
|
|
351
|
+
async function runExtraction(_args) {
|
|
352
|
+
await import("./index.js");
|
|
353
|
+
}
|
|
354
|
+
main().catch((err) => {
|
|
355
|
+
console.error(`Unexpected error: ${err}`);
|
|
356
|
+
process.exit(1);
|
|
357
|
+
});
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import {
|
|
2
|
+
createConfigFiles,
|
|
3
|
+
getConfigDir,
|
|
4
|
+
getStatus,
|
|
5
|
+
readConfig,
|
|
6
|
+
startContainer,
|
|
7
|
+
stopContainer,
|
|
8
|
+
streamLogs,
|
|
9
|
+
waitForReady
|
|
10
|
+
} from "./chunk-F3PNR32Z.js";
|
|
11
|
+
export {
|
|
12
|
+
createConfigFiles,
|
|
13
|
+
getConfigDir,
|
|
14
|
+
getStatus,
|
|
15
|
+
readConfig,
|
|
16
|
+
startContainer,
|
|
17
|
+
stopContainer,
|
|
18
|
+
streamLogs,
|
|
19
|
+
waitForReady
|
|
20
|
+
};
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
import {
|
|
2
|
+
extractExcerpts,
|
|
3
|
+
formatExcerpts,
|
|
4
|
+
logger_default,
|
|
5
|
+
search,
|
|
6
|
+
tokenize
|
|
7
|
+
} from "./chunk-S3WZDJCP.js";
|
|
8
|
+
|
|
9
|
+
// src/index.ts
|
|
10
|
+
import * as fs from "fs";
|
|
11
|
+
import * as path from "path";
|
|
12
|
+
import { fileURLToPath } from "url";
|
|
13
|
+
var __filename = fileURLToPath(import.meta.url);
|
|
14
|
+
var __dirname = path.dirname(__filename);
|
|
15
|
+
var logger = logger_default.getInstance();
|
|
16
|
+
async function main() {
|
|
17
|
+
const args = process.argv.slice(2).filter((a) => a !== "--");
|
|
18
|
+
let query = "";
|
|
19
|
+
let filePath = "";
|
|
20
|
+
let url = "";
|
|
21
|
+
let searchMode = false;
|
|
22
|
+
let debug = false;
|
|
23
|
+
let timing = false;
|
|
24
|
+
let maxResults = 5;
|
|
25
|
+
for (let i = 0; i < args.length; i++) {
|
|
26
|
+
const arg = args[i];
|
|
27
|
+
const nextArg = args[i + 1];
|
|
28
|
+
if ((arg === "--query" || arg === "-q") && nextArg !== void 0) {
|
|
29
|
+
query = nextArg;
|
|
30
|
+
i++;
|
|
31
|
+
} else if ((arg === "--file" || arg === "-f") && nextArg !== void 0) {
|
|
32
|
+
filePath = nextArg;
|
|
33
|
+
i++;
|
|
34
|
+
} else if ((arg === "--url" || arg === "-u") && nextArg !== void 0) {
|
|
35
|
+
url = nextArg;
|
|
36
|
+
i++;
|
|
37
|
+
} else if (arg === "--search" || arg === "-s") {
|
|
38
|
+
searchMode = true;
|
|
39
|
+
} else if (arg === "--max" && nextArg !== void 0) {
|
|
40
|
+
maxResults = parseInt(nextArg, 10);
|
|
41
|
+
i++;
|
|
42
|
+
} else if (arg === "--debug") {
|
|
43
|
+
debug = true;
|
|
44
|
+
} else if (arg === "--timing" || arg === "-t") {
|
|
45
|
+
timing = true;
|
|
46
|
+
} else if (arg === "--help" || arg === "-h") {
|
|
47
|
+
printUsage();
|
|
48
|
+
return;
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
if (timing) {
|
|
52
|
+
logger.setTimingEnabled(true);
|
|
53
|
+
}
|
|
54
|
+
if (searchMode) {
|
|
55
|
+
if (query === "") {
|
|
56
|
+
logger.error("--search requires --query");
|
|
57
|
+
process.exit(1);
|
|
58
|
+
}
|
|
59
|
+
logger.log(`Searching: "${query}" (max ${maxResults} results)`);
|
|
60
|
+
const result = await search(query, { maxResults, debug });
|
|
61
|
+
console.log("\n" + result);
|
|
62
|
+
return;
|
|
63
|
+
}
|
|
64
|
+
if (url !== "") {
|
|
65
|
+
if (query === "") {
|
|
66
|
+
logger.error("--url requires --query");
|
|
67
|
+
process.exit(1);
|
|
68
|
+
}
|
|
69
|
+
await processUrl(url, query, debug);
|
|
70
|
+
return;
|
|
71
|
+
}
|
|
72
|
+
await processFile(filePath, query, debug);
|
|
73
|
+
}
|
|
74
|
+
async function processUrl(url, query, debug) {
|
|
75
|
+
logger.log(`Fetching: ${url}`);
|
|
76
|
+
logger.log(`Query: "${query}"`);
|
|
77
|
+
logger.log(`Query tokens: ${JSON.stringify(tokenize(query))}`);
|
|
78
|
+
try {
|
|
79
|
+
const response = await fetch(url, {
|
|
80
|
+
headers: {
|
|
81
|
+
"User-Agent": "Mozilla/5.0 (compatible; PeekyBot/1.0)",
|
|
82
|
+
"Accept": "text/html"
|
|
83
|
+
}
|
|
84
|
+
});
|
|
85
|
+
if (!response.ok) {
|
|
86
|
+
logger.error(`HTTP ${response.status}: ${response.statusText}`);
|
|
87
|
+
process.exit(1);
|
|
88
|
+
}
|
|
89
|
+
const html = await response.text();
|
|
90
|
+
processHtml(html, query, debug);
|
|
91
|
+
} catch (err) {
|
|
92
|
+
logger.error(`Fetch failed: ${err}`);
|
|
93
|
+
process.exit(1);
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
async function processFile(filePath, query, debug) {
|
|
97
|
+
if (filePath === "") {
|
|
98
|
+
filePath = path.join(__dirname, "../samples", "clerk-how-to-add-user-to-org-sample.html");
|
|
99
|
+
}
|
|
100
|
+
if (query === "") {
|
|
101
|
+
query = "how to add user to organization";
|
|
102
|
+
}
|
|
103
|
+
if (!fs.existsSync(filePath)) {
|
|
104
|
+
logger.error(`File not found: ${filePath}`);
|
|
105
|
+
process.exit(1);
|
|
106
|
+
}
|
|
107
|
+
logger.log(`Reading file: ${filePath}`);
|
|
108
|
+
logger.log(`Query: "${query}"`);
|
|
109
|
+
logger.log(`Query tokens: ${JSON.stringify(tokenize(query))}`);
|
|
110
|
+
const html = fs.readFileSync(filePath, "utf8");
|
|
111
|
+
processHtml(html, query, debug);
|
|
112
|
+
}
|
|
113
|
+
function processHtml(html, query, debug) {
|
|
114
|
+
const config = {
|
|
115
|
+
debug,
|
|
116
|
+
excerpts: {
|
|
117
|
+
maxExcerpts: 3,
|
|
118
|
+
charBudget: 4e3
|
|
119
|
+
}
|
|
120
|
+
};
|
|
121
|
+
const result = extractExcerpts(html, query, config);
|
|
122
|
+
logger.printTimings();
|
|
123
|
+
console.log("\n" + "=".repeat(60));
|
|
124
|
+
console.log(formatExcerpts(result));
|
|
125
|
+
if (debug && "debug" in result && result.debug) {
|
|
126
|
+
logger.debug("=".repeat(60));
|
|
127
|
+
logger.debug("Debug Info:");
|
|
128
|
+
logger.debug(` Sentences: ${result.debug.sentenceCount}`);
|
|
129
|
+
logger.debug(` Query term coverage: ${((result.debug.queryTermCoverage ?? 0) * 100).toFixed(1)}%`);
|
|
130
|
+
logger.debug(` Max raw BM25: ${(result.debug.maxRawBm25 ?? 0).toFixed(3)}`);
|
|
131
|
+
logger.debug(` Has relevant results: ${result.debug.hasRelevantResults ?? "N/A"}`);
|
|
132
|
+
logger.debug(` Anchors selected: ${result.debug.anchorCount}`);
|
|
133
|
+
logger.debug(` Chunks before dedupe: ${result.debug.chunkCount}`);
|
|
134
|
+
logger.debug(` Chunks after dedupe: ${result.debug.dedupedChunkCount}`);
|
|
135
|
+
logger.debug("\nTop 10 sentences by score:");
|
|
136
|
+
for (const s of result.debug.topSentences) {
|
|
137
|
+
const pathStr = s.headingPath.length > 0 ? ` [${s.headingPath.join(" > ")}]` : "";
|
|
138
|
+
logger.debug(` ${s.score.toFixed(3)}${pathStr}: ${s.text}`);
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
console.log("=".repeat(60));
|
|
142
|
+
}
|
|
143
|
+
function printUsage() {
|
|
144
|
+
console.log(`
|
|
145
|
+
peeky-search: IR-based excerpt extraction from HTML
|
|
146
|
+
|
|
147
|
+
Usage:
|
|
148
|
+
node dist/index.js [options]
|
|
149
|
+
|
|
150
|
+
Modes:
|
|
151
|
+
--file path Process a local HTML file (default mode)
|
|
152
|
+
--url URL Fetch and process a single URL
|
|
153
|
+
--search Search via SearXNG and extract from multiple pages
|
|
154
|
+
|
|
155
|
+
Options:
|
|
156
|
+
--query "text" Search query (required for --url and --search)
|
|
157
|
+
--max N Max results for --search mode (default: 5)
|
|
158
|
+
--debug Show debug information
|
|
159
|
+
--timing, -t Show performance timing breakdown
|
|
160
|
+
--help, -h Show this help message
|
|
161
|
+
|
|
162
|
+
Examples:
|
|
163
|
+
# Process local file
|
|
164
|
+
node dist/index.js --query "invite member" --file page.html
|
|
165
|
+
|
|
166
|
+
# Fetch and process a URL
|
|
167
|
+
node dist/index.js --url "https://clerk.com/docs/..." --query "invitation redirect"
|
|
168
|
+
|
|
169
|
+
# Search via SearXNG (requires SEARXNG_URL env var)
|
|
170
|
+
node dist/index.js --search --query "Clerk organization invitation redirect_url"
|
|
171
|
+
|
|
172
|
+
# Run with timing enabled
|
|
173
|
+
node dist/index.js --search --query "React hooks" --timing
|
|
174
|
+
`);
|
|
175
|
+
}
|
|
176
|
+
main().catch((err) => {
|
|
177
|
+
logger.error(`Unexpected error: ${err}`);
|
|
178
|
+
process.exit(1);
|
|
179
|
+
});
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
import {
|
|
2
|
+
logger_default,
|
|
3
|
+
search
|
|
4
|
+
} from "../chunk-S3WZDJCP.js";
|
|
5
|
+
|
|
6
|
+
// src/mcp/server.ts
|
|
7
|
+
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
8
|
+
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
|
|
9
|
+
import { z } from "zod";
|
|
10
|
+
var logger = logger_default.getInstance();
|
|
11
|
+
var server = new McpServer({
|
|
12
|
+
name: "peeky_mcp",
|
|
13
|
+
version: "1.0.0"
|
|
14
|
+
});
|
|
15
|
+
server.tool(
|
|
16
|
+
"peeky_web_search",
|
|
17
|
+
`Search the web for technical information. Use this tool for ANY request involving research, documentation lookup, or questions you're not 100% confident about.
|
|
18
|
+
|
|
19
|
+
WHEN TO USE (prefer this over browsing or guessing):
|
|
20
|
+
- User says "research", "look up", "find docs", "search for"
|
|
21
|
+
- User asks "how do I...", "what is...", "best way to..."
|
|
22
|
+
- User asks about libraries, frameworks, APIs, tools
|
|
23
|
+
- User has an error message or needs debugging help
|
|
24
|
+
- You need current/accurate information beyond your training data
|
|
25
|
+
|
|
26
|
+
QUERY FORMULATION:
|
|
27
|
+
- For vague or project-level requests, extract the underlying technical concepts and search for those
|
|
28
|
+
- Always include the specific technology/framework/library name
|
|
29
|
+
- Focus on patterns, APIs, or techniques that have documentation - not the user's exact project
|
|
30
|
+
- If a topic is niche, think about what general concept it falls under
|
|
31
|
+
|
|
32
|
+
SEARCH OPERATORS:
|
|
33
|
+
- site:domain.com - limit to specific domain
|
|
34
|
+
- "exact phrase" - exact phrase match
|
|
35
|
+
- -term - exclude term
|
|
36
|
+
|
|
37
|
+
RETURNS: Extracted text excerpts with source URLs (not raw HTML).`,
|
|
38
|
+
{
|
|
39
|
+
query: z.string().describe(
|
|
40
|
+
'Search query with technical terms. Supports operators: site:, "quotes", -exclude.'
|
|
41
|
+
),
|
|
42
|
+
maxResults: z.number().optional().describe("Maximum pages to scrape (default: 5, max: 10)"),
|
|
43
|
+
diagnostics: z.boolean().optional().describe("Include detailed diagnostics about why pages were filtered or failed (default: false). Diagnostics are always shown when no results are found.")
|
|
44
|
+
},
|
|
45
|
+
async ({ query, maxResults, diagnostics }) => {
|
|
46
|
+
const result = await search(query, {
|
|
47
|
+
...maxResults !== void 0 && { maxResults },
|
|
48
|
+
...diagnostics !== void 0 && { diagnostics }
|
|
49
|
+
});
|
|
50
|
+
return {
|
|
51
|
+
content: [
|
|
52
|
+
{
|
|
53
|
+
type: "text",
|
|
54
|
+
text: result
|
|
55
|
+
}
|
|
56
|
+
]
|
|
57
|
+
};
|
|
58
|
+
}
|
|
59
|
+
);
|
|
60
|
+
async function main() {
|
|
61
|
+
const transport = new StdioServerTransport();
|
|
62
|
+
await server.connect(transport);
|
|
63
|
+
}
|
|
64
|
+
main().catch((error) => {
|
|
65
|
+
logger.error(`Fatal error: ${error}`);
|
|
66
|
+
process.exit(1);
|
|
67
|
+
});
|
package/package.json
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "peeky-search",
|
|
3
|
+
"version": "1.0.10",
|
|
4
|
+
"description": "IR-based HTML content extraction with MCP server for web search",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"main": "dist/index.js",
|
|
7
|
+
"bin": {
|
|
8
|
+
"peeky-search": "dist/cli.js"
|
|
9
|
+
},
|
|
10
|
+
"files": [
|
|
11
|
+
"dist",
|
|
12
|
+
"README.md"
|
|
13
|
+
],
|
|
14
|
+
"scripts": {
|
|
15
|
+
"build": "tsup",
|
|
16
|
+
"build:watch": "tsup --watch",
|
|
17
|
+
"build:tsc": "tsc --project tsconfig.json",
|
|
18
|
+
"start": "node dist/cli.js",
|
|
19
|
+
"start:watch": "nodemon --watch src --ext *.ts --exec \"pnpm build && pnpm start\"",
|
|
20
|
+
"mcp": "node dist/cli.js mcp",
|
|
21
|
+
"cli": "node dist/cli.js"
|
|
22
|
+
},
|
|
23
|
+
"keywords": [],
|
|
24
|
+
"author": "",
|
|
25
|
+
"license": "MIT",
|
|
26
|
+
"packageManager": "pnpm@10.14.0",
|
|
27
|
+
"devDependencies": {
|
|
28
|
+
"@types/node": "^22.19.7",
|
|
29
|
+
"tsup": "^8.5.1",
|
|
30
|
+
"typescript": "^5.9.3"
|
|
31
|
+
},
|
|
32
|
+
"dependencies": {
|
|
33
|
+
"@modelcontextprotocol/sdk": "^1.0.0",
|
|
34
|
+
"cheerio": "^1.2.0",
|
|
35
|
+
"domhandler": "^5.0.3",
|
|
36
|
+
"stemmer": "^2.0.1",
|
|
37
|
+
"zod": "^3.24.0"
|
|
38
|
+
}
|
|
39
|
+
}
|