docshark 0.1.13 → 0.1.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +243 -55
- package/dist/server.d.ts +7 -7
- package/dist/server.js +123 -113
- package/dist/services/library.d.ts +8 -3
- package/dist/services/library.js +42 -12
- package/dist/storage/db.d.ts +4 -3
- package/dist/storage/db.js +45 -24
- package/dist/version.d.ts +1 -1
- package/dist/version.js +6 -2
- package/package.json +2 -2
- package/LICENSE +0 -21
- package/README.md +0 -176
package/dist/cli.js
CHANGED
|
@@ -1,23 +1,30 @@
|
|
|
1
1
|
#!/usr/bin/env bun
|
|
2
2
|
// src/cli.ts — DocShark CLI entry point
|
|
3
|
-
import {
|
|
3
|
+
import { cac } from "cac";
|
|
4
4
|
import { startHttpServer } from "./http.js";
|
|
5
5
|
import { StdioTransport } from "@tmcp/transport-stdio";
|
|
6
6
|
import { server, db, searchEngine, libraryService } from "./server.js";
|
|
7
7
|
import { maybeNotifyAboutUpdate, runUpdateCommand } from "./cli-update.js";
|
|
8
8
|
import { VERSION } from "./version.js";
|
|
9
|
-
const
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
9
|
+
const useColor = process.stdout.isTTY;
|
|
10
|
+
const color = {
|
|
11
|
+
reset: "\x1b[0m",
|
|
12
|
+
bold: "\x1b[1m",
|
|
13
|
+
dim: "\x1b[2m",
|
|
14
|
+
cyan: "\x1b[36m",
|
|
15
|
+
yellow: "\x1b[33m",
|
|
16
|
+
gray: "\x1b[90m",
|
|
17
|
+
};
|
|
18
|
+
const cli = cac("docshark");
|
|
19
|
+
cli
|
|
20
|
+
.command("", "Start the MCP server")
|
|
21
|
+
.alias("start")
|
|
15
22
|
.alias("s")
|
|
16
|
-
.
|
|
17
|
-
.option("-p, --port <port>", "HTTP server port", "6380")
|
|
23
|
+
.option("-p, --port <port>", "HTTP server port", { default: "6380" })
|
|
18
24
|
.option("-S, --stdio", "Run in STDIO mode (for Claude Desktop, Cursor, etc.)")
|
|
19
|
-
.option("-D, --data-dir <path>", "Data directory"
|
|
25
|
+
.option("-D, --data-dir <path>", "Data directory")
|
|
20
26
|
.action(async (opts) => {
|
|
27
|
+
await maybeNotifyForCommand("start", opts.stdio === true);
|
|
21
28
|
if (opts.dataDir) {
|
|
22
29
|
process.env.DOCSHARK_DATA_DIR = opts.dataDir;
|
|
23
30
|
}
|
|
@@ -31,14 +38,70 @@ program
|
|
|
31
38
|
await startHttpServer(parseInt(opts.port));
|
|
32
39
|
}
|
|
33
40
|
});
|
|
34
|
-
|
|
35
|
-
|
|
41
|
+
const helpCommands = [
|
|
42
|
+
{
|
|
43
|
+
name: "start",
|
|
44
|
+
aliases: ["s", "-s"],
|
|
45
|
+
args: "",
|
|
46
|
+
description: "Start server",
|
|
47
|
+
},
|
|
48
|
+
{
|
|
49
|
+
name: "add",
|
|
50
|
+
aliases: ["a", "-a"],
|
|
51
|
+
args: "<url>",
|
|
52
|
+
description: "Add & crawl library",
|
|
53
|
+
},
|
|
54
|
+
{
|
|
55
|
+
name: "search",
|
|
56
|
+
aliases: ["f", "-f"],
|
|
57
|
+
args: "<query>",
|
|
58
|
+
description: "Search docs",
|
|
59
|
+
},
|
|
60
|
+
{
|
|
61
|
+
name: "list",
|
|
62
|
+
aliases: ["l", "-l"],
|
|
63
|
+
args: "",
|
|
64
|
+
description: "List libraries",
|
|
65
|
+
},
|
|
66
|
+
{
|
|
67
|
+
name: "refresh",
|
|
68
|
+
aliases: ["r", "-r"],
|
|
69
|
+
args: "<name>",
|
|
70
|
+
description: "Refresh library",
|
|
71
|
+
},
|
|
72
|
+
{
|
|
73
|
+
name: "remove",
|
|
74
|
+
aliases: ["rm", "-rm"],
|
|
75
|
+
args: "<name>",
|
|
76
|
+
description: "Remove library",
|
|
77
|
+
},
|
|
78
|
+
{
|
|
79
|
+
name: "get",
|
|
80
|
+
aliases: ["g", "-g"],
|
|
81
|
+
args: "[url]",
|
|
82
|
+
description: "Get page markdown",
|
|
83
|
+
},
|
|
84
|
+
{
|
|
85
|
+
name: "update",
|
|
86
|
+
aliases: ["u", "-u"],
|
|
87
|
+
args: "",
|
|
88
|
+
description: "Update DocShark",
|
|
89
|
+
},
|
|
90
|
+
{
|
|
91
|
+
name: "info",
|
|
92
|
+
aliases: ["i", "-i"],
|
|
93
|
+
args: "<name>",
|
|
94
|
+
description: "Library info + pages",
|
|
95
|
+
},
|
|
96
|
+
];
|
|
97
|
+
cli
|
|
98
|
+
.command("add <url>", "Add a documentation library and start crawling")
|
|
36
99
|
.alias("a")
|
|
37
|
-
.description("Add a documentation library and start crawling (aliases: a, -a)")
|
|
38
100
|
.option("-n, --name <name>", "Library name (auto-generated from URL if omitted)")
|
|
39
|
-
.option("-d, --depth <n>", "Max crawl depth", "3")
|
|
101
|
+
.option("-d, --depth <n>", "Max crawl depth", { default: "3" })
|
|
40
102
|
.option("-V, --lib-version <version>", "Library version")
|
|
41
103
|
.action(async (url, opts) => {
|
|
104
|
+
await maybeNotifyForCommand("add");
|
|
42
105
|
db.init();
|
|
43
106
|
try {
|
|
44
107
|
const lib = await libraryService.add({
|
|
@@ -58,13 +121,36 @@ program
|
|
|
58
121
|
process.exit(1);
|
|
59
122
|
}
|
|
60
123
|
});
|
|
61
|
-
|
|
62
|
-
|
|
124
|
+
cli.command("help [command]", "Show help for a command").action((command) => {
|
|
125
|
+
if (command) {
|
|
126
|
+
printCommandHelp(command);
|
|
127
|
+
return;
|
|
128
|
+
}
|
|
129
|
+
printRootHelp();
|
|
130
|
+
});
|
|
131
|
+
cli
|
|
132
|
+
.command("rename <current-name> <new-name>", "Rename an existing documentation library")
|
|
133
|
+
.alias("mv")
|
|
134
|
+
.action(async (currentName, newName) => {
|
|
135
|
+
await maybeNotifyForCommand("rename");
|
|
136
|
+
db.init();
|
|
137
|
+
try {
|
|
138
|
+
const library = libraryService.rename({ currentName, newName });
|
|
139
|
+
console.log(`\n✅ Renamed library to "${library.display_name}" (${library.name}).\n`);
|
|
140
|
+
}
|
|
141
|
+
catch (err) {
|
|
142
|
+
const message = err instanceof Error ? err.message : "Unknown error";
|
|
143
|
+
console.error(`\n❌ ${message}\n`);
|
|
144
|
+
process.exit(1);
|
|
145
|
+
}
|
|
146
|
+
});
|
|
147
|
+
cli
|
|
148
|
+
.command("search <query>", "Search indexed documentation")
|
|
63
149
|
.alias("f")
|
|
64
|
-
.description("Search indexed documentation (aliases: f, -f)")
|
|
65
150
|
.option("-l, --library <name>", "Filter by library")
|
|
66
|
-
.option("-m, --limit <n>", "Max results", "5")
|
|
151
|
+
.option("-m, --limit <n>", "Max results", { default: "5" })
|
|
67
152
|
.action(async (query, opts) => {
|
|
153
|
+
await maybeNotifyForCommand("search");
|
|
68
154
|
db.init();
|
|
69
155
|
const results = searchEngine.search(query, {
|
|
70
156
|
library: opts.library,
|
|
@@ -81,12 +167,12 @@ program
|
|
|
81
167
|
console.log(`Source: ${r.page_url}\n`);
|
|
82
168
|
}
|
|
83
169
|
});
|
|
84
|
-
|
|
85
|
-
.command("list")
|
|
170
|
+
cli
|
|
171
|
+
.command("list", "List indexed libraries")
|
|
86
172
|
.alias("l")
|
|
87
|
-
.
|
|
88
|
-
.
|
|
89
|
-
|
|
173
|
+
.option("-s, --status <status>", "Filter by status (indexed, crawling, error, all)", { default: "all" })
|
|
174
|
+
.action(async (opts) => {
|
|
175
|
+
await maybeNotifyForCommand("list");
|
|
90
176
|
db.init();
|
|
91
177
|
const libs = db.listLibraries(opts.status);
|
|
92
178
|
if (libs.length === 0) {
|
|
@@ -102,11 +188,11 @@ program
|
|
|
102
188
|
"Last Crawled": l.last_crawled_at || "never",
|
|
103
189
|
})));
|
|
104
190
|
});
|
|
105
|
-
|
|
106
|
-
.command("refresh <name>")
|
|
191
|
+
cli
|
|
192
|
+
.command("refresh <name>", "Refresh an existing documentation library")
|
|
107
193
|
.alias("r")
|
|
108
|
-
.description("Refresh an existing documentation library (aliases: r, -r)")
|
|
109
194
|
.action(async (name) => {
|
|
195
|
+
await maybeNotifyForCommand("refresh");
|
|
110
196
|
db.init();
|
|
111
197
|
try {
|
|
112
198
|
const lib = db.getLibraryByName(name);
|
|
@@ -123,11 +209,11 @@ program
|
|
|
123
209
|
process.exit(1);
|
|
124
210
|
}
|
|
125
211
|
});
|
|
126
|
-
|
|
127
|
-
.command("remove <name>")
|
|
212
|
+
cli
|
|
213
|
+
.command("remove <name>", "Remove a documentation library and its index")
|
|
128
214
|
.alias("rm")
|
|
129
|
-
.
|
|
130
|
-
|
|
215
|
+
.action(async (name) => {
|
|
216
|
+
await maybeNotifyForCommand("remove");
|
|
131
217
|
db.init();
|
|
132
218
|
try {
|
|
133
219
|
const lib = db.getLibraryByName(name);
|
|
@@ -141,13 +227,13 @@ program
|
|
|
141
227
|
process.exit(1);
|
|
142
228
|
}
|
|
143
229
|
});
|
|
144
|
-
|
|
145
|
-
.command("get [url]")
|
|
230
|
+
cli
|
|
231
|
+
.command("get [url]", "Get the full markdown content of a specific indexed page")
|
|
146
232
|
.alias("g")
|
|
147
|
-
.description("Get the full markdown content of a specific indexed page (aliases: g, -g)")
|
|
148
233
|
.option("-l, --library <name>", "Library name to search within")
|
|
149
234
|
.option("-p, --path <path>", "Relative path within the library")
|
|
150
|
-
.action((url, opts) => {
|
|
235
|
+
.action(async (url, opts) => {
|
|
236
|
+
await maybeNotifyForCommand("get");
|
|
151
237
|
if (!url && (!opts.library || !opts.path)) {
|
|
152
238
|
console.error(`\n❌ Please provide either a URL, or both --library and --path\n`);
|
|
153
239
|
process.exit(1);
|
|
@@ -163,20 +249,20 @@ program
|
|
|
163
249
|
console.log(page.content_markdown);
|
|
164
250
|
console.log("\n");
|
|
165
251
|
});
|
|
166
|
-
|
|
167
|
-
.command("update")
|
|
252
|
+
cli
|
|
253
|
+
.command("update", "Update the global Bun installation of DocShark")
|
|
168
254
|
.alias("u")
|
|
169
|
-
.description("Update the global Bun installation of DocShark (aliases: u, -u)")
|
|
170
255
|
.option("-c, --check", "Only check whether a newer DocShark version is available")
|
|
171
256
|
.option("-q, --quiet", "Suppress DocShark status output and rely on exit codes")
|
|
172
257
|
.action(async (opts) => {
|
|
258
|
+
await maybeNotifyForCommand("update");
|
|
173
259
|
await runUpdateCommand({
|
|
174
260
|
checkOnly: opts.check,
|
|
175
261
|
quiet: opts.quiet,
|
|
176
262
|
});
|
|
177
263
|
});
|
|
178
264
|
// Intercept manual short flags (e.g., -l instead of l) so they act as command aliases
|
|
179
|
-
const args = process.argv;
|
|
265
|
+
const args = process.argv.slice(2);
|
|
180
266
|
const cmdAliases = {
|
|
181
267
|
"-s": "start",
|
|
182
268
|
"-a": "add",
|
|
@@ -188,14 +274,29 @@ const cmdAliases = {
|
|
|
188
274
|
"-i": "info",
|
|
189
275
|
"-u": "update",
|
|
190
276
|
};
|
|
191
|
-
|
|
192
|
-
|
|
277
|
+
const normalizedArgs = [...args];
|
|
278
|
+
if (normalizedArgs[0] && cmdAliases[normalizedArgs[0]]) {
|
|
279
|
+
normalizedArgs[0] = cmdAliases[normalizedArgs[0]];
|
|
280
|
+
}
|
|
281
|
+
const helpRequest = getHelpRequest(normalizedArgs);
|
|
282
|
+
if (helpRequest === "root") {
|
|
283
|
+
printRootHelp();
|
|
284
|
+
process.exit(0);
|
|
193
285
|
}
|
|
194
|
-
|
|
195
|
-
|
|
286
|
+
if (helpRequest && helpRequest !== "root") {
|
|
287
|
+
printCommandHelp(helpRequest);
|
|
288
|
+
process.exit(0);
|
|
289
|
+
}
|
|
290
|
+
if (normalizedArgs.includes("-v") || normalizedArgs.includes("--version")) {
|
|
291
|
+
printVersion();
|
|
292
|
+
process.exit(0);
|
|
293
|
+
}
|
|
294
|
+
const parseArgv = [process.argv[0], process.argv[1], ...normalizedArgs];
|
|
295
|
+
cli
|
|
296
|
+
.command("info <name>", "Get information about a library and list its pages")
|
|
196
297
|
.alias("i")
|
|
197
|
-
.
|
|
198
|
-
|
|
298
|
+
.action(async (name) => {
|
|
299
|
+
await maybeNotifyForCommand("info");
|
|
199
300
|
db.init();
|
|
200
301
|
const lib = db.getLibraryByName(name);
|
|
201
302
|
if (!lib) {
|
|
@@ -221,17 +322,13 @@ program
|
|
|
221
322
|
console.log(`\nNo pages found for this library.\n`);
|
|
222
323
|
}
|
|
223
324
|
});
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
stdioMode: commandName === "start" && options.stdio === true,
|
|
232
|
-
});
|
|
233
|
-
});
|
|
234
|
-
await program.parseAsync(args);
|
|
325
|
+
try {
|
|
326
|
+
cli.parse(parseArgv, { run: false });
|
|
327
|
+
await cli.runMatchedCommand();
|
|
328
|
+
}
|
|
329
|
+
catch (error) {
|
|
330
|
+
handleCliError(error);
|
|
331
|
+
}
|
|
235
332
|
/** Helper to wait for a crawl job to finish (CLI blocking mode) */
|
|
236
333
|
async function waitForCrawl(jobId) {
|
|
237
334
|
const { jobManager } = await import("./server.js");
|
|
@@ -256,3 +353,94 @@ async function waitForCrawl(jobId) {
|
|
|
256
353
|
check();
|
|
257
354
|
});
|
|
258
355
|
}
|
|
356
|
+
async function maybeNotifyForCommand(commandName, stdioMode = false) {
|
|
357
|
+
await maybeNotifyAboutUpdate({ commandName, stdioMode });
|
|
358
|
+
}
|
|
359
|
+
function getHelpRequest(args) {
|
|
360
|
+
if (args.length === 0) {
|
|
361
|
+
return null;
|
|
362
|
+
}
|
|
363
|
+
if (args[0] === "-h" || args[0] === "--help") {
|
|
364
|
+
return "root";
|
|
365
|
+
}
|
|
366
|
+
if (args[0] === "help") {
|
|
367
|
+
return args[1] ? normalizeCommandName(args[1]) : "root";
|
|
368
|
+
}
|
|
369
|
+
if (args[1] === "help") {
|
|
370
|
+
return normalizeCommandName(args[0]);
|
|
371
|
+
}
|
|
372
|
+
if (args.includes("-h") || args.includes("--help")) {
|
|
373
|
+
return normalizeCommandName(args[0]);
|
|
374
|
+
}
|
|
375
|
+
return null;
|
|
376
|
+
}
|
|
377
|
+
function normalizeCommandName(name) {
|
|
378
|
+
return cmdAliases[name] ?? name;
|
|
379
|
+
}
|
|
380
|
+
function printVersion() {
|
|
381
|
+
console.log(`${paint("DocShark", color.cyan)} ${VERSION}`);
|
|
382
|
+
}
|
|
383
|
+
function printRootHelp() {
|
|
384
|
+
printHeader();
|
|
385
|
+
console.log(`${paint("USAGE", color.gray)}`);
|
|
386
|
+
console.log(` docshark [options] [command]\n`);
|
|
387
|
+
console.log(`${paint("OPTIONS", color.gray)}`);
|
|
388
|
+
console.log(` ${paint("-v, --version", color.cyan).padEnd(18)} Show version`);
|
|
389
|
+
console.log(` ${paint("-h, --help", color.cyan).padEnd(18)} Show this help\n`);
|
|
390
|
+
console.log(`${paint("COMMANDS", color.gray)}`);
|
|
391
|
+
const rows = helpCommands.map((command) => ({
|
|
392
|
+
primary: [
|
|
393
|
+
command.name,
|
|
394
|
+
...command.aliases.filter((alias) => !alias.startsWith("-")),
|
|
395
|
+
].join(", "),
|
|
396
|
+
shortAliases: command.aliases.filter((alias) => alias.startsWith("-")),
|
|
397
|
+
args: command.args,
|
|
398
|
+
description: command.description,
|
|
399
|
+
}));
|
|
400
|
+
const primaryWidth = Math.max(...rows.map((row) => row.primary.length));
|
|
401
|
+
const argsWidth = Math.max(...rows.map((row) => row.args.length));
|
|
402
|
+
for (const row of rows) {
|
|
403
|
+
const aliasSuffix = row.shortAliases.length > 0
|
|
404
|
+
? ` [aliases: ${row.shortAliases.join(", ")}]`
|
|
405
|
+
: "";
|
|
406
|
+
const label = `${row.primary.padEnd(primaryWidth)}${row.args ? ` ${row.args.padEnd(argsWidth)}` : `${"".padEnd(argsWidth + 1)}`}${aliasSuffix}`.trimEnd();
|
|
407
|
+
console.log(` ${paint(label.padEnd(36), color.cyan)} ${row.description}`);
|
|
408
|
+
}
|
|
409
|
+
console.log(`\n${paint("Run `docshark help <command>` for more information.", color.dim)}`);
|
|
410
|
+
}
|
|
411
|
+
function printCommandHelp(commandName) {
|
|
412
|
+
const command = helpCommands.find((item) => item.name === normalizeCommandName(commandName));
|
|
413
|
+
printHeader();
|
|
414
|
+
if (!command) {
|
|
415
|
+
console.log(`${paint(`Unknown command: ${commandName}`, color.cyan)}\n`);
|
|
416
|
+
printRootHelp();
|
|
417
|
+
return;
|
|
418
|
+
}
|
|
419
|
+
console.log(`${paint("USAGE", color.gray)}`);
|
|
420
|
+
console.log(` docshark ${command.name} ${command.args ? paint(command.args, color.yellow) : ""}`.trimEnd());
|
|
421
|
+
console.log(``);
|
|
422
|
+
console.log(`${paint("ALIASES", color.gray)}`);
|
|
423
|
+
console.log(` ${command.aliases.join(", ")}\n`);
|
|
424
|
+
console.log(`${paint("SUMMARY", color.gray)}`);
|
|
425
|
+
console.log(` ${command.description}\n`);
|
|
426
|
+
console.log(`${paint("Run `docshark help` to see all commands.", color.dim)}`);
|
|
427
|
+
}
|
|
428
|
+
function printHeader() {
|
|
429
|
+
console.log();
|
|
430
|
+
console.log(`${paint("🦈 DocShark", color.cyan)} ${paint("Documentation MCP Server", color.bold)}`);
|
|
431
|
+
console.log(` ${paint("Scrape • Index • Search any docs site", color.dim)}\n`);
|
|
432
|
+
}
|
|
433
|
+
function paint(text, code) {
|
|
434
|
+
if (!useColor) {
|
|
435
|
+
return text;
|
|
436
|
+
}
|
|
437
|
+
return `${code}${text}${color.reset}`;
|
|
438
|
+
}
|
|
439
|
+
function handleCliError(error) {
|
|
440
|
+
const message = error instanceof Error ? error.message : "Unknown command error";
|
|
441
|
+
const prettyMessage = message.startsWith("Unused args:")
|
|
442
|
+
? "Too many arguments passed. Run `docshark help <command>` for usage."
|
|
443
|
+
: message;
|
|
444
|
+
console.error(`\n❌ ${prettyMessage}\n`);
|
|
445
|
+
process.exit(1);
|
|
446
|
+
}
|
package/dist/server.d.ts
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
import { McpServer } from
|
|
2
|
-
import * as v from
|
|
3
|
-
import { Database } from
|
|
4
|
-
import { SearchEngine } from
|
|
5
|
-
import { LibraryService } from
|
|
6
|
-
import { JobManager } from
|
|
7
|
-
import { EventBus } from
|
|
1
|
+
import { McpServer } from "tmcp";
|
|
2
|
+
import * as v from "valibot";
|
|
3
|
+
import { Database } from "./storage/db.js";
|
|
4
|
+
import { SearchEngine } from "./storage/search.js";
|
|
5
|
+
import { LibraryService } from "./services/library.js";
|
|
6
|
+
import { JobManager } from "./jobs/manager.js";
|
|
7
|
+
import { EventBus } from "./jobs/events.js";
|
|
8
8
|
export declare const db: Database;
|
|
9
9
|
export declare const eventBus: EventBus;
|
|
10
10
|
export declare const searchEngine: SearchEngine;
|
package/dist/server.js
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
// src/server.ts — TMCP McpServer setup + tool registration
|
|
2
|
-
import { McpServer } from
|
|
3
|
-
import { ValibotJsonSchemaAdapter } from
|
|
4
|
-
import * as v from
|
|
5
|
-
import { tool } from
|
|
6
|
-
import { Database } from
|
|
7
|
-
import { SearchEngine } from
|
|
8
|
-
import { LibraryService } from
|
|
9
|
-
import { JobManager } from
|
|
10
|
-
import { VERSION } from
|
|
11
|
-
import { EventBus } from
|
|
2
|
+
import { McpServer } from "tmcp";
|
|
3
|
+
import { ValibotJsonSchemaAdapter } from "@tmcp/adapter-valibot";
|
|
4
|
+
import * as v from "valibot";
|
|
5
|
+
import { tool } from "tmcp/utils";
|
|
6
|
+
import { Database } from "./storage/db.js";
|
|
7
|
+
import { SearchEngine } from "./storage/search.js";
|
|
8
|
+
import { LibraryService } from "./services/library.js";
|
|
9
|
+
import { JobManager } from "./jobs/manager.js";
|
|
10
|
+
import { VERSION } from "./version.js";
|
|
11
|
+
import { EventBus } from "./jobs/events.js";
|
|
12
12
|
// Initialize core services
|
|
13
13
|
export const db = new Database();
|
|
14
14
|
export const eventBus = new EventBus();
|
|
@@ -17,9 +17,9 @@ export const jobManager = new JobManager(db, eventBus);
|
|
|
17
17
|
export const libraryService = new LibraryService(db, jobManager);
|
|
18
18
|
// Create TMCP server
|
|
19
19
|
export const server = new McpServer({
|
|
20
|
-
name:
|
|
20
|
+
name: "docshark",
|
|
21
21
|
version: VERSION,
|
|
22
|
-
description:
|
|
22
|
+
description: "🦈 Documentation MCP Server — scrape, index, and search any doc website",
|
|
23
23
|
}, {
|
|
24
24
|
adapter: new ValibotJsonSchemaAdapter(),
|
|
25
25
|
capabilities: {
|
|
@@ -31,14 +31,14 @@ export const server = new McpServer({
|
|
|
31
31
|
// Tool 1: search_docs — Primary search tool
|
|
32
32
|
// ──────────────────────────────────────
|
|
33
33
|
server.tool({
|
|
34
|
-
name:
|
|
35
|
-
description:
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
34
|
+
name: "search_docs",
|
|
35
|
+
description: "Search through indexed documentation libraries for relevant information. " +
|
|
36
|
+
"Returns ranked documentation sections with code examples and source URLs. " +
|
|
37
|
+
"Use this when you need to find information about a library, framework, API, " +
|
|
38
|
+
"or any technical concept.",
|
|
39
39
|
schema: v.object({
|
|
40
|
-
query: v.pipe(v.string(), v.description(
|
|
41
|
-
library: v.optional(v.pipe(v.string(), v.description(
|
|
40
|
+
query: v.pipe(v.string(), v.description("Search query. Use natural language.")),
|
|
41
|
+
library: v.optional(v.pipe(v.string(), v.description("Filter to a specific library."))),
|
|
42
42
|
limit: v.optional(v.pipe(v.number(), v.integer(), v.minValue(1), v.maxValue(20)), 5),
|
|
43
43
|
}),
|
|
44
44
|
}, async ({ query, library, limit }) => {
|
|
@@ -53,27 +53,59 @@ server.tool({
|
|
|
53
53
|
block += r.content;
|
|
54
54
|
return block;
|
|
55
55
|
})
|
|
56
|
-
.join(
|
|
56
|
+
.join("\n\n---\n\n");
|
|
57
57
|
return tool.text(`## Results for "${query}"\n\n${formatted}`);
|
|
58
58
|
});
|
|
59
|
+
function requireValue(value, message) {
|
|
60
|
+
if (value === undefined || value === null || value === "") {
|
|
61
|
+
throw new Error(message);
|
|
62
|
+
}
|
|
63
|
+
return value;
|
|
64
|
+
}
|
|
65
|
+
function formatLibraryInfo(libraryId) {
|
|
66
|
+
const lib = db.getLibraryById(libraryId);
|
|
67
|
+
if (!lib) {
|
|
68
|
+
return `Library not found.`;
|
|
69
|
+
}
|
|
70
|
+
const pages = db.getPagesByLibrary(lib.id);
|
|
71
|
+
let output = `## Library: ${lib.display_name} (${lib.name})\n`;
|
|
72
|
+
output += `- **URL:** ${lib.url}\n`;
|
|
73
|
+
output += `- **Status:** ${lib.status}\n`;
|
|
74
|
+
output += `- **Pages:** ${lib.page_count}\n`;
|
|
75
|
+
output += `- **Chunks:** ${lib.chunk_count}\n`;
|
|
76
|
+
output += `- **Last Crawled:** ${lib.last_crawled_at || "never"}\n\n`;
|
|
77
|
+
if (pages.length > 0) {
|
|
78
|
+
output += `### Pages (${pages.length})\n\n`;
|
|
79
|
+
output += "| Title | Path | URL |\n";
|
|
80
|
+
output += "| ----- | ---- | --- |\n";
|
|
81
|
+
for (const p of pages) {
|
|
82
|
+
const title = p.title?.replace(/\|/g, "-") || "Untitled";
|
|
83
|
+
output += `| ${title} | \`${p.path}\` | ${p.url} |\n`;
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
else {
|
|
87
|
+
output += `*No pages indexed yet for this library.*\n`;
|
|
88
|
+
}
|
|
89
|
+
return output;
|
|
90
|
+
}
|
|
59
91
|
// ──────────────────────────────────────
|
|
60
92
|
// Tool 2: list_libraries — Discovery tool
|
|
61
93
|
// ──────────────────────────────────────
|
|
62
94
|
server.tool({
|
|
63
|
-
name:
|
|
64
|
-
description:
|
|
65
|
-
|
|
95
|
+
name: "list_libraries",
|
|
96
|
+
description: "List all documentation libraries currently indexed and available for searching. " +
|
|
97
|
+
"Use this to discover what docs are available before running search_docs.",
|
|
66
98
|
schema: v.object({
|
|
67
|
-
status: v.optional(v.pipe(v.picklist([
|
|
99
|
+
status: v.optional(v.pipe(v.picklist(["indexed", "crawling", "error", "all"]), v.description('Filter by status. Default: "all".')), "all"),
|
|
68
100
|
}),
|
|
69
101
|
}, async ({ status }) => {
|
|
70
102
|
const libraries = db.listLibraries(status);
|
|
71
103
|
if (libraries.length === 0) {
|
|
72
|
-
return tool.text(
|
|
104
|
+
return tool.text("No libraries indexed yet. Use manage_library with action=add to add a documentation website.");
|
|
73
105
|
}
|
|
74
106
|
let output = `## Indexed Libraries (${libraries.length} total)\n\n`;
|
|
75
|
-
output +=
|
|
76
|
-
output +=
|
|
107
|
+
output += "| Library | URL | Pages | Chunks | Status |\n";
|
|
108
|
+
output += "| ------- | --- | ----- | ------ | ------ |\n";
|
|
77
109
|
for (const lib of libraries) {
|
|
78
110
|
output += `| ${lib.name} | ${lib.url} | ${lib.page_count} | ${lib.chunk_count} | ${lib.status} |\n`;
|
|
79
111
|
}
|
|
@@ -83,107 +115,85 @@ server.tool({
|
|
|
83
115
|
// Tool 3: get_doc_page — Full page read
|
|
84
116
|
// ──────────────────────────────────────
|
|
85
117
|
server.tool({
|
|
86
|
-
name:
|
|
87
|
-
description:
|
|
88
|
-
|
|
118
|
+
name: "get_doc_page",
|
|
119
|
+
description: "Retrieve the complete content of a specific documentation page as markdown. " +
|
|
120
|
+
"Use when search results reference a page and you need full context.",
|
|
89
121
|
schema: v.object({
|
|
90
|
-
url: v.optional(v.pipe(v.string(), v.description(
|
|
91
|
-
library: v.optional(v.pipe(v.string(), v.description(
|
|
92
|
-
path: v.optional(v.pipe(v.string(), v.description(
|
|
122
|
+
url: v.optional(v.pipe(v.string(), v.description("The full URL of the documentation page."))),
|
|
123
|
+
library: v.optional(v.pipe(v.string(), v.description("Library name to search within."))),
|
|
124
|
+
path: v.optional(v.pipe(v.string(), v.description("Relative path within the library."))),
|
|
93
125
|
}),
|
|
94
126
|
}, async ({ url, library, path }) => {
|
|
95
127
|
const page = db.getPage({ url, library, path });
|
|
96
128
|
if (!page)
|
|
97
|
-
return tool.text(
|
|
129
|
+
return tool.text("Page not found. Use search_docs to find the correct page.");
|
|
98
130
|
return tool.text(`# ${page.title}\n**Source:** ${page.url}\n\n${page.content_markdown}`);
|
|
99
131
|
});
|
|
100
132
|
// ──────────────────────────────────────
|
|
101
|
-
// Tool 4:
|
|
133
|
+
// Tool 4: manage_library — Create, rename, refresh, remove, inspect
|
|
102
134
|
// ──────────────────────────────────────
|
|
103
135
|
server.tool({
|
|
104
|
-
name:
|
|
105
|
-
description:
|
|
106
|
-
'Provide the URL and an optional name. Crawl runs in the background.',
|
|
136
|
+
name: "manage_library",
|
|
137
|
+
description: "Manage a documentation library lifecycle. Use action=add to crawl a new source, action=rename to change the library name, action=refresh to re-crawl, action=remove to delete it, or action=info to inspect its pages and stats.",
|
|
107
138
|
schema: v.object({
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
139
|
+
action: v.pipe(v.picklist(["add", "rename", "refresh", "remove", "info"]), v.description("The management action to perform.")),
|
|
140
|
+
url: v.optional(v.pipe(v.string(), v.url(), v.description("Base URL of the documentation website."))),
|
|
141
|
+
name: v.optional(v.pipe(v.string(), v.description("Short identifier (auto-generated if omitted)."))),
|
|
142
|
+
version: v.optional(v.pipe(v.string(), v.description("Version string."))),
|
|
111
143
|
max_depth: v.optional(v.pipe(v.number(), v.integer(), v.minValue(1), v.maxValue(10)), 3),
|
|
144
|
+
current_name: v.optional(v.pipe(v.string(), v.description("The current library name (for rename)."))),
|
|
145
|
+
new_name: v.optional(v.pipe(v.string(), v.description("The new library name (for rename)."))),
|
|
146
|
+
library: v.optional(v.pipe(v.string(), v.description("The library name to manage."))),
|
|
112
147
|
}),
|
|
113
|
-
}, async (
|
|
148
|
+
}, async (input) => {
|
|
114
149
|
try {
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
});
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
});
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
// ──────────────────────────────────────
|
|
158
|
-
server.tool({
|
|
159
|
-
name: 'library_info',
|
|
160
|
-
description: 'Get detailed information about a specific documentation library, including a list of all its indexed pages and their paths. ' +
|
|
161
|
-
'Use this to see what pages are available in a library before retrieving them.',
|
|
162
|
-
schema: v.object({
|
|
163
|
-
library: v.pipe(v.string(), v.description('The library name to get information for.')),
|
|
164
|
-
}),
|
|
165
|
-
}, async ({ library }) => {
|
|
166
|
-
const lib = db.getLibraryByName(library);
|
|
167
|
-
if (!lib)
|
|
168
|
-
return tool.text(`Library "${library}" not found. Use list_libraries to see available libraries.`);
|
|
169
|
-
const pages = db.getPagesByLibrary(lib.id);
|
|
170
|
-
let output = `## Library: ${lib.display_name} (${lib.name})\n`;
|
|
171
|
-
output += `- **URL:** ${lib.url}\n`;
|
|
172
|
-
output += `- **Status:** ${lib.status}\n`;
|
|
173
|
-
output += `- **Pages:** ${lib.page_count}\n`;
|
|
174
|
-
output += `- **Chunks:** ${lib.chunk_count}\n`;
|
|
175
|
-
output += `- **Last Crawled:** ${lib.last_crawled_at || 'never'}\n\n`;
|
|
176
|
-
if (pages.length > 0) {
|
|
177
|
-
output += `### Pages (${pages.length})\n\n`;
|
|
178
|
-
output += '| Title | Path | URL |\n';
|
|
179
|
-
output += '| ----- | ---- | --- |\n';
|
|
180
|
-
for (const p of pages) {
|
|
181
|
-
const title = p.title?.replace(/\|/g, '-') || 'Untitled';
|
|
182
|
-
output += `| ${title} | \`${p.path}\` | ${p.url} |\n`;
|
|
150
|
+
switch (input.action) {
|
|
151
|
+
case "add": {
|
|
152
|
+
const url = requireValue(input.url, "The URL is required for action=add.");
|
|
153
|
+
const library = await libraryService.add({
|
|
154
|
+
url,
|
|
155
|
+
name: input.name,
|
|
156
|
+
version: input.version,
|
|
157
|
+
maxDepth: input.max_depth,
|
|
158
|
+
});
|
|
159
|
+
return tool.text(`✅ Library "${library.display_name}" added.\n` +
|
|
160
|
+
`Crawl job ${library.jobId} started. Use list_libraries to check progress.`);
|
|
161
|
+
}
|
|
162
|
+
case "rename": {
|
|
163
|
+
const currentName = requireValue(input.current_name, "current_name is required for action=rename.");
|
|
164
|
+
const newName = requireValue(input.new_name, "new_name is required for action=rename.");
|
|
165
|
+
const library = libraryService.rename({ currentName, newName });
|
|
166
|
+
return tool.text(`✅ Library renamed to "${library.display_name}" (${library.name}).\n` +
|
|
167
|
+
`Pages and crawl history remain attached to the same library.`);
|
|
168
|
+
}
|
|
169
|
+
case "refresh": {
|
|
170
|
+
const libraryName = requireValue(input.library, "library is required for action=refresh.");
|
|
171
|
+
const lib = db.getLibraryByName(libraryName);
|
|
172
|
+
if (!lib)
|
|
173
|
+
return tool.text(`Library "${libraryName}" not found. Use list_libraries to see available.`);
|
|
174
|
+
const job = jobManager.startCrawl(lib.id, { incremental: true });
|
|
175
|
+
return tool.text(`🔄 Refresh started for "${lib.display_name}".\nJob ${job.id}: checking for updated pages...`);
|
|
176
|
+
}
|
|
177
|
+
case "remove": {
|
|
178
|
+
const libraryName = requireValue(input.library, "library is required for action=remove.");
|
|
179
|
+
const lib = db.getLibraryByName(libraryName);
|
|
180
|
+
if (!lib)
|
|
181
|
+
return tool.text(`Library "${libraryName}" not found.`);
|
|
182
|
+
db.removeLibrary(lib.id);
|
|
183
|
+
return tool.text(`🗑️ Library "${lib.display_name}" removed.\nDeleted ${lib.page_count} pages and ${lib.chunk_count} chunks.`);
|
|
184
|
+
}
|
|
185
|
+
case "info": {
|
|
186
|
+
const libraryName = requireValue(input.library, "library is required for action=info.");
|
|
187
|
+
const lib = db.getLibraryByName(libraryName);
|
|
188
|
+
if (!lib)
|
|
189
|
+
return tool.text(`Library "${libraryName}" not found. Use list_libraries to see available libraries.`);
|
|
190
|
+
return tool.text(formatLibraryInfo(lib.id));
|
|
191
|
+
}
|
|
183
192
|
}
|
|
184
193
|
}
|
|
185
|
-
|
|
186
|
-
|
|
194
|
+
catch (err) {
|
|
195
|
+
const message = err instanceof Error ? err.message : "Unknown error";
|
|
196
|
+
return tool.text(`❌ Failed: ${message}`);
|
|
187
197
|
}
|
|
188
|
-
return tool.text(
|
|
198
|
+
return tool.text(`❌ Failed: Unsupported action.`);
|
|
189
199
|
});
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
import type { Database } from
|
|
2
|
-
import type { JobManager } from
|
|
3
|
-
import type { Library } from
|
|
1
|
+
import type { Database } from "../storage/db.js";
|
|
2
|
+
import type { JobManager } from "../jobs/manager.js";
|
|
3
|
+
import type { Library } from "../types.js";
|
|
4
4
|
export declare class LibraryService {
|
|
5
5
|
private db;
|
|
6
6
|
private jobManager;
|
|
@@ -14,4 +14,9 @@ export declare class LibraryService {
|
|
|
14
14
|
}): Promise<Library & {
|
|
15
15
|
jobId: string;
|
|
16
16
|
}>;
|
|
17
|
+
/** Rename an existing documentation library */
|
|
18
|
+
rename(opts: {
|
|
19
|
+
currentName: string;
|
|
20
|
+
newName: string;
|
|
21
|
+
}): Library;
|
|
17
22
|
}
|
package/dist/services/library.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
// src/services/library.ts — Library management service
|
|
2
|
-
import { nanoid } from
|
|
2
|
+
import { nanoid } from "nanoid";
|
|
3
3
|
export class LibraryService {
|
|
4
4
|
db;
|
|
5
5
|
jobManager;
|
|
@@ -15,12 +15,12 @@ export class LibraryService {
|
|
|
15
15
|
// Check if already exists
|
|
16
16
|
const existing = this.db.getLibraryByName(name);
|
|
17
17
|
if (existing) {
|
|
18
|
-
throw new Error(`Library "${name}" already exists. Use
|
|
18
|
+
throw new Error(`Library "${name}" already exists. Use manage_library with action=refresh to re-crawl.`);
|
|
19
19
|
}
|
|
20
20
|
const id = nanoid();
|
|
21
21
|
const crawlConfig = {
|
|
22
22
|
maxDepth: opts.maxDepth ?? 3,
|
|
23
|
-
renderer:
|
|
23
|
+
renderer: "auto",
|
|
24
24
|
};
|
|
25
25
|
this.db.addLibrary({
|
|
26
26
|
id,
|
|
@@ -35,32 +35,62 @@ export class LibraryService {
|
|
|
35
35
|
const library = this.db.getLibraryById(id);
|
|
36
36
|
return { ...library, jobId: job.id };
|
|
37
37
|
}
|
|
38
|
+
/** Rename an existing documentation library */
|
|
39
|
+
rename(opts) {
|
|
40
|
+
const currentName = opts.currentName.trim();
|
|
41
|
+
const newName = opts.newName.trim();
|
|
42
|
+
if (!currentName) {
|
|
43
|
+
throw new Error("Current library name is required.");
|
|
44
|
+
}
|
|
45
|
+
if (!newName) {
|
|
46
|
+
throw new Error("New library name is required.");
|
|
47
|
+
}
|
|
48
|
+
const library = this.db.getLibraryByName(currentName);
|
|
49
|
+
if (!library) {
|
|
50
|
+
throw new Error(`Library "${currentName}" not found.`);
|
|
51
|
+
}
|
|
52
|
+
if (library.name === newName) {
|
|
53
|
+
return library;
|
|
54
|
+
}
|
|
55
|
+
const existing = this.db.getLibraryByName(newName);
|
|
56
|
+
if (existing && existing.id !== library.id) {
|
|
57
|
+
throw new Error(`Library "${newName}" already exists.`);
|
|
58
|
+
}
|
|
59
|
+
const displayName = generateDisplayName(newName);
|
|
60
|
+
this.db.renameLibrary(library.id, newName, displayName);
|
|
61
|
+
const updated = this.db.getLibraryById(library.id);
|
|
62
|
+
if (!updated) {
|
|
63
|
+
throw new Error(`Failed to rename library "${currentName}".`);
|
|
64
|
+
}
|
|
65
|
+
return updated;
|
|
66
|
+
}
|
|
38
67
|
}
|
|
39
68
|
/** Normalize URL: ensure trailing slash for base docs */
|
|
40
69
|
function normalizeUrl(url) {
|
|
41
70
|
const parsed = new URL(url);
|
|
42
71
|
// Remove trailing hash and query for base URL
|
|
43
|
-
parsed.hash =
|
|
72
|
+
parsed.hash = "";
|
|
44
73
|
return parsed.href;
|
|
45
74
|
}
|
|
46
75
|
/** Generate a slug name from URL */
|
|
47
76
|
function generateName(url) {
|
|
48
77
|
const parsed = new URL(url);
|
|
49
|
-
const host = parsed.hostname.replace(/^www\./,
|
|
50
|
-
const path = parsed.pathname.replace(/\/$/,
|
|
78
|
+
const host = parsed.hostname.replace(/^www\./, "");
|
|
79
|
+
const path = parsed.pathname.replace(/\/$/, "").replace(/^\//, "");
|
|
51
80
|
if (path) {
|
|
52
81
|
// e.g. svelte.dev/docs → "svelte-docs"
|
|
53
|
-
const hostPart = host.split(
|
|
54
|
-
const pathPart = path.split(
|
|
55
|
-
return `${hostPart}-${pathPart}`.toLowerCase().replace(/[^a-z0-9-]/g,
|
|
82
|
+
const hostPart = host.split(".")[0];
|
|
83
|
+
const pathPart = path.split("/").slice(0, 2).join("-");
|
|
84
|
+
return `${hostPart}-${pathPart}`.toLowerCase().replace(/[^a-z0-9-]/g, "-");
|
|
56
85
|
}
|
|
57
86
|
// Just the hostname
|
|
58
|
-
return host.replace(/\./g,
|
|
87
|
+
return host.replace(/\./g, "-").toLowerCase();
|
|
59
88
|
}
|
|
60
89
|
/** Generate a display name from the slug */
|
|
61
90
|
function generateDisplayName(name) {
|
|
62
91
|
return name
|
|
63
|
-
.split(
|
|
92
|
+
.split(/[-_\s]+/)
|
|
93
|
+
.filter(Boolean)
|
|
64
94
|
.map((word) => word.charAt(0).toUpperCase() + word.slice(1))
|
|
65
|
-
.join(
|
|
95
|
+
.join(" ");
|
|
66
96
|
}
|
package/dist/storage/db.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { Database as BunDatabase } from
|
|
2
|
-
import type { Library, Page, CrawlJob } from
|
|
1
|
+
import { Database as BunDatabase } from "bun:sqlite";
|
|
2
|
+
import type { Library, Page, CrawlJob } from "../types.js";
|
|
3
3
|
export declare class Database {
|
|
4
4
|
private db;
|
|
5
5
|
init(): void;
|
|
@@ -18,6 +18,7 @@ export declare class Database {
|
|
|
18
18
|
getLibraryByName(name: string): Library | undefined;
|
|
19
19
|
getLibraryById(id: string): Library | undefined;
|
|
20
20
|
removeLibrary(id: string): import("bun:sqlite").Changes;
|
|
21
|
+
renameLibrary(id: string, name: string, displayName: string): import("bun:sqlite").Changes;
|
|
21
22
|
updateLibraryStatus(id: string, status: string): import("bun:sqlite").Changes;
|
|
22
23
|
updateLibraryStats(id: string, pageCount: number, chunkCount: number): import("bun:sqlite").Changes;
|
|
23
24
|
upsertPage(page: {
|
|
@@ -52,6 +53,6 @@ export declare class Database {
|
|
|
52
53
|
libraryId: string;
|
|
53
54
|
}): CrawlJob;
|
|
54
55
|
getJob(id: string): CrawlJob | undefined;
|
|
55
|
-
updateJob(id: string, updates: Partial<Pick<CrawlJob,
|
|
56
|
+
updateJob(id: string, updates: Partial<Pick<CrawlJob, "status" | "pages_discovered" | "pages_crawled" | "pages_failed" | "chunks_created" | "error_message" | "started_at" | "completed_at">>): void;
|
|
56
57
|
listJobs(libraryId?: string): CrawlJob[];
|
|
57
58
|
}
|
package/dist/storage/db.js
CHANGED
|
@@ -1,16 +1,16 @@
|
|
|
1
1
|
// src/storage/db.ts — SQLite + FTS5 storage layer (bun:sqlite)
|
|
2
|
-
import { Database as BunDatabase } from
|
|
3
|
-
import { resolve } from
|
|
4
|
-
import { mkdirSync } from
|
|
5
|
-
import { homedir } from
|
|
2
|
+
import { Database as BunDatabase } from "bun:sqlite";
|
|
3
|
+
import { resolve } from "path";
|
|
4
|
+
import { mkdirSync } from "fs";
|
|
5
|
+
import { homedir } from "os";
|
|
6
6
|
export class Database {
|
|
7
7
|
db;
|
|
8
8
|
init() {
|
|
9
|
-
const dir = process.env.DOCSHARK_DATA_DIR || resolve(homedir(),
|
|
9
|
+
const dir = process.env.DOCSHARK_DATA_DIR || resolve(homedir(), ".docshark");
|
|
10
10
|
mkdirSync(dir, { recursive: true });
|
|
11
|
-
this.db = new BunDatabase(resolve(dir,
|
|
12
|
-
this.db.run(
|
|
13
|
-
this.db.run(
|
|
11
|
+
this.db = new BunDatabase(resolve(dir, "docshark.db"));
|
|
12
|
+
this.db.run("PRAGMA journal_mode = WAL");
|
|
13
|
+
this.db.run("PRAGMA foreign_keys = ON");
|
|
14
14
|
this.migrate();
|
|
15
15
|
}
|
|
16
16
|
/** Expose raw DB for search engine direct queries */
|
|
@@ -114,19 +114,32 @@ export class Database {
|
|
|
114
114
|
.run(lib.id, lib.name, lib.displayName, lib.url, lib.version ?? null, lib.crawlConfig ? JSON.stringify(lib.crawlConfig) : null);
|
|
115
115
|
}
|
|
116
116
|
listLibraries(status) {
|
|
117
|
-
if (status && status !==
|
|
118
|
-
return this.db
|
|
117
|
+
if (status && status !== "all") {
|
|
118
|
+
return this.db
|
|
119
|
+
.prepare("SELECT * FROM libraries WHERE status = ?")
|
|
120
|
+
.all(status);
|
|
119
121
|
}
|
|
120
|
-
return this.db
|
|
122
|
+
return this.db
|
|
123
|
+
.prepare("SELECT * FROM libraries ORDER BY name")
|
|
124
|
+
.all();
|
|
121
125
|
}
|
|
122
126
|
getLibraryByName(name) {
|
|
123
|
-
return this.db
|
|
127
|
+
return this.db
|
|
128
|
+
.prepare("SELECT * FROM libraries WHERE name = ?")
|
|
129
|
+
.get(name);
|
|
124
130
|
}
|
|
125
131
|
getLibraryById(id) {
|
|
126
|
-
return this.db.prepare(
|
|
132
|
+
return this.db.prepare("SELECT * FROM libraries WHERE id = ?").get(id);
|
|
127
133
|
}
|
|
128
134
|
removeLibrary(id) {
|
|
129
|
-
return this.db.prepare(
|
|
135
|
+
return this.db.prepare("DELETE FROM libraries WHERE id = ?").run(id);
|
|
136
|
+
}
|
|
137
|
+
renameLibrary(id, name, displayName) {
|
|
138
|
+
return this.db
|
|
139
|
+
.prepare(`UPDATE libraries
|
|
140
|
+
SET name = ?, display_name = ?, updated_at = datetime('now')
|
|
141
|
+
WHERE id = ?`)
|
|
142
|
+
.run(name, displayName, id);
|
|
130
143
|
}
|
|
131
144
|
updateLibraryStatus(id, status) {
|
|
132
145
|
return this.db
|
|
@@ -154,12 +167,16 @@ export class Database {
|
|
|
154
167
|
headings = excluded.headings,
|
|
155
168
|
updated_at = datetime('now')`)
|
|
156
169
|
.run(page.id, page.libraryId, page.url, page.path, page.title, page.contentMarkdown, page.contentHash, JSON.stringify(page.headings));
|
|
157
|
-
const row = this.db
|
|
170
|
+
const row = this.db
|
|
171
|
+
.prepare("SELECT id FROM pages WHERE library_id = ? AND url = ?")
|
|
172
|
+
.get(page.libraryId, page.url);
|
|
158
173
|
return row.id;
|
|
159
174
|
}
|
|
160
175
|
getPage(opts) {
|
|
161
176
|
if (opts.url) {
|
|
162
|
-
return this.db
|
|
177
|
+
return this.db
|
|
178
|
+
.prepare("SELECT * FROM pages WHERE url = ?")
|
|
179
|
+
.get(opts.url);
|
|
163
180
|
}
|
|
164
181
|
if (opts.library && opts.path) {
|
|
165
182
|
return this.db
|
|
@@ -172,7 +189,7 @@ export class Database {
|
|
|
172
189
|
}
|
|
173
190
|
getPagesByLibrary(libraryId) {
|
|
174
191
|
return this.db
|
|
175
|
-
.prepare(
|
|
192
|
+
.prepare("SELECT * FROM pages WHERE library_id = ? ORDER BY path")
|
|
176
193
|
.all(libraryId);
|
|
177
194
|
}
|
|
178
195
|
// ──────────────────────────────────────
|
|
@@ -189,19 +206,21 @@ export class Database {
|
|
|
189
206
|
tx();
|
|
190
207
|
}
|
|
191
208
|
deleteChunksByPage(pageId) {
|
|
192
|
-
this.db.prepare(
|
|
209
|
+
this.db.prepare("DELETE FROM chunks WHERE page_id = ?").run(pageId);
|
|
193
210
|
}
|
|
194
211
|
// ──────────────────────────────────────
|
|
195
212
|
// Crawl Jobs
|
|
196
213
|
// ──────────────────────────────────────
|
|
197
214
|
createJob(job) {
|
|
198
215
|
this.db
|
|
199
|
-
.prepare(
|
|
216
|
+
.prepare("INSERT INTO crawl_jobs (id, library_id) VALUES (?, ?)")
|
|
200
217
|
.run(job.id, job.libraryId);
|
|
201
|
-
return this.db
|
|
218
|
+
return this.db
|
|
219
|
+
.prepare("SELECT * FROM crawl_jobs WHERE id = ?")
|
|
220
|
+
.get(job.id);
|
|
202
221
|
}
|
|
203
222
|
getJob(id) {
|
|
204
|
-
return this.db.prepare(
|
|
223
|
+
return this.db.prepare("SELECT * FROM crawl_jobs WHERE id = ?").get(id);
|
|
205
224
|
}
|
|
206
225
|
updateJob(id, updates) {
|
|
207
226
|
const sets = [];
|
|
@@ -213,16 +232,18 @@ export class Database {
|
|
|
213
232
|
if (sets.length === 0)
|
|
214
233
|
return;
|
|
215
234
|
values.push(id);
|
|
216
|
-
this.db
|
|
235
|
+
this.db
|
|
236
|
+
.prepare(`UPDATE crawl_jobs SET ${sets.join(", ")} WHERE id = ?`)
|
|
237
|
+
.run(...values);
|
|
217
238
|
}
|
|
218
239
|
listJobs(libraryId) {
|
|
219
240
|
if (libraryId) {
|
|
220
241
|
return this.db
|
|
221
|
-
.prepare(
|
|
242
|
+
.prepare("SELECT * FROM crawl_jobs WHERE library_id = ? ORDER BY created_at DESC")
|
|
222
243
|
.all(libraryId);
|
|
223
244
|
}
|
|
224
245
|
return this.db
|
|
225
|
-
.prepare(
|
|
246
|
+
.prepare("SELECT * FROM crawl_jobs ORDER BY created_at DESC")
|
|
226
247
|
.all();
|
|
227
248
|
}
|
|
228
249
|
}
|
package/dist/version.d.ts
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
export declare const VERSION
|
|
1
|
+
export declare const VERSION: string;
|
package/dist/version.js
CHANGED
|
@@ -1,2 +1,6 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
1
|
+
import { readFileSync } from "node:fs";
|
|
2
|
+
import { dirname, resolve } from "node:path";
|
|
3
|
+
import { fileURLToPath } from "node:url";
|
|
4
|
+
const packageJsonPath = resolve(dirname(fileURLToPath(import.meta.url)), "../package.json");
|
|
5
|
+
export const VERSION = JSON.parse(readFileSync(packageJsonPath, "utf8"))
|
|
6
|
+
.version;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "docshark",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.16",
|
|
4
4
|
"description": "🦈 Documentation MCP Server — scrape, index, and search any doc website",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/index.js",
|
|
@@ -58,8 +58,8 @@
|
|
|
58
58
|
"@tmcp/transport-http": "^0.8.4",
|
|
59
59
|
"@tmcp/transport-sse": "^0.5.3",
|
|
60
60
|
"@tmcp/transport-stdio": "^0.4.1",
|
|
61
|
+
"cac": "^7.0.0",
|
|
61
62
|
"cheerio": "^1.2.0",
|
|
62
|
-
"commander": "^14.0.3",
|
|
63
63
|
"linkedom": "^0.18.12",
|
|
64
64
|
"nanoid": "^5.1.6",
|
|
65
65
|
"puppeteer-core": "^24.37.5",
|
package/LICENSE
DELETED
|
@@ -1,21 +0,0 @@
|
|
|
1
|
-
MIT License
|
|
2
|
-
|
|
3
|
-
Copyright (c) 2026 Michael-Obele
|
|
4
|
-
|
|
5
|
-
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
-
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
-
in the Software without restriction, including without limitation the rights
|
|
8
|
-
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
-
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
-
furnished to do so, subject to the following conditions:
|
|
11
|
-
|
|
12
|
-
The above copyright notice and this permission notice shall be included in all
|
|
13
|
-
copies or substantial portions of the Software.
|
|
14
|
-
|
|
15
|
-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
-
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
-
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
-
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
-
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
-
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
-
SOFTWARE.
|
package/README.md
DELETED
|
@@ -1,176 +0,0 @@
|
|
|
1
|
-
# 🦈 DocShark
|
|
2
|
-
|
|
3
|
-
[](https://bun.sh/)
|
|
4
|
-
[](https://www.npmjs.com/package/docshark)
|
|
5
|
-
[](https://modelcontextprotocol.io/)
|
|
6
|
-
[](https://github.com/Michael-Obele/docshark/releases)
|
|
7
|
-
[](https://opensource.org/licenses/MIT)
|
|
8
|
-
|
|
9
|
-
**DocShark** is a powerful MCP (Model Context Protocol) server designed to scrape, index, and search any documentation website. It creates a local, highly-searchable knowledge base from public documentation pages using FTS5 (Full-Text Search) and BM25 ranking, allowing AI assistants to query the latest docs effortlessly.
|
|
10
|
-
|
|
11
|
-
---
|
|
12
|
-
|
|
13
|
-
## 🚀 Features
|
|
14
|
-
|
|
15
|
-
- **Automated Crawling**: Discovers pages via `sitemap.xml` with fallback to BFS link crawling.
|
|
16
|
-
- **Smart Extraction**: Uses Readability and Turndown to extract main content and convert it to clean Markdown, filtering out navbars and sidebars.
|
|
17
|
-
- **Semantic Chunking**: Splits content based on headings, preserving contextual headers for better AI understanding.
|
|
18
|
-
- **High-Performance Search**: Built-in SQLite + FTS5 indexing with BM25 ranking for accurate and lightning-fast search results.
|
|
19
|
-
- **JS-Rendered Site Support**: Tiered fetching strategy automatically detects React/Vue SPAs (empty shells) and upgrades to `puppeteer-core` if you have it installed (zero-config, auto-fallback).
|
|
20
|
-
- **Polite Crawling**: Respects `robots.txt` and implements rate limiting to prevent overloading documentation servers.
|
|
21
|
-
- **Standard MCP Tooling**: Connect perfectly with Desktop Claude, VS Code, Cursor, and any other MCP-compatible clients via standard `stdio` or `http`/`sse` transports.
|
|
22
|
-
|
|
23
|
-
## 📦 What We Have Done (Phase 1)
|
|
24
|
-
|
|
25
|
-
**Phase 1: Core Engine** is fully implemented and tested.
|
|
26
|
-
|
|
27
|
-
- ✅ Custom SQLite Database with FTS5 virtual tables and auto-sync triggers.
|
|
28
|
-
- ✅ Web scraping engine supporting standard `fetch()` and `puppeteer-core`.
|
|
29
|
-
- ✅ Markdown processor utilizing Readability + Turndown.
|
|
30
|
-
- ✅ Heading-based semantic chunker (500-1200 tokens per chunk).
|
|
31
|
-
- ✅ Asynchronous job manager and queue system.
|
|
32
|
-
- ✅ Complete HTTP API (REST endpoints + SSE event streams).
|
|
33
|
-
- ✅ Seamless integration of 6 MCP tools: `add_library`, `search_docs`, `list_libraries`, `get_doc_page`, `refresh_library`, and `remove_library`.
|
|
34
|
-
- ✅ Robust CLI interface (`start`, `add`, `search`, `list`).
|
|
35
|
-
|
|
36
|
-
## 🏗️ What We Are Doing
|
|
37
|
-
|
|
38
|
-
We are actively polishing the integration between the core engine and external MCP clients (like VS Code Agents and Claude Desktop).
|
|
39
|
-
|
|
40
|
-
## 🔮 What We Plan To Do (Phase 2 & Beyond)
|
|
41
|
-
|
|
42
|
-
- **Web Dashboard**: An intuitive SvelteKit dashboard to manage your synced libraries, view crawl progress in real-time (via SSE), and test searches manually.
|
|
43
|
-
- **Incremental Crawling**: Smarter `refresh` jobs that compare `ETag` and `Last-Modified` headers to only re-scrape updated pages.
|
|
44
|
-
- **Vector Search (RAG)**: Integration of lightweight vector embeddings for semantic similarity search alongside the existing FTS5 keyword search.
|
|
45
|
-
- **Advanced Scraping Setup**: Support for custom CSS selectors to define exactly where content lives in non-standard documentation websites.
|
|
46
|
-
|
|
47
|
-
---
|
|
48
|
-
|
|
49
|
-
## 🛠️ Usage
|
|
50
|
-
|
|
51
|
-
### Quick Start (from npm)
|
|
52
|
-
|
|
53
|
-
You can run DocShark directly without installing it globally using `bunx`:
|
|
54
|
-
|
|
55
|
-
```bash
|
|
56
|
-
# Add a documentation library to the index
|
|
57
|
-
bunx docshark add https://valibot.dev/guides/ --depth 2
|
|
58
|
-
|
|
59
|
-
# Search your indexed docs
|
|
60
|
-
bunx docshark search "schema validation"
|
|
61
|
-
```
|
|
62
|
-
|
|
63
|
-
### Installation
|
|
64
|
-
|
|
65
|
-
To install DocShark globally as a CLI tool:
|
|
66
|
-
|
|
67
|
-
DocShark is intended to be installed and run with Bun.
|
|
68
|
-
|
|
69
|
-
```bash
|
|
70
|
-
# Global Bun installation
|
|
71
|
-
bun add -g docshark
|
|
72
|
-
```
|
|
73
|
-
|
|
74
|
-
After installation, you can use the `docshark` command:
|
|
75
|
-
|
|
76
|
-
```bash
|
|
77
|
-
docshark list
|
|
78
|
-
|
|
79
|
-
# Update the global Bun installation when a new release is published
|
|
80
|
-
docshark update
|
|
81
|
-
|
|
82
|
-
# Script-friendly update check
|
|
83
|
-
docshark update --check --quiet
|
|
84
|
-
```
|
|
85
|
-
|
|
86
|
-
Interactive CLI runs will also let you know when a newer version is available. Update notices are intentionally skipped for MCP `stdio` mode so they never interfere with protocol output.
|
|
87
|
-
|
|
88
|
-
For scripts, `docshark update --check` exits `0` when current, `10` when a newer version is available, and `1` when the version check could not be completed.
|
|
89
|
-
|
|
90
|
-
## 🔌 MCP Integration
|
|
91
|
-
|
|
92
|
-
### VS Code (GitHub Copilot / MCP Extension)
|
|
93
|
-
|
|
94
|
-
Add DocShark to your `.vscode/settings.json` or global MCP configuration:
|
|
95
|
-
|
|
96
|
-
```json
|
|
97
|
-
{
|
|
98
|
-
"mcpServers": {
|
|
99
|
-
"docshark": {
|
|
100
|
-
"command": "bunx",
|
|
101
|
-
"args": ["-y", "docshark", "start", "--stdio"]
|
|
102
|
-
}
|
|
103
|
-
}
|
|
104
|
-
}
|
|
105
|
-
```
|
|
106
|
-
|
|
107
|
-
### Cursor
|
|
108
|
-
|
|
109
|
-
1. Open **Cursor Settings** > **Models** > **MCP**.
|
|
110
|
-
2. Click **+ Add New MCP Server**.
|
|
111
|
-
3. Name: `docshark`
|
|
112
|
-
4. Type: `command`
|
|
113
|
-
5. Command: `bunx -y docshark start --stdio`
|
|
114
|
-
|
|
115
|
-
### Claude Desktop
|
|
116
|
-
|
|
117
|
-
Edit your Claude Desktop configuration file:
|
|
118
|
-
|
|
119
|
-
- **macOS**: `~/Library/Application Support/Claude/claude_desktop_config.json`
|
|
120
|
-
- **Windows**: `%APPDATA%\Claude\claude_desktop_config.json`
|
|
121
|
-
|
|
122
|
-
```json
|
|
123
|
-
{
|
|
124
|
-
"mcpServers": {
|
|
125
|
-
"docshark": {
|
|
126
|
-
"command": "bunx",
|
|
127
|
-
"args": ["-y", "docshark", "start", "--stdio"]
|
|
128
|
-
}
|
|
129
|
-
}
|
|
130
|
-
}
|
|
131
|
-
```
|
|
132
|
-
|
|
133
|
-
---
|
|
134
|
-
|
|
135
|
-
## 🛠️ Development
|
|
136
|
-
|
|
137
|
-
### Local Setup
|
|
138
|
-
|
|
139
|
-
Ensure you have [Bun](https://bun.sh/) installed.
|
|
140
|
-
|
|
141
|
-
```bash
|
|
142
|
-
# Clone the repository
|
|
143
|
-
git clone https://github.com/Michael-Obele/docshark.git
|
|
144
|
-
cd docshark
|
|
145
|
-
|
|
146
|
-
# Install dependencies
|
|
147
|
-
bun install
|
|
148
|
-
|
|
149
|
-
# (Optional) Enable auto-detection & scraping of Javascript React/Vue single-page apps
|
|
150
|
-
bun add puppeteer-core
|
|
151
|
-
|
|
152
|
-
# Start the DocShark MCP server in HTTP mode for local testing
|
|
153
|
-
bun run src/cli.ts start --port 6380
|
|
154
|
-
```
|
|
155
|
-
|
|
156
|
-
### Local CLI Debugging
|
|
157
|
-
|
|
158
|
-
```bash
|
|
159
|
-
# Run CLI directly while developing
|
|
160
|
-
bun run src/cli.ts list
|
|
161
|
-
```
|
|
162
|
-
|
|
163
|
-
## 🔄 Versioning & Changelog
|
|
164
|
-
|
|
165
|
-
This project uses [Google's Release Please](https://github.com/googleapis/release-please) to automate versioning and changelog generation.
|
|
166
|
-
|
|
167
|
-
- **Semantic Versioning**: Our versions automatically bump (e.g. `0.0.1` -> `0.0.2` or `0.1.0`) based on standard Conventional Commits (`feat:`, `fix:`, `chore:`, etc.).
|
|
168
|
-
- **Automated**: A PR is automatically created on `master` when standard commits are merged, generating a standard `CHANGELOG.md`.
|
|
169
|
-
|
|
170
|
-
## 📜 License
|
|
171
|
-
|
|
172
|
-
This project is open-source and available under the [MIT License](LICENSE).
|
|
173
|
-
|
|
174
|
-
---
|
|
175
|
-
|
|
176
|
-
_Built to empower AI agents with the latest knowledge._
|