docshark 0.1.12 → 0.1.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +9 -0
- package/dist/cli-update.d.ts +10 -0
- package/dist/cli-update.js +186 -0
- package/dist/cli.js +254 -42
- package/dist/server.d.ts +7 -7
- package/dist/server.js +123 -113
- package/dist/services/library.d.ts +8 -3
- package/dist/services/library.js +42 -12
- package/dist/storage/db.d.ts +4 -3
- package/dist/storage/db.js +45 -24
- package/dist/tools/list-libraries.d.ts +1 -1
- package/dist/version.d.ts +1 -1
- package/dist/version.js +6 -2
- package/package.json +2 -2
- package/LICENSE +0 -21
- package/README.md +0 -167
package/dist/server.js
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
// src/server.ts — TMCP McpServer setup + tool registration
|
|
2
|
-
import { McpServer } from
|
|
3
|
-
import { ValibotJsonSchemaAdapter } from
|
|
4
|
-
import * as v from
|
|
5
|
-
import { tool } from
|
|
6
|
-
import { Database } from
|
|
7
|
-
import { SearchEngine } from
|
|
8
|
-
import { LibraryService } from
|
|
9
|
-
import { JobManager } from
|
|
10
|
-
import { VERSION } from
|
|
11
|
-
import { EventBus } from
|
|
2
|
+
import { McpServer } from "tmcp";
|
|
3
|
+
import { ValibotJsonSchemaAdapter } from "@tmcp/adapter-valibot";
|
|
4
|
+
import * as v from "valibot";
|
|
5
|
+
import { tool } from "tmcp/utils";
|
|
6
|
+
import { Database } from "./storage/db.js";
|
|
7
|
+
import { SearchEngine } from "./storage/search.js";
|
|
8
|
+
import { LibraryService } from "./services/library.js";
|
|
9
|
+
import { JobManager } from "./jobs/manager.js";
|
|
10
|
+
import { VERSION } from "./version.js";
|
|
11
|
+
import { EventBus } from "./jobs/events.js";
|
|
12
12
|
// Initialize core services
|
|
13
13
|
export const db = new Database();
|
|
14
14
|
export const eventBus = new EventBus();
|
|
@@ -17,9 +17,9 @@ export const jobManager = new JobManager(db, eventBus);
|
|
|
17
17
|
export const libraryService = new LibraryService(db, jobManager);
|
|
18
18
|
// Create TMCP server
|
|
19
19
|
export const server = new McpServer({
|
|
20
|
-
name:
|
|
20
|
+
name: "docshark",
|
|
21
21
|
version: VERSION,
|
|
22
|
-
description:
|
|
22
|
+
description: "🦈 Documentation MCP Server — scrape, index, and search any doc website",
|
|
23
23
|
}, {
|
|
24
24
|
adapter: new ValibotJsonSchemaAdapter(),
|
|
25
25
|
capabilities: {
|
|
@@ -31,14 +31,14 @@ export const server = new McpServer({
|
|
|
31
31
|
// Tool 1: search_docs — Primary search tool
|
|
32
32
|
// ──────────────────────────────────────
|
|
33
33
|
server.tool({
|
|
34
|
-
name:
|
|
35
|
-
description:
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
34
|
+
name: "search_docs",
|
|
35
|
+
description: "Search through indexed documentation libraries for relevant information. " +
|
|
36
|
+
"Returns ranked documentation sections with code examples and source URLs. " +
|
|
37
|
+
"Use this when you need to find information about a library, framework, API, " +
|
|
38
|
+
"or any technical concept.",
|
|
39
39
|
schema: v.object({
|
|
40
|
-
query: v.pipe(v.string(), v.description(
|
|
41
|
-
library: v.optional(v.pipe(v.string(), v.description(
|
|
40
|
+
query: v.pipe(v.string(), v.description("Search query. Use natural language.")),
|
|
41
|
+
library: v.optional(v.pipe(v.string(), v.description("Filter to a specific library."))),
|
|
42
42
|
limit: v.optional(v.pipe(v.number(), v.integer(), v.minValue(1), v.maxValue(20)), 5),
|
|
43
43
|
}),
|
|
44
44
|
}, async ({ query, library, limit }) => {
|
|
@@ -53,27 +53,59 @@ server.tool({
|
|
|
53
53
|
block += r.content;
|
|
54
54
|
return block;
|
|
55
55
|
})
|
|
56
|
-
.join(
|
|
56
|
+
.join("\n\n---\n\n");
|
|
57
57
|
return tool.text(`## Results for "${query}"\n\n${formatted}`);
|
|
58
58
|
});
|
|
59
|
+
function requireValue(value, message) {
|
|
60
|
+
if (value === undefined || value === null || value === "") {
|
|
61
|
+
throw new Error(message);
|
|
62
|
+
}
|
|
63
|
+
return value;
|
|
64
|
+
}
|
|
65
|
+
function formatLibraryInfo(libraryId) {
|
|
66
|
+
const lib = db.getLibraryById(libraryId);
|
|
67
|
+
if (!lib) {
|
|
68
|
+
return `Library not found.`;
|
|
69
|
+
}
|
|
70
|
+
const pages = db.getPagesByLibrary(lib.id);
|
|
71
|
+
let output = `## Library: ${lib.display_name} (${lib.name})\n`;
|
|
72
|
+
output += `- **URL:** ${lib.url}\n`;
|
|
73
|
+
output += `- **Status:** ${lib.status}\n`;
|
|
74
|
+
output += `- **Pages:** ${lib.page_count}\n`;
|
|
75
|
+
output += `- **Chunks:** ${lib.chunk_count}\n`;
|
|
76
|
+
output += `- **Last Crawled:** ${lib.last_crawled_at || "never"}\n\n`;
|
|
77
|
+
if (pages.length > 0) {
|
|
78
|
+
output += `### Pages (${pages.length})\n\n`;
|
|
79
|
+
output += "| Title | Path | URL |\n";
|
|
80
|
+
output += "| ----- | ---- | --- |\n";
|
|
81
|
+
for (const p of pages) {
|
|
82
|
+
const title = p.title?.replace(/\|/g, "-") || "Untitled";
|
|
83
|
+
output += `| ${title} | \`${p.path}\` | ${p.url} |\n`;
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
else {
|
|
87
|
+
output += `*No pages indexed yet for this library.*\n`;
|
|
88
|
+
}
|
|
89
|
+
return output;
|
|
90
|
+
}
|
|
59
91
|
// ──────────────────────────────────────
|
|
60
92
|
// Tool 2: list_libraries — Discovery tool
|
|
61
93
|
// ──────────────────────────────────────
|
|
62
94
|
server.tool({
|
|
63
|
-
name:
|
|
64
|
-
description:
|
|
65
|
-
|
|
95
|
+
name: "list_libraries",
|
|
96
|
+
description: "List all documentation libraries currently indexed and available for searching. " +
|
|
97
|
+
"Use this to discover what docs are available before running search_docs.",
|
|
66
98
|
schema: v.object({
|
|
67
|
-
status: v.optional(v.pipe(v.picklist([
|
|
99
|
+
status: v.optional(v.pipe(v.picklist(["indexed", "crawling", "error", "all"]), v.description('Filter by status. Default: "all".')), "all"),
|
|
68
100
|
}),
|
|
69
101
|
}, async ({ status }) => {
|
|
70
102
|
const libraries = db.listLibraries(status);
|
|
71
103
|
if (libraries.length === 0) {
|
|
72
|
-
return tool.text(
|
|
104
|
+
return tool.text("No libraries indexed yet. Use manage_library with action=add to add a documentation website.");
|
|
73
105
|
}
|
|
74
106
|
let output = `## Indexed Libraries (${libraries.length} total)\n\n`;
|
|
75
|
-
output +=
|
|
76
|
-
output +=
|
|
107
|
+
output += "| Library | URL | Pages | Chunks | Status |\n";
|
|
108
|
+
output += "| ------- | --- | ----- | ------ | ------ |\n";
|
|
77
109
|
for (const lib of libraries) {
|
|
78
110
|
output += `| ${lib.name} | ${lib.url} | ${lib.page_count} | ${lib.chunk_count} | ${lib.status} |\n`;
|
|
79
111
|
}
|
|
@@ -83,107 +115,85 @@ server.tool({
|
|
|
83
115
|
// Tool 3: get_doc_page — Full page read
|
|
84
116
|
// ──────────────────────────────────────
|
|
85
117
|
server.tool({
|
|
86
|
-
name:
|
|
87
|
-
description:
|
|
88
|
-
|
|
118
|
+
name: "get_doc_page",
|
|
119
|
+
description: "Retrieve the complete content of a specific documentation page as markdown. " +
|
|
120
|
+
"Use when search results reference a page and you need full context.",
|
|
89
121
|
schema: v.object({
|
|
90
|
-
url: v.optional(v.pipe(v.string(), v.description(
|
|
91
|
-
library: v.optional(v.pipe(v.string(), v.description(
|
|
92
|
-
path: v.optional(v.pipe(v.string(), v.description(
|
|
122
|
+
url: v.optional(v.pipe(v.string(), v.description("The full URL of the documentation page."))),
|
|
123
|
+
library: v.optional(v.pipe(v.string(), v.description("Library name to search within."))),
|
|
124
|
+
path: v.optional(v.pipe(v.string(), v.description("Relative path within the library."))),
|
|
93
125
|
}),
|
|
94
126
|
}, async ({ url, library, path }) => {
|
|
95
127
|
const page = db.getPage({ url, library, path });
|
|
96
128
|
if (!page)
|
|
97
|
-
return tool.text(
|
|
129
|
+
return tool.text("Page not found. Use search_docs to find the correct page.");
|
|
98
130
|
return tool.text(`# ${page.title}\n**Source:** ${page.url}\n\n${page.content_markdown}`);
|
|
99
131
|
});
|
|
100
132
|
// ──────────────────────────────────────
|
|
101
|
-
// Tool 4:
|
|
133
|
+
// Tool 4: manage_library — Create, rename, refresh, remove, inspect
|
|
102
134
|
// ──────────────────────────────────────
|
|
103
135
|
server.tool({
|
|
104
|
-
name:
|
|
105
|
-
description:
|
|
106
|
-
'Provide the URL and an optional name. Crawl runs in the background.',
|
|
136
|
+
name: "manage_library",
|
|
137
|
+
description: "Manage a documentation library lifecycle. Use action=add to crawl a new source, action=rename to change the library name, action=refresh to re-crawl, action=remove to delete it, or action=info to inspect its pages and stats.",
|
|
107
138
|
schema: v.object({
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
139
|
+
action: v.pipe(v.picklist(["add", "rename", "refresh", "remove", "info"]), v.description("The management action to perform.")),
|
|
140
|
+
url: v.optional(v.pipe(v.string(), v.url(), v.description("Base URL of the documentation website."))),
|
|
141
|
+
name: v.optional(v.pipe(v.string(), v.description("Short identifier (auto-generated if omitted)."))),
|
|
142
|
+
version: v.optional(v.pipe(v.string(), v.description("Version string."))),
|
|
111
143
|
max_depth: v.optional(v.pipe(v.number(), v.integer(), v.minValue(1), v.maxValue(10)), 3),
|
|
144
|
+
current_name: v.optional(v.pipe(v.string(), v.description("The current library name (for rename)."))),
|
|
145
|
+
new_name: v.optional(v.pipe(v.string(), v.description("The new library name (for rename)."))),
|
|
146
|
+
library: v.optional(v.pipe(v.string(), v.description("The library name to manage."))),
|
|
112
147
|
}),
|
|
113
|
-
}, async (
|
|
148
|
+
}, async (input) => {
|
|
114
149
|
try {
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
});
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
});
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
// ──────────────────────────────────────
|
|
158
|
-
server.tool({
|
|
159
|
-
name: 'library_info',
|
|
160
|
-
description: 'Get detailed information about a specific documentation library, including a list of all its indexed pages and their paths. ' +
|
|
161
|
-
'Use this to see what pages are available in a library before retrieving them.',
|
|
162
|
-
schema: v.object({
|
|
163
|
-
library: v.pipe(v.string(), v.description('The library name to get information for.')),
|
|
164
|
-
}),
|
|
165
|
-
}, async ({ library }) => {
|
|
166
|
-
const lib = db.getLibraryByName(library);
|
|
167
|
-
if (!lib)
|
|
168
|
-
return tool.text(`Library "${library}" not found. Use list_libraries to see available libraries.`);
|
|
169
|
-
const pages = db.getPagesByLibrary(lib.id);
|
|
170
|
-
let output = `## Library: ${lib.display_name} (${lib.name})\n`;
|
|
171
|
-
output += `- **URL:** ${lib.url}\n`;
|
|
172
|
-
output += `- **Status:** ${lib.status}\n`;
|
|
173
|
-
output += `- **Pages:** ${lib.page_count}\n`;
|
|
174
|
-
output += `- **Chunks:** ${lib.chunk_count}\n`;
|
|
175
|
-
output += `- **Last Crawled:** ${lib.last_crawled_at || 'never'}\n\n`;
|
|
176
|
-
if (pages.length > 0) {
|
|
177
|
-
output += `### Pages (${pages.length})\n\n`;
|
|
178
|
-
output += '| Title | Path | URL |\n';
|
|
179
|
-
output += '| ----- | ---- | --- |\n';
|
|
180
|
-
for (const p of pages) {
|
|
181
|
-
const title = p.title?.replace(/\|/g, '-') || 'Untitled';
|
|
182
|
-
output += `| ${title} | \`${p.path}\` | ${p.url} |\n`;
|
|
150
|
+
switch (input.action) {
|
|
151
|
+
case "add": {
|
|
152
|
+
const url = requireValue(input.url, "The URL is required for action=add.");
|
|
153
|
+
const library = await libraryService.add({
|
|
154
|
+
url,
|
|
155
|
+
name: input.name,
|
|
156
|
+
version: input.version,
|
|
157
|
+
maxDepth: input.max_depth,
|
|
158
|
+
});
|
|
159
|
+
return tool.text(`✅ Library "${library.display_name}" added.\n` +
|
|
160
|
+
`Crawl job ${library.jobId} started. Use list_libraries to check progress.`);
|
|
161
|
+
}
|
|
162
|
+
case "rename": {
|
|
163
|
+
const currentName = requireValue(input.current_name, "current_name is required for action=rename.");
|
|
164
|
+
const newName = requireValue(input.new_name, "new_name is required for action=rename.");
|
|
165
|
+
const library = libraryService.rename({ currentName, newName });
|
|
166
|
+
return tool.text(`✅ Library renamed to "${library.display_name}" (${library.name}).\n` +
|
|
167
|
+
`Pages and crawl history remain attached to the same library.`);
|
|
168
|
+
}
|
|
169
|
+
case "refresh": {
|
|
170
|
+
const libraryName = requireValue(input.library, "library is required for action=refresh.");
|
|
171
|
+
const lib = db.getLibraryByName(libraryName);
|
|
172
|
+
if (!lib)
|
|
173
|
+
return tool.text(`Library "${libraryName}" not found. Use list_libraries to see available.`);
|
|
174
|
+
const job = jobManager.startCrawl(lib.id, { incremental: true });
|
|
175
|
+
return tool.text(`🔄 Refresh started for "${lib.display_name}".\nJob ${job.id}: checking for updated pages...`);
|
|
176
|
+
}
|
|
177
|
+
case "remove": {
|
|
178
|
+
const libraryName = requireValue(input.library, "library is required for action=remove.");
|
|
179
|
+
const lib = db.getLibraryByName(libraryName);
|
|
180
|
+
if (!lib)
|
|
181
|
+
return tool.text(`Library "${libraryName}" not found.`);
|
|
182
|
+
db.removeLibrary(lib.id);
|
|
183
|
+
return tool.text(`🗑️ Library "${lib.display_name}" removed.\nDeleted ${lib.page_count} pages and ${lib.chunk_count} chunks.`);
|
|
184
|
+
}
|
|
185
|
+
case "info": {
|
|
186
|
+
const libraryName = requireValue(input.library, "library is required for action=info.");
|
|
187
|
+
const lib = db.getLibraryByName(libraryName);
|
|
188
|
+
if (!lib)
|
|
189
|
+
return tool.text(`Library "${libraryName}" not found. Use list_libraries to see available libraries.`);
|
|
190
|
+
return tool.text(formatLibraryInfo(lib.id));
|
|
191
|
+
}
|
|
183
192
|
}
|
|
184
193
|
}
|
|
185
|
-
|
|
186
|
-
|
|
194
|
+
catch (err) {
|
|
195
|
+
const message = err instanceof Error ? err.message : "Unknown error";
|
|
196
|
+
return tool.text(`❌ Failed: ${message}`);
|
|
187
197
|
}
|
|
188
|
-
return tool.text(
|
|
198
|
+
return tool.text(`❌ Failed: Unsupported action.`);
|
|
189
199
|
});
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
import type { Database } from
|
|
2
|
-
import type { JobManager } from
|
|
3
|
-
import type { Library } from
|
|
1
|
+
import type { Database } from "../storage/db.js";
|
|
2
|
+
import type { JobManager } from "../jobs/manager.js";
|
|
3
|
+
import type { Library } from "../types.js";
|
|
4
4
|
export declare class LibraryService {
|
|
5
5
|
private db;
|
|
6
6
|
private jobManager;
|
|
@@ -14,4 +14,9 @@ export declare class LibraryService {
|
|
|
14
14
|
}): Promise<Library & {
|
|
15
15
|
jobId: string;
|
|
16
16
|
}>;
|
|
17
|
+
/** Rename an existing documentation library */
|
|
18
|
+
rename(opts: {
|
|
19
|
+
currentName: string;
|
|
20
|
+
newName: string;
|
|
21
|
+
}): Library;
|
|
17
22
|
}
|
package/dist/services/library.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
// src/services/library.ts — Library management service
|
|
2
|
-
import { nanoid } from
|
|
2
|
+
import { nanoid } from "nanoid";
|
|
3
3
|
export class LibraryService {
|
|
4
4
|
db;
|
|
5
5
|
jobManager;
|
|
@@ -15,12 +15,12 @@ export class LibraryService {
|
|
|
15
15
|
// Check if already exists
|
|
16
16
|
const existing = this.db.getLibraryByName(name);
|
|
17
17
|
if (existing) {
|
|
18
|
-
throw new Error(`Library "${name}" already exists. Use
|
|
18
|
+
throw new Error(`Library "${name}" already exists. Use manage_library with action=refresh to re-crawl.`);
|
|
19
19
|
}
|
|
20
20
|
const id = nanoid();
|
|
21
21
|
const crawlConfig = {
|
|
22
22
|
maxDepth: opts.maxDepth ?? 3,
|
|
23
|
-
renderer:
|
|
23
|
+
renderer: "auto",
|
|
24
24
|
};
|
|
25
25
|
this.db.addLibrary({
|
|
26
26
|
id,
|
|
@@ -35,32 +35,62 @@ export class LibraryService {
|
|
|
35
35
|
const library = this.db.getLibraryById(id);
|
|
36
36
|
return { ...library, jobId: job.id };
|
|
37
37
|
}
|
|
38
|
+
/** Rename an existing documentation library */
|
|
39
|
+
rename(opts) {
|
|
40
|
+
const currentName = opts.currentName.trim();
|
|
41
|
+
const newName = opts.newName.trim();
|
|
42
|
+
if (!currentName) {
|
|
43
|
+
throw new Error("Current library name is required.");
|
|
44
|
+
}
|
|
45
|
+
if (!newName) {
|
|
46
|
+
throw new Error("New library name is required.");
|
|
47
|
+
}
|
|
48
|
+
const library = this.db.getLibraryByName(currentName);
|
|
49
|
+
if (!library) {
|
|
50
|
+
throw new Error(`Library "${currentName}" not found.`);
|
|
51
|
+
}
|
|
52
|
+
if (library.name === newName) {
|
|
53
|
+
return library;
|
|
54
|
+
}
|
|
55
|
+
const existing = this.db.getLibraryByName(newName);
|
|
56
|
+
if (existing && existing.id !== library.id) {
|
|
57
|
+
throw new Error(`Library "${newName}" already exists.`);
|
|
58
|
+
}
|
|
59
|
+
const displayName = generateDisplayName(newName);
|
|
60
|
+
this.db.renameLibrary(library.id, newName, displayName);
|
|
61
|
+
const updated = this.db.getLibraryById(library.id);
|
|
62
|
+
if (!updated) {
|
|
63
|
+
throw new Error(`Failed to rename library "${currentName}".`);
|
|
64
|
+
}
|
|
65
|
+
return updated;
|
|
66
|
+
}
|
|
38
67
|
}
|
|
39
68
|
/** Normalize URL: ensure trailing slash for base docs */
|
|
40
69
|
function normalizeUrl(url) {
|
|
41
70
|
const parsed = new URL(url);
|
|
42
71
|
// Remove trailing hash and query for base URL
|
|
43
|
-
parsed.hash =
|
|
72
|
+
parsed.hash = "";
|
|
44
73
|
return parsed.href;
|
|
45
74
|
}
|
|
46
75
|
/** Generate a slug name from URL */
|
|
47
76
|
function generateName(url) {
|
|
48
77
|
const parsed = new URL(url);
|
|
49
|
-
const host = parsed.hostname.replace(/^www\./,
|
|
50
|
-
const path = parsed.pathname.replace(/\/$/,
|
|
78
|
+
const host = parsed.hostname.replace(/^www\./, "");
|
|
79
|
+
const path = parsed.pathname.replace(/\/$/, "").replace(/^\//, "");
|
|
51
80
|
if (path) {
|
|
52
81
|
// e.g. svelte.dev/docs → "svelte-docs"
|
|
53
|
-
const hostPart = host.split(
|
|
54
|
-
const pathPart = path.split(
|
|
55
|
-
return `${hostPart}-${pathPart}`.toLowerCase().replace(/[^a-z0-9-]/g,
|
|
82
|
+
const hostPart = host.split(".")[0];
|
|
83
|
+
const pathPart = path.split("/").slice(0, 2).join("-");
|
|
84
|
+
return `${hostPart}-${pathPart}`.toLowerCase().replace(/[^a-z0-9-]/g, "-");
|
|
56
85
|
}
|
|
57
86
|
// Just the hostname
|
|
58
|
-
return host.replace(/\./g,
|
|
87
|
+
return host.replace(/\./g, "-").toLowerCase();
|
|
59
88
|
}
|
|
60
89
|
/** Generate a display name from the slug */
|
|
61
90
|
function generateDisplayName(name) {
|
|
62
91
|
return name
|
|
63
|
-
.split(
|
|
92
|
+
.split(/[-_\s]+/)
|
|
93
|
+
.filter(Boolean)
|
|
64
94
|
.map((word) => word.charAt(0).toUpperCase() + word.slice(1))
|
|
65
|
-
.join(
|
|
95
|
+
.join(" ");
|
|
66
96
|
}
|
package/dist/storage/db.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { Database as BunDatabase } from
|
|
2
|
-
import type { Library, Page, CrawlJob } from
|
|
1
|
+
import { Database as BunDatabase } from "bun:sqlite";
|
|
2
|
+
import type { Library, Page, CrawlJob } from "../types.js";
|
|
3
3
|
export declare class Database {
|
|
4
4
|
private db;
|
|
5
5
|
init(): void;
|
|
@@ -18,6 +18,7 @@ export declare class Database {
|
|
|
18
18
|
getLibraryByName(name: string): Library | undefined;
|
|
19
19
|
getLibraryById(id: string): Library | undefined;
|
|
20
20
|
removeLibrary(id: string): import("bun:sqlite").Changes;
|
|
21
|
+
renameLibrary(id: string, name: string, displayName: string): import("bun:sqlite").Changes;
|
|
21
22
|
updateLibraryStatus(id: string, status: string): import("bun:sqlite").Changes;
|
|
22
23
|
updateLibraryStats(id: string, pageCount: number, chunkCount: number): import("bun:sqlite").Changes;
|
|
23
24
|
upsertPage(page: {
|
|
@@ -52,6 +53,6 @@ export declare class Database {
|
|
|
52
53
|
libraryId: string;
|
|
53
54
|
}): CrawlJob;
|
|
54
55
|
getJob(id: string): CrawlJob | undefined;
|
|
55
|
-
updateJob(id: string, updates: Partial<Pick<CrawlJob,
|
|
56
|
+
updateJob(id: string, updates: Partial<Pick<CrawlJob, "status" | "pages_discovered" | "pages_crawled" | "pages_failed" | "chunks_created" | "error_message" | "started_at" | "completed_at">>): void;
|
|
56
57
|
listJobs(libraryId?: string): CrawlJob[];
|
|
57
58
|
}
|
package/dist/storage/db.js
CHANGED
|
@@ -1,16 +1,16 @@
|
|
|
1
1
|
// src/storage/db.ts — SQLite + FTS5 storage layer (bun:sqlite)
|
|
2
|
-
import { Database as BunDatabase } from
|
|
3
|
-
import { resolve } from
|
|
4
|
-
import { mkdirSync } from
|
|
5
|
-
import { homedir } from
|
|
2
|
+
import { Database as BunDatabase } from "bun:sqlite";
|
|
3
|
+
import { resolve } from "path";
|
|
4
|
+
import { mkdirSync } from "fs";
|
|
5
|
+
import { homedir } from "os";
|
|
6
6
|
export class Database {
|
|
7
7
|
db;
|
|
8
8
|
init() {
|
|
9
|
-
const dir = process.env.DOCSHARK_DATA_DIR || resolve(homedir(),
|
|
9
|
+
const dir = process.env.DOCSHARK_DATA_DIR || resolve(homedir(), ".docshark");
|
|
10
10
|
mkdirSync(dir, { recursive: true });
|
|
11
|
-
this.db = new BunDatabase(resolve(dir,
|
|
12
|
-
this.db.run(
|
|
13
|
-
this.db.run(
|
|
11
|
+
this.db = new BunDatabase(resolve(dir, "docshark.db"));
|
|
12
|
+
this.db.run("PRAGMA journal_mode = WAL");
|
|
13
|
+
this.db.run("PRAGMA foreign_keys = ON");
|
|
14
14
|
this.migrate();
|
|
15
15
|
}
|
|
16
16
|
/** Expose raw DB for search engine direct queries */
|
|
@@ -114,19 +114,32 @@ export class Database {
|
|
|
114
114
|
.run(lib.id, lib.name, lib.displayName, lib.url, lib.version ?? null, lib.crawlConfig ? JSON.stringify(lib.crawlConfig) : null);
|
|
115
115
|
}
|
|
116
116
|
listLibraries(status) {
|
|
117
|
-
if (status && status !==
|
|
118
|
-
return this.db
|
|
117
|
+
if (status && status !== "all") {
|
|
118
|
+
return this.db
|
|
119
|
+
.prepare("SELECT * FROM libraries WHERE status = ?")
|
|
120
|
+
.all(status);
|
|
119
121
|
}
|
|
120
|
-
return this.db
|
|
122
|
+
return this.db
|
|
123
|
+
.prepare("SELECT * FROM libraries ORDER BY name")
|
|
124
|
+
.all();
|
|
121
125
|
}
|
|
122
126
|
getLibraryByName(name) {
|
|
123
|
-
return this.db
|
|
127
|
+
return this.db
|
|
128
|
+
.prepare("SELECT * FROM libraries WHERE name = ?")
|
|
129
|
+
.get(name);
|
|
124
130
|
}
|
|
125
131
|
getLibraryById(id) {
|
|
126
|
-
return this.db.prepare(
|
|
132
|
+
return this.db.prepare("SELECT * FROM libraries WHERE id = ?").get(id);
|
|
127
133
|
}
|
|
128
134
|
removeLibrary(id) {
|
|
129
|
-
return this.db.prepare(
|
|
135
|
+
return this.db.prepare("DELETE FROM libraries WHERE id = ?").run(id);
|
|
136
|
+
}
|
|
137
|
+
renameLibrary(id, name, displayName) {
|
|
138
|
+
return this.db
|
|
139
|
+
.prepare(`UPDATE libraries
|
|
140
|
+
SET name = ?, display_name = ?, updated_at = datetime('now')
|
|
141
|
+
WHERE id = ?`)
|
|
142
|
+
.run(name, displayName, id);
|
|
130
143
|
}
|
|
131
144
|
updateLibraryStatus(id, status) {
|
|
132
145
|
return this.db
|
|
@@ -154,12 +167,16 @@ export class Database {
|
|
|
154
167
|
headings = excluded.headings,
|
|
155
168
|
updated_at = datetime('now')`)
|
|
156
169
|
.run(page.id, page.libraryId, page.url, page.path, page.title, page.contentMarkdown, page.contentHash, JSON.stringify(page.headings));
|
|
157
|
-
const row = this.db
|
|
170
|
+
const row = this.db
|
|
171
|
+
.prepare("SELECT id FROM pages WHERE library_id = ? AND url = ?")
|
|
172
|
+
.get(page.libraryId, page.url);
|
|
158
173
|
return row.id;
|
|
159
174
|
}
|
|
160
175
|
getPage(opts) {
|
|
161
176
|
if (opts.url) {
|
|
162
|
-
return this.db
|
|
177
|
+
return this.db
|
|
178
|
+
.prepare("SELECT * FROM pages WHERE url = ?")
|
|
179
|
+
.get(opts.url);
|
|
163
180
|
}
|
|
164
181
|
if (opts.library && opts.path) {
|
|
165
182
|
return this.db
|
|
@@ -172,7 +189,7 @@ export class Database {
|
|
|
172
189
|
}
|
|
173
190
|
getPagesByLibrary(libraryId) {
|
|
174
191
|
return this.db
|
|
175
|
-
.prepare(
|
|
192
|
+
.prepare("SELECT * FROM pages WHERE library_id = ? ORDER BY path")
|
|
176
193
|
.all(libraryId);
|
|
177
194
|
}
|
|
178
195
|
// ──────────────────────────────────────
|
|
@@ -189,19 +206,21 @@ export class Database {
|
|
|
189
206
|
tx();
|
|
190
207
|
}
|
|
191
208
|
deleteChunksByPage(pageId) {
|
|
192
|
-
this.db.prepare(
|
|
209
|
+
this.db.prepare("DELETE FROM chunks WHERE page_id = ?").run(pageId);
|
|
193
210
|
}
|
|
194
211
|
// ──────────────────────────────────────
|
|
195
212
|
// Crawl Jobs
|
|
196
213
|
// ──────────────────────────────────────
|
|
197
214
|
createJob(job) {
|
|
198
215
|
this.db
|
|
199
|
-
.prepare(
|
|
216
|
+
.prepare("INSERT INTO crawl_jobs (id, library_id) VALUES (?, ?)")
|
|
200
217
|
.run(job.id, job.libraryId);
|
|
201
|
-
return this.db
|
|
218
|
+
return this.db
|
|
219
|
+
.prepare("SELECT * FROM crawl_jobs WHERE id = ?")
|
|
220
|
+
.get(job.id);
|
|
202
221
|
}
|
|
203
222
|
getJob(id) {
|
|
204
|
-
return this.db.prepare(
|
|
223
|
+
return this.db.prepare("SELECT * FROM crawl_jobs WHERE id = ?").get(id);
|
|
205
224
|
}
|
|
206
225
|
updateJob(id, updates) {
|
|
207
226
|
const sets = [];
|
|
@@ -213,16 +232,18 @@ export class Database {
|
|
|
213
232
|
if (sets.length === 0)
|
|
214
233
|
return;
|
|
215
234
|
values.push(id);
|
|
216
|
-
this.db
|
|
235
|
+
this.db
|
|
236
|
+
.prepare(`UPDATE crawl_jobs SET ${sets.join(", ")} WHERE id = ?`)
|
|
237
|
+
.run(...values);
|
|
217
238
|
}
|
|
218
239
|
listJobs(libraryId) {
|
|
219
240
|
if (libraryId) {
|
|
220
241
|
return this.db
|
|
221
|
-
.prepare(
|
|
242
|
+
.prepare("SELECT * FROM crawl_jobs WHERE library_id = ? ORDER BY created_at DESC")
|
|
222
243
|
.all(libraryId);
|
|
223
244
|
}
|
|
224
245
|
return this.db
|
|
225
|
-
.prepare(
|
|
246
|
+
.prepare("SELECT * FROM crawl_jobs ORDER BY created_at DESC")
|
|
226
247
|
.all();
|
|
227
248
|
}
|
|
228
249
|
}
|
|
@@ -5,7 +5,7 @@ export declare function createListLibrariesTool(db: Database): {
|
|
|
5
5
|
name: "list_libraries";
|
|
6
6
|
description: string;
|
|
7
7
|
schema: v.ObjectSchema<{
|
|
8
|
-
readonly status: v.OptionalSchema<v.SchemaWithPipe<readonly [v.PicklistSchema<["indexed", "crawling", "error", "all"], undefined>, v.DescriptionAction<"
|
|
8
|
+
readonly status: v.OptionalSchema<v.SchemaWithPipe<readonly [v.PicklistSchema<["indexed", "crawling", "error", "all"], undefined>, v.DescriptionAction<"error" | "crawling" | "indexed" | "all", "Filter by indexing status. Default: \"all\".">]>, "all">;
|
|
9
9
|
}, undefined>;
|
|
10
10
|
};
|
|
11
11
|
handler: ({ status }: {
|
package/dist/version.d.ts
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
export declare const VERSION
|
|
1
|
+
export declare const VERSION: string;
|
package/dist/version.js
CHANGED
|
@@ -1,2 +1,6 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
1
|
+
import { readFileSync } from "node:fs";
|
|
2
|
+
import { dirname, resolve } from "node:path";
|
|
3
|
+
import { fileURLToPath } from "node:url";
|
|
4
|
+
const packageJsonPath = resolve(dirname(fileURLToPath(import.meta.url)), "../package.json");
|
|
5
|
+
export const VERSION = JSON.parse(readFileSync(packageJsonPath, "utf8"))
|
|
6
|
+
.version;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "docshark",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.16",
|
|
4
4
|
"description": "🦈 Documentation MCP Server — scrape, index, and search any doc website",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/index.js",
|
|
@@ -58,8 +58,8 @@
|
|
|
58
58
|
"@tmcp/transport-http": "^0.8.4",
|
|
59
59
|
"@tmcp/transport-sse": "^0.5.3",
|
|
60
60
|
"@tmcp/transport-stdio": "^0.4.1",
|
|
61
|
+
"cac": "^7.0.0",
|
|
61
62
|
"cheerio": "^1.2.0",
|
|
62
|
-
"commander": "^14.0.3",
|
|
63
63
|
"linkedom": "^0.18.12",
|
|
64
64
|
"nanoid": "^5.1.6",
|
|
65
65
|
"puppeteer-core": "^24.37.5",
|