docshark 0.1.5 ā 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +8 -0
- package/README.md +83 -30
- package/dist/api/router.js +77 -0
- package/dist/cli.d.ts +1 -1
- package/dist/cli.js +160 -164
- package/dist/http.js +84 -0
- package/dist/index.js +0 -1
- package/dist/jobs/events.js +15 -0
- package/dist/jobs/manager.js +49 -0
- package/dist/jobs/worker.js +120 -0
- package/dist/processor/chunker.js +79 -0
- package/dist/processor/extractor.js +81 -0
- package/dist/scraper/discoverer.js +206 -0
- package/dist/scraper/fetcher.js +129 -0
- package/dist/scraper/rate-limiter.js +18 -0
- package/dist/scraper/robots.js +26 -0
- package/dist/server.js +154 -0
- package/dist/services/library.js +66 -0
- package/dist/storage/db.js +228 -0
- package/dist/storage/search.js +49 -0
- package/dist/tools/add-library.js +35 -0
- package/dist/tools/get-doc-page.js +25 -0
- package/dist/tools/list-libraries.js +29 -0
- package/dist/tools/refresh-library.js +25 -0
- package/dist/tools/remove-library.js +25 -0
- package/dist/tools/search-docs.js +35 -0
- package/dist/types.js +2 -0
- package/dist/version.d.ts +1 -1
- package/dist/version.js +2 -0
- package/package.json +6 -2
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,13 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 0.1.7 (2026-03-07)
|
|
4
|
+
|
|
5
|
+
**Full Changelog**: https://github.com/Michael-Obele/docshark/compare/v0.1.6...v0.1.7
|
|
6
|
+
|
|
7
|
+
## 0.1.6 (2026-03-07)
|
|
8
|
+
|
|
9
|
+
**Full Changelog**: https://github.com/Michael-Obele/docshark/compare/v0.1.5...v0.1.6
|
|
10
|
+
|
|
3
11
|
## 0.1.5 (2026-03-02)
|
|
4
12
|
|
|
5
13
|
**Full Changelog**: https://github.com/Michael-Obele/docshark/compare/v0.1.4...v0.1.5
|
package/README.md
CHANGED
|
@@ -22,6 +22,7 @@
|
|
|
22
22
|
## š¦ What We Have Done (Phase 1)
|
|
23
23
|
|
|
24
24
|
**Phase 1: Core Engine** is fully implemented and tested.
|
|
25
|
+
|
|
25
26
|
- ā
Custom SQLite Database with FTS5 virtual tables and auto-sync triggers.
|
|
26
27
|
- ā
Web scraping engine supporting standard `fetch()` and `puppeteer-core`.
|
|
27
28
|
- ā
Markdown processor utilizing Readability + Turndown.
|
|
@@ -46,62 +47,113 @@ We are actively polishing the integration between the core engine and external M
|
|
|
46
47
|
|
|
47
48
|
## š ļø Usage
|
|
48
49
|
|
|
49
|
-
###
|
|
50
|
+
### Quick Start (from npm)
|
|
50
51
|
|
|
51
|
-
|
|
52
|
+
You can run DocShark directly without installing it globally using `bunx`:
|
|
52
53
|
|
|
53
54
|
```bash
|
|
54
|
-
#
|
|
55
|
-
|
|
55
|
+
# Add a documentation library to the index
|
|
56
|
+
bunx docshark add https://valibot.dev/guides/ --depth 2
|
|
56
57
|
|
|
57
|
-
#
|
|
58
|
-
|
|
58
|
+
# Search your indexed docs
|
|
59
|
+
bunx docshark search "schema validation"
|
|
60
|
+
```
|
|
59
61
|
|
|
60
|
-
|
|
61
|
-
|
|
62
|
+
### Installation
|
|
63
|
+
|
|
64
|
+
To install DocShark globally as a CLI tool:
|
|
65
|
+
|
|
66
|
+
```bash
|
|
67
|
+
# Using npm
|
|
68
|
+
npm install -g docshark
|
|
69
|
+
|
|
70
|
+
# Using Bun
|
|
71
|
+
bun add -g docshark
|
|
62
72
|
```
|
|
63
73
|
|
|
64
|
-
|
|
74
|
+
After installation, you can use the `docshark` command:
|
|
65
75
|
|
|
66
76
|
```bash
|
|
67
|
-
|
|
68
|
-
|
|
77
|
+
docshark list
|
|
78
|
+
```
|
|
69
79
|
|
|
70
|
-
|
|
71
|
-
bun run src/cli.ts search "schema validation"
|
|
80
|
+
## š MCP Integration
|
|
72
81
|
|
|
73
|
-
|
|
74
|
-
|
|
82
|
+
### VS Code (GitHub Copilot / MCP Extension)
|
|
83
|
+
|
|
84
|
+
Add DocShark to your `.vscode/settings.json` or global MCP configuration:
|
|
85
|
+
|
|
86
|
+
```json
|
|
87
|
+
{
|
|
88
|
+
"mcpServers": {
|
|
89
|
+
"docshark": {
|
|
90
|
+
"command": "bunx",
|
|
91
|
+
"args": ["-y", "docshark", "start", "--stdio"]
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
}
|
|
75
95
|
```
|
|
76
96
|
|
|
77
|
-
###
|
|
97
|
+
### Cursor
|
|
98
|
+
|
|
99
|
+
1. Open **Cursor Settings** > **Models** > **MCP**.
|
|
100
|
+
2. Click **+ Add New MCP Server**.
|
|
101
|
+
3. Name: `docshark`
|
|
102
|
+
4. Type: `command`
|
|
103
|
+
5. Command: `bunx -y docshark start --stdio`
|
|
104
|
+
|
|
105
|
+
### Claude Desktop
|
|
106
|
+
|
|
107
|
+
Edit your Claude Desktop configuration file:
|
|
108
|
+
|
|
109
|
+
- **macOS**: `~/Library/Application Support/Claude/claude_desktop_config.json`
|
|
110
|
+
- **Windows**: `%APPDATA%\Claude\claude_desktop_config.json`
|
|
78
111
|
|
|
79
|
-
To use DocShark as an MCP server in VS Code:
|
|
80
|
-
1. Enable MCP discovery in your VS Code settings.
|
|
81
|
-
2. Create `.vscode/mcp.json` in your workspace:
|
|
82
112
|
```json
|
|
83
113
|
{
|
|
84
|
-
"
|
|
114
|
+
"mcpServers": {
|
|
85
115
|
"docshark": {
|
|
86
|
-
"
|
|
87
|
-
"
|
|
88
|
-
"args": [
|
|
89
|
-
"run",
|
|
90
|
-
"/absolute/path/to/docshark/src/cli.ts",
|
|
91
|
-
"start",
|
|
92
|
-
"--stdio"
|
|
93
|
-
]
|
|
116
|
+
"command": "bunx",
|
|
117
|
+
"args": ["-y", "docshark", "start", "--stdio"]
|
|
94
118
|
}
|
|
95
119
|
}
|
|
96
120
|
}
|
|
97
121
|
```
|
|
98
|
-
3. Restart the server in VS Code properties, and your Copilot agent will now have access to the docshark tools.
|
|
99
122
|
|
|
100
123
|
---
|
|
101
124
|
|
|
125
|
+
## š ļø Development
|
|
126
|
+
|
|
127
|
+
### Local Setup
|
|
128
|
+
|
|
129
|
+
Ensure you have [Bun](https://bun.sh/) installed.
|
|
130
|
+
|
|
131
|
+
```bash
|
|
132
|
+
# Clone the repository
|
|
133
|
+
git clone https://github.com/Michael-Obele/docshark.git
|
|
134
|
+
cd docshark
|
|
135
|
+
|
|
136
|
+
# Install dependencies
|
|
137
|
+
bun install
|
|
138
|
+
|
|
139
|
+
# (Optional) Enable auto-detection & scraping of Javascript React/Vue single-page apps
|
|
140
|
+
bun add puppeteer-core
|
|
141
|
+
|
|
142
|
+
# Start the DocShark MCP server in HTTP mode for local testing
|
|
143
|
+
bun run src/cli.ts start --port 6380
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
### Local CLI Debugging
|
|
147
|
+
|
|
148
|
+
```bash
|
|
149
|
+
# Run CLI directly while developing
|
|
150
|
+
bun run src/cli.ts list
|
|
151
|
+
```
|
|
152
|
+
|
|
102
153
|
## š Versioning & Changelog
|
|
103
154
|
|
|
104
155
|
This project uses [Google's Release Please](https://github.com/googleapis/release-please) to automate versioning and changelog generation.
|
|
156
|
+
|
|
105
157
|
- **Semantic Versioning**: Our versions automatically bump (e.g. `0.0.1` -> `0.0.2` or `0.1.0`) based on standard Conventional Commits (`feat:`, `fix:`, `chore:`, etc.).
|
|
106
158
|
- **Automated**: A PR is automatically created on `master` when standard commits are merged, generating a standard `CHANGELOG.md`.
|
|
107
159
|
|
|
@@ -110,4 +162,5 @@ This project uses [Google's Release Please](https://github.com/googleapis/releas
|
|
|
110
162
|
This project is open-source and available under the [MIT License](LICENSE).
|
|
111
163
|
|
|
112
164
|
---
|
|
113
|
-
|
|
165
|
+
|
|
166
|
+
_Built to empower AI agents with the latest knowledge._
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
import { VERSION } from '../version.js';
|
|
2
|
+
export function createApiRouter(deps) {
|
|
3
|
+
return {
|
|
4
|
+
async handle(request) {
|
|
5
|
+
const url = new URL(request.url);
|
|
6
|
+
const path = url.pathname.replace(/^\/api/, '');
|
|
7
|
+
const method = request.method;
|
|
8
|
+
try {
|
|
9
|
+
// GET /api/libraries
|
|
10
|
+
if (method === 'GET' && path === '/libraries') {
|
|
11
|
+
const status = url.searchParams.get('status') || 'all';
|
|
12
|
+
const libs = deps.db.listLibraries(status);
|
|
13
|
+
return json(libs);
|
|
14
|
+
}
|
|
15
|
+
// POST /api/libraries
|
|
16
|
+
if (method === 'POST' && path === '/libraries') {
|
|
17
|
+
const body = await request.json();
|
|
18
|
+
const lib = await deps.libraryService.add(body);
|
|
19
|
+
return json(lib, 201);
|
|
20
|
+
}
|
|
21
|
+
// DELETE /api/libraries/:id
|
|
22
|
+
const deleteMatch = path.match(/^\/libraries\/(.+)$/);
|
|
23
|
+
if (method === 'DELETE' && deleteMatch) {
|
|
24
|
+
deps.db.removeLibrary(deleteMatch[1]);
|
|
25
|
+
return json({ ok: true });
|
|
26
|
+
}
|
|
27
|
+
// POST /api/libraries/:id/refresh
|
|
28
|
+
const refreshMatch = path.match(/^\/libraries\/(.+)\/refresh$/);
|
|
29
|
+
if (method === 'POST' && refreshMatch) {
|
|
30
|
+
const job = deps.jobManager.startCrawl(refreshMatch[1]);
|
|
31
|
+
return json({ jobId: job.id });
|
|
32
|
+
}
|
|
33
|
+
// GET /api/search?q=...&library=...&limit=...
|
|
34
|
+
if (method === 'GET' && path === '/search') {
|
|
35
|
+
const q = url.searchParams.get('q') || '';
|
|
36
|
+
const library = url.searchParams.get('library') || undefined;
|
|
37
|
+
const limit = parseInt(url.searchParams.get('limit') || '5');
|
|
38
|
+
const results = deps.searchEngine.search(q, { library, limit });
|
|
39
|
+
return json(results);
|
|
40
|
+
}
|
|
41
|
+
// GET /api/crawls
|
|
42
|
+
if (method === 'GET' && path === '/crawls') {
|
|
43
|
+
const libraryId = url.searchParams.get('library_id') || undefined;
|
|
44
|
+
const jobs = deps.jobManager.listJobs(libraryId);
|
|
45
|
+
return json(jobs);
|
|
46
|
+
}
|
|
47
|
+
// GET /api/stats
|
|
48
|
+
if (method === 'GET' && path === '/stats') {
|
|
49
|
+
const libs = deps.db.listLibraries();
|
|
50
|
+
return json({
|
|
51
|
+
libraries: libs.length,
|
|
52
|
+
pages: libs.reduce((s, l) => s + l.page_count, 0),
|
|
53
|
+
chunks: libs.reduce((s, l) => s + l.chunk_count, 0),
|
|
54
|
+
});
|
|
55
|
+
}
|
|
56
|
+
// GET /api/health
|
|
57
|
+
if (method === 'GET' && path === '/health') {
|
|
58
|
+
return json({ status: 'ok', version: VERSION });
|
|
59
|
+
}
|
|
60
|
+
return new Response('Not Found', { status: 404 });
|
|
61
|
+
}
|
|
62
|
+
catch (err) {
|
|
63
|
+
console.error('[DocShark API]', err);
|
|
64
|
+
return json({ error: err.message }, 500);
|
|
65
|
+
}
|
|
66
|
+
},
|
|
67
|
+
};
|
|
68
|
+
}
|
|
69
|
+
function json(data, status = 200) {
|
|
70
|
+
return new Response(JSON.stringify(data), {
|
|
71
|
+
status,
|
|
72
|
+
headers: {
|
|
73
|
+
'Content-Type': 'application/json',
|
|
74
|
+
'Access-Control-Allow-Origin': '*',
|
|
75
|
+
},
|
|
76
|
+
});
|
|
77
|
+
}
|
package/dist/cli.d.ts
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
#!/usr/bin/env
|
|
1
|
+
#!/usr/bin/env bun
|
|
2
2
|
export {};
|
package/dist/cli.js
CHANGED
|
@@ -1,179 +1,175 @@
|
|
|
1
|
-
#!/usr/bin/env
|
|
2
|
-
|
|
3
|
-
var __create = Object.create;
|
|
4
|
-
var __getProtoOf = Object.getPrototypeOf;
|
|
5
|
-
var __defProp = Object.defineProperty;
|
|
6
|
-
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
7
|
-
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
8
|
-
var __toESM = (mod, isNodeMode, target) => {
|
|
9
|
-
target = mod != null ? __create(__getProtoOf(mod)) : {};
|
|
10
|
-
const to = isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target;
|
|
11
|
-
for (let key of __getOwnPropNames(mod))
|
|
12
|
-
if (!__hasOwnProp.call(to, key))
|
|
13
|
-
__defProp(to, key, {
|
|
14
|
-
get: () => mod[key],
|
|
15
|
-
enumerable: true
|
|
16
|
-
});
|
|
17
|
-
return to;
|
|
18
|
-
};
|
|
19
|
-
var __require = /* @__PURE__ */ createRequire(import.meta.url);
|
|
20
|
-
|
|
21
|
-
// src/cli.ts
|
|
1
|
+
#!/usr/bin/env bun
|
|
2
|
+
// src/cli.ts ā DocShark CLI entry point
|
|
22
3
|
import { Command } from "commander";
|
|
23
4
|
import { startHttpServer } from "./http.js";
|
|
24
5
|
import { StdioTransport } from "@tmcp/transport-stdio";
|
|
25
6
|
import { server, db, searchEngine, libraryService } from "./server.js";
|
|
26
7
|
import { VERSION } from "./version.js";
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
stdio.
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
8
|
+
const program = new Command()
|
|
9
|
+
.name("docshark")
|
|
10
|
+
.description("š¦ Documentation MCP Server ā scrape, index, and search any doc website")
|
|
11
|
+
.version(VERSION, "-v, --version", "output the current version");
|
|
12
|
+
program
|
|
13
|
+
.command("start", { isDefault: true })
|
|
14
|
+
.description("Start the MCP server")
|
|
15
|
+
.option("-p, --port <port>", "HTTP server port", "6380")
|
|
16
|
+
.option("--stdio", "Run in STDIO mode (for Claude Desktop, Cursor, etc.)")
|
|
17
|
+
.option("--data-dir <path>", "Data directory", "")
|
|
18
|
+
.action(async (opts) => {
|
|
19
|
+
if (opts.dataDir) {
|
|
20
|
+
process.env.DOCSHARK_DATA_DIR = opts.dataDir;
|
|
21
|
+
}
|
|
22
|
+
db.init();
|
|
23
|
+
if (opts.stdio) {
|
|
24
|
+
// STDIO mode ā direct pipe, no HTTP
|
|
25
|
+
const stdio = new StdioTransport(server);
|
|
26
|
+
stdio.listen();
|
|
27
|
+
}
|
|
28
|
+
else {
|
|
29
|
+
await startHttpServer(parseInt(opts.port));
|
|
30
|
+
}
|
|
39
31
|
});
|
|
40
|
-
program
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
32
|
+
program
|
|
33
|
+
.command("add <url>")
|
|
34
|
+
.description("Add a documentation library and start crawling")
|
|
35
|
+
.option("-n, --name <name>", "Library name (auto-generated from URL if omitted)")
|
|
36
|
+
.option("-d, --depth <n>", "Max crawl depth", "3")
|
|
37
|
+
.option("--lib-version <version>", "Library version")
|
|
38
|
+
.action(async (url, opts) => {
|
|
39
|
+
db.init();
|
|
40
|
+
try {
|
|
41
|
+
const lib = await libraryService.add({
|
|
42
|
+
url,
|
|
43
|
+
name: opts.name,
|
|
44
|
+
version: opts.libVersion,
|
|
45
|
+
maxDepth: parseInt(opts.depth),
|
|
46
|
+
});
|
|
47
|
+
console.log(`\nā
Added "${lib.display_name}" ā crawling ${lib.url}...`);
|
|
48
|
+
console.log(` Job ID: ${lib.jobId}`);
|
|
49
|
+
console.log(` Use "docshark list" to check progress.\n`);
|
|
50
|
+
// Wait for the crawl to finish
|
|
51
|
+
await waitForCrawl(lib.jobId);
|
|
52
|
+
}
|
|
53
|
+
catch (err) {
|
|
54
|
+
console.error(`\nā ${err.message}\n`);
|
|
55
|
+
process.exit(1);
|
|
56
|
+
}
|
|
61
57
|
});
|
|
62
|
-
program
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
library
|
|
66
|
-
limit
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
58
|
+
program
|
|
59
|
+
.command("search <query>")
|
|
60
|
+
.description("Search indexed documentation")
|
|
61
|
+
.option("-l, --library <name>", "Filter by library")
|
|
62
|
+
.option("--limit <n>", "Max results", "5")
|
|
63
|
+
.action(async (query, opts) => {
|
|
64
|
+
db.init();
|
|
65
|
+
const results = searchEngine.search(query, {
|
|
66
|
+
library: opts.library,
|
|
67
|
+
limit: parseInt(opts.limit),
|
|
68
|
+
});
|
|
69
|
+
if (results.length === 0) {
|
|
70
|
+
console.log(`\nNo results found for "${query}".\n`);
|
|
71
|
+
return;
|
|
72
|
+
}
|
|
73
|
+
for (const r of results) {
|
|
74
|
+
console.log(`\n--- ${r.page_title} (${r.library_display_name}) ---`);
|
|
75
|
+
console.log(`Section: ${r.heading_context}`);
|
|
76
|
+
console.log(r.content.slice(0, 300));
|
|
77
|
+
console.log(`Source: ${r.page_url}\n`);
|
|
78
|
+
}
|
|
82
79
|
});
|
|
83
|
-
program
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
80
|
+
program
|
|
81
|
+
.command("list")
|
|
82
|
+
.description("List indexed libraries")
|
|
83
|
+
.action(() => {
|
|
84
|
+
db.init();
|
|
85
|
+
const libs = db.listLibraries();
|
|
86
|
+
if (libs.length === 0) {
|
|
87
|
+
console.log('\nNo libraries indexed. Use "docshark add <url>" to add one.\n');
|
|
88
|
+
return;
|
|
89
|
+
}
|
|
90
|
+
console.table(libs.map((l) => ({
|
|
91
|
+
Name: l.name,
|
|
92
|
+
URL: l.url,
|
|
93
|
+
Pages: l.page_count,
|
|
94
|
+
Chunks: l.chunk_count,
|
|
95
|
+
Status: l.status,
|
|
96
|
+
"Last Crawled": l.last_crawled_at || "never",
|
|
97
|
+
})));
|
|
100
98
|
});
|
|
101
|
-
program
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
99
|
+
program
|
|
100
|
+
.command("refresh <name>")
|
|
101
|
+
.description("Refresh an existing documentation library")
|
|
102
|
+
.action(async (name) => {
|
|
103
|
+
db.init();
|
|
104
|
+
try {
|
|
105
|
+
const lib = db.getLibraryByName(name);
|
|
106
|
+
if (!lib)
|
|
107
|
+
throw new Error(`Library "${name}" not found.`);
|
|
108
|
+
const { jobManager } = await import("./server.js");
|
|
109
|
+
const job = jobManager.startCrawl(lib.id, { incremental: true });
|
|
110
|
+
console.log(`\nš Refreshing "${lib.display_name}" ā crawling ${lib.url}...`);
|
|
111
|
+
console.log(` Job ID: ${job.id}`);
|
|
112
|
+
await waitForCrawl(job.id);
|
|
113
|
+
}
|
|
114
|
+
catch (err) {
|
|
115
|
+
console.error(`\nā ${err.message}\n`);
|
|
116
|
+
process.exit(1);
|
|
117
|
+
}
|
|
119
118
|
});
|
|
120
|
-
program
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
`);
|
|
134
|
-
|
|
135
|
-
|
|
119
|
+
program
|
|
120
|
+
.command("remove <name>")
|
|
121
|
+
.description("Remove a documentation library and its index")
|
|
122
|
+
.action((name) => {
|
|
123
|
+
db.init();
|
|
124
|
+
try {
|
|
125
|
+
const lib = db.getLibraryByName(name);
|
|
126
|
+
if (!lib)
|
|
127
|
+
throw new Error(`Library "${name}" not found.`);
|
|
128
|
+
db.removeLibrary(lib.id);
|
|
129
|
+
console.log(`\nšļø Removed library "${lib.display_name}". Deleted ${lib.page_count} pages.\n`);
|
|
130
|
+
}
|
|
131
|
+
catch (err) {
|
|
132
|
+
console.error(`\nā ${err.message}\n`);
|
|
133
|
+
process.exit(1);
|
|
134
|
+
}
|
|
136
135
|
});
|
|
137
|
-
program
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
--- ${page.title} ---`);
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
console.log(page.content_markdown);
|
|
152
|
-
console.log(`
|
|
153
|
-
`);
|
|
136
|
+
program
|
|
137
|
+
.command("get <url>")
|
|
138
|
+
.description("Get the full markdown content of a specific indexed page")
|
|
139
|
+
.action((url) => {
|
|
140
|
+
db.init();
|
|
141
|
+
const page = db.getPage({ url });
|
|
142
|
+
if (!page) {
|
|
143
|
+
console.error(`\nā Page not found in index: ${url}\n`);
|
|
144
|
+
process.exit(1);
|
|
145
|
+
}
|
|
146
|
+
console.log(`\n--- ${page.title} ---`);
|
|
147
|
+
console.log(`Source: ${page.url}\n\n`);
|
|
148
|
+
console.log(page.content_markdown);
|
|
149
|
+
console.log("\n");
|
|
154
150
|
});
|
|
155
151
|
program.parse();
|
|
152
|
+
/** Helper to wait for a crawl job to finish (CLI blocking mode) */
|
|
156
153
|
async function waitForCrawl(jobId) {
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
});
|
|
154
|
+
const { jobManager } = await import("./server.js");
|
|
155
|
+
return new Promise((resolve) => {
|
|
156
|
+
const check = () => {
|
|
157
|
+
const job = jobManager.getJob(jobId);
|
|
158
|
+
if (!job || job.status === "completed" || job.status === "failed") {
|
|
159
|
+
if (job?.status === "completed") {
|
|
160
|
+
console.log(`\nš¦ Crawl complete: ${job.pages_crawled} pages, ${job.chunks_created} chunks indexed.`);
|
|
161
|
+
if (job.pages_failed > 0) {
|
|
162
|
+
console.log(` ā ļø ${job.pages_failed} pages failed.`);
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
else if (job?.status === "failed") {
|
|
166
|
+
console.error(`\nā Crawl failed: ${job.error_message}`);
|
|
167
|
+
}
|
|
168
|
+
resolve();
|
|
169
|
+
return;
|
|
170
|
+
}
|
|
171
|
+
setTimeout(check, 1000);
|
|
172
|
+
};
|
|
173
|
+
check();
|
|
174
|
+
});
|
|
179
175
|
}
|