npm - docshark - Versions diffs - 0.1.5 → 0.1.7 - Mend

docshark 0.1.5 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

package/CHANGELOG.md +8 -0
package/README.md +83 -30
package/dist/api/router.js +77 -0
package/dist/cli.d.ts +1 -1
package/dist/cli.js +160 -164
package/dist/http.js +84 -0
package/dist/index.js +0 -1
package/dist/jobs/events.js +15 -0
package/dist/jobs/manager.js +49 -0
package/dist/jobs/worker.js +120 -0
package/dist/processor/chunker.js +79 -0
package/dist/processor/extractor.js +81 -0
package/dist/scraper/discoverer.js +206 -0
package/dist/scraper/fetcher.js +129 -0
package/dist/scraper/rate-limiter.js +18 -0
package/dist/scraper/robots.js +26 -0
package/dist/server.js +154 -0
package/dist/services/library.js +66 -0
package/dist/storage/db.js +228 -0
package/dist/storage/search.js +49 -0
package/dist/tools/add-library.js +35 -0
package/dist/tools/get-doc-page.js +25 -0
package/dist/tools/list-libraries.js +29 -0
package/dist/tools/refresh-library.js +25 -0
package/dist/tools/remove-library.js +25 -0
package/dist/tools/search-docs.js +35 -0
package/dist/types.js +2 -0
package/dist/version.d.ts +1 -1
package/dist/version.js +2 -0
package/package.json +6 -2

package/dist/tools/add-library.js ADDED Viewed

@@ -0,0 +1,35 @@
+// src/tools/add-library.ts — Add new documentation source
+import * as v from 'valibot';
+import { tool } from 'tmcp/utils';
+export function createAddLibraryTool(libraryService) {
+    return {
+        definition: {
+            name: 'add_library',
+            description: 'Add a new documentation library to be crawled and indexed for searching. ' +
+                'Provide the documentation website URL and an optional name. ' +
+                'The library will be crawled in the background. ' +
+                'Use list_libraries to check crawl progress.',
+            schema: v.object({
+                url: v.pipe(v.string(), v.url(), v.description('The base URL of the documentation website to crawl.')),
+                name: v.optional(v.pipe(v.string(), v.description('A short identifier for the library (e.g., "svelte-5"). Auto-generated from URL if omitted.'))),
+                version: v.optional(v.pipe(v.string(), v.description('Version string (e.g., "5.0.0", "v4").'))),
+                max_depth: v.optional(v.pipe(v.number(), v.integer(), v.minValue(1), v.maxValue(10), v.description('Maximum link depth to crawl. Default: 3.')), 3),
+            }),
+        },
+        handler: async ({ url, name, version, max_depth, }) => {
+            try {
+                const library = await libraryService.add({
+                    url,
+                    name,
+                    version,
+                    maxDepth: max_depth,
+                });
+                return tool.text(`✅ Library "${library.display_name}" added.\n` +
+                    `Crawl job ${library.jobId} started. Use list_libraries to check progress.`);
+            }
+            catch (err) {
+                return tool.text(`❌ Failed to add library: ${err.message}`);
+            }
+        },
+    };
+}

package/dist/tools/get-doc-page.js ADDED Viewed

@@ -0,0 +1,25 @@
+// src/tools/get-doc-page.ts — Full page retrieval
+import * as v from 'valibot';
+import { tool } from 'tmcp/utils';
+export function createGetDocPageTool(db) {
+    return {
+        definition: {
+            name: 'get_doc_page',
+            description: 'Retrieve the complete content of a specific documentation page as markdown. ' +
+                'Use this when search results reference a page and you need the full context, ' +
+                'or when you know the exact page URL. Returns the entire page content.',
+            schema: v.object({
+                url: v.optional(v.pipe(v.string(), v.description('The full URL of the documentation page.'))),
+                library: v.optional(v.pipe(v.string(), v.description('Library name to search within.'))),
+                path: v.optional(v.pipe(v.string(), v.description('Relative path within the library (e.g., "/getting-started").'))),
+            }),
+        },
+        handler: async ({ url, library, path }) => {
+            const page = db.getPage({ url, library, path });
+            if (!page) {
+                return tool.text('Page not found. Use search_docs to find the correct page.');
+            }
+            return tool.text(`# ${page.title}\n**Source:** ${page.url}\n\n${page.content_markdown}`);
+        },
+    };
+}

package/dist/tools/list-libraries.js ADDED Viewed

@@ -0,0 +1,29 @@
+// src/tools/list-libraries.ts — Discovery tool
+import * as v from 'valibot';
+import { tool } from 'tmcp/utils';
+export function createListLibrariesTool(db) {
+    return {
+        definition: {
+            name: 'list_libraries',
+            description: 'List all documentation libraries currently indexed and available for searching. ' +
+                'Use this to discover what documentation is available before running search_docs. ' +
+                'Returns library names, URLs, page counts, and indexing status.',
+            schema: v.object({
+                status: v.optional(v.pipe(v.picklist(['indexed', 'crawling', 'error', 'all']), v.description('Filter by indexing status. Default: "all".')), 'all'),
+            }),
+        },
+        handler: async ({ status }) => {
+            const libraries = db.listLibraries(status);
+            if (libraries.length === 0) {
+                return tool.text('No libraries indexed yet. Use add_library to add a documentation website.');
+            }
+            let output = `## Indexed Libraries (${libraries.length} total)\n\n`;
+            output += '| Library | URL | Pages | Chunks | Status | Last Updated |\n';
+            output += '| ------- | --- | ----- | ------ | ------ | ------------ |\n';
+            for (const lib of libraries) {
+                output += `| ${lib.name} | ${lib.url} | ${lib.page_count} | ${lib.chunk_count} | ${lib.status} | ${lib.last_crawled_at || 'never'} |\n`;
+            }
+            return tool.text(output);
+        },
+    };
+}

package/dist/tools/refresh-library.js ADDED Viewed

@@ -0,0 +1,25 @@
+// src/tools/refresh-library.ts — Re-crawl existing library
+import * as v from 'valibot';
+import { tool } from 'tmcp/utils';
+export function createRefreshLibraryTool(jobManager, db) {
+    return {
+        definition: {
+            name: 'refresh_library',
+            description: 'Re-crawl and re-index an existing documentation library to get the latest content. ' +
+                'Use this when documentation may have been updated since it was last indexed. ' +
+                'Only re-fetches pages that have changed (via HTTP ETags/Last-Modified).',
+            schema: v.object({
+                library: v.pipe(v.string(), v.description('The library name to refresh (e.g., "svelte-5").')),
+            }),
+        },
+        handler: async ({ library }) => {
+            const lib = db.getLibraryByName(library);
+            if (!lib) {
+                return tool.text(`Library "${library}" not found. Use list_libraries to see available libraries.`);
+            }
+            const job = jobManager.startCrawl(lib.id, { incremental: true });
+            return tool.text(`🔄 Refresh started for "${lib.display_name}".\n` +
+                `Job ${job.id}: checking for updated pages...`);
+        },
+    };
+}

package/dist/tools/remove-library.js ADDED Viewed

@@ -0,0 +1,25 @@
+// src/tools/remove-library.ts — Remove a library and all its data
+import * as v from 'valibot';
+import { tool } from 'tmcp/utils';
+export function createRemoveLibraryTool(db) {
+    return {
+        definition: {
+            name: 'remove_library',
+            description: 'Remove a documentation library and all its indexed content. ' +
+                'This permanently deletes the library, its pages, and search index. ' +
+                'Use list_libraries first to confirm the library name.',
+            schema: v.object({
+                library: v.pipe(v.string(), v.description('The library name to remove (e.g., "svelte-5").')),
+            }),
+        },
+        handler: async ({ library }) => {
+            const lib = db.getLibraryByName(library);
+            if (!lib) {
+                return tool.text(`Library "${library}" not found.`);
+            }
+            db.removeLibrary(lib.id);
+            return tool.text(`🗑️ Library "${lib.display_name}" removed.\n` +
+                `Deleted ${lib.page_count} pages and ${lib.chunk_count} search chunks.`);
+        },
+    };
+}

package/dist/tools/search-docs.js ADDED Viewed

@@ -0,0 +1,35 @@
+// src/tools/search-docs.ts — Primary search tool (80% of usage)
+import * as v from 'valibot';
+import { tool } from 'tmcp/utils';
+export function createSearchDocsTool(searchEngine) {
+    return {
+        definition: {
+            name: 'search_docs',
+            description: 'Search through indexed documentation libraries for relevant information. ' +
+                'Returns ranked documentation sections with code examples and source URLs. ' +
+                'Use this when you need to find information about a library, framework, API, ' +
+                'or any technical concept. You can optionally filter by a specific library name.',
+            schema: v.object({
+                query: v.pipe(v.string(), v.description('The search query. Use natural language or specific terms.')),
+                library: v.optional(v.pipe(v.string(), v.description('Filter results to a specific library name.'))),
+                limit: v.optional(v.pipe(v.number(), v.integer(), v.minValue(1), v.maxValue(20), v.description('Max results to return. Default: 5.')), 5),
+            }),
+        },
+        handler: async ({ query, library, limit }) => {
+            const results = searchEngine.search(query, { library, limit });
+            if (results.length === 0) {
+                return tool.text(`No results found for "${query}".`);
+            }
+            const formatted = results
+                .map((r, i) => {
+                let block = `### ${i + 1}. ${r.page_title} — ${r.library_display_name}\n`;
+                block += `**Source:** ${r.page_url}\n`;
+                block += `**Section:** ${r.heading_context}\n\n`;
+                block += r.content;
+                return block;
+            })
+                .join('\n\n---\n\n');
+            return tool.text(`## Results for "${query}"\n\n${formatted}`);
+        },
+    };
+}

package/dist/types.js ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ // src/types.ts — Shared type definitions for DocShark
2	+ export {};

package/dist/version.d.ts CHANGED Viewed

	@@ -1 +1 @@
1	- export declare const VERSION = "0.1.5";
1	+ export declare const VERSION = "0.1.7";

package/dist/version.js ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ // This file is automatically updated by release-please
2	+ export const VERSION = '0.1.7'; // x-release-please-version

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "docshark",
-  "version": "0.1.5",
+  "version": "0.1.7",
   "description": "🦈 Documentation MCP Server — scrape, index, and search any doc website",
   "type": "module",
   "main": "./dist/index.js",
@@ -26,10 +26,14 @@
     "dev": "bun run --watch src/cli.ts start",
     "cli": "bun run src/cli.ts",
     "check": "tsc --noEmit",
-    "build": "rm -rf dist && bun build ./src/cli.ts ./src/index.ts --outdir ./dist --target node --external '*' && tsc --emitDeclarationOnly",
+    "build": "rm -rf dist && tsc && chmod +x dist/cli.js",
     "prepublishOnly": "bun run build",
     "test:crawl": "bun run src/cli.ts add https://svelte.dev/docs/svelte/overview"
   },
+  "engines": {
+    "node": ">=20",
+    "bun": ">=1.1.0"
+  },
   "keywords": [
     "tmcp",
     "mcp",