docshark 0.1.8 → 0.1.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,33 @@
1
1
  # Changelog
2
2
 
3
+ ## [0.1.12](https://github.com/Michael-Obele/docshark/compare/v0.1.11...v0.1.12) (2026-03-12)
4
+
5
+
6
+ ### 🐛 Bug Fixes
7
+
8
+ * **release:** update Node.js version to 24 and set registry URL ([2887a29](https://github.com/Michael-Obele/docshark/commit/2887a2920ab61adf6eb4bf4aa6b6d86143ffd34f))
9
+
10
+ ## [0.1.11](https://github.com/Michael-Obele/docshark/compare/v0.1.10...v0.1.11) (2026-03-11)
11
+
12
+
13
+ ### ✨ Features
14
+
15
+ * Directly process raw Markdown and plain text files by checking content type and URL extension. ([a4d66b8](https://github.com/Michael-Obele/docshark/commit/a4d66b8cdd6342645b3cab292ef82c133c9ae52c))
16
+
17
+ ## [0.1.10](https://github.com/Michael-Obele/docshark/compare/v0.1.9...v0.1.10) (2026-03-11)
18
+
19
+
20
+ ### 📚 Documentation
21
+
22
+ * Add NPM version badge to README. ([ae9c6ca](https://github.com/Michael-Obele/docshark/commit/ae9c6ca71dbd844de6b065a0e47021c7d1e3ec72))
23
+
24
+ ## [0.1.9](https://github.com/Michael-Obele/docshark/compare/v0.1.8...v0.1.9) (2026-03-11)
25
+
26
+
27
+ ### ✨ Features
28
+
29
+ * Add NPM provenance publishing to the release workflow and set the release-please changelog type to default. ([7c6ed54](https://github.com/Michael-Obele/docshark/commit/7c6ed54ca8e51a3b73abf64cc775352937c9c1cf))
30
+
3
31
  ## 0.1.8 (2026-03-11)
4
32
 
5
33
  **Full Changelog**: https://github.com/Michael-Obele/docshark/compare/v0.1.7...v0.1.8
package/README.md CHANGED
@@ -1,6 +1,7 @@
1
1
  # 🦈 DocShark
2
2
 
3
3
  [![Built with Bun](https://img.shields.io/badge/Bun-%23000000.svg?style=flat&logo=bun&logoColor=white)](https://bun.sh/)
4
+ [![NPM Version](https://img.shields.io/npm/v/docshark.svg?style=flat&color=blue)](https://www.npmjs.com/package/docshark)
4
5
  [![MCP Compatible](https://img.shields.io/badge/MCP-Ready-0D1117.svg?style=flat&logo=github&logoColor=white)](https://modelcontextprotocol.io/)
5
6
  [![GitHub Release](https://img.shields.io/github/v/release/Michael-Obele/docshark?color=success)](https://github.com/Michael-Obele/docshark/releases)
6
7
  [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
@@ -41,7 +41,7 @@ export class CrawlWorker {
41
41
  await rateLimiter.wait();
42
42
  const result = await fetchPage(url, config.renderer);
43
43
  // Extract content + convert to markdown
44
- const { markdown, title, headings } = extractAndConvert(result.html, url);
44
+ const { markdown, title, headings } = extractAndConvert(result.html, url, result.contentType);
45
45
  if (!markdown || markdown.length < 50) {
46
46
  crawled++;
47
47
  continue; // Skip essentially empty pages
@@ -1,4 +1,4 @@
1
- export declare function extractAndConvert(html: string, url: string): {
1
+ export declare function extractAndConvert(html: string, url: string, contentType?: string): {
2
2
  markdown: string;
3
3
  title: string;
4
4
  headings: Array<{
@@ -25,7 +25,23 @@ turndown.addRule('removeImages', {
25
25
  filter: 'img',
26
26
  replacement: () => '',
27
27
  });
28
- export function extractAndConvert(html, url) {
28
+ export function extractAndConvert(html, url, contentType = 'text/html') {
29
+ const isMarkdown = url.endsWith('.md') || url.endsWith('.txt') || contentType.includes('text/plain') || contentType.includes('text/markdown');
30
+ if (isMarkdown && !html.trim().startsWith('<!DOCTYPE') && !html.trim().startsWith('<html')) {
31
+ const markdown = html.trim();
32
+ const titleMatch = markdown.match(/^#\s+(.+)$/m);
33
+ let title = titleMatch ? titleMatch[1].trim() : url.split('/').pop() || 'Untitled';
34
+ if (title.endsWith('.txt') || title.endsWith('.md')) {
35
+ title = title.slice(0, -4);
36
+ }
37
+ const headings = [];
38
+ const headingRegex = /^(#{1,6})\s+(.+)$/gm;
39
+ let match;
40
+ while ((match = headingRegex.exec(markdown)) !== null) {
41
+ headings.push({ level: match[1].length, text: match[2].trim() });
42
+ }
43
+ return { markdown, title, headings };
44
+ }
29
45
  const { document } = parseHTML(html);
30
46
  // Set the document URL for Readability to resolve relative links
31
47
  if (url) {
@@ -17,7 +17,7 @@ export async function fetchPage(url, renderer = 'auto') {
17
17
  return result;
18
18
  }
19
19
  // Auto mode: check if content is too short (possibly JS-rendered)
20
- const { markdown } = extractAndConvert(result.html, url);
20
+ const { markdown } = extractAndConvert(result.html, url, result.contentType);
21
21
  if (markdown.length >= MIN_CONTENT_LENGTH) {
22
22
  return result;
23
23
  }
@@ -52,6 +52,7 @@ async function fetchWithRetry(url, retries = MAX_RETRIES) {
52
52
  status: response.status,
53
53
  etag: response.headers.get('etag'),
54
54
  lastModified: response.headers.get('last-modified'),
55
+ contentType: response.headers.get('content-type') || undefined,
55
56
  };
56
57
  }
57
58
  catch (err) {
package/dist/types.d.ts CHANGED
@@ -59,6 +59,7 @@ export interface FetchResult {
59
59
  etag?: string | null;
60
60
  lastModified?: string | null;
61
61
  unchanged?: boolean;
62
+ contentType?: string;
62
63
  }
63
64
  export interface CrawlConfig {
64
65
  renderer?: 'auto' | 'fetch' | 'puppeteer';
package/dist/version.d.ts CHANGED
@@ -1 +1 @@
1
- export declare const VERSION = "0.1.8";
1
+ export declare const VERSION = "0.1.12";
package/dist/version.js CHANGED
@@ -1,2 +1,2 @@
1
1
  // This file is automatically updated by release-please
2
- export const VERSION = '0.1.8'; // x-release-please-version
2
+ export const VERSION = '0.1.12'; // x-release-please-version
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "docshark",
3
- "version": "0.1.8",
3
+ "version": "0.1.12",
4
4
  "description": "🦈 Documentation MCP Server — scrape, index, and search any doc website",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",
@@ -21,6 +21,16 @@
21
21
  "LICENSE",
22
22
  "CHANGELOG.md"
23
23
  ],
24
+ "author": "Michael Obele",
25
+ "license": "MIT",
26
+ "repository": {
27
+ "type": "git",
28
+ "url": "git+https://github.com/Michael-Obele/docshark.git"
29
+ },
30
+ "bugs": {
31
+ "url": "https://github.com/Michael-Obele/docshark/issues"
32
+ },
33
+ "homepage": "https://github.com/Michael-Obele/docshark#readme",
24
34
  "scripts": {
25
35
  "start": "bun run src/cli.ts start",
26
36
  "dev": "bun run --watch src/cli.ts start",