@j0hanz/superfetch 2.2.2 → 2.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. package/README.md +358 -363
  2. package/dist/assets/logo.svg +24835 -0
  3. package/dist/cache.d.ts +0 -1
  4. package/dist/cache.js +71 -29
  5. package/dist/config.d.ts +2 -1
  6. package/dist/config.js +11 -7
  7. package/dist/crypto.d.ts +0 -1
  8. package/dist/crypto.js +0 -1
  9. package/dist/dom-noise-removal.d.ts +0 -1
  10. package/dist/dom-noise-removal.js +50 -45
  11. package/dist/errors.d.ts +0 -1
  12. package/dist/errors.js +0 -1
  13. package/dist/fetch.d.ts +0 -1
  14. package/dist/fetch.js +61 -54
  15. package/dist/host-normalization.d.ts +1 -0
  16. package/dist/host-normalization.js +47 -0
  17. package/dist/http-native.d.ts +0 -1
  18. package/dist/http-native.js +92 -28
  19. package/dist/index.d.ts +0 -1
  20. package/dist/index.js +0 -1
  21. package/dist/instructions.md +41 -41
  22. package/dist/json.d.ts +0 -1
  23. package/dist/json.js +0 -1
  24. package/dist/language-detection.d.ts +0 -1
  25. package/dist/language-detection.js +10 -2
  26. package/dist/markdown-cleanup.d.ts +6 -13
  27. package/dist/markdown-cleanup.js +252 -34
  28. package/dist/mcp-validator.d.ts +14 -0
  29. package/dist/mcp-validator.js +22 -0
  30. package/dist/mcp.d.ts +0 -1
  31. package/dist/mcp.js +20 -10
  32. package/dist/observability.d.ts +2 -1
  33. package/dist/observability.js +30 -3
  34. package/dist/server-tuning.d.ts +9 -0
  35. package/dist/server-tuning.js +30 -0
  36. package/dist/{http-utils.d.ts → session.d.ts} +0 -25
  37. package/dist/{http-utils.js → session.js} +11 -104
  38. package/dist/tools.d.ts +5 -4
  39. package/dist/tools.js +46 -41
  40. package/dist/transform-types.d.ts +38 -1
  41. package/dist/transform-types.js +0 -1
  42. package/dist/transform.d.ts +12 -7
  43. package/dist/transform.js +205 -344
  44. package/dist/type-guards.d.ts +0 -1
  45. package/dist/type-guards.js +0 -1
  46. package/dist/workers/transform-worker.d.ts +0 -1
  47. package/dist/workers/transform-worker.js +29 -19
  48. package/package.json +84 -85
  49. package/dist/cache.d.ts.map +0 -1
  50. package/dist/cache.js.map +0 -1
  51. package/dist/config.d.ts.map +0 -1
  52. package/dist/config.js.map +0 -1
  53. package/dist/crypto.d.ts.map +0 -1
  54. package/dist/crypto.js.map +0 -1
  55. package/dist/dom-noise-removal.d.ts.map +0 -1
  56. package/dist/dom-noise-removal.js.map +0 -1
  57. package/dist/errors.d.ts.map +0 -1
  58. package/dist/errors.js.map +0 -1
  59. package/dist/fetch.d.ts.map +0 -1
  60. package/dist/fetch.js.map +0 -1
  61. package/dist/http-native.d.ts.map +0 -1
  62. package/dist/http-native.js.map +0 -1
  63. package/dist/http-utils.d.ts.map +0 -1
  64. package/dist/http-utils.js.map +0 -1
  65. package/dist/index.d.ts.map +0 -1
  66. package/dist/index.js.map +0 -1
  67. package/dist/json.d.ts.map +0 -1
  68. package/dist/json.js.map +0 -1
  69. package/dist/language-detection.d.ts.map +0 -1
  70. package/dist/language-detection.js.map +0 -1
  71. package/dist/markdown-cleanup.d.ts.map +0 -1
  72. package/dist/markdown-cleanup.js.map +0 -1
  73. package/dist/mcp.d.ts.map +0 -1
  74. package/dist/mcp.js.map +0 -1
  75. package/dist/observability.d.ts.map +0 -1
  76. package/dist/observability.js.map +0 -1
  77. package/dist/tools.d.ts.map +0 -1
  78. package/dist/tools.js.map +0 -1
  79. package/dist/transform-types.d.ts.map +0 -1
  80. package/dist/transform-types.js.map +0 -1
  81. package/dist/transform.d.ts.map +0 -1
  82. package/dist/transform.js.map +0 -1
  83. package/dist/type-guards.d.ts.map +0 -1
  84. package/dist/type-guards.js.map +0 -1
  85. package/dist/workers/transform-worker.d.ts.map +0 -1
  86. package/dist/workers/transform-worker.js.map +0 -1
@@ -5,13 +5,51 @@ import { URL, URLSearchParams } from 'node:url';
5
5
  import { InvalidTokenError, ServerError, } from '@modelcontextprotocol/sdk/server/auth/errors.js';
6
6
  import { StreamableHTTPServerTransport } from '@modelcontextprotocol/sdk/server/streamableHttp.js';
7
7
  import { isInitializeRequest } from '@modelcontextprotocol/sdk/types.js';
8
- import { handleDownload } from './cache.js';
9
- import { config, enableHttpMode } from './config.js';
8
+ import { keys as cacheKeys, handleDownload } from './cache.js';
9
+ import { config, enableHttpMode, serverVersion } from './config.js';
10
10
  import { timingSafeEqualUtf8 } from './crypto.js';
11
- import { acceptsEventStream, applyHttpServerTuning, composeCloseHandlers, createSessionStore, createSlotTracker, drainConnectionsOnShutdown, ensureSessionCapacity, isJsonRpcBatchRequest, isMcpRequestBody, normalizeHost, reserveSessionSlot, startSessionCleanupLoop, } from './http-utils.js';
11
+ import { normalizeHost } from './host-normalization.js';
12
+ import { acceptsEventStream, isJsonRpcBatchRequest, isMcpRequestBody, } from './mcp-validator.js';
12
13
  import { createMcpServer } from './mcp.js';
13
14
  import { logError, logInfo, logWarn } from './observability.js';
15
+ import { applyHttpServerTuning, drainConnectionsOnShutdown, } from './server-tuning.js';
16
+ import { composeCloseHandlers, createSessionStore, createSlotTracker, ensureSessionCapacity, reserveSessionSlot, startSessionCleanupLoop, } from './session.js';
17
+ import { getTransformPoolStats } from './transform.js';
14
18
  import { isObject } from './type-guards.js';
19
+ function createTransportAdapter(transportImpl) {
20
+ const noopOnClose = () => { };
21
+ const noopOnError = () => { };
22
+ const noopOnMessage = () => { };
23
+ let oncloseHandler = noopOnClose;
24
+ let onerrorHandler = noopOnError;
25
+ let onmessageHandler = noopOnMessage;
26
+ return {
27
+ start: () => transportImpl.start(),
28
+ send: (message, options) => transportImpl.send(message, options),
29
+ close: () => transportImpl.close(),
30
+ get onclose() {
31
+ return oncloseHandler;
32
+ },
33
+ set onclose(handler) {
34
+ oncloseHandler = handler;
35
+ transportImpl.onclose = handler;
36
+ },
37
+ get onerror() {
38
+ return onerrorHandler;
39
+ },
40
+ set onerror(handler) {
41
+ onerrorHandler = handler;
42
+ transportImpl.onerror = handler;
43
+ },
44
+ get onmessage() {
45
+ return onmessageHandler;
46
+ },
47
+ set onmessage(handler) {
48
+ onmessageHandler = handler;
49
+ transportImpl.onmessage = handler;
50
+ },
51
+ };
52
+ }
15
53
  function shimResponse(res) {
16
54
  const shim = res;
17
55
  shim.status = function (code) {
@@ -144,26 +182,26 @@ function resolveOriginHost(origin) {
144
182
  return null;
145
183
  }
146
184
  }
185
+ function rejectHostRequest(res, status, message) {
186
+ res.status(status).json({ error: message });
187
+ return false;
188
+ }
147
189
  function validateHostAndOrigin(req, res) {
148
190
  const host = resolveHostHeader(req);
149
191
  if (!host) {
150
- res.status(400).json({ error: 'Missing or invalid Host header' });
151
- return false;
192
+ return rejectHostRequest(res, 400, 'Missing or invalid Host header');
152
193
  }
153
194
  if (!ALLOWED_HOSTS.has(host)) {
154
- res.status(403).json({ error: 'Host not allowed' });
155
- return false;
195
+ return rejectHostRequest(res, 403, 'Host not allowed');
156
196
  }
157
197
  const originHeader = getHeaderValue(req, 'origin');
158
198
  if (originHeader) {
159
199
  const originHost = resolveOriginHost(originHeader);
160
200
  if (!originHost) {
161
- res.status(403).json({ error: 'Invalid Origin header' });
162
- return false;
201
+ return rejectHostRequest(res, 403, 'Invalid Origin header');
163
202
  }
164
203
  if (!ALLOWED_HOSTS.has(originHost)) {
165
- res.status(403).json({ error: 'Origin not allowed' });
166
- return false;
204
+ return rejectHostRequest(res, 403, 'Origin not allowed');
167
205
  }
168
206
  }
169
207
  return true;
@@ -318,24 +356,35 @@ async function verifyWithIntrospection(token) {
318
356
  throw new InvalidTokenError('Token is inactive');
319
357
  return buildIntrospectionAuthInfo(token, payload);
320
358
  }
359
+ function resolveBearerToken(authHeader) {
360
+ const [type, token] = authHeader.split(' ');
361
+ if (type !== 'Bearer' || !token) {
362
+ throw new InvalidTokenError('Invalid Authorization header format');
363
+ }
364
+ return token;
365
+ }
366
+ function authenticateWithToken(token) {
367
+ return config.auth.mode === 'oauth'
368
+ ? verifyWithIntrospection(token)
369
+ : Promise.resolve(verifyStaticToken(token));
370
+ }
371
+ function authenticateWithApiKey(req) {
372
+ const apiKey = getHeaderValue(req, 'x-api-key');
373
+ if (apiKey && config.auth.mode === 'static') {
374
+ return verifyStaticToken(apiKey);
375
+ }
376
+ if (apiKey && config.auth.mode === 'oauth') {
377
+ throw new InvalidTokenError('X-API-Key not supported for OAuth');
378
+ }
379
+ throw new InvalidTokenError('Missing Authorization header');
380
+ }
321
381
  async function authenticate(req) {
322
382
  const authHeader = req.headers.authorization;
323
383
  if (!authHeader) {
324
- const apiKey = getHeaderValue(req, 'x-api-key');
325
- if (apiKey && config.auth.mode === 'static') {
326
- return verifyStaticToken(apiKey);
327
- }
328
- if (apiKey && config.auth.mode === 'oauth') {
329
- throw new InvalidTokenError('X-API-Key not supported for OAuth');
330
- }
331
- throw new InvalidTokenError('Missing Authorization header');
384
+ return authenticateWithApiKey(req);
332
385
  }
333
- const [type, token] = authHeader.split(' ');
334
- if (type !== 'Bearer' || !token)
335
- throw new InvalidTokenError('Invalid Authorization header format');
336
- if (config.auth.mode === 'oauth')
337
- return verifyWithIntrospection(token);
338
- return verifyStaticToken(token);
386
+ const token = resolveBearerToken(authHeader);
387
+ return authenticateWithToken(token);
339
388
  }
340
389
  // --- MCP Routes ---
341
390
  function sendError(res, code, message, status = 400, id = null) {
@@ -394,7 +443,8 @@ async function createNewSession(store, mcpServer, res, requestId) {
394
443
  tracker.releaseSlot();
395
444
  };
396
445
  try {
397
- await mcpServer.connect(transportImpl);
446
+ const transport = createTransportAdapter(transportImpl);
447
+ await mcpServer.connect(transport);
398
448
  }
399
449
  catch (err) {
400
450
  clearTimeout(initTimeout);
@@ -531,7 +581,22 @@ async function dispatchRequest(req, res, url, ctx) {
531
581
  const { method } = req;
532
582
  try {
533
583
  if (method === 'GET' && path === '/health') {
534
- res.status(200).json({ status: 'ok' });
584
+ const poolStats = getTransformPoolStats();
585
+ res.status(200).json({
586
+ status: 'ok',
587
+ version: serverVersion,
588
+ uptime: Math.floor(process.uptime()),
589
+ timestamp: new Date().toISOString(),
590
+ stats: {
591
+ activeSessions: ctx.store.size(),
592
+ cacheKeys: cacheKeys().length,
593
+ workerPool: poolStats ?? {
594
+ queueDepth: 0,
595
+ activeWorkers: 0,
596
+ capacity: 0,
597
+ },
598
+ },
599
+ });
535
600
  return;
536
601
  }
537
602
  if (!(await authenticateRequest(req, res))) {
@@ -642,4 +707,3 @@ async function handleRequest(rawReq, rawRes, rateLimiter, ctx) {
642
707
  // 5. Routing
643
708
  await dispatchRequest(req, res, url, ctx);
644
709
  }
645
- //# sourceMappingURL=http-native.js.map
package/dist/index.d.ts CHANGED
@@ -1,3 +1,2 @@
1
1
  #!/usr/bin/env node
2
2
  export {};
3
- //# sourceMappingURL=index.d.ts.map
package/dist/index.js CHANGED
@@ -52,4 +52,3 @@ catch (error) {
52
52
  process.stderr.write(`Failed to start server: ${message}\n`);
53
53
  process.exit(1);
54
54
  }
55
- //# sourceMappingURL=index.js.map
@@ -1,41 +1,41 @@
1
- # superFetch Instructions
2
-
3
- > Guidance for the Agent: These instructions are available as a resource (`internal://instructions`) or prompt (`get-help`). Load them when you are unsure about tool usage.
4
-
5
- ## 1. Core Capability
6
-
7
- - **Domain:** Fetch public http(s) URLs, extract readable content, and return clean Markdown.
8
- - **Primary Resources:** `fetch-url` output (`markdown`, `title`, `url`) and cache resources (`superfetch://cache/markdown/{urlHash}`).
9
-
10
- ## 2. The "Golden Path" Workflows (Critical)
11
-
12
- _Describe the standard order of operations using ONLY tools that exist._
13
-
14
- ### Workflow A: Fetch and Read
15
-
16
- 1. Call `fetch-url` with `url`.
17
- 2. Read `structuredContent.markdown` and `structuredContent.title` from the result.
18
- 3. If content is truncated (look for `...[truncated]`), follow the returned `resource_link` URI.
19
- > Constraint: Never guess resource URIs. Use the returned `resource_link` or list resources first.
20
-
21
- ### Workflow B: Retrieve Cached Content
22
-
23
- 1. List resources to find available cached pages (`superfetch://cache/...`).
24
- 2. Read the specific `superfetch://cache/markdown/{urlHash}` URI.
25
-
26
- ## 3. Tool Nuances & Gotchas
27
-
28
- _Do NOT repeat JSON schema. Focus on behavior and pitfalls._
29
-
30
- - **`fetch-url`**
31
- - **Purpose:** Fetches a webpage and converts it to clean Markdown format.
32
- - **Inputs:** `url` (Must be public http/https. Private patterns like localhost/127.0.0.1 are blocked).
33
- - **Side effects:** Open world network request; writes to internal LRU cache.
34
- - **Latency/limits:** Network-bound. Large content exceeds inline limits and returns a `resource_link`.
35
- - **Common failure modes:** `VALIDATION_ERROR` (private/blocked URL), `FETCH_ERROR` (network timeout/404).
36
-
37
- ## 4. Error Handling Strategy
38
-
39
- - **`VALIDATION_ERROR`**: Ensure the URL is valid and publicly accessible.
40
- - **`FETCH_ERROR`**: Retry once. If persistent, the site may be blocking automated requests.
41
- - **Truncation**: If `isError` is false but content ends in `...[truncated]`, you MUST read the provided `resource_link` URI to get the full markdown.
1
+ # superFetch Instructions
2
+
3
+ > Guidance for the Agent: These instructions are available as a resource (`internal://instructions`) or prompt (`get-help`). Load them when you are unsure about tool usage.
4
+
5
+ ## 1. Core Capability
6
+
7
+ - **Domain:** Fetch public http(s) URLs, extract readable content, and return clean Markdown.
8
+ - **Primary Resources:** `fetch-url` output (`markdown`, `title`, `url`) and cache resources (`superfetch://cache/markdown/{urlHash}`).
9
+
10
+ ## 2. The "Golden Path" Workflows (Critical)
11
+
12
+ _Describe the standard order of operations using ONLY tools that exist._
13
+
14
+ ### Workflow A: Fetch and Read
15
+
16
+ 1. Call `fetch-url` with `url`.
17
+ 2. Read `structuredContent.markdown` and `structuredContent.title` from the result.
18
+ 3. If content is truncated (look for `...[truncated]`), follow the returned `resource_link` URI.
19
+ > Constraint: Never guess resource URIs. Use the returned `resource_link` or list resources first.
20
+
21
+ ### Workflow B: Retrieve Cached Content
22
+
23
+ 1. List resources to find available cached pages (`superfetch://cache/...`).
24
+ 2. Read the specific `superfetch://cache/markdown/{urlHash}` URI.
25
+
26
+ ## 3. Tool Nuances & Gotchas
27
+
28
+ _Do NOT repeat JSON schema. Focus on behavior and pitfalls._
29
+
30
+ - **`fetch-url`**
31
+ - **Purpose:** Fetches a webpage and converts it to clean Markdown format.
32
+ - **Inputs:** `url` (Must be public http/https. Private patterns like localhost/127.0.0.1 are blocked).
33
+ - **Side effects:** Open world network request; writes to internal LRU cache.
34
+ - **Latency/limits:** Network-bound. Large content exceeds inline limits and returns a `resource_link`.
35
+ - **Common failure modes:** `VALIDATION_ERROR` (private/blocked URL), `FETCH_ERROR` (network timeout/404).
36
+
37
+ ## 4. Error Handling Strategy
38
+
39
+ - **`VALIDATION_ERROR`**: Ensure the URL is valid and publicly accessible.
40
+ - **`FETCH_ERROR`**: Retry once. If persistent, the site may be blocking automated requests.
41
+ - **Truncation**: If `isError` is false but content ends in `...[truncated]`, you MUST read the provided `resource_link` URI to get the full markdown.
package/dist/json.d.ts CHANGED
@@ -1,2 +1 @@
1
1
  export declare function stableStringify(obj: unknown, depth?: number, seen?: WeakSet<object>): string;
2
- //# sourceMappingURL=json.d.ts.map
package/dist/json.js CHANGED
@@ -27,4 +27,3 @@ export function stableStringify(obj, depth = 0, seen = new WeakSet()) {
27
27
  const processed = processValue(obj, depth, seen);
28
28
  return JSON.stringify(processed);
29
29
  }
30
- //# sourceMappingURL=json.js.map
@@ -10,4 +10,3 @@ export declare function detectLanguageFromCode(code: string): string | undefined
10
10
  * Resolve language from HTML attributes (class name and data-language).
11
11
  */
12
12
  export declare function resolveLanguageFromAttributes(className: string, dataLang: string): string | undefined;
13
- //# sourceMappingURL=language-detection.d.ts.map
@@ -6,7 +6,16 @@
6
6
  * Check if source contains the given word as a standalone word (not part of another word).
7
7
  */
8
8
  function containsWord(source, word) {
9
- return new RegExp(`\\b${word}\\b`).test(source);
9
+ return getWordRegex(word).test(source);
10
+ }
11
+ const WORD_REGEX_CACHE = new Map();
12
+ function getWordRegex(word) {
13
+ const cached = WORD_REGEX_CACHE.get(word);
14
+ if (cached)
15
+ return cached;
16
+ const compiled = new RegExp(`\\b${word}\\b`);
17
+ WORD_REGEX_CACHE.set(word, compiled);
18
+ return compiled;
10
19
  }
11
20
  /**
12
21
  * Extract language from class name (e.g., "language-typescript", "lang-js", "hljs javascript").
@@ -280,4 +289,3 @@ export function resolveLanguageFromAttributes(className, dataLang) {
280
289
  const classMatch = extractLanguageFromClassName(className);
281
290
  return classMatch ?? resolveLanguageFromDataAttribute(dataLang);
282
291
  }
283
- //# sourceMappingURL=language-detection.js.map
@@ -1,19 +1,12 @@
1
- /**
2
- * Markdown cleanup utilities for post-processing converted content.
3
- *
4
- * Goals:
5
- * - Never mutate fenced code blocks (``` / ~~~) content.
6
- * - Keep rules localized and readable.
7
- * - Avoid multi-pass regexes that accidentally hit code blocks.
8
- */
9
- /**
10
- * Clean up common markdown artifacts and formatting issues.
11
- * IMPORTANT: All rules are applied ONLY outside fenced code blocks.
12
- */
1
+ import type { MetadataBlock } from './transform-types.js';
13
2
  export declare function cleanupMarkdownArtifacts(content: string): string;
3
+ export declare function extractTitleFromRawMarkdown(content: string): string | undefined;
4
+ export declare function addSourceToMarkdown(content: string, url: string): string;
5
+ export declare function isRawTextContent(content: string): boolean;
6
+ export declare function isLikelyHtmlContent(content: string): boolean;
7
+ export declare function buildMetadataFooter(metadata?: MetadataBlock, fallbackUrl?: string): string;
14
8
  /**
15
9
  * Promote standalone lines that look like headings to proper markdown headings.
16
10
  * Fence-aware: never modifies content inside fenced code blocks.
17
11
  */
18
12
  export declare function promoteOrphanHeadings(markdown: string): string;
19
- //# sourceMappingURL=markdown-cleanup.d.ts.map
@@ -1,11 +1,4 @@
1
- /**
2
- * Markdown cleanup utilities for post-processing converted content.
3
- *
4
- * Goals:
5
- * - Never mutate fenced code blocks (``` / ~~~) content.
6
- * - Keep rules localized and readable.
7
- * - Avoid multi-pass regexes that accidentally hit code blocks.
8
- */
1
+ import { config } from './config.js';
9
2
  // ─────────────────────────────────────────────────────────────────────────────
10
3
  // Fence state helpers
11
4
  // ─────────────────────────────────────────────────────────────────────────────
@@ -76,15 +69,6 @@ function splitByFences(content) {
76
69
  }
77
70
  return segments;
78
71
  }
79
- /**
80
- * Apply a transformation function only to non-fenced content.
81
- */
82
- function mapOutsideFences(content, transform) {
83
- const segments = splitByFences(content);
84
- return segments
85
- .map((seg) => (seg.inFence ? seg.content : transform(seg.content)))
86
- .join('\n');
87
- }
88
72
  // ─────────────────────────────────────────────────────────────────────────────
89
73
  // Cleanup rules (OUTSIDE fences only)
90
74
  // ─────────────────────────────────────────────────────────────────────────────
@@ -165,28 +149,263 @@ function normalizeListsAndSpacing(text) {
165
149
  // Collapse excessive blank lines
166
150
  return text.replace(/\n{3,}/g, '\n\n');
167
151
  }
152
+ const CLEANUP_STEPS = [
153
+ fixOrphanHeadings,
154
+ removeEmptyHeadings,
155
+ removeSkipLinksAndEmptyAnchors,
156
+ ensureBlankLineAfterHeadings,
157
+ removeTocBlocks,
158
+ tidyLinksAndEscapes,
159
+ normalizeListsAndSpacing,
160
+ ];
168
161
  // ─────────────────────────────────────────────────────────────────────────────
169
162
  // Public API
170
163
  // ─────────────────────────────────────────────────────────────────────────────
171
- /**
172
- * Clean up common markdown artifacts and formatting issues.
173
- * IMPORTANT: All rules are applied ONLY outside fenced code blocks.
174
- */
164
+ function getLastLine(text) {
165
+ const index = text.lastIndexOf('\n');
166
+ return index === -1 ? text : text.slice(index + 1);
167
+ }
175
168
  export function cleanupMarkdownArtifacts(content) {
176
169
  if (!content)
177
170
  return '';
178
- const cleaned = mapOutsideFences(content, (outside) => {
179
- let text = outside;
180
- text = fixOrphanHeadings(text);
181
- text = removeEmptyHeadings(text);
182
- text = removeSkipLinksAndEmptyAnchors(text);
183
- text = ensureBlankLineAfterHeadings(text);
184
- text = removeTocBlocks(text);
185
- text = tidyLinksAndEscapes(text);
186
- text = normalizeListsAndSpacing(text);
187
- return text;
188
- });
189
- return cleaned.trim();
171
+ const segments = splitByFences(content);
172
+ return segments
173
+ .map((seg, index) => {
174
+ if (seg.inFence)
175
+ return seg.content;
176
+ const prevSeg = segments[index - 1];
177
+ const prevLineContext = prevSeg ? getLastLine(prevSeg.content) : '';
178
+ const lines = seg.content.split('\n');
179
+ const promotedLines = [];
180
+ for (let i = 0; i < lines.length; i += 1) {
181
+ const line = lines[i] ?? '';
182
+ const prevLine = i > 0 ? (lines[i - 1] ?? '') : prevLineContext;
183
+ promotedLines.push(processNonFencedLine(line, prevLine));
184
+ }
185
+ const promoted = promotedLines.join('\n');
186
+ return CLEANUP_STEPS.reduce((text, step) => step(text), promoted);
187
+ })
188
+ .join('\n')
189
+ .trim();
190
+ }
191
+ // ─────────────────────────────────────────────────────────────────────────────
192
+ // Raw markdown handling + metadata footer
193
+ // ─────────────────────────────────────────────────────────────────────────────
194
+ const HEADING_PATTERN = /^#{1,6}\s/m;
195
+ const LIST_PATTERN = /^(?:[-*+])\s/m;
196
+ const HTML_DOCUMENT_PATTERN = /^(<!doctype|<html)/i;
197
+ function containsMarkdownHeading(content) {
198
+ return HEADING_PATTERN.test(content);
199
+ }
200
+ function containsMarkdownList(content) {
201
+ return LIST_PATTERN.test(content);
202
+ }
203
+ function containsFencedCodeBlock(content) {
204
+ const first = content.indexOf('```');
205
+ if (first === -1)
206
+ return false;
207
+ return content.includes('```', first + 3);
208
+ }
209
+ function looksLikeMarkdown(content) {
210
+ return (containsMarkdownHeading(content) ||
211
+ containsMarkdownList(content) ||
212
+ containsFencedCodeBlock(content));
213
+ }
214
+ function detectLineEnding(content) {
215
+ return content.includes('\r\n') ? '\r\n' : '\n';
216
+ }
217
+ const FRONTMATTER_DELIMITER = '---';
218
+ function findFrontmatterLines(content) {
219
+ const lineEnding = detectLineEnding(content);
220
+ const lines = content.split(lineEnding);
221
+ if (lines[0] !== FRONTMATTER_DELIMITER)
222
+ return null;
223
+ const endIndex = lines.indexOf(FRONTMATTER_DELIMITER, 1);
224
+ if (endIndex === -1)
225
+ return null;
226
+ return { lineEnding, lines, endIndex };
227
+ }
228
+ function stripOptionalQuotes(value) {
229
+ const trimmed = value.trim();
230
+ if (trimmed.length < 2)
231
+ return trimmed;
232
+ const first = trimmed[0];
233
+ const last = trimmed[trimmed.length - 1];
234
+ if ((first === '"' && last === '"') || (first === "'" && last === "'")) {
235
+ return trimmed.slice(1, -1).trim();
236
+ }
237
+ return trimmed;
238
+ }
239
+ function parseFrontmatterEntry(line) {
240
+ const trimmed = line.trim();
241
+ if (!trimmed)
242
+ return null;
243
+ const separatorIndex = trimmed.indexOf(':');
244
+ if (separatorIndex <= 0)
245
+ return null;
246
+ const key = trimmed.slice(0, separatorIndex).trim().toLowerCase();
247
+ const value = trimmed.slice(separatorIndex + 1);
248
+ return { key, value };
249
+ }
250
+ function isTitleKey(key) {
251
+ return key === 'title' || key === 'name';
252
+ }
253
+ function extractTitleFromHeading(content) {
254
+ const lineEnding = detectLineEnding(content);
255
+ const lines = content.split(lineEnding);
256
+ for (const line of lines) {
257
+ const trimmed = line.trim();
258
+ if (!trimmed)
259
+ continue;
260
+ let index = 0;
261
+ while (index < trimmed.length && trimmed[index] === '#') {
262
+ index += 1;
263
+ }
264
+ if (index === 0 || index > 6)
265
+ return undefined;
266
+ const nextChar = trimmed[index];
267
+ if (nextChar !== ' ' && nextChar !== '\t')
268
+ return undefined;
269
+ const heading = trimmed.slice(index).trim();
270
+ return heading.length > 0 ? heading : undefined;
271
+ }
272
+ return undefined;
273
+ }
274
+ export function extractTitleFromRawMarkdown(content) {
275
+ const frontmatter = findFrontmatterLines(content);
276
+ if (!frontmatter) {
277
+ return extractTitleFromHeading(content);
278
+ }
279
+ const { lines, endIndex } = frontmatter;
280
+ const entry = lines
281
+ .slice(1, endIndex)
282
+ .map((line) => parseFrontmatterEntry(line))
283
+ .find((parsed) => parsed !== null && isTitleKey(parsed.key));
284
+ if (!entry)
285
+ return undefined;
286
+ const value = stripOptionalQuotes(entry.value);
287
+ return value || undefined;
288
+ }
289
+ function hasMarkdownSourceLine(content) {
290
+ const lineEnding = detectLineEnding(content);
291
+ const lines = content.split(lineEnding);
292
+ const limit = Math.min(lines.length, 50);
293
+ for (let index = 0; index < limit; index += 1) {
294
+ const line = lines[index];
295
+ if (!line)
296
+ continue;
297
+ if (line.trimStart().toLowerCase().startsWith('source:')) {
298
+ return true;
299
+ }
300
+ }
301
+ return false;
302
+ }
303
+ function addSourceToMarkdownMarkdownFormat(content, url) {
304
+ if (hasMarkdownSourceLine(content))
305
+ return content;
306
+ const lineEnding = detectLineEnding(content);
307
+ const lines = content.split(lineEnding);
308
+ const firstNonEmptyIndex = lines.findIndex((line) => line.trim().length > 0);
309
+ if (firstNonEmptyIndex !== -1) {
310
+ const firstLine = lines[firstNonEmptyIndex];
311
+ if (firstLine && /^#{1,6}\s+/.test(firstLine.trim())) {
312
+ const insertAt = firstNonEmptyIndex + 1;
313
+ const updated = [
314
+ ...lines.slice(0, insertAt),
315
+ '',
316
+ `Source: ${url}`,
317
+ '',
318
+ ...lines.slice(insertAt),
319
+ ];
320
+ return updated.join(lineEnding);
321
+ }
322
+ }
323
+ return [`Source: ${url}`, '', content].join(lineEnding);
324
+ }
325
+ export function addSourceToMarkdown(content, url) {
326
+ const frontmatter = findFrontmatterLines(content);
327
+ if (config.transform.metadataFormat === 'markdown' && !frontmatter) {
328
+ return addSourceToMarkdownMarkdownFormat(content, url);
329
+ }
330
+ if (!frontmatter) {
331
+ return `---\nsource: "${url}"\n---\n\n${content}`;
332
+ }
333
+ const { lineEnding, lines, endIndex } = frontmatter;
334
+ const bodyLines = lines.slice(1, endIndex);
335
+ const hasSource = bodyLines.some((line) => line.trimStart().toLowerCase().startsWith('source:'));
336
+ if (hasSource)
337
+ return content;
338
+ const updatedLines = [
339
+ lines[0],
340
+ ...bodyLines,
341
+ `source: "${url}"`,
342
+ ...lines.slice(endIndex),
343
+ ];
344
+ return updatedLines.join(lineEnding);
345
+ }
346
+ function hasFrontmatter(trimmed) {
347
+ return trimmed.startsWith('---\n') || trimmed.startsWith('---\r\n');
348
+ }
349
+ function looksLikeHtmlDocument(trimmed) {
350
+ return HTML_DOCUMENT_PATTERN.test(trimmed);
351
+ }
352
+ function countCommonHtmlTags(content) {
353
+ const matches = content.match(/<(html|head|body|div|span|script|style|meta|link)\b/gi) ??
354
+ [];
355
+ return matches.length;
356
+ }
357
+ export function isRawTextContent(content) {
358
+ const trimmed = content.trim();
359
+ const isHtmlDocument = looksLikeHtmlDocument(trimmed);
360
+ const hasMarkdownFrontmatter = hasFrontmatter(trimmed);
361
+ const hasTooManyHtmlTags = countCommonHtmlTags(content) > 2;
362
+ const isMarkdown = looksLikeMarkdown(content);
363
+ return (!isHtmlDocument &&
364
+ (hasMarkdownFrontmatter || (!hasTooManyHtmlTags && isMarkdown)));
365
+ }
366
+ export function isLikelyHtmlContent(content) {
367
+ const trimmed = content.trim();
368
+ if (!trimmed)
369
+ return false;
370
+ if (looksLikeHtmlDocument(trimmed))
371
+ return true;
372
+ return countCommonHtmlTags(content) > 2;
373
+ }
374
+ function formatFetchedDate(isoString) {
375
+ try {
376
+ const date = new Date(isoString);
377
+ const day = String(date.getDate()).padStart(2, '0');
378
+ const month = String(date.getMonth() + 1).padStart(2, '0');
379
+ const year = date.getFullYear();
380
+ return `${day}-${month}-${year}`;
381
+ }
382
+ catch {
383
+ return isoString;
384
+ }
385
+ }
386
+ export function buildMetadataFooter(metadata, fallbackUrl) {
387
+ if (!metadata)
388
+ return '';
389
+ const lines = ['---', ''];
390
+ const url = metadata.url || fallbackUrl;
391
+ const parts = [];
392
+ if (metadata.title)
393
+ parts.push(`_${metadata.title}_`);
394
+ if (metadata.author)
395
+ parts.push(`_${metadata.author}_`);
396
+ if (url)
397
+ parts.push(`[_Original Source_](${url})`);
398
+ if (metadata.fetchedAt) {
399
+ const formattedDate = formatFetchedDate(metadata.fetchedAt);
400
+ parts.push(`_${formattedDate}_`);
401
+ }
402
+ if (parts.length > 0) {
403
+ lines.push(` ${parts.join(' | ')}`);
404
+ }
405
+ if (metadata.description) {
406
+ lines.push(` <sub>${metadata.description}</sub>`);
407
+ }
408
+ return lines.join('\n');
190
409
  }
191
410
  // ─────────────────────────────────────────────────────────────────────────────
192
411
  // Heading Promotion (fence-aware)
@@ -280,4 +499,3 @@ export function promoteOrphanHeadings(markdown) {
280
499
  }
281
500
  return result.join('\n');
282
501
  }
283
- //# sourceMappingURL=markdown-cleanup.js.map
@@ -0,0 +1,14 @@
1
+ export type JsonRpcId = string | number | null;
2
+ export interface McpRequestParams {
3
+ _meta?: Record<string, unknown>;
4
+ [key: string]: unknown;
5
+ }
6
+ export interface McpRequestBody {
7
+ jsonrpc: '2.0';
8
+ method: string;
9
+ id?: JsonRpcId;
10
+ params?: McpRequestParams;
11
+ }
12
+ export declare function isJsonRpcBatchRequest(body: unknown): boolean;
13
+ export declare function isMcpRequestBody(body: unknown): body is McpRequestBody;
14
+ export declare function acceptsEventStream(header: string | null | undefined): boolean;