@j0hanz/fetch-url-mcp 1.3.1 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +24 -21
- package/dist/cli.d.ts +1 -0
- package/dist/cli.d.ts.map +1 -1
- package/dist/cli.js +15 -7
- package/dist/cli.js.map +1 -1
- package/dist/http/auth.d.ts +1 -3
- package/dist/http/auth.d.ts.map +1 -1
- package/dist/http/auth.js +18 -12
- package/dist/http/auth.js.map +1 -1
- package/dist/http/health.d.ts +1 -1
- package/dist/http/health.d.ts.map +1 -1
- package/dist/http/health.js +2 -2
- package/dist/http/health.js.map +1 -1
- package/dist/http/helpers.d.ts +1 -8
- package/dist/http/helpers.d.ts.map +1 -1
- package/dist/http/helpers.js +12 -10
- package/dist/http/helpers.js.map +1 -1
- package/dist/http/native.d.ts.map +1 -1
- package/dist/http/native.js +41 -27
- package/dist/http/native.js.map +1 -1
- package/dist/http/rate-limit.d.ts +2 -1
- package/dist/http/rate-limit.d.ts.map +1 -1
- package/dist/http/rate-limit.js +11 -15
- package/dist/http/rate-limit.js.map +1 -1
- package/dist/index.js +4 -6
- package/dist/index.js.map +1 -1
- package/dist/lib/cache.d.ts.map +1 -0
- package/dist/{cache.js → lib/cache.js} +5 -2
- package/dist/lib/cache.js.map +1 -0
- package/dist/{config.d.ts → lib/config.d.ts} +2 -0
- package/dist/lib/config.d.ts.map +1 -0
- package/dist/{config.js → lib/config.js} +3 -0
- package/dist/lib/config.js.map +1 -0
- package/dist/lib/crypto.d.ts.map +1 -0
- package/dist/lib/crypto.js.map +1 -0
- package/dist/lib/dom-noise-removal.d.ts.map +1 -0
- package/dist/lib/dom-noise-removal.js.map +1 -0
- package/dist/lib/download.d.ts.map +1 -0
- package/dist/lib/download.js.map +1 -0
- package/dist/{errors.d.ts → lib/errors.d.ts} +3 -0
- package/dist/lib/errors.d.ts.map +1 -0
- package/dist/{errors.js → lib/errors.js} +7 -0
- package/dist/lib/errors.js.map +1 -0
- package/dist/lib/fetch-content.d.ts.map +1 -0
- package/dist/lib/fetch-content.js.map +1 -0
- package/dist/lib/fetch-stream.d.ts.map +1 -0
- package/dist/lib/fetch-stream.js.map +1 -0
- package/dist/lib/fetch.d.ts.map +1 -0
- package/dist/{fetch.js → lib/fetch.js} +97 -142
- package/dist/lib/fetch.js.map +1 -0
- package/dist/lib/host-normalization.d.ts.map +1 -0
- package/dist/lib/host-normalization.js.map +1 -0
- package/dist/lib/ip-blocklist.d.ts.map +1 -0
- package/dist/lib/ip-blocklist.js.map +1 -0
- package/dist/lib/json.d.ts.map +1 -0
- package/dist/lib/json.js.map +1 -0
- package/dist/lib/language-detection.d.ts.map +1 -0
- package/dist/lib/language-detection.js.map +1 -0
- package/dist/{markdown-cleanup.d.ts → lib/markdown-cleanup.d.ts} +1 -1
- package/dist/lib/markdown-cleanup.d.ts.map +1 -0
- package/dist/{markdown-cleanup.js → lib/markdown-cleanup.js} +1 -3
- package/dist/lib/markdown-cleanup.js.map +1 -0
- package/dist/lib/mcp-lifecycle.d.ts +5 -0
- package/dist/lib/mcp-lifecycle.d.ts.map +1 -0
- package/dist/lib/mcp-lifecycle.js +51 -0
- package/dist/lib/mcp-lifecycle.js.map +1 -0
- package/dist/lib/mcp-validator.d.ts.map +1 -0
- package/dist/{mcp-validator.js → lib/mcp-validator.js} +1 -1
- package/dist/lib/mcp-validator.js.map +1 -0
- package/dist/{mcp.d.ts → lib/mcp.d.ts} +1 -1
- package/dist/lib/mcp.d.ts.map +1 -0
- package/dist/{mcp.js → lib/mcp.js} +39 -19
- package/dist/lib/mcp.js.map +1 -0
- package/dist/lib/observability.d.ts.map +1 -0
- package/dist/lib/observability.js.map +1 -0
- package/dist/lib/server-tuning.d.ts.map +1 -0
- package/dist/lib/server-tuning.js.map +1 -0
- package/dist/{session.d.ts → lib/session.d.ts} +4 -1
- package/dist/lib/session.d.ts.map +1 -0
- package/dist/{session.js → lib/session.js} +43 -35
- package/dist/lib/session.js.map +1 -0
- package/dist/lib/timer-utils.d.ts +13 -0
- package/dist/lib/timer-utils.d.ts.map +1 -0
- package/dist/lib/timer-utils.js +44 -0
- package/dist/lib/timer-utils.js.map +1 -0
- package/dist/lib/tool-errors.d.ts.map +1 -0
- package/dist/{tool-errors.js → lib/tool-errors.js} +2 -2
- package/dist/lib/tool-errors.js.map +1 -0
- package/dist/{tool-pipeline.d.ts → lib/tool-pipeline.d.ts} +1 -2
- package/dist/lib/tool-pipeline.d.ts.map +1 -0
- package/dist/{tool-pipeline.js → lib/tool-pipeline.js} +8 -25
- package/dist/lib/tool-pipeline.js.map +1 -0
- package/dist/{tool-progress.d.ts → lib/tool-progress.d.ts} +0 -1
- package/dist/lib/tool-progress.d.ts.map +1 -0
- package/dist/{tool-progress.js → lib/tool-progress.js} +1 -1
- package/dist/lib/tool-progress.js.map +1 -0
- package/dist/lib/type-guards.d.ts.map +1 -0
- package/dist/lib/type-guards.js.map +1 -0
- package/dist/{prompts.d.ts → prompts/index.d.ts} +1 -1
- package/dist/prompts/index.d.ts.map +1 -0
- package/dist/{prompts.js → prompts/index.js} +2 -2
- package/dist/prompts/index.js.map +1 -0
- package/dist/{resources.d.ts → resources/index.d.ts} +1 -1
- package/dist/resources/index.d.ts.map +1 -0
- package/dist/{resources.js → resources/index.js} +19 -37
- package/dist/resources/index.js.map +1 -0
- package/dist/resources/instructions.d.ts.map +1 -0
- package/dist/{instructions.js → resources/instructions.js} +4 -2
- package/dist/resources/instructions.js.map +1 -0
- package/dist/schemas/inputs.d.ts +8 -0
- package/dist/schemas/inputs.d.ts.map +1 -0
- package/dist/schemas/inputs.js +25 -0
- package/dist/schemas/inputs.js.map +1 -0
- package/dist/schemas/outputs.d.ts +24 -0
- package/dist/schemas/outputs.d.ts.map +1 -0
- package/dist/schemas/outputs.js +78 -0
- package/dist/schemas/outputs.js.map +1 -0
- package/dist/server.d.ts.map +1 -1
- package/dist/server.js +25 -14
- package/dist/server.js.map +1 -1
- package/dist/tasks/execution.d.ts.map +1 -1
- package/dist/tasks/execution.js +88 -55
- package/dist/tasks/execution.js.map +1 -1
- package/dist/tasks/manager.d.ts +4 -0
- package/dist/tasks/manager.d.ts.map +1 -1
- package/dist/tasks/manager.js +51 -46
- package/dist/tasks/manager.js.map +1 -1
- package/dist/tasks/owner.d.ts +3 -2
- package/dist/tasks/owner.d.ts.map +1 -1
- package/dist/tasks/owner.js +2 -1
- package/dist/tasks/owner.js.map +1 -1
- package/dist/tasks/tool-registry.d.ts +12 -0
- package/dist/tasks/tool-registry.d.ts.map +1 -0
- package/dist/tasks/tool-registry.js +14 -0
- package/dist/tasks/tool-registry.js.map +1 -0
- package/dist/tools/fetch-url.d.ts +29 -0
- package/dist/tools/fetch-url.d.ts.map +1 -0
- package/dist/{tools.js → tools/fetch-url.js} +72 -132
- package/dist/tools/fetch-url.js.map +1 -0
- package/dist/tools/index.d.ts +3 -0
- package/dist/tools/index.d.ts.map +1 -0
- package/dist/tools/index.js +5 -0
- package/dist/tools/index.js.map +1 -0
- package/dist/transform/transform.d.ts.map +1 -1
- package/dist/transform/transform.js +156 -174
- package/dist/transform/transform.js.map +1 -1
- package/dist/transform/types.d.ts +0 -1
- package/dist/transform/types.d.ts.map +1 -1
- package/dist/transform/worker-pool.d.ts.map +1 -1
- package/dist/transform/worker-pool.js +5 -5
- package/dist/transform/worker-pool.js.map +1 -1
- package/dist/transform/workers/shared.d.ts +8 -0
- package/dist/transform/workers/shared.d.ts.map +1 -0
- package/dist/transform/workers/shared.js +131 -0
- package/dist/transform/workers/shared.js.map +1 -0
- package/dist/transform/workers/transform-child.js +5 -134
- package/dist/transform/workers/transform-child.js.map +1 -1
- package/dist/transform/workers/transform-worker.js +7 -127
- package/dist/transform/workers/transform-worker.js.map +1 -1
- package/package.json +8 -7
- package/dist/cache.d.ts.map +0 -1
- package/dist/cache.js.map +0 -1
- package/dist/config.d.ts.map +0 -1
- package/dist/config.js.map +0 -1
- package/dist/crypto.d.ts.map +0 -1
- package/dist/crypto.js.map +0 -1
- package/dist/dom-noise-removal.d.ts.map +0 -1
- package/dist/dom-noise-removal.js.map +0 -1
- package/dist/download.d.ts.map +0 -1
- package/dist/download.js.map +0 -1
- package/dist/errors.d.ts.map +0 -1
- package/dist/errors.js.map +0 -1
- package/dist/examples/mcp-fetch-url-client.js +0 -329
- package/dist/examples/mcp-fetch-url-client.js.map +0 -1
- package/dist/fetch-content.d.ts.map +0 -1
- package/dist/fetch-content.js.map +0 -1
- package/dist/fetch-stream.d.ts.map +0 -1
- package/dist/fetch-stream.js.map +0 -1
- package/dist/fetch.d.ts.map +0 -1
- package/dist/fetch.js.map +0 -1
- package/dist/host-normalization.d.ts.map +0 -1
- package/dist/host-normalization.js.map +0 -1
- package/dist/instructions.d.ts.map +0 -1
- package/dist/instructions.js.map +0 -1
- package/dist/ip-blocklist.d.ts.map +0 -1
- package/dist/ip-blocklist.js.map +0 -1
- package/dist/json.d.ts.map +0 -1
- package/dist/json.js.map +0 -1
- package/dist/language-detection.d.ts.map +0 -1
- package/dist/language-detection.js.map +0 -1
- package/dist/markdown-cleanup.d.ts.map +0 -1
- package/dist/markdown-cleanup.js.map +0 -1
- package/dist/mcp-validator.d.ts.map +0 -1
- package/dist/mcp-validator.js.map +0 -1
- package/dist/mcp.d.ts.map +0 -1
- package/dist/mcp.js.map +0 -1
- package/dist/observability.d.ts.map +0 -1
- package/dist/observability.js.map +0 -1
- package/dist/prompts.d.ts.map +0 -1
- package/dist/prompts.js.map +0 -1
- package/dist/resources.d.ts.map +0 -1
- package/dist/resources.js.map +0 -1
- package/dist/server-tuning.d.ts.map +0 -1
- package/dist/server-tuning.js.map +0 -1
- package/dist/session.d.ts.map +0 -1
- package/dist/session.js.map +0 -1
- package/dist/timer-utils.d.ts +0 -6
- package/dist/timer-utils.d.ts.map +0 -1
- package/dist/timer-utils.js +0 -27
- package/dist/timer-utils.js.map +0 -1
- package/dist/tool-errors.d.ts.map +0 -1
- package/dist/tool-errors.js.map +0 -1
- package/dist/tool-pipeline.d.ts.map +0 -1
- package/dist/tool-pipeline.js.map +0 -1
- package/dist/tool-progress.d.ts.map +0 -1
- package/dist/tool-progress.js.map +0 -1
- package/dist/tools.d.ts +0 -54
- package/dist/tools.d.ts.map +0 -1
- package/dist/tools.js.map +0 -1
- package/dist/type-guards.d.ts.map +0 -1
- package/dist/type-guards.js.map +0 -1
- /package/dist/{cache.d.ts → lib/cache.d.ts} +0 -0
- /package/dist/{crypto.d.ts → lib/crypto.d.ts} +0 -0
- /package/dist/{crypto.js → lib/crypto.js} +0 -0
- /package/dist/{dom-noise-removal.d.ts → lib/dom-noise-removal.d.ts} +0 -0
- /package/dist/{dom-noise-removal.js → lib/dom-noise-removal.js} +0 -0
- /package/dist/{download.d.ts → lib/download.d.ts} +0 -0
- /package/dist/{download.js → lib/download.js} +0 -0
- /package/dist/{fetch-content.d.ts → lib/fetch-content.d.ts} +0 -0
- /package/dist/{fetch-content.js → lib/fetch-content.js} +0 -0
- /package/dist/{fetch-stream.d.ts → lib/fetch-stream.d.ts} +0 -0
- /package/dist/{fetch-stream.js → lib/fetch-stream.js} +0 -0
- /package/dist/{fetch.d.ts → lib/fetch.d.ts} +0 -0
- /package/dist/{host-normalization.d.ts → lib/host-normalization.d.ts} +0 -0
- /package/dist/{host-normalization.js → lib/host-normalization.js} +0 -0
- /package/dist/{ip-blocklist.d.ts → lib/ip-blocklist.d.ts} +0 -0
- /package/dist/{ip-blocklist.js → lib/ip-blocklist.js} +0 -0
- /package/dist/{json.d.ts → lib/json.d.ts} +0 -0
- /package/dist/{json.js → lib/json.js} +0 -0
- /package/dist/{language-detection.d.ts → lib/language-detection.d.ts} +0 -0
- /package/dist/{language-detection.js → lib/language-detection.js} +0 -0
- /package/dist/{mcp-validator.d.ts → lib/mcp-validator.d.ts} +0 -0
- /package/dist/{observability.d.ts → lib/observability.d.ts} +0 -0
- /package/dist/{observability.js → lib/observability.js} +0 -0
- /package/dist/{server-tuning.d.ts → lib/server-tuning.d.ts} +0 -0
- /package/dist/{server-tuning.js → lib/server-tuning.js} +0 -0
- /package/dist/{tool-errors.d.ts → lib/tool-errors.d.ts} +0 -0
- /package/dist/{type-guards.d.ts → lib/type-guards.d.ts} +0 -0
- /package/dist/{type-guards.js → lib/type-guards.js} +0 -0
- /package/dist/{instructions.d.ts → resources/instructions.d.ts} +0 -0
|
@@ -4,14 +4,14 @@ import { performance } from 'node:perf_hooks';
|
|
|
4
4
|
import { isProbablyReaderable, Readability } from '@mozilla/readability';
|
|
5
5
|
import { parseHTML } from 'linkedom';
|
|
6
6
|
import { NodeHtmlMarkdown, } from 'node-html-markdown';
|
|
7
|
-
import { config } from '../config.js';
|
|
8
|
-
import { removeNoiseFromHtml } from '../dom-noise-removal.js';
|
|
9
|
-
import { FetchError, getErrorMessage } from '../errors.js';
|
|
10
|
-
import { isRawTextContentUrl } from '../fetch.js';
|
|
11
|
-
import { detectLanguageFromCode, resolveLanguageFromAttributes, } from '../language-detection.js';
|
|
12
|
-
import { addSourceToMarkdown, buildMetadataFooter, cleanupMarkdownArtifacts, extractTitleFromRawMarkdown, isRawTextContent, } from '../markdown-cleanup.js';
|
|
13
|
-
import { getOperationId, getRequestId, logDebug, logError, logInfo, logWarn, redactUrl, } from '../observability.js';
|
|
14
|
-
import { isLikeNode, isObject } from '../type-guards.js';
|
|
7
|
+
import { config } from '../lib/config.js';
|
|
8
|
+
import { removeNoiseFromHtml } from '../lib/dom-noise-removal.js';
|
|
9
|
+
import { FetchError, getErrorMessage } from '../lib/errors.js';
|
|
10
|
+
import { isRawTextContentUrl } from '../lib/fetch.js';
|
|
11
|
+
import { detectLanguageFromCode, resolveLanguageFromAttributes, } from '../lib/language-detection.js';
|
|
12
|
+
import { addSourceToMarkdown, buildMetadataFooter, cleanupMarkdownArtifacts, extractTitleFromRawMarkdown, isRawTextContent, } from '../lib/markdown-cleanup.js';
|
|
13
|
+
import { getOperationId, getRequestId, logDebug, logError, logInfo, logWarn, redactUrl, } from '../lib/observability.js';
|
|
14
|
+
import { isLikeNode, isObject } from '../lib/type-guards.js';
|
|
15
15
|
import { getOrCreateWorkerPool, getWorkerPoolStats, shutdownWorkerPool, } from './worker-pool.js';
|
|
16
16
|
const utf8Decoder = new TextDecoder('utf-8');
|
|
17
17
|
function decodeInput(input, encoding) {
|
|
@@ -213,7 +213,11 @@ function trimUtf8Buffer(buffer, maxBytes) {
|
|
|
213
213
|
function trimDanglingTagFragment(content) {
|
|
214
214
|
const lastOpen = content.lastIndexOf('<');
|
|
215
215
|
const lastClose = content.lastIndexOf('>');
|
|
216
|
-
|
|
216
|
+
if (lastOpen > lastClose &&
|
|
217
|
+
/^<([a-zA-Z/!?]|$)/.test(content.substring(lastOpen))) {
|
|
218
|
+
return content.substring(0, lastOpen);
|
|
219
|
+
}
|
|
220
|
+
return content;
|
|
217
221
|
}
|
|
218
222
|
function truncateHtml(html, inputTruncated = false) {
|
|
219
223
|
const maxSize = config.constants.maxHtmlSize;
|
|
@@ -270,24 +274,19 @@ function mergeMetadata(early, late) {
|
|
|
270
274
|
if (!early)
|
|
271
275
|
return late;
|
|
272
276
|
const merged = {};
|
|
273
|
-
const
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
merged.image = image;
|
|
287
|
-
if (publishedAt !== undefined)
|
|
288
|
-
merged.publishedAt = publishedAt;
|
|
289
|
-
if (modifiedAt !== undefined)
|
|
290
|
-
merged.modifiedAt = modifiedAt;
|
|
277
|
+
const keys = [
|
|
278
|
+
'title',
|
|
279
|
+
'description',
|
|
280
|
+
'author',
|
|
281
|
+
'image',
|
|
282
|
+
'publishedAt',
|
|
283
|
+
'modifiedAt',
|
|
284
|
+
];
|
|
285
|
+
for (const key of keys) {
|
|
286
|
+
const value = late[key] ?? early[key];
|
|
287
|
+
if (value !== undefined)
|
|
288
|
+
merged[key] = value;
|
|
289
|
+
}
|
|
291
290
|
return merged;
|
|
292
291
|
}
|
|
293
292
|
const META_PROPERTY_HANDLERS = new Map([
|
|
@@ -653,9 +652,6 @@ function isCodeBlock(parent) {
|
|
|
653
652
|
const tagName = getTagName(parent);
|
|
654
653
|
return tagName === 'PRE' || tagName === 'WRAPPED-PRE';
|
|
655
654
|
}
|
|
656
|
-
function isAnchor(node) {
|
|
657
|
-
return getTagName(node) === 'A';
|
|
658
|
-
}
|
|
659
655
|
function resolveAttributeLanguage(node) {
|
|
660
656
|
const getAttribute = hasGetAttribute(node)
|
|
661
657
|
? node.getAttribute.bind(node)
|
|
@@ -762,7 +758,7 @@ function resolveImageSrc(getAttribute) {
|
|
|
762
758
|
function buildImageTranslator(ctx) {
|
|
763
759
|
if (!isObject(ctx))
|
|
764
760
|
return { content: '' };
|
|
765
|
-
const { node
|
|
761
|
+
const { node } = ctx;
|
|
766
762
|
const getAttribute = hasGetAttribute(node)
|
|
767
763
|
? node.getAttribute.bind(node)
|
|
768
764
|
: undefined;
|
|
@@ -770,10 +766,7 @@ function buildImageTranslator(ctx) {
|
|
|
770
766
|
const existingAlt = getAttribute?.('alt') ?? '';
|
|
771
767
|
const alt = existingAlt.trim() || deriveAltFromImageUrl(src);
|
|
772
768
|
const markdown = ``;
|
|
773
|
-
|
|
774
|
-
return { content: markdown };
|
|
775
|
-
}
|
|
776
|
-
return { content: `\n\n${markdown}\n\n` };
|
|
769
|
+
return { content: markdown };
|
|
777
770
|
}
|
|
778
771
|
const GFM_ALERT_MAP = new Map([
|
|
779
772
|
['note', 'NOTE'],
|
|
@@ -831,6 +824,112 @@ function buildPreTranslator(ctx) {
|
|
|
831
824
|
postprocess: createCodeBlockPostprocessor(attributeLanguage),
|
|
832
825
|
};
|
|
833
826
|
}
|
|
827
|
+
function getNodeAttr(node) {
|
|
828
|
+
if (!isLikeNode(node))
|
|
829
|
+
return undefined;
|
|
830
|
+
return typeof node.getAttribute === 'function'
|
|
831
|
+
? node.getAttribute.bind(node)
|
|
832
|
+
: undefined;
|
|
833
|
+
}
|
|
834
|
+
function buildDivTranslator(ctx) {
|
|
835
|
+
if (!isObject(ctx))
|
|
836
|
+
return {};
|
|
837
|
+
const { node } = ctx;
|
|
838
|
+
const getAttribute = getNodeAttr(node);
|
|
839
|
+
if (!getAttribute)
|
|
840
|
+
return {};
|
|
841
|
+
const className = getAttribute('class') ?? '';
|
|
842
|
+
if (className.includes('mermaid')) {
|
|
843
|
+
return {
|
|
844
|
+
noEscape: true,
|
|
845
|
+
preserveWhitespace: true,
|
|
846
|
+
postprocess: ({ content }) => `\n\n\`\`\`mermaid\n${content.trim()}\n\`\`\`\n\n`,
|
|
847
|
+
};
|
|
848
|
+
}
|
|
849
|
+
const isAdmonition = className.includes('admonition') ||
|
|
850
|
+
className.includes('callout') ||
|
|
851
|
+
className.includes('custom-block') ||
|
|
852
|
+
getAttribute('role') === 'alert' ||
|
|
853
|
+
/\b(note|tip|info|warning|danger|caution|important)\b/i.test(className);
|
|
854
|
+
if (isAdmonition) {
|
|
855
|
+
return {
|
|
856
|
+
postprocess: ({ content }) => {
|
|
857
|
+
const alertType = resolveGfmAlertType(className);
|
|
858
|
+
const lines = content.trim().split('\n');
|
|
859
|
+
const header = alertType ? `> [!${alertType}]\n` : '';
|
|
860
|
+
return `\n\n${header}> ${lines.join('\n> ')}\n\n`;
|
|
861
|
+
},
|
|
862
|
+
};
|
|
863
|
+
}
|
|
864
|
+
if (!className.includes('type'))
|
|
865
|
+
return {};
|
|
866
|
+
return {
|
|
867
|
+
postprocess: ({ content }) => {
|
|
868
|
+
const lines = content.split('\n');
|
|
869
|
+
const separated = [];
|
|
870
|
+
for (let i = 0; i < lines.length; i++) {
|
|
871
|
+
const line = lines[i] ?? '';
|
|
872
|
+
const nextLine = i < lines.length - 1 ? (lines[i + 1] ?? '') : '';
|
|
873
|
+
separated.push(line);
|
|
874
|
+
if (line.trim() &&
|
|
875
|
+
nextLine.trim() &&
|
|
876
|
+
line.includes(':') &&
|
|
877
|
+
nextLine.includes(':') &&
|
|
878
|
+
!line.startsWith(' ') &&
|
|
879
|
+
!nextLine.startsWith(' ')) {
|
|
880
|
+
separated.push('');
|
|
881
|
+
}
|
|
882
|
+
}
|
|
883
|
+
return separated.join('\n');
|
|
884
|
+
},
|
|
885
|
+
};
|
|
886
|
+
}
|
|
887
|
+
function buildSectionTranslator(ctx) {
|
|
888
|
+
if (isObject(ctx)) {
|
|
889
|
+
const { node } = ctx;
|
|
890
|
+
const getAttribute = getNodeAttr(node);
|
|
891
|
+
if (getAttribute?.('class')?.includes('tsd-member')) {
|
|
892
|
+
return {
|
|
893
|
+
postprocess: ({ content }) => `\n\n \n\n${content}\n\n`,
|
|
894
|
+
};
|
|
895
|
+
}
|
|
896
|
+
}
|
|
897
|
+
return {
|
|
898
|
+
postprocess: ({ content }) => `\n\n${content}\n\n`,
|
|
899
|
+
};
|
|
900
|
+
}
|
|
901
|
+
function buildSpanTranslator(ctx) {
|
|
902
|
+
if (!isObject(ctx))
|
|
903
|
+
return {};
|
|
904
|
+
const { node } = ctx;
|
|
905
|
+
const getAttribute = getNodeAttr(node);
|
|
906
|
+
if (!getAttribute)
|
|
907
|
+
return {};
|
|
908
|
+
const dataAs = getAttribute('data-as') ?? '';
|
|
909
|
+
if (dataAs === 'p') {
|
|
910
|
+
return {
|
|
911
|
+
postprocess: ({ content }) => `\n\n${content.trim()}\n\n`,
|
|
912
|
+
};
|
|
913
|
+
}
|
|
914
|
+
return {};
|
|
915
|
+
}
|
|
916
|
+
function buildMermaidPreTranslator(ctx) {
|
|
917
|
+
if (!isObject(ctx))
|
|
918
|
+
return buildPreTranslator(ctx);
|
|
919
|
+
const { node } = ctx;
|
|
920
|
+
const getAttribute = getNodeAttr(node);
|
|
921
|
+
if (!getAttribute)
|
|
922
|
+
return buildPreTranslator(ctx);
|
|
923
|
+
const className = getAttribute('class') ?? '';
|
|
924
|
+
if (className.includes('mermaid')) {
|
|
925
|
+
return {
|
|
926
|
+
noEscape: true,
|
|
927
|
+
preserveWhitespace: true,
|
|
928
|
+
postprocess: ({ content }) => `\n\n\`\`\`mermaid\n${content.trim()}\n\`\`\`\n\n`,
|
|
929
|
+
};
|
|
930
|
+
}
|
|
931
|
+
return buildPreTranslator(ctx);
|
|
932
|
+
}
|
|
834
933
|
function createCustomTranslators() {
|
|
835
934
|
return {
|
|
836
935
|
code: (ctx) => buildCodeTranslator(ctx),
|
|
@@ -866,61 +965,7 @@ function createCustomTranslators() {
|
|
|
866
965
|
}
|
|
867
966
|
return { content: items ? `\n${items}\n` : '' };
|
|
868
967
|
},
|
|
869
|
-
div:
|
|
870
|
-
if (!isObject(ctx))
|
|
871
|
-
return {};
|
|
872
|
-
const { node } = ctx;
|
|
873
|
-
if (!isLikeNode(node))
|
|
874
|
-
return {};
|
|
875
|
-
const getAttribute = typeof node.getAttribute === 'function'
|
|
876
|
-
? node.getAttribute.bind(node)
|
|
877
|
-
: undefined;
|
|
878
|
-
const className = getAttribute?.('class') ?? '';
|
|
879
|
-
if (className.includes('mermaid')) {
|
|
880
|
-
return {
|
|
881
|
-
noEscape: true,
|
|
882
|
-
preserveWhitespace: true,
|
|
883
|
-
postprocess: ({ content }) => `\n\n\`\`\`mermaid\n${content.trim()}\n\`\`\`\n\n`,
|
|
884
|
-
};
|
|
885
|
-
}
|
|
886
|
-
const isAdmonition = className.includes('admonition') ||
|
|
887
|
-
className.includes('callout') ||
|
|
888
|
-
className.includes('custom-block') ||
|
|
889
|
-
getAttribute?.('role') === 'alert' ||
|
|
890
|
-
/\b(note|tip|info|warning|danger|caution|important)\b/i.test(className);
|
|
891
|
-
if (isAdmonition) {
|
|
892
|
-
return {
|
|
893
|
-
postprocess: ({ content }) => {
|
|
894
|
-
const alertType = resolveGfmAlertType(className);
|
|
895
|
-
const lines = content.trim().split('\n');
|
|
896
|
-
const header = alertType ? `> [!${alertType}]\n` : '';
|
|
897
|
-
return `\n\n${header}> ${lines.join('\n> ')}\n\n`;
|
|
898
|
-
},
|
|
899
|
-
};
|
|
900
|
-
}
|
|
901
|
-
if (!className.includes('type'))
|
|
902
|
-
return {};
|
|
903
|
-
return {
|
|
904
|
-
postprocess: ({ content }) => {
|
|
905
|
-
const lines = content.split('\n');
|
|
906
|
-
const separated = [];
|
|
907
|
-
for (let i = 0; i < lines.length; i++) {
|
|
908
|
-
const line = lines[i] ?? '';
|
|
909
|
-
const nextLine = i < lines.length - 1 ? (lines[i + 1] ?? '') : '';
|
|
910
|
-
separated.push(line);
|
|
911
|
-
if (line.trim() &&
|
|
912
|
-
nextLine.trim() &&
|
|
913
|
-
line.includes(':') &&
|
|
914
|
-
nextLine.includes(':') &&
|
|
915
|
-
!line.startsWith(' ') &&
|
|
916
|
-
!nextLine.startsWith(' ')) {
|
|
917
|
-
separated.push('');
|
|
918
|
-
}
|
|
919
|
-
}
|
|
920
|
-
return separated.join('\n');
|
|
921
|
-
},
|
|
922
|
-
};
|
|
923
|
-
},
|
|
968
|
+
div: buildDivTranslator,
|
|
924
969
|
kbd: () => ({
|
|
925
970
|
postprocess: ({ content }) => `\`${content}\``,
|
|
926
971
|
}),
|
|
@@ -933,24 +978,7 @@ function createCustomTranslators() {
|
|
|
933
978
|
sup: () => ({
|
|
934
979
|
postprocess: ({ content }) => `^${content}^`,
|
|
935
980
|
}),
|
|
936
|
-
section:
|
|
937
|
-
if (isObject(ctx)) {
|
|
938
|
-
const { node } = ctx;
|
|
939
|
-
if (isLikeNode(node)) {
|
|
940
|
-
const getAttribute = typeof node.getAttribute === 'function'
|
|
941
|
-
? node.getAttribute.bind(node)
|
|
942
|
-
: undefined;
|
|
943
|
-
if (getAttribute?.('class')?.includes('tsd-member')) {
|
|
944
|
-
return {
|
|
945
|
-
postprocess: ({ content }) => `\n\n \n\n${content}\n\n`,
|
|
946
|
-
};
|
|
947
|
-
}
|
|
948
|
-
}
|
|
949
|
-
}
|
|
950
|
-
return {
|
|
951
|
-
postprocess: ({ content }) => `\n\n${content}\n\n`,
|
|
952
|
-
};
|
|
953
|
-
},
|
|
981
|
+
section: buildSectionTranslator,
|
|
954
982
|
details: () => ({
|
|
955
983
|
postprocess: ({ content }) => {
|
|
956
984
|
const trimmed = content.trim();
|
|
@@ -962,43 +990,8 @@ function createCustomTranslators() {
|
|
|
962
990
|
summary: () => ({
|
|
963
991
|
postprocess: ({ content }) => `${content.trim()}\n\n`,
|
|
964
992
|
}),
|
|
965
|
-
span:
|
|
966
|
-
|
|
967
|
-
return {};
|
|
968
|
-
const { node } = ctx;
|
|
969
|
-
if (!isLikeNode(node))
|
|
970
|
-
return {};
|
|
971
|
-
const getAttribute = typeof node.getAttribute === 'function'
|
|
972
|
-
? node.getAttribute.bind(node)
|
|
973
|
-
: undefined;
|
|
974
|
-
const dataAs = getAttribute?.('data-as') ?? '';
|
|
975
|
-
if (dataAs === 'p') {
|
|
976
|
-
return {
|
|
977
|
-
postprocess: ({ content }) => `\n\n${content.trim()}\n\n`,
|
|
978
|
-
};
|
|
979
|
-
}
|
|
980
|
-
return {};
|
|
981
|
-
},
|
|
982
|
-
pre: (ctx) => {
|
|
983
|
-
if (!isObject(ctx))
|
|
984
|
-
return buildPreTranslator(ctx);
|
|
985
|
-
const { node } = ctx;
|
|
986
|
-
if (!isLikeNode(node)) {
|
|
987
|
-
return buildPreTranslator(ctx);
|
|
988
|
-
}
|
|
989
|
-
const getAttribute = typeof node.getAttribute === 'function'
|
|
990
|
-
? node.getAttribute.bind(node)
|
|
991
|
-
: undefined;
|
|
992
|
-
const className = getAttribute?.('class') ?? '';
|
|
993
|
-
if (className.includes('mermaid')) {
|
|
994
|
-
return {
|
|
995
|
-
noEscape: true,
|
|
996
|
-
preserveWhitespace: true,
|
|
997
|
-
postprocess: ({ content }) => `\n\n\`\`\`mermaid\n${content.trim()}\n\`\`\`\n\n`,
|
|
998
|
-
};
|
|
999
|
-
}
|
|
1000
|
-
return buildPreTranslator(ctx);
|
|
1001
|
-
},
|
|
993
|
+
span: buildSpanTranslator,
|
|
994
|
+
pre: buildMermaidPreTranslator,
|
|
1002
995
|
};
|
|
1003
996
|
}
|
|
1004
997
|
let markdownConverter = null;
|
|
@@ -1127,8 +1120,8 @@ function resolveRelativeUrls(markdown, baseUrl, signal) {
|
|
|
1127
1120
|
let lastIndex = 0;
|
|
1128
1121
|
let lineCount = 0;
|
|
1129
1122
|
while (lastIndex < len) {
|
|
1130
|
-
if (++lineCount % 500 === 0
|
|
1131
|
-
|
|
1123
|
+
if (++lineCount % 500 === 0) {
|
|
1124
|
+
abortPolicy.throwIfAborted(signal, baseUrl, 'markdown:resolve-urls');
|
|
1132
1125
|
}
|
|
1133
1126
|
let nextIndex = markdown.indexOf('\n', lastIndex);
|
|
1134
1127
|
let line;
|
|
@@ -1315,7 +1308,12 @@ function getTextContentSkippingHidden(node, parts) {
|
|
|
1315
1308
|
}
|
|
1316
1309
|
if (nodeType !== 1)
|
|
1317
1310
|
return;
|
|
1318
|
-
const
|
|
1311
|
+
const element = node;
|
|
1312
|
+
if (element.hasAttribute('hidden') ||
|
|
1313
|
+
element.getAttribute('aria-hidden') === 'true') {
|
|
1314
|
+
return;
|
|
1315
|
+
}
|
|
1316
|
+
const { tagName } = element;
|
|
1319
1317
|
if (tagName === 'SCRIPT' || tagName === 'STYLE' || tagName === 'NOSCRIPT')
|
|
1320
1318
|
return;
|
|
1321
1319
|
const { childNodes } = node;
|
|
@@ -1479,19 +1477,21 @@ function shouldUseArticleContent(article, originalHtmlOrDocument) {
|
|
|
1479
1477
|
function buildContentSource(params) {
|
|
1480
1478
|
const { html, url, article, extractedMeta, includeMetadata, useArticleContent, document, truncated, skipNoiseRemoval, signal, } = params;
|
|
1481
1479
|
const metadata = createContentMetadataBlock(url, article, extractedMeta, useArticleContent, includeMetadata);
|
|
1480
|
+
const base = {
|
|
1481
|
+
favicon: extractedMeta.favicon,
|
|
1482
|
+
metadata,
|
|
1483
|
+
extractedMetadata: extractedMeta,
|
|
1484
|
+
truncated,
|
|
1485
|
+
};
|
|
1482
1486
|
if (useArticleContent && article) {
|
|
1483
|
-
// Readability output can still be noisy (unless user requested skip).
|
|
1484
1487
|
const cleanedArticleHtml = skipNoiseRemoval
|
|
1485
1488
|
? article.content
|
|
1486
1489
|
: removeNoiseFromHtml(article.content, undefined, url, signal);
|
|
1487
1490
|
return {
|
|
1491
|
+
...base,
|
|
1488
1492
|
sourceHtml: cleanedArticleHtml,
|
|
1489
1493
|
title: article.title,
|
|
1490
|
-
favicon: extractedMeta.favicon,
|
|
1491
|
-
metadata,
|
|
1492
|
-
extractedMetadata: extractedMeta,
|
|
1493
1494
|
skipNoiseRemoval: true,
|
|
1494
|
-
truncated,
|
|
1495
1495
|
};
|
|
1496
1496
|
}
|
|
1497
1497
|
if (document) {
|
|
@@ -1499,36 +1499,18 @@ function buildContentSource(params) {
|
|
|
1499
1499
|
? html
|
|
1500
1500
|
: removeNoiseFromHtml(html, document, url, signal);
|
|
1501
1501
|
const contentRoot = findContentRoot(document);
|
|
1502
|
-
if (contentRoot) {
|
|
1503
|
-
return {
|
|
1504
|
-
sourceHtml: contentRoot,
|
|
1505
|
-
title: extractedMeta.title,
|
|
1506
|
-
favicon: extractedMeta.favicon,
|
|
1507
|
-
metadata,
|
|
1508
|
-
extractedMetadata: extractedMeta,
|
|
1509
|
-
skipNoiseRemoval: true,
|
|
1510
|
-
document,
|
|
1511
|
-
truncated,
|
|
1512
|
-
};
|
|
1513
|
-
}
|
|
1514
1502
|
return {
|
|
1515
|
-
|
|
1503
|
+
...base,
|
|
1504
|
+
sourceHtml: contentRoot ?? cleanedHtml,
|
|
1516
1505
|
title: extractedMeta.title,
|
|
1517
|
-
favicon: extractedMeta.favicon,
|
|
1518
|
-
metadata,
|
|
1519
|
-
extractedMetadata: extractedMeta,
|
|
1520
1506
|
skipNoiseRemoval: true,
|
|
1521
1507
|
document,
|
|
1522
|
-
truncated,
|
|
1523
1508
|
};
|
|
1524
1509
|
}
|
|
1525
1510
|
return {
|
|
1511
|
+
...base,
|
|
1526
1512
|
sourceHtml: html,
|
|
1527
1513
|
title: extractedMeta.title,
|
|
1528
|
-
favicon: extractedMeta.favicon,
|
|
1529
|
-
metadata,
|
|
1530
|
-
extractedMetadata: extractedMeta,
|
|
1531
|
-
truncated,
|
|
1532
1514
|
};
|
|
1533
1515
|
}
|
|
1534
1516
|
function resolveContentSource(params) {
|