@j0hanz/fetch-url-mcp 1.12.7 → 1.12.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/http/auth.d.ts +2 -2
- package/dist/http/auth.d.ts.map +1 -1
- package/dist/http/auth.js +4 -5
- package/dist/http/index.d.ts +6 -0
- package/dist/http/index.d.ts.map +1 -0
- package/dist/http/index.js +5 -0
- package/dist/http/native.d.ts +73 -0
- package/dist/http/native.d.ts.map +1 -1
- package/dist/http/native.js +554 -10
- package/dist/http/rate-limit.d.ts +1 -1
- package/dist/http/rate-limit.d.ts.map +1 -1
- package/dist/http/rate-limit.js +3 -4
- package/dist/index.d.ts +17 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +67 -6
- package/dist/lib/config.js +2 -2
- package/dist/lib/core.d.ts +56 -4
- package/dist/lib/core.d.ts.map +1 -1
- package/dist/lib/core.js +155 -4
- package/dist/lib/error/classes.d.ts +19 -0
- package/dist/lib/error/classes.d.ts.map +1 -0
- package/dist/lib/error/classes.js +107 -0
- package/dist/lib/error/classify.d.ts +4 -0
- package/dist/lib/error/classify.d.ts.map +1 -0
- package/dist/lib/error/classify.js +154 -0
- package/dist/lib/error/codes.d.ts +23 -0
- package/dist/lib/error/codes.d.ts.map +1 -0
- package/dist/lib/error/codes.js +22 -0
- package/dist/lib/error/index.d.ts +6 -0
- package/dist/lib/error/index.d.ts.map +1 -0
- package/dist/lib/error/index.js +5 -0
- package/dist/lib/{error-messages.d.ts → error/messages.d.ts} +2 -2
- package/dist/lib/error/messages.d.ts.map +1 -0
- package/dist/lib/{error-messages.js → error/messages.js} +2 -2
- package/dist/lib/{tool-errors.d.ts → error/payload.d.ts} +7 -13
- package/dist/lib/error/payload.d.ts.map +1 -0
- package/dist/lib/error/payload.js +108 -0
- package/dist/lib/mcp-interop.d.ts.map +1 -1
- package/dist/lib/mcp-interop.js +4 -6
- package/dist/lib/net/http.d.ts.map +1 -0
- package/dist/lib/{http.js → net/http.js} +4 -7
- package/dist/lib/net/index.d.ts +4 -0
- package/dist/lib/net/index.d.ts.map +1 -0
- package/dist/lib/net/index.js +3 -0
- package/dist/lib/{fetch-pipeline.d.ts → net/pipeline.d.ts} +3 -3
- package/dist/lib/net/pipeline.d.ts.map +1 -0
- package/dist/lib/{fetch-pipeline.js → net/pipeline.js} +3 -5
- package/dist/lib/{url.d.ts → net/url.d.ts} +1 -1
- package/dist/lib/net/url.d.ts.map +1 -0
- package/dist/lib/{url.js → net/url.js} +3 -5
- package/dist/lib/utils.d.ts +2 -18
- package/dist/lib/utils.d.ts.map +1 -1
- package/dist/lib/utils.js +29 -104
- package/dist/resources/index.d.ts.map +1 -1
- package/dist/resources/index.js +8 -5
- package/dist/schemas.d.ts +1 -1
- package/dist/server.d.ts.map +1 -1
- package/dist/server.js +7 -9
- package/dist/tasks/index.d.ts +2 -0
- package/dist/tasks/index.d.ts.map +1 -0
- package/dist/tasks/index.js +1 -0
- package/dist/tasks/manager.d.ts +123 -1
- package/dist/tasks/manager.d.ts.map +1 -1
- package/dist/tasks/manager.js +745 -10
- package/dist/tools/{fetch-url.d.ts → index.d.ts} +4 -5
- package/dist/tools/index.d.ts.map +1 -0
- package/dist/tools/{fetch-url.js → index.js} +6 -8
- package/dist/transform/index.d.ts +279 -0
- package/dist/transform/index.d.ts.map +1 -0
- package/dist/transform/index.js +5234 -0
- package/package.json +2 -2
- package/dist/cli.d.ts +0 -19
- package/dist/cli.d.ts.map +0 -1
- package/dist/cli.js +0 -65
- package/dist/http/health.d.ts +0 -8
- package/dist/http/health.d.ts.map +0 -1
- package/dist/http/health.js +0 -152
- package/dist/http/helpers.d.ts +0 -68
- package/dist/http/helpers.d.ts.map +0 -1
- package/dist/http/helpers.js +0 -402
- package/dist/lib/error-codes.d.ts +0 -13
- package/dist/lib/error-codes.d.ts.map +0 -1
- package/dist/lib/error-codes.js +0 -12
- package/dist/lib/error-messages.d.ts.map +0 -1
- package/dist/lib/fetch-pipeline.d.ts.map +0 -1
- package/dist/lib/http.d.ts.map +0 -1
- package/dist/lib/logger-names.d.ts +0 -16
- package/dist/lib/logger-names.d.ts.map +0 -1
- package/dist/lib/logger-names.js +0 -15
- package/dist/lib/session.d.ts +0 -44
- package/dist/lib/session.d.ts.map +0 -1
- package/dist/lib/session.js +0 -137
- package/dist/lib/tool-errors.d.ts.map +0 -1
- package/dist/lib/tool-errors.js +0 -253
- package/dist/lib/url.d.ts.map +0 -1
- package/dist/lib/zod.d.ts +0 -3
- package/dist/lib/zod.d.ts.map +0 -1
- package/dist/lib/zod.js +0 -27
- package/dist/tasks/call-contract.d.ts +0 -25
- package/dist/tasks/call-contract.d.ts.map +0 -1
- package/dist/tasks/call-contract.js +0 -59
- package/dist/tasks/execution.d.ts +0 -16
- package/dist/tasks/execution.d.ts.map +0 -1
- package/dist/tasks/execution.js +0 -241
- package/dist/tasks/handlers.d.ts +0 -11
- package/dist/tasks/handlers.d.ts.map +0 -1
- package/dist/tasks/handlers.js +0 -157
- package/dist/tasks/owner.d.ts +0 -43
- package/dist/tasks/owner.d.ts.map +0 -1
- package/dist/tasks/owner.js +0 -144
- package/dist/tasks/registry.d.ts +0 -20
- package/dist/tasks/registry.d.ts.map +0 -1
- package/dist/tasks/registry.js +0 -40
- package/dist/tasks/waiters.d.ts +0 -27
- package/dist/tasks/waiters.d.ts.map +0 -1
- package/dist/tasks/waiters.js +0 -114
- package/dist/tools/fetch-url.d.ts.map +0 -1
- package/dist/transform/dom-prep.d.ts +0 -16
- package/dist/transform/dom-prep.d.ts.map +0 -1
- package/dist/transform/dom-prep.js +0 -1287
- package/dist/transform/html-translators.d.ts +0 -5
- package/dist/transform/html-translators.d.ts.map +0 -1
- package/dist/transform/html-translators.js +0 -697
- package/dist/transform/markdown-cleanup.d.ts +0 -10
- package/dist/transform/markdown-cleanup.d.ts.map +0 -1
- package/dist/transform/markdown-cleanup.js +0 -542
- package/dist/transform/metadata.d.ts +0 -18
- package/dist/transform/metadata.d.ts.map +0 -1
- package/dist/transform/metadata.js +0 -462
- package/dist/transform/next-flight.d.ts +0 -2
- package/dist/transform/next-flight.d.ts.map +0 -1
- package/dist/transform/next-flight.js +0 -374
- package/dist/transform/shared.d.ts +0 -8
- package/dist/transform/shared.d.ts.map +0 -1
- package/dist/transform/shared.js +0 -137
- package/dist/transform/transform.d.ts +0 -38
- package/dist/transform/transform.d.ts.map +0 -1
- package/dist/transform/transform.js +0 -1042
- package/dist/transform/types.d.ts +0 -124
- package/dist/transform/types.d.ts.map +0 -1
- package/dist/transform/types.js +0 -5
- package/dist/transform/worker-pool.d.ts +0 -76
- package/dist/transform/worker-pool.d.ts.map +0 -1
- package/dist/transform/worker-pool.js +0 -725
- /package/dist/lib/{http.d.ts → net/http.d.ts} +0 -0
|
@@ -1,5 +0,0 @@
|
|
|
1
|
-
export declare function extractLanguageFromClassName(className: string): string | undefined;
|
|
2
|
-
export declare function resolveLanguageFromAttributes(className: string, dataLang: string): string | undefined;
|
|
3
|
-
export declare function detectLanguageFromCode(code: string): string | undefined;
|
|
4
|
-
export declare function translateHtmlFragmentToMarkdown(html: string): string;
|
|
5
|
-
//# sourceMappingURL=html-translators.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"html-translators.d.ts","sourceRoot":"","sources":["../../src/transform/html-translators.ts"],"names":[],"mappings":"AAkVA,wBAAgB,4BAA4B,CAC1C,SAAS,EAAE,MAAM,GAChB,MAAM,GAAG,SAAS,CAkCpB;AAOD,wBAAgB,6BAA6B,CAC3C,SAAS,EAAE,MAAM,EACjB,QAAQ,EAAE,MAAM,GACf,MAAM,GAAG,SAAS,CAKpB;AACD,wBAAgB,sBAAsB,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,GAAG,SAAS,CAKvE;AA0cD,wBAAgB,+BAA+B,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAEpE"}
|
|
@@ -1,697 +0,0 @@
|
|
|
1
|
-
import { NodeHtmlMarkdown, } from 'node-html-markdown';
|
|
2
|
-
import { isHtmlNode, isObject } from '../lib/utils.js';
|
|
3
|
-
import { WP_PHOTON_HOST_PATTERN } from './dom-prep.js';
|
|
4
|
-
// ---------------------------------------------------------------------------
|
|
5
|
-
// Shared constant
|
|
6
|
-
// ---------------------------------------------------------------------------
|
|
7
|
-
const CODE_BLOCK = {
|
|
8
|
-
fence: '```',
|
|
9
|
-
format: (code, language = '') => `\`\`\`${language}\n${code}\n\`\`\``,
|
|
10
|
-
};
|
|
11
|
-
const MERMAID_POSTPROCESS = ({ content }) => `\n\n\`\`\`mermaid\n${content.trim()}\n\`\`\`\n\n`;
|
|
12
|
-
const MERMAID_TRANSLATOR_CONFIG = {
|
|
13
|
-
noEscape: true,
|
|
14
|
-
preserveWhitespace: true,
|
|
15
|
-
postprocess: MERMAID_POSTPROCESS,
|
|
16
|
-
};
|
|
17
|
-
// ---------------------------------------------------------------------------
|
|
18
|
-
// DOM helpers (translator-only)
|
|
19
|
-
// ---------------------------------------------------------------------------
|
|
20
|
-
function getTagName(node) {
|
|
21
|
-
if (!isHtmlNode(node))
|
|
22
|
-
return '';
|
|
23
|
-
const raw = node.tagName;
|
|
24
|
-
return typeof raw === 'string' ? raw.toUpperCase() : '';
|
|
25
|
-
}
|
|
26
|
-
function getNode(ctx) {
|
|
27
|
-
return isObject(ctx) ? ctx['node'] : undefined;
|
|
28
|
-
}
|
|
29
|
-
function getParent(ctx) {
|
|
30
|
-
return isObject(ctx) ? ctx['parent'] : undefined;
|
|
31
|
-
}
|
|
32
|
-
function getNodeAttr(node) {
|
|
33
|
-
if (!isHtmlNode(node) || typeof node.getAttribute !== 'function')
|
|
34
|
-
return undefined;
|
|
35
|
-
return node.getAttribute.bind(node);
|
|
36
|
-
}
|
|
37
|
-
// ---------------------------------------------------------------------------
|
|
38
|
-
// Code translators
|
|
39
|
-
// ---------------------------------------------------------------------------
|
|
40
|
-
class DetectionContext {
|
|
41
|
-
code;
|
|
42
|
-
_lower;
|
|
43
|
-
_lines;
|
|
44
|
-
_trimmedStart;
|
|
45
|
-
constructor(code) {
|
|
46
|
-
this.code = code;
|
|
47
|
-
}
|
|
48
|
-
get lower() {
|
|
49
|
-
return (this._lower ??= this.code.toLowerCase());
|
|
50
|
-
}
|
|
51
|
-
get lines() {
|
|
52
|
-
return (this._lines ??= this.code.split(/\r?\n/));
|
|
53
|
-
}
|
|
54
|
-
get trimmedStart() {
|
|
55
|
-
return (this._trimmedStart ??= this.code.trimStart());
|
|
56
|
-
}
|
|
57
|
-
}
|
|
58
|
-
const BASH_COMMANDS = new Set([
|
|
59
|
-
'sudo',
|
|
60
|
-
'chmod',
|
|
61
|
-
'mkdir',
|
|
62
|
-
'cd',
|
|
63
|
-
'ls',
|
|
64
|
-
'cat',
|
|
65
|
-
'echo',
|
|
66
|
-
]);
|
|
67
|
-
const BASH_PACKAGE_MANAGERS = [
|
|
68
|
-
'npm',
|
|
69
|
-
'yarn',
|
|
70
|
-
'pnpm',
|
|
71
|
-
'npx',
|
|
72
|
-
'brew',
|
|
73
|
-
'apt',
|
|
74
|
-
'pip',
|
|
75
|
-
'cargo',
|
|
76
|
-
'go',
|
|
77
|
-
];
|
|
78
|
-
const TYPESCRIPT_HINTS = [
|
|
79
|
-
': string',
|
|
80
|
-
':string',
|
|
81
|
-
': number',
|
|
82
|
-
':number',
|
|
83
|
-
': boolean',
|
|
84
|
-
':boolean',
|
|
85
|
-
': void',
|
|
86
|
-
':void',
|
|
87
|
-
': any',
|
|
88
|
-
':any',
|
|
89
|
-
': unknown',
|
|
90
|
-
':unknown',
|
|
91
|
-
': never',
|
|
92
|
-
':never',
|
|
93
|
-
];
|
|
94
|
-
const HTML_TAGS = [
|
|
95
|
-
'<!doctype',
|
|
96
|
-
'<html',
|
|
97
|
-
'<head',
|
|
98
|
-
'<body',
|
|
99
|
-
'<div',
|
|
100
|
-
'<span',
|
|
101
|
-
'<p',
|
|
102
|
-
'<a',
|
|
103
|
-
'<script',
|
|
104
|
-
'<style',
|
|
105
|
-
];
|
|
106
|
-
function isBashLine(line) {
|
|
107
|
-
const trimmed = line.trimStart();
|
|
108
|
-
if (!trimmed)
|
|
109
|
-
return false;
|
|
110
|
-
if (trimmed.startsWith('#!') ||
|
|
111
|
-
trimmed.startsWith('$ ') ||
|
|
112
|
-
/^\s*\.\.\.\\?>\s+\S/m.test(trimmed)) {
|
|
113
|
-
return true;
|
|
114
|
-
}
|
|
115
|
-
const spaceIdx = trimmed.indexOf(' ');
|
|
116
|
-
const firstWord = spaceIdx === -1 ? trimmed : trimmed.slice(0, spaceIdx);
|
|
117
|
-
if (BASH_COMMANDS.has(firstWord))
|
|
118
|
-
return true;
|
|
119
|
-
return (spaceIdx !== -1 &&
|
|
120
|
-
BASH_PACKAGE_MANAGERS.includes(firstWord));
|
|
121
|
-
}
|
|
122
|
-
function detectBashIndicators(lines) {
|
|
123
|
-
return lines.some(isBashLine);
|
|
124
|
-
}
|
|
125
|
-
function detectCssStructure(lines) {
|
|
126
|
-
for (const line of lines) {
|
|
127
|
-
const trimmed = line.trimStart();
|
|
128
|
-
if (!trimmed || trimmed.startsWith('# ') || trimmed.startsWith('//')) {
|
|
129
|
-
continue;
|
|
130
|
-
}
|
|
131
|
-
if (/^[.#][A-Za-z_-][\w-]*\s*\{/.test(trimmed))
|
|
132
|
-
return true;
|
|
133
|
-
if (trimmed.includes(';') &&
|
|
134
|
-
/^\s*[a-z][\w-]*\s*:/.test(trimmed) &&
|
|
135
|
-
!trimmed.includes('(')) {
|
|
136
|
-
return true;
|
|
137
|
-
}
|
|
138
|
-
}
|
|
139
|
-
return false;
|
|
140
|
-
}
|
|
141
|
-
function detectYamlStructure(lines) {
|
|
142
|
-
for (const line of lines) {
|
|
143
|
-
const trimmed = line.trim();
|
|
144
|
-
const colonIdx = trimmed.indexOf(':');
|
|
145
|
-
if (colonIdx > 0) {
|
|
146
|
-
const after = trimmed[colonIdx + 1];
|
|
147
|
-
if (after === ' ' || after === '\t')
|
|
148
|
-
return true;
|
|
149
|
-
}
|
|
150
|
-
}
|
|
151
|
-
return false;
|
|
152
|
-
}
|
|
153
|
-
const LANGUAGES = [
|
|
154
|
-
{
|
|
155
|
-
lang: 'rust',
|
|
156
|
-
weight: 25,
|
|
157
|
-
match: (ctx) => ctx.lower.includes('let mut') ||
|
|
158
|
-
/\b(?:fn|impl|struct|enum)\b/.test(ctx.lower) ||
|
|
159
|
-
(ctx.lower.includes('use ') && ctx.lower.includes('::')),
|
|
160
|
-
},
|
|
161
|
-
{
|
|
162
|
-
lang: 'go',
|
|
163
|
-
weight: 22,
|
|
164
|
-
match: (ctx) => ctx.lower.includes('import "') || /\b(?:package|func)\b/.test(ctx.lower),
|
|
165
|
-
},
|
|
166
|
-
{
|
|
167
|
-
lang: 'jsx',
|
|
168
|
-
weight: 22,
|
|
169
|
-
match: (ctx) => {
|
|
170
|
-
const l = ctx.lower;
|
|
171
|
-
if (l.includes('classname=') ||
|
|
172
|
-
l.includes('jsx:') ||
|
|
173
|
-
l.includes("from 'react'") ||
|
|
174
|
-
l.includes('from "react"')) {
|
|
175
|
-
return true;
|
|
176
|
-
}
|
|
177
|
-
return /<\/?[A-Z][A-Za-z0-9]*(?:\s+[A-Za-z_:][\w:.-]*(?:\s*=\s*(?:"[^"]*"|'[^']*'|\{[^}]*\}))?)*\s*\/?>/m.test(ctx.code);
|
|
178
|
-
},
|
|
179
|
-
},
|
|
180
|
-
{
|
|
181
|
-
lang: 'typescript',
|
|
182
|
-
weight: 20,
|
|
183
|
-
match: (ctx) => /\b(?:interface|type)\b/.test(ctx.lower) ||
|
|
184
|
-
TYPESCRIPT_HINTS.some((hint) => ctx.lower.includes(hint)),
|
|
185
|
-
},
|
|
186
|
-
{
|
|
187
|
-
lang: 'sql',
|
|
188
|
-
weight: 20,
|
|
189
|
-
match: (ctx) => /\b(?:select\s+(?:.+?\s+from|[\d*@])|insert\s+into|update\s+.+?\s+set|delete\s+from|create\s+(?:table|database|index|view|function|procedure|trigger|user|role)|alter\s+(?:table|database|index|view))\b/.test(ctx.lower),
|
|
190
|
-
},
|
|
191
|
-
{
|
|
192
|
-
lang: 'html',
|
|
193
|
-
weight: 19,
|
|
194
|
-
match: (ctx) => HTML_TAGS.some((tag) => ctx.lower.includes(tag)),
|
|
195
|
-
},
|
|
196
|
-
{
|
|
197
|
-
lang: 'python',
|
|
198
|
-
weight: 18,
|
|
199
|
-
match: (ctx) => {
|
|
200
|
-
if (HTML_TAGS.some((tag) => ctx.lower.includes(tag)))
|
|
201
|
-
return false;
|
|
202
|
-
const l = ctx.lower;
|
|
203
|
-
const c = ctx.code;
|
|
204
|
-
if (/^\s*(?:>>>|\.\.\.)\s/m.test(c) ||
|
|
205
|
-
/<(?:QuerySet|[A-Z][A-Za-z0-9_]*:\s)|\bdatetime\.datetime\(|\bDoesNotExist:/.test(c) ||
|
|
206
|
-
/^\s*[A-Za-z_][\w.]*\s*=\s*[A-Z][\w.]*\(/m.test(c) ||
|
|
207
|
-
/^\s*[A-Za-z_][\w.]*\.[A-Za-z_][\w]*\s*$/m.test(c) ||
|
|
208
|
-
c.includes('None') ||
|
|
209
|
-
c.includes('True') ||
|
|
210
|
-
c.includes('False') ||
|
|
211
|
-
l.includes('print(') ||
|
|
212
|
-
l.includes('__name__') ||
|
|
213
|
-
l.includes('self.') ||
|
|
214
|
-
l.includes('elif ') ||
|
|
215
|
-
/\b(?:def |elif |except |finally:|yield |lambda |raise |pass$)/m.test(l)) {
|
|
216
|
-
return true;
|
|
217
|
-
}
|
|
218
|
-
const hasJsSignals = /\b(?:const |let |var |function |require\(|=>|===|!==|console\.)/.test(l) ||
|
|
219
|
-
l.includes('{') ||
|
|
220
|
-
l.includes("from '");
|
|
221
|
-
return /\b(?:import|from|class)\b/.test(l) && !hasJsSignals;
|
|
222
|
-
},
|
|
223
|
-
},
|
|
224
|
-
{
|
|
225
|
-
lang: 'css',
|
|
226
|
-
weight: 18,
|
|
227
|
-
match: (ctx) => /@media|@import|@keyframes|@theme\b|@utility\b|@layer\b|@apply\b|@variant\b|@custom-variant\b|@reference\b|@source\b/.test(ctx.lower) || detectCssStructure(ctx.lines),
|
|
228
|
-
},
|
|
229
|
-
{ lang: 'bash', weight: 15, match: (ctx) => detectBashIndicators(ctx.lines) },
|
|
230
|
-
{ lang: 'yaml', weight: 15, match: (ctx) => detectYamlStructure(ctx.lines) },
|
|
231
|
-
{
|
|
232
|
-
lang: 'javascript',
|
|
233
|
-
weight: 15,
|
|
234
|
-
match: (ctx) => /\b(?:const|let|var|function|class|async|await|export|import)\b/.test(ctx.lower),
|
|
235
|
-
},
|
|
236
|
-
{
|
|
237
|
-
lang: 'json',
|
|
238
|
-
weight: 10,
|
|
239
|
-
match: (ctx) => ctx.trimmedStart.startsWith('{') || ctx.trimmedStart.startsWith('['),
|
|
240
|
-
},
|
|
241
|
-
];
|
|
242
|
-
const KNOWN_LANG_PREFIXES = new Set([
|
|
243
|
-
'css',
|
|
244
|
-
'javascript',
|
|
245
|
-
'js',
|
|
246
|
-
'typescript',
|
|
247
|
-
'ts',
|
|
248
|
-
'python',
|
|
249
|
-
'py',
|
|
250
|
-
'html',
|
|
251
|
-
'xml',
|
|
252
|
-
'sql',
|
|
253
|
-
'bash',
|
|
254
|
-
'sh',
|
|
255
|
-
'yaml',
|
|
256
|
-
'json',
|
|
257
|
-
'ruby',
|
|
258
|
-
'go',
|
|
259
|
-
'rust',
|
|
260
|
-
'java',
|
|
261
|
-
'php',
|
|
262
|
-
'c',
|
|
263
|
-
'cpp',
|
|
264
|
-
'swift',
|
|
265
|
-
'kotlin',
|
|
266
|
-
'scss',
|
|
267
|
-
'sass',
|
|
268
|
-
'less',
|
|
269
|
-
'graphql',
|
|
270
|
-
'markdown',
|
|
271
|
-
'md',
|
|
272
|
-
]);
|
|
273
|
-
export function extractLanguageFromClassName(className) {
|
|
274
|
-
if (!className)
|
|
275
|
-
return undefined;
|
|
276
|
-
// Split by whitespace and check for language indicators
|
|
277
|
-
const tokens = className.match(/\S+/g);
|
|
278
|
-
if (!tokens)
|
|
279
|
-
return undefined;
|
|
280
|
-
// Fast path: check for prefixes
|
|
281
|
-
for (const token of tokens) {
|
|
282
|
-
const lower = token.toLowerCase();
|
|
283
|
-
if (lower.startsWith('language-'))
|
|
284
|
-
return token.slice(9);
|
|
285
|
-
if (lower.startsWith('lang-'))
|
|
286
|
-
return token.slice(5);
|
|
287
|
-
if (lower.startsWith('highlight-'))
|
|
288
|
-
return token.slice(10);
|
|
289
|
-
}
|
|
290
|
-
// Special handling for hljs which often appears with a separate language class
|
|
291
|
-
if (tokens.includes('hljs')) {
|
|
292
|
-
const langClass = tokens.find((t) => {
|
|
293
|
-
const l = t.toLowerCase();
|
|
294
|
-
return l !== 'hljs' && !l.startsWith('hljs-');
|
|
295
|
-
});
|
|
296
|
-
if (langClass)
|
|
297
|
-
return langClass;
|
|
298
|
-
}
|
|
299
|
-
// Last resort: look for any known language prefix followed by a dash
|
|
300
|
-
for (const token of tokens) {
|
|
301
|
-
const dashIdx = token.indexOf('-');
|
|
302
|
-
if (dashIdx > 0) {
|
|
303
|
-
const prefix = token.slice(0, dashIdx).toLowerCase();
|
|
304
|
-
if (KNOWN_LANG_PREFIXES.has(prefix))
|
|
305
|
-
return prefix;
|
|
306
|
-
}
|
|
307
|
-
}
|
|
308
|
-
return undefined;
|
|
309
|
-
}
|
|
310
|
-
function resolveLanguageFromDataAttribute(dataLang) {
|
|
311
|
-
const trimmed = dataLang.trim();
|
|
312
|
-
return /^\w+$/.test(trimmed) ? trimmed : undefined;
|
|
313
|
-
}
|
|
314
|
-
export function resolveLanguageFromAttributes(className, dataLang) {
|
|
315
|
-
return (extractLanguageFromClassName(className) ??
|
|
316
|
-
resolveLanguageFromDataAttribute(dataLang));
|
|
317
|
-
}
|
|
318
|
-
export function detectLanguageFromCode(code) {
|
|
319
|
-
if (!code || !/\S/.test(code))
|
|
320
|
-
return undefined;
|
|
321
|
-
const ctx = new DetectionContext(code);
|
|
322
|
-
return LANGUAGES.find((def) => def.match(ctx))?.lang;
|
|
323
|
-
}
|
|
324
|
-
function buildInlineCode(content) {
|
|
325
|
-
const trimmed = content.trim();
|
|
326
|
-
if (!trimmed)
|
|
327
|
-
return '``';
|
|
328
|
-
const matches = trimmed.match(/`+/g);
|
|
329
|
-
const maxBackticks = matches ? Math.max(...matches.map((m) => m.length)) : 0;
|
|
330
|
-
const delimiter = '`'.repeat(maxBackticks + 1);
|
|
331
|
-
const padding = trimmed.startsWith('`') || trimmed.endsWith('`') ? ' ' : '';
|
|
332
|
-
return `${delimiter}${padding}${trimmed}${padding}${delimiter}`;
|
|
333
|
-
}
|
|
334
|
-
function isCodeBlock(parent) {
|
|
335
|
-
const tagName = getTagName(parent);
|
|
336
|
-
return tagName === 'PRE' || tagName === 'WRAPPED-PRE';
|
|
337
|
-
}
|
|
338
|
-
function resolveAttributeLanguage(node) {
|
|
339
|
-
const getAttribute = getNodeAttr(node);
|
|
340
|
-
const className = getAttribute?.('class') ?? '';
|
|
341
|
-
const dataLanguage = getAttribute?.('data-language') ?? '';
|
|
342
|
-
return resolveLanguageFromAttributes(className, dataLanguage);
|
|
343
|
-
}
|
|
344
|
-
function findLanguageFromCodeChild(node) {
|
|
345
|
-
if (!isHtmlNode(node))
|
|
346
|
-
return undefined;
|
|
347
|
-
const childNodes = Array.from(node.childNodes ?? []);
|
|
348
|
-
for (const child of childNodes) {
|
|
349
|
-
if (!isHtmlNode(child))
|
|
350
|
-
continue;
|
|
351
|
-
const raw = child.rawTagName;
|
|
352
|
-
const tagName = typeof raw === 'string' ? raw.toUpperCase() : '';
|
|
353
|
-
if (tagName === 'CODE')
|
|
354
|
-
return resolveAttributeLanguage(child);
|
|
355
|
-
}
|
|
356
|
-
return undefined;
|
|
357
|
-
}
|
|
358
|
-
function createCodeBlockPostprocessor(language) {
|
|
359
|
-
return ({ content }) => {
|
|
360
|
-
const trimmed = content.trim();
|
|
361
|
-
if (!trimmed)
|
|
362
|
-
return '';
|
|
363
|
-
const resolvedLanguage = language ?? detectLanguageFromCode(trimmed) ?? '';
|
|
364
|
-
return CODE_BLOCK.format(trimmed, resolvedLanguage);
|
|
365
|
-
};
|
|
366
|
-
}
|
|
367
|
-
function buildInlineCodeTranslator() {
|
|
368
|
-
return {
|
|
369
|
-
spaceIfRepeatingChar: true,
|
|
370
|
-
noEscape: true,
|
|
371
|
-
postprocess: ({ content }) => buildInlineCode(content),
|
|
372
|
-
};
|
|
373
|
-
}
|
|
374
|
-
function buildCodeTranslator(ctx) {
|
|
375
|
-
const inlineCodeTranslator = buildInlineCodeTranslator();
|
|
376
|
-
if (!isCodeBlock(getParent(ctx)))
|
|
377
|
-
return inlineCodeTranslator;
|
|
378
|
-
return { noEscape: true, preserveWhitespace: true };
|
|
379
|
-
}
|
|
380
|
-
// ---------------------------------------------------------------------------
|
|
381
|
-
// Image translators
|
|
382
|
-
// ---------------------------------------------------------------------------
|
|
383
|
-
function extractFirstSrcsetUrl(srcset) {
|
|
384
|
-
return srcset.split(',')[0]?.trim().split(/\s+/)[0] ?? '';
|
|
385
|
-
}
|
|
386
|
-
const LAZY_SRC_ATTRIBUTES = [
|
|
387
|
-
'data-src',
|
|
388
|
-
'data-lazy-src',
|
|
389
|
-
'data-lazy',
|
|
390
|
-
'data-original',
|
|
391
|
-
'data-echo',
|
|
392
|
-
'data-srcset',
|
|
393
|
-
];
|
|
394
|
-
function isDataUri(value) {
|
|
395
|
-
return value.startsWith('data:');
|
|
396
|
-
}
|
|
397
|
-
const PLACEHOLDER_FILENAME_PATTERN = /(?:^|\/)(?:blank|spacer|placeholder|grey|gray|pixel|loading|lazy|transparent|empty|dummy)\.[a-z]{3,4}$/i;
|
|
398
|
-
function isPlaceholderSrc(value) {
|
|
399
|
-
if (isDataUri(value))
|
|
400
|
-
return true;
|
|
401
|
-
const parsed = URL.parse(value) ?? URL.parse(value, 'http://localhost');
|
|
402
|
-
if (!parsed)
|
|
403
|
-
return false;
|
|
404
|
-
return PLACEHOLDER_FILENAME_PATTERN.test(parsed.pathname);
|
|
405
|
-
}
|
|
406
|
-
function extractNonDataSrcsetUrl(value) {
|
|
407
|
-
const url = extractFirstSrcsetUrl(value);
|
|
408
|
-
return url && !isDataUri(url) ? url : undefined;
|
|
409
|
-
}
|
|
410
|
-
function resolveLazySrc(getAttribute) {
|
|
411
|
-
for (const attr of LAZY_SRC_ATTRIBUTES) {
|
|
412
|
-
const lazy = getAttribute(attr);
|
|
413
|
-
if (!lazy || isDataUri(lazy))
|
|
414
|
-
continue;
|
|
415
|
-
if (attr === 'data-srcset') {
|
|
416
|
-
const url = extractNonDataSrcsetUrl(lazy);
|
|
417
|
-
if (url)
|
|
418
|
-
return url;
|
|
419
|
-
continue;
|
|
420
|
-
}
|
|
421
|
-
return lazy;
|
|
422
|
-
}
|
|
423
|
-
return undefined;
|
|
424
|
-
}
|
|
425
|
-
// Some sites (notably WordPress with Photon CDN) use a CDN proxy URL in img src while keeping the original same-domain URL in srcset.
|
|
426
|
-
// Since the converter prefers srcset URLs for CDN-hosted images, we need to detect this pattern and extract the canonical URL from srcset to ensure images are correctly resolved, especially when migrating content to a new domain.
|
|
427
|
-
function isWpPhotonUrl(src) {
|
|
428
|
-
const parsed = URL.parse(src);
|
|
429
|
-
return parsed !== null && WP_PHOTON_HOST_PATTERN.test(parsed.hostname);
|
|
430
|
-
}
|
|
431
|
-
function resolveImageSrc(getAttribute) {
|
|
432
|
-
if (!getAttribute)
|
|
433
|
-
return '';
|
|
434
|
-
const srcRaw = getAttribute('src') ?? '';
|
|
435
|
-
const srcsetUrl = extractNonDataSrcsetUrl(getAttribute('srcset') ?? '');
|
|
436
|
-
// When src is a CDN proxy URL, prefer srcset which usually has the
|
|
437
|
-
// canonical same-domain URL that survives domain migrations.
|
|
438
|
-
if (srcRaw && isWpPhotonUrl(srcRaw) && srcsetUrl)
|
|
439
|
-
return srcsetUrl;
|
|
440
|
-
if (srcRaw && !isPlaceholderSrc(srcRaw))
|
|
441
|
-
return srcRaw;
|
|
442
|
-
// First check common lazy-loading attributes that may contain non-data URLs before falling back to the native srcset, as some sites use data URIs in lazy attributes while still providing valid URLs in srcset.
|
|
443
|
-
const lazySrc = resolveLazySrc(getAttribute);
|
|
444
|
-
if (lazySrc)
|
|
445
|
-
return lazySrc;
|
|
446
|
-
// If the src is a data URI or missing, check srcset for a valid URL. Some sites use srcset with data URIs in src and actual URLs in srcset for responsive images.
|
|
447
|
-
if (srcsetUrl)
|
|
448
|
-
return srcsetUrl;
|
|
449
|
-
return '';
|
|
450
|
-
}
|
|
451
|
-
function deriveAltFromImageUrl(src) {
|
|
452
|
-
if (!src)
|
|
453
|
-
return '';
|
|
454
|
-
const absoluteParsed = URL.parse(src);
|
|
455
|
-
const parsed = absoluteParsed ?? URL.parse(src, 'http://localhost');
|
|
456
|
-
if (!parsed)
|
|
457
|
-
return '';
|
|
458
|
-
if (absoluteParsed &&
|
|
459
|
-
parsed.protocol !== 'http:' &&
|
|
460
|
-
parsed.protocol !== 'https:') {
|
|
461
|
-
return '';
|
|
462
|
-
}
|
|
463
|
-
const match = /\/([^/]+?)(?:\.[^/.]+)?$/.exec(parsed.pathname);
|
|
464
|
-
if (!match?.[1])
|
|
465
|
-
return '';
|
|
466
|
-
return match[1].replace(/[_-]+/g, ' ').trim();
|
|
467
|
-
}
|
|
468
|
-
function buildImageTranslator(ctx) {
|
|
469
|
-
const getAttribute = getNodeAttr(getNode(ctx));
|
|
470
|
-
const src = resolveImageSrc(getAttribute);
|
|
471
|
-
const existingAlt = getAttribute?.('alt') ?? '';
|
|
472
|
-
if (!src) {
|
|
473
|
-
return { content: existingAlt.trim() };
|
|
474
|
-
}
|
|
475
|
-
const alt = existingAlt.trim() || deriveAltFromImageUrl(src);
|
|
476
|
-
return { content: `` };
|
|
477
|
-
}
|
|
478
|
-
// ---------------------------------------------------------------------------
|
|
479
|
-
// Pre / Mermaid translators
|
|
480
|
-
// ---------------------------------------------------------------------------
|
|
481
|
-
function buildPreTranslator(ctx) {
|
|
482
|
-
const node = getNode(ctx);
|
|
483
|
-
if (!node)
|
|
484
|
-
return {};
|
|
485
|
-
const attributeLanguage = resolveAttributeLanguage(node) ?? findLanguageFromCodeChild(node);
|
|
486
|
-
return {
|
|
487
|
-
noEscape: true,
|
|
488
|
-
preserveWhitespace: true,
|
|
489
|
-
postprocess: createCodeBlockPostprocessor(attributeLanguage),
|
|
490
|
-
};
|
|
491
|
-
}
|
|
492
|
-
function buildMermaidPreTranslator(ctx) {
|
|
493
|
-
const node = getNode(ctx);
|
|
494
|
-
const getAttribute = getNodeAttr(node);
|
|
495
|
-
const className = getAttribute?.('class') ?? '';
|
|
496
|
-
if (className.includes('mermaid'))
|
|
497
|
-
return MERMAID_TRANSLATOR_CONFIG;
|
|
498
|
-
return buildPreTranslator(ctx);
|
|
499
|
-
}
|
|
500
|
-
// ---------------------------------------------------------------------------
|
|
501
|
-
// Block-level translators (div, section, span, table, dl, etc.)
|
|
502
|
-
// ---------------------------------------------------------------------------
|
|
503
|
-
const GFM_ALERT_MAP = new Map([
|
|
504
|
-
['note', 'NOTE'],
|
|
505
|
-
['info', 'NOTE'],
|
|
506
|
-
['tip', 'TIP'],
|
|
507
|
-
['hint', 'TIP'],
|
|
508
|
-
['warning', 'WARNING'],
|
|
509
|
-
['warn', 'WARNING'],
|
|
510
|
-
['caution', 'CAUTION'],
|
|
511
|
-
['danger', 'CAUTION'],
|
|
512
|
-
['important', 'IMPORTANT'],
|
|
513
|
-
]);
|
|
514
|
-
function resolveGfmAlertType(className) {
|
|
515
|
-
const tokens = className.toLowerCase().split(/\s+/);
|
|
516
|
-
for (const token of tokens) {
|
|
517
|
-
const mapped = GFM_ALERT_MAP.get(token);
|
|
518
|
-
if (mapped)
|
|
519
|
-
return mapped;
|
|
520
|
-
}
|
|
521
|
-
return undefined;
|
|
522
|
-
}
|
|
523
|
-
function buildAdmonitionConfig(className, alertType, getAttribute) {
|
|
524
|
-
const isAdmonition = className.includes('admonition') ||
|
|
525
|
-
className.includes('callout') ||
|
|
526
|
-
className.includes('custom-block') ||
|
|
527
|
-
getAttribute('role') === 'alert' ||
|
|
528
|
-
alertType !== undefined;
|
|
529
|
-
if (!isAdmonition)
|
|
530
|
-
return undefined;
|
|
531
|
-
return {
|
|
532
|
-
postprocess: ({ content }) => {
|
|
533
|
-
const lines = content.trim().split('\n');
|
|
534
|
-
const header = alertType ? `> [!${alertType}]\n` : '';
|
|
535
|
-
return `\n\n${header}> ${lines.join('\n> ')}\n\n`;
|
|
536
|
-
},
|
|
537
|
-
};
|
|
538
|
-
}
|
|
539
|
-
function buildTypeSpacingConfig() {
|
|
540
|
-
return {
|
|
541
|
-
postprocess: ({ content }) => {
|
|
542
|
-
const lines = content.split('\n');
|
|
543
|
-
const separated = [];
|
|
544
|
-
for (let i = 0; i < lines.length; i++) {
|
|
545
|
-
const line = lines[i] ?? '';
|
|
546
|
-
separated.push(line);
|
|
547
|
-
const nextLine = lines[i + 1];
|
|
548
|
-
if (nextLine !== undefined &&
|
|
549
|
-
line.trim() &&
|
|
550
|
-
nextLine.trim() &&
|
|
551
|
-
line.includes(':') &&
|
|
552
|
-
nextLine.includes(':') &&
|
|
553
|
-
!line.startsWith(' ') &&
|
|
554
|
-
!nextLine.startsWith(' ')) {
|
|
555
|
-
separated.push('');
|
|
556
|
-
}
|
|
557
|
-
}
|
|
558
|
-
return separated.join('\n');
|
|
559
|
-
},
|
|
560
|
-
};
|
|
561
|
-
}
|
|
562
|
-
function buildDivTranslator(ctx) {
|
|
563
|
-
const getAttribute = getNodeAttr(getNode(ctx));
|
|
564
|
-
if (!getAttribute)
|
|
565
|
-
return {};
|
|
566
|
-
const className = getAttribute('class') ?? '';
|
|
567
|
-
if (className.includes('mermaid'))
|
|
568
|
-
return MERMAID_TRANSLATOR_CONFIG;
|
|
569
|
-
const alertType = resolveGfmAlertType(className);
|
|
570
|
-
const admonition = buildAdmonitionConfig(className, alertType, getAttribute);
|
|
571
|
-
if (admonition)
|
|
572
|
-
return admonition;
|
|
573
|
-
if (!className.includes('type'))
|
|
574
|
-
return {};
|
|
575
|
-
return buildTypeSpacingConfig();
|
|
576
|
-
}
|
|
577
|
-
function buildSectionTranslator(ctx) {
|
|
578
|
-
const getAttribute = getNodeAttr(getNode(ctx));
|
|
579
|
-
if (getAttribute?.('class')?.includes('tsd-member')) {
|
|
580
|
-
return {
|
|
581
|
-
postprocess: ({ content }) => `\n\n \n\n${content}\n\n`,
|
|
582
|
-
};
|
|
583
|
-
}
|
|
584
|
-
return {
|
|
585
|
-
postprocess: ({ content }) => `\n\n${content}\n\n`,
|
|
586
|
-
};
|
|
587
|
-
}
|
|
588
|
-
function buildSpanTranslator(ctx) {
|
|
589
|
-
const getAttribute = getNodeAttr(getNode(ctx));
|
|
590
|
-
if (getAttribute?.('data-as') === 'p') {
|
|
591
|
-
return {
|
|
592
|
-
postprocess: ({ content }) => `\n\n${content.trim()}\n\n`,
|
|
593
|
-
};
|
|
594
|
-
}
|
|
595
|
-
return {};
|
|
596
|
-
}
|
|
597
|
-
// ---------------------------------------------------------------------------
|
|
598
|
-
// DL helpers
|
|
599
|
-
// ---------------------------------------------------------------------------
|
|
600
|
-
function normalizeDefinitionListContent(content) {
|
|
601
|
-
const lines = content
|
|
602
|
-
.split('\n')
|
|
603
|
-
.map((line) => line.trim())
|
|
604
|
-
.filter(Boolean);
|
|
605
|
-
if (lines.length === 0)
|
|
606
|
-
return '';
|
|
607
|
-
const normalized = [];
|
|
608
|
-
for (const line of lines) {
|
|
609
|
-
const isDefinition = line.startsWith(': ');
|
|
610
|
-
const previous = normalized[normalized.length - 1];
|
|
611
|
-
if (previous &&
|
|
612
|
-
previous.length > 0 &&
|
|
613
|
-
!previous.startsWith(': ') &&
|
|
614
|
-
!isDefinition) {
|
|
615
|
-
normalized.push('');
|
|
616
|
-
}
|
|
617
|
-
normalized.push(line);
|
|
618
|
-
}
|
|
619
|
-
return normalized.join('\n');
|
|
620
|
-
}
|
|
621
|
-
// ---------------------------------------------------------------------------
|
|
622
|
-
// Simple tag translators
|
|
623
|
-
// ---------------------------------------------------------------------------
|
|
624
|
-
function buildDlTranslator() {
|
|
625
|
-
return {
|
|
626
|
-
postprocess: ({ content }) => {
|
|
627
|
-
const normalized = normalizeDefinitionListContent(content);
|
|
628
|
-
return normalized ? `\n\n${normalized}\n\n` : '';
|
|
629
|
-
},
|
|
630
|
-
};
|
|
631
|
-
}
|
|
632
|
-
function buildDtTranslator() {
|
|
633
|
-
return {
|
|
634
|
-
postprocess: ({ content }) => `${content.trim()}\n`,
|
|
635
|
-
};
|
|
636
|
-
}
|
|
637
|
-
function buildDdTranslator() {
|
|
638
|
-
return {
|
|
639
|
-
postprocess: ({ content }) => content.trim() ? `: ${content.trim()}\n` : '',
|
|
640
|
-
};
|
|
641
|
-
}
|
|
642
|
-
function wrapTranslator(prefix, suffix) {
|
|
643
|
-
return () => ({
|
|
644
|
-
postprocess: ({ content }) => `${prefix}${content}${suffix}`,
|
|
645
|
-
});
|
|
646
|
-
}
|
|
647
|
-
function buildDetailsTranslator() {
|
|
648
|
-
return {
|
|
649
|
-
postprocess: ({ content }) => {
|
|
650
|
-
const trimmed = content.trim();
|
|
651
|
-
if (!trimmed)
|
|
652
|
-
return '';
|
|
653
|
-
return `\n\n${trimmed}\n\n`;
|
|
654
|
-
},
|
|
655
|
-
};
|
|
656
|
-
}
|
|
657
|
-
function buildSummaryTranslator() {
|
|
658
|
-
return {
|
|
659
|
-
postprocess: ({ content }) => `${content.trim()}\n\n`,
|
|
660
|
-
};
|
|
661
|
-
}
|
|
662
|
-
// ---------------------------------------------------------------------------
|
|
663
|
-
// Translator registry + converter singleton
|
|
664
|
-
// ---------------------------------------------------------------------------
|
|
665
|
-
function createCustomTranslators() {
|
|
666
|
-
return {
|
|
667
|
-
code: buildCodeTranslator,
|
|
668
|
-
img: buildImageTranslator,
|
|
669
|
-
dl: buildDlTranslator,
|
|
670
|
-
dt: buildDtTranslator,
|
|
671
|
-
dd: buildDdTranslator,
|
|
672
|
-
div: buildDivTranslator,
|
|
673
|
-
kbd: wrapTranslator('`', '`'),
|
|
674
|
-
mark: wrapTranslator('==', '=='),
|
|
675
|
-
sub: wrapTranslator('~', '~'),
|
|
676
|
-
sup: wrapTranslator('^', '^'),
|
|
677
|
-
section: buildSectionTranslator,
|
|
678
|
-
details: buildDetailsTranslator,
|
|
679
|
-
summary: buildSummaryTranslator,
|
|
680
|
-
span: buildSpanTranslator,
|
|
681
|
-
pre: buildMermaidPreTranslator,
|
|
682
|
-
};
|
|
683
|
-
}
|
|
684
|
-
let markdownConverter = null;
|
|
685
|
-
function getMarkdownConverter() {
|
|
686
|
-
markdownConverter ??= new NodeHtmlMarkdown({
|
|
687
|
-
codeFence: CODE_BLOCK.fence,
|
|
688
|
-
codeBlockStyle: 'fenced',
|
|
689
|
-
emDelimiter: '_',
|
|
690
|
-
bulletMarker: '-',
|
|
691
|
-
globalEscape: [/[\\`*_~]/gm, '\\$&'],
|
|
692
|
-
}, createCustomTranslators());
|
|
693
|
-
return markdownConverter;
|
|
694
|
-
}
|
|
695
|
-
export function translateHtmlFragmentToMarkdown(html) {
|
|
696
|
-
return getMarkdownConverter().translate(html).trim();
|
|
697
|
-
}
|