@j0hanz/fetch-url-mcp 1.12.7 → 1.12.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (145) hide show
  1. package/dist/http/auth.d.ts +2 -2
  2. package/dist/http/auth.d.ts.map +1 -1
  3. package/dist/http/auth.js +4 -5
  4. package/dist/http/index.d.ts +6 -0
  5. package/dist/http/index.d.ts.map +1 -0
  6. package/dist/http/index.js +5 -0
  7. package/dist/http/native.d.ts +73 -0
  8. package/dist/http/native.d.ts.map +1 -1
  9. package/dist/http/native.js +554 -10
  10. package/dist/http/rate-limit.d.ts +1 -1
  11. package/dist/http/rate-limit.d.ts.map +1 -1
  12. package/dist/http/rate-limit.js +3 -4
  13. package/dist/index.d.ts +17 -0
  14. package/dist/index.d.ts.map +1 -1
  15. package/dist/index.js +67 -6
  16. package/dist/lib/config.js +2 -2
  17. package/dist/lib/core.d.ts +56 -4
  18. package/dist/lib/core.d.ts.map +1 -1
  19. package/dist/lib/core.js +155 -4
  20. package/dist/lib/error/classes.d.ts +19 -0
  21. package/dist/lib/error/classes.d.ts.map +1 -0
  22. package/dist/lib/error/classes.js +107 -0
  23. package/dist/lib/error/classify.d.ts +4 -0
  24. package/dist/lib/error/classify.d.ts.map +1 -0
  25. package/dist/lib/error/classify.js +154 -0
  26. package/dist/lib/error/codes.d.ts +23 -0
  27. package/dist/lib/error/codes.d.ts.map +1 -0
  28. package/dist/lib/error/codes.js +22 -0
  29. package/dist/lib/error/index.d.ts +6 -0
  30. package/dist/lib/error/index.d.ts.map +1 -0
  31. package/dist/lib/error/index.js +5 -0
  32. package/dist/lib/{error-messages.d.ts → error/messages.d.ts} +2 -2
  33. package/dist/lib/error/messages.d.ts.map +1 -0
  34. package/dist/lib/{error-messages.js → error/messages.js} +2 -2
  35. package/dist/lib/{tool-errors.d.ts → error/payload.d.ts} +7 -13
  36. package/dist/lib/error/payload.d.ts.map +1 -0
  37. package/dist/lib/error/payload.js +108 -0
  38. package/dist/lib/mcp-interop.d.ts.map +1 -1
  39. package/dist/lib/mcp-interop.js +4 -6
  40. package/dist/lib/net/http.d.ts.map +1 -0
  41. package/dist/lib/{http.js → net/http.js} +4 -7
  42. package/dist/lib/net/index.d.ts +4 -0
  43. package/dist/lib/net/index.d.ts.map +1 -0
  44. package/dist/lib/net/index.js +3 -0
  45. package/dist/lib/{fetch-pipeline.d.ts → net/pipeline.d.ts} +3 -3
  46. package/dist/lib/net/pipeline.d.ts.map +1 -0
  47. package/dist/lib/{fetch-pipeline.js → net/pipeline.js} +3 -5
  48. package/dist/lib/{url.d.ts → net/url.d.ts} +1 -1
  49. package/dist/lib/net/url.d.ts.map +1 -0
  50. package/dist/lib/{url.js → net/url.js} +3 -5
  51. package/dist/lib/utils.d.ts +2 -18
  52. package/dist/lib/utils.d.ts.map +1 -1
  53. package/dist/lib/utils.js +29 -104
  54. package/dist/resources/index.d.ts.map +1 -1
  55. package/dist/resources/index.js +8 -5
  56. package/dist/schemas.d.ts +1 -1
  57. package/dist/server.d.ts.map +1 -1
  58. package/dist/server.js +7 -9
  59. package/dist/tasks/index.d.ts +2 -0
  60. package/dist/tasks/index.d.ts.map +1 -0
  61. package/dist/tasks/index.js +1 -0
  62. package/dist/tasks/manager.d.ts +123 -1
  63. package/dist/tasks/manager.d.ts.map +1 -1
  64. package/dist/tasks/manager.js +745 -10
  65. package/dist/tools/{fetch-url.d.ts → index.d.ts} +4 -5
  66. package/dist/tools/index.d.ts.map +1 -0
  67. package/dist/tools/{fetch-url.js → index.js} +6 -8
  68. package/dist/transform/index.d.ts +279 -0
  69. package/dist/transform/index.d.ts.map +1 -0
  70. package/dist/transform/index.js +5234 -0
  71. package/package.json +2 -2
  72. package/dist/cli.d.ts +0 -19
  73. package/dist/cli.d.ts.map +0 -1
  74. package/dist/cli.js +0 -65
  75. package/dist/http/health.d.ts +0 -8
  76. package/dist/http/health.d.ts.map +0 -1
  77. package/dist/http/health.js +0 -152
  78. package/dist/http/helpers.d.ts +0 -68
  79. package/dist/http/helpers.d.ts.map +0 -1
  80. package/dist/http/helpers.js +0 -402
  81. package/dist/lib/error-codes.d.ts +0 -13
  82. package/dist/lib/error-codes.d.ts.map +0 -1
  83. package/dist/lib/error-codes.js +0 -12
  84. package/dist/lib/error-messages.d.ts.map +0 -1
  85. package/dist/lib/fetch-pipeline.d.ts.map +0 -1
  86. package/dist/lib/http.d.ts.map +0 -1
  87. package/dist/lib/logger-names.d.ts +0 -16
  88. package/dist/lib/logger-names.d.ts.map +0 -1
  89. package/dist/lib/logger-names.js +0 -15
  90. package/dist/lib/session.d.ts +0 -44
  91. package/dist/lib/session.d.ts.map +0 -1
  92. package/dist/lib/session.js +0 -137
  93. package/dist/lib/tool-errors.d.ts.map +0 -1
  94. package/dist/lib/tool-errors.js +0 -253
  95. package/dist/lib/url.d.ts.map +0 -1
  96. package/dist/lib/zod.d.ts +0 -3
  97. package/dist/lib/zod.d.ts.map +0 -1
  98. package/dist/lib/zod.js +0 -27
  99. package/dist/tasks/call-contract.d.ts +0 -25
  100. package/dist/tasks/call-contract.d.ts.map +0 -1
  101. package/dist/tasks/call-contract.js +0 -59
  102. package/dist/tasks/execution.d.ts +0 -16
  103. package/dist/tasks/execution.d.ts.map +0 -1
  104. package/dist/tasks/execution.js +0 -241
  105. package/dist/tasks/handlers.d.ts +0 -11
  106. package/dist/tasks/handlers.d.ts.map +0 -1
  107. package/dist/tasks/handlers.js +0 -157
  108. package/dist/tasks/owner.d.ts +0 -43
  109. package/dist/tasks/owner.d.ts.map +0 -1
  110. package/dist/tasks/owner.js +0 -144
  111. package/dist/tasks/registry.d.ts +0 -20
  112. package/dist/tasks/registry.d.ts.map +0 -1
  113. package/dist/tasks/registry.js +0 -40
  114. package/dist/tasks/waiters.d.ts +0 -27
  115. package/dist/tasks/waiters.d.ts.map +0 -1
  116. package/dist/tasks/waiters.js +0 -114
  117. package/dist/tools/fetch-url.d.ts.map +0 -1
  118. package/dist/transform/dom-prep.d.ts +0 -16
  119. package/dist/transform/dom-prep.d.ts.map +0 -1
  120. package/dist/transform/dom-prep.js +0 -1287
  121. package/dist/transform/html-translators.d.ts +0 -5
  122. package/dist/transform/html-translators.d.ts.map +0 -1
  123. package/dist/transform/html-translators.js +0 -697
  124. package/dist/transform/markdown-cleanup.d.ts +0 -10
  125. package/dist/transform/markdown-cleanup.d.ts.map +0 -1
  126. package/dist/transform/markdown-cleanup.js +0 -542
  127. package/dist/transform/metadata.d.ts +0 -18
  128. package/dist/transform/metadata.d.ts.map +0 -1
  129. package/dist/transform/metadata.js +0 -462
  130. package/dist/transform/next-flight.d.ts +0 -2
  131. package/dist/transform/next-flight.d.ts.map +0 -1
  132. package/dist/transform/next-flight.js +0 -374
  133. package/dist/transform/shared.d.ts +0 -8
  134. package/dist/transform/shared.d.ts.map +0 -1
  135. package/dist/transform/shared.js +0 -137
  136. package/dist/transform/transform.d.ts +0 -38
  137. package/dist/transform/transform.d.ts.map +0 -1
  138. package/dist/transform/transform.js +0 -1042
  139. package/dist/transform/types.d.ts +0 -124
  140. package/dist/transform/types.d.ts.map +0 -1
  141. package/dist/transform/types.js +0 -5
  142. package/dist/transform/worker-pool.d.ts +0 -76
  143. package/dist/transform/worker-pool.d.ts.map +0 -1
  144. package/dist/transform/worker-pool.js +0 -725
  145. /package/dist/lib/{http.d.ts → net/http.d.ts} +0 -0
@@ -1,5 +0,0 @@
1
- export declare function extractLanguageFromClassName(className: string): string | undefined;
2
- export declare function resolveLanguageFromAttributes(className: string, dataLang: string): string | undefined;
3
- export declare function detectLanguageFromCode(code: string): string | undefined;
4
- export declare function translateHtmlFragmentToMarkdown(html: string): string;
5
- //# sourceMappingURL=html-translators.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"html-translators.d.ts","sourceRoot":"","sources":["../../src/transform/html-translators.ts"],"names":[],"mappings":"AAkVA,wBAAgB,4BAA4B,CAC1C,SAAS,EAAE,MAAM,GAChB,MAAM,GAAG,SAAS,CAkCpB;AAOD,wBAAgB,6BAA6B,CAC3C,SAAS,EAAE,MAAM,EACjB,QAAQ,EAAE,MAAM,GACf,MAAM,GAAG,SAAS,CAKpB;AACD,wBAAgB,sBAAsB,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,GAAG,SAAS,CAKvE;AA0cD,wBAAgB,+BAA+B,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAEpE"}
@@ -1,697 +0,0 @@
1
- import { NodeHtmlMarkdown, } from 'node-html-markdown';
2
- import { isHtmlNode, isObject } from '../lib/utils.js';
3
- import { WP_PHOTON_HOST_PATTERN } from './dom-prep.js';
4
- // ---------------------------------------------------------------------------
5
- // Shared constant
6
- // ---------------------------------------------------------------------------
7
- const CODE_BLOCK = {
8
- fence: '```',
9
- format: (code, language = '') => `\`\`\`${language}\n${code}\n\`\`\``,
10
- };
11
- const MERMAID_POSTPROCESS = ({ content }) => `\n\n\`\`\`mermaid\n${content.trim()}\n\`\`\`\n\n`;
12
- const MERMAID_TRANSLATOR_CONFIG = {
13
- noEscape: true,
14
- preserveWhitespace: true,
15
- postprocess: MERMAID_POSTPROCESS,
16
- };
17
- // ---------------------------------------------------------------------------
18
- // DOM helpers (translator-only)
19
- // ---------------------------------------------------------------------------
20
- function getTagName(node) {
21
- if (!isHtmlNode(node))
22
- return '';
23
- const raw = node.tagName;
24
- return typeof raw === 'string' ? raw.toUpperCase() : '';
25
- }
26
- function getNode(ctx) {
27
- return isObject(ctx) ? ctx['node'] : undefined;
28
- }
29
- function getParent(ctx) {
30
- return isObject(ctx) ? ctx['parent'] : undefined;
31
- }
32
- function getNodeAttr(node) {
33
- if (!isHtmlNode(node) || typeof node.getAttribute !== 'function')
34
- return undefined;
35
- return node.getAttribute.bind(node);
36
- }
37
- // ---------------------------------------------------------------------------
38
- // Code translators
39
- // ---------------------------------------------------------------------------
40
- class DetectionContext {
41
- code;
42
- _lower;
43
- _lines;
44
- _trimmedStart;
45
- constructor(code) {
46
- this.code = code;
47
- }
48
- get lower() {
49
- return (this._lower ??= this.code.toLowerCase());
50
- }
51
- get lines() {
52
- return (this._lines ??= this.code.split(/\r?\n/));
53
- }
54
- get trimmedStart() {
55
- return (this._trimmedStart ??= this.code.trimStart());
56
- }
57
- }
58
- const BASH_COMMANDS = new Set([
59
- 'sudo',
60
- 'chmod',
61
- 'mkdir',
62
- 'cd',
63
- 'ls',
64
- 'cat',
65
- 'echo',
66
- ]);
67
- const BASH_PACKAGE_MANAGERS = [
68
- 'npm',
69
- 'yarn',
70
- 'pnpm',
71
- 'npx',
72
- 'brew',
73
- 'apt',
74
- 'pip',
75
- 'cargo',
76
- 'go',
77
- ];
78
- const TYPESCRIPT_HINTS = [
79
- ': string',
80
- ':string',
81
- ': number',
82
- ':number',
83
- ': boolean',
84
- ':boolean',
85
- ': void',
86
- ':void',
87
- ': any',
88
- ':any',
89
- ': unknown',
90
- ':unknown',
91
- ': never',
92
- ':never',
93
- ];
94
- const HTML_TAGS = [
95
- '<!doctype',
96
- '<html',
97
- '<head',
98
- '<body',
99
- '<div',
100
- '<span',
101
- '<p',
102
- '<a',
103
- '<script',
104
- '<style',
105
- ];
106
- function isBashLine(line) {
107
- const trimmed = line.trimStart();
108
- if (!trimmed)
109
- return false;
110
- if (trimmed.startsWith('#!') ||
111
- trimmed.startsWith('$ ') ||
112
- /^\s*\.\.\.\\?>\s+\S/m.test(trimmed)) {
113
- return true;
114
- }
115
- const spaceIdx = trimmed.indexOf(' ');
116
- const firstWord = spaceIdx === -1 ? trimmed : trimmed.slice(0, spaceIdx);
117
- if (BASH_COMMANDS.has(firstWord))
118
- return true;
119
- return (spaceIdx !== -1 &&
120
- BASH_PACKAGE_MANAGERS.includes(firstWord));
121
- }
122
- function detectBashIndicators(lines) {
123
- return lines.some(isBashLine);
124
- }
125
- function detectCssStructure(lines) {
126
- for (const line of lines) {
127
- const trimmed = line.trimStart();
128
- if (!trimmed || trimmed.startsWith('# ') || trimmed.startsWith('//')) {
129
- continue;
130
- }
131
- if (/^[.#][A-Za-z_-][\w-]*\s*\{/.test(trimmed))
132
- return true;
133
- if (trimmed.includes(';') &&
134
- /^\s*[a-z][\w-]*\s*:/.test(trimmed) &&
135
- !trimmed.includes('(')) {
136
- return true;
137
- }
138
- }
139
- return false;
140
- }
141
- function detectYamlStructure(lines) {
142
- for (const line of lines) {
143
- const trimmed = line.trim();
144
- const colonIdx = trimmed.indexOf(':');
145
- if (colonIdx > 0) {
146
- const after = trimmed[colonIdx + 1];
147
- if (after === ' ' || after === '\t')
148
- return true;
149
- }
150
- }
151
- return false;
152
- }
153
- const LANGUAGES = [
154
- {
155
- lang: 'rust',
156
- weight: 25,
157
- match: (ctx) => ctx.lower.includes('let mut') ||
158
- /\b(?:fn|impl|struct|enum)\b/.test(ctx.lower) ||
159
- (ctx.lower.includes('use ') && ctx.lower.includes('::')),
160
- },
161
- {
162
- lang: 'go',
163
- weight: 22,
164
- match: (ctx) => ctx.lower.includes('import "') || /\b(?:package|func)\b/.test(ctx.lower),
165
- },
166
- {
167
- lang: 'jsx',
168
- weight: 22,
169
- match: (ctx) => {
170
- const l = ctx.lower;
171
- if (l.includes('classname=') ||
172
- l.includes('jsx:') ||
173
- l.includes("from 'react'") ||
174
- l.includes('from "react"')) {
175
- return true;
176
- }
177
- return /<\/?[A-Z][A-Za-z0-9]*(?:\s+[A-Za-z_:][\w:.-]*(?:\s*=\s*(?:"[^"]*"|'[^']*'|\{[^}]*\}))?)*\s*\/?>/m.test(ctx.code);
178
- },
179
- },
180
- {
181
- lang: 'typescript',
182
- weight: 20,
183
- match: (ctx) => /\b(?:interface|type)\b/.test(ctx.lower) ||
184
- TYPESCRIPT_HINTS.some((hint) => ctx.lower.includes(hint)),
185
- },
186
- {
187
- lang: 'sql',
188
- weight: 20,
189
- match: (ctx) => /\b(?:select\s+(?:.+?\s+from|[\d*@])|insert\s+into|update\s+.+?\s+set|delete\s+from|create\s+(?:table|database|index|view|function|procedure|trigger|user|role)|alter\s+(?:table|database|index|view))\b/.test(ctx.lower),
190
- },
191
- {
192
- lang: 'html',
193
- weight: 19,
194
- match: (ctx) => HTML_TAGS.some((tag) => ctx.lower.includes(tag)),
195
- },
196
- {
197
- lang: 'python',
198
- weight: 18,
199
- match: (ctx) => {
200
- if (HTML_TAGS.some((tag) => ctx.lower.includes(tag)))
201
- return false;
202
- const l = ctx.lower;
203
- const c = ctx.code;
204
- if (/^\s*(?:>>>|\.\.\.)\s/m.test(c) ||
205
- /<(?:QuerySet|[A-Z][A-Za-z0-9_]*:\s)|\bdatetime\.datetime\(|\bDoesNotExist:/.test(c) ||
206
- /^\s*[A-Za-z_][\w.]*\s*=\s*[A-Z][\w.]*\(/m.test(c) ||
207
- /^\s*[A-Za-z_][\w.]*\.[A-Za-z_][\w]*\s*$/m.test(c) ||
208
- c.includes('None') ||
209
- c.includes('True') ||
210
- c.includes('False') ||
211
- l.includes('print(') ||
212
- l.includes('__name__') ||
213
- l.includes('self.') ||
214
- l.includes('elif ') ||
215
- /\b(?:def |elif |except |finally:|yield |lambda |raise |pass$)/m.test(l)) {
216
- return true;
217
- }
218
- const hasJsSignals = /\b(?:const |let |var |function |require\(|=>|===|!==|console\.)/.test(l) ||
219
- l.includes('{') ||
220
- l.includes("from '");
221
- return /\b(?:import|from|class)\b/.test(l) && !hasJsSignals;
222
- },
223
- },
224
- {
225
- lang: 'css',
226
- weight: 18,
227
- match: (ctx) => /@media|@import|@keyframes|@theme\b|@utility\b|@layer\b|@apply\b|@variant\b|@custom-variant\b|@reference\b|@source\b/.test(ctx.lower) || detectCssStructure(ctx.lines),
228
- },
229
- { lang: 'bash', weight: 15, match: (ctx) => detectBashIndicators(ctx.lines) },
230
- { lang: 'yaml', weight: 15, match: (ctx) => detectYamlStructure(ctx.lines) },
231
- {
232
- lang: 'javascript',
233
- weight: 15,
234
- match: (ctx) => /\b(?:const|let|var|function|class|async|await|export|import)\b/.test(ctx.lower),
235
- },
236
- {
237
- lang: 'json',
238
- weight: 10,
239
- match: (ctx) => ctx.trimmedStart.startsWith('{') || ctx.trimmedStart.startsWith('['),
240
- },
241
- ];
242
- const KNOWN_LANG_PREFIXES = new Set([
243
- 'css',
244
- 'javascript',
245
- 'js',
246
- 'typescript',
247
- 'ts',
248
- 'python',
249
- 'py',
250
- 'html',
251
- 'xml',
252
- 'sql',
253
- 'bash',
254
- 'sh',
255
- 'yaml',
256
- 'json',
257
- 'ruby',
258
- 'go',
259
- 'rust',
260
- 'java',
261
- 'php',
262
- 'c',
263
- 'cpp',
264
- 'swift',
265
- 'kotlin',
266
- 'scss',
267
- 'sass',
268
- 'less',
269
- 'graphql',
270
- 'markdown',
271
- 'md',
272
- ]);
273
- export function extractLanguageFromClassName(className) {
274
- if (!className)
275
- return undefined;
276
- // Split by whitespace and check for language indicators
277
- const tokens = className.match(/\S+/g);
278
- if (!tokens)
279
- return undefined;
280
- // Fast path: check for prefixes
281
- for (const token of tokens) {
282
- const lower = token.toLowerCase();
283
- if (lower.startsWith('language-'))
284
- return token.slice(9);
285
- if (lower.startsWith('lang-'))
286
- return token.slice(5);
287
- if (lower.startsWith('highlight-'))
288
- return token.slice(10);
289
- }
290
- // Special handling for hljs which often appears with a separate language class
291
- if (tokens.includes('hljs')) {
292
- const langClass = tokens.find((t) => {
293
- const l = t.toLowerCase();
294
- return l !== 'hljs' && !l.startsWith('hljs-');
295
- });
296
- if (langClass)
297
- return langClass;
298
- }
299
- // Last resort: look for any known language prefix followed by a dash
300
- for (const token of tokens) {
301
- const dashIdx = token.indexOf('-');
302
- if (dashIdx > 0) {
303
- const prefix = token.slice(0, dashIdx).toLowerCase();
304
- if (KNOWN_LANG_PREFIXES.has(prefix))
305
- return prefix;
306
- }
307
- }
308
- return undefined;
309
- }
310
- function resolveLanguageFromDataAttribute(dataLang) {
311
- const trimmed = dataLang.trim();
312
- return /^\w+$/.test(trimmed) ? trimmed : undefined;
313
- }
314
- export function resolveLanguageFromAttributes(className, dataLang) {
315
- return (extractLanguageFromClassName(className) ??
316
- resolveLanguageFromDataAttribute(dataLang));
317
- }
318
- export function detectLanguageFromCode(code) {
319
- if (!code || !/\S/.test(code))
320
- return undefined;
321
- const ctx = new DetectionContext(code);
322
- return LANGUAGES.find((def) => def.match(ctx))?.lang;
323
- }
324
- function buildInlineCode(content) {
325
- const trimmed = content.trim();
326
- if (!trimmed)
327
- return '``';
328
- const matches = trimmed.match(/`+/g);
329
- const maxBackticks = matches ? Math.max(...matches.map((m) => m.length)) : 0;
330
- const delimiter = '`'.repeat(maxBackticks + 1);
331
- const padding = trimmed.startsWith('`') || trimmed.endsWith('`') ? ' ' : '';
332
- return `${delimiter}${padding}${trimmed}${padding}${delimiter}`;
333
- }
334
- function isCodeBlock(parent) {
335
- const tagName = getTagName(parent);
336
- return tagName === 'PRE' || tagName === 'WRAPPED-PRE';
337
- }
338
- function resolveAttributeLanguage(node) {
339
- const getAttribute = getNodeAttr(node);
340
- const className = getAttribute?.('class') ?? '';
341
- const dataLanguage = getAttribute?.('data-language') ?? '';
342
- return resolveLanguageFromAttributes(className, dataLanguage);
343
- }
344
- function findLanguageFromCodeChild(node) {
345
- if (!isHtmlNode(node))
346
- return undefined;
347
- const childNodes = Array.from(node.childNodes ?? []);
348
- for (const child of childNodes) {
349
- if (!isHtmlNode(child))
350
- continue;
351
- const raw = child.rawTagName;
352
- const tagName = typeof raw === 'string' ? raw.toUpperCase() : '';
353
- if (tagName === 'CODE')
354
- return resolveAttributeLanguage(child);
355
- }
356
- return undefined;
357
- }
358
- function createCodeBlockPostprocessor(language) {
359
- return ({ content }) => {
360
- const trimmed = content.trim();
361
- if (!trimmed)
362
- return '';
363
- const resolvedLanguage = language ?? detectLanguageFromCode(trimmed) ?? '';
364
- return CODE_BLOCK.format(trimmed, resolvedLanguage);
365
- };
366
- }
367
- function buildInlineCodeTranslator() {
368
- return {
369
- spaceIfRepeatingChar: true,
370
- noEscape: true,
371
- postprocess: ({ content }) => buildInlineCode(content),
372
- };
373
- }
374
- function buildCodeTranslator(ctx) {
375
- const inlineCodeTranslator = buildInlineCodeTranslator();
376
- if (!isCodeBlock(getParent(ctx)))
377
- return inlineCodeTranslator;
378
- return { noEscape: true, preserveWhitespace: true };
379
- }
380
- // ---------------------------------------------------------------------------
381
- // Image translators
382
- // ---------------------------------------------------------------------------
383
- function extractFirstSrcsetUrl(srcset) {
384
- return srcset.split(',')[0]?.trim().split(/\s+/)[0] ?? '';
385
- }
386
- const LAZY_SRC_ATTRIBUTES = [
387
- 'data-src',
388
- 'data-lazy-src',
389
- 'data-lazy',
390
- 'data-original',
391
- 'data-echo',
392
- 'data-srcset',
393
- ];
394
- function isDataUri(value) {
395
- return value.startsWith('data:');
396
- }
397
- const PLACEHOLDER_FILENAME_PATTERN = /(?:^|\/)(?:blank|spacer|placeholder|grey|gray|pixel|loading|lazy|transparent|empty|dummy)\.[a-z]{3,4}$/i;
398
- function isPlaceholderSrc(value) {
399
- if (isDataUri(value))
400
- return true;
401
- const parsed = URL.parse(value) ?? URL.parse(value, 'http://localhost');
402
- if (!parsed)
403
- return false;
404
- return PLACEHOLDER_FILENAME_PATTERN.test(parsed.pathname);
405
- }
406
- function extractNonDataSrcsetUrl(value) {
407
- const url = extractFirstSrcsetUrl(value);
408
- return url && !isDataUri(url) ? url : undefined;
409
- }
410
- function resolveLazySrc(getAttribute) {
411
- for (const attr of LAZY_SRC_ATTRIBUTES) {
412
- const lazy = getAttribute(attr);
413
- if (!lazy || isDataUri(lazy))
414
- continue;
415
- if (attr === 'data-srcset') {
416
- const url = extractNonDataSrcsetUrl(lazy);
417
- if (url)
418
- return url;
419
- continue;
420
- }
421
- return lazy;
422
- }
423
- return undefined;
424
- }
425
- // Some sites (notably WordPress with Photon CDN) use a CDN proxy URL in img src while keeping the original same-domain URL in srcset.
426
- // Since the converter prefers srcset URLs for CDN-hosted images, we need to detect this pattern and extract the canonical URL from srcset to ensure images are correctly resolved, especially when migrating content to a new domain.
427
- function isWpPhotonUrl(src) {
428
- const parsed = URL.parse(src);
429
- return parsed !== null && WP_PHOTON_HOST_PATTERN.test(parsed.hostname);
430
- }
431
- function resolveImageSrc(getAttribute) {
432
- if (!getAttribute)
433
- return '';
434
- const srcRaw = getAttribute('src') ?? '';
435
- const srcsetUrl = extractNonDataSrcsetUrl(getAttribute('srcset') ?? '');
436
- // When src is a CDN proxy URL, prefer srcset which usually has the
437
- // canonical same-domain URL that survives domain migrations.
438
- if (srcRaw && isWpPhotonUrl(srcRaw) && srcsetUrl)
439
- return srcsetUrl;
440
- if (srcRaw && !isPlaceholderSrc(srcRaw))
441
- return srcRaw;
442
- // First check common lazy-loading attributes that may contain non-data URLs before falling back to the native srcset, as some sites use data URIs in lazy attributes while still providing valid URLs in srcset.
443
- const lazySrc = resolveLazySrc(getAttribute);
444
- if (lazySrc)
445
- return lazySrc;
446
- // If the src is a data URI or missing, check srcset for a valid URL. Some sites use srcset with data URIs in src and actual URLs in srcset for responsive images.
447
- if (srcsetUrl)
448
- return srcsetUrl;
449
- return '';
450
- }
451
- function deriveAltFromImageUrl(src) {
452
- if (!src)
453
- return '';
454
- const absoluteParsed = URL.parse(src);
455
- const parsed = absoluteParsed ?? URL.parse(src, 'http://localhost');
456
- if (!parsed)
457
- return '';
458
- if (absoluteParsed &&
459
- parsed.protocol !== 'http:' &&
460
- parsed.protocol !== 'https:') {
461
- return '';
462
- }
463
- const match = /\/([^/]+?)(?:\.[^/.]+)?$/.exec(parsed.pathname);
464
- if (!match?.[1])
465
- return '';
466
- return match[1].replace(/[_-]+/g, ' ').trim();
467
- }
468
- function buildImageTranslator(ctx) {
469
- const getAttribute = getNodeAttr(getNode(ctx));
470
- const src = resolveImageSrc(getAttribute);
471
- const existingAlt = getAttribute?.('alt') ?? '';
472
- if (!src) {
473
- return { content: existingAlt.trim() };
474
- }
475
- const alt = existingAlt.trim() || deriveAltFromImageUrl(src);
476
- return { content: `![${alt}](${src})` };
477
- }
478
- // ---------------------------------------------------------------------------
479
- // Pre / Mermaid translators
480
- // ---------------------------------------------------------------------------
481
- function buildPreTranslator(ctx) {
482
- const node = getNode(ctx);
483
- if (!node)
484
- return {};
485
- const attributeLanguage = resolveAttributeLanguage(node) ?? findLanguageFromCodeChild(node);
486
- return {
487
- noEscape: true,
488
- preserveWhitespace: true,
489
- postprocess: createCodeBlockPostprocessor(attributeLanguage),
490
- };
491
- }
492
- function buildMermaidPreTranslator(ctx) {
493
- const node = getNode(ctx);
494
- const getAttribute = getNodeAttr(node);
495
- const className = getAttribute?.('class') ?? '';
496
- if (className.includes('mermaid'))
497
- return MERMAID_TRANSLATOR_CONFIG;
498
- return buildPreTranslator(ctx);
499
- }
500
- // ---------------------------------------------------------------------------
501
- // Block-level translators (div, section, span, table, dl, etc.)
502
- // ---------------------------------------------------------------------------
503
- const GFM_ALERT_MAP = new Map([
504
- ['note', 'NOTE'],
505
- ['info', 'NOTE'],
506
- ['tip', 'TIP'],
507
- ['hint', 'TIP'],
508
- ['warning', 'WARNING'],
509
- ['warn', 'WARNING'],
510
- ['caution', 'CAUTION'],
511
- ['danger', 'CAUTION'],
512
- ['important', 'IMPORTANT'],
513
- ]);
514
- function resolveGfmAlertType(className) {
515
- const tokens = className.toLowerCase().split(/\s+/);
516
- for (const token of tokens) {
517
- const mapped = GFM_ALERT_MAP.get(token);
518
- if (mapped)
519
- return mapped;
520
- }
521
- return undefined;
522
- }
523
- function buildAdmonitionConfig(className, alertType, getAttribute) {
524
- const isAdmonition = className.includes('admonition') ||
525
- className.includes('callout') ||
526
- className.includes('custom-block') ||
527
- getAttribute('role') === 'alert' ||
528
- alertType !== undefined;
529
- if (!isAdmonition)
530
- return undefined;
531
- return {
532
- postprocess: ({ content }) => {
533
- const lines = content.trim().split('\n');
534
- const header = alertType ? `> [!${alertType}]\n` : '';
535
- return `\n\n${header}> ${lines.join('\n> ')}\n\n`;
536
- },
537
- };
538
- }
539
- function buildTypeSpacingConfig() {
540
- return {
541
- postprocess: ({ content }) => {
542
- const lines = content.split('\n');
543
- const separated = [];
544
- for (let i = 0; i < lines.length; i++) {
545
- const line = lines[i] ?? '';
546
- separated.push(line);
547
- const nextLine = lines[i + 1];
548
- if (nextLine !== undefined &&
549
- line.trim() &&
550
- nextLine.trim() &&
551
- line.includes(':') &&
552
- nextLine.includes(':') &&
553
- !line.startsWith(' ') &&
554
- !nextLine.startsWith(' ')) {
555
- separated.push('');
556
- }
557
- }
558
- return separated.join('\n');
559
- },
560
- };
561
- }
562
- function buildDivTranslator(ctx) {
563
- const getAttribute = getNodeAttr(getNode(ctx));
564
- if (!getAttribute)
565
- return {};
566
- const className = getAttribute('class') ?? '';
567
- if (className.includes('mermaid'))
568
- return MERMAID_TRANSLATOR_CONFIG;
569
- const alertType = resolveGfmAlertType(className);
570
- const admonition = buildAdmonitionConfig(className, alertType, getAttribute);
571
- if (admonition)
572
- return admonition;
573
- if (!className.includes('type'))
574
- return {};
575
- return buildTypeSpacingConfig();
576
- }
577
- function buildSectionTranslator(ctx) {
578
- const getAttribute = getNodeAttr(getNode(ctx));
579
- if (getAttribute?.('class')?.includes('tsd-member')) {
580
- return {
581
- postprocess: ({ content }) => `\n\n&nbsp;\n\n${content}\n\n`,
582
- };
583
- }
584
- return {
585
- postprocess: ({ content }) => `\n\n${content}\n\n`,
586
- };
587
- }
588
- function buildSpanTranslator(ctx) {
589
- const getAttribute = getNodeAttr(getNode(ctx));
590
- if (getAttribute?.('data-as') === 'p') {
591
- return {
592
- postprocess: ({ content }) => `\n\n${content.trim()}\n\n`,
593
- };
594
- }
595
- return {};
596
- }
597
- // ---------------------------------------------------------------------------
598
- // DL helpers
599
- // ---------------------------------------------------------------------------
600
- function normalizeDefinitionListContent(content) {
601
- const lines = content
602
- .split('\n')
603
- .map((line) => line.trim())
604
- .filter(Boolean);
605
- if (lines.length === 0)
606
- return '';
607
- const normalized = [];
608
- for (const line of lines) {
609
- const isDefinition = line.startsWith(': ');
610
- const previous = normalized[normalized.length - 1];
611
- if (previous &&
612
- previous.length > 0 &&
613
- !previous.startsWith(': ') &&
614
- !isDefinition) {
615
- normalized.push('');
616
- }
617
- normalized.push(line);
618
- }
619
- return normalized.join('\n');
620
- }
621
- // ---------------------------------------------------------------------------
622
- // Simple tag translators
623
- // ---------------------------------------------------------------------------
624
- function buildDlTranslator() {
625
- return {
626
- postprocess: ({ content }) => {
627
- const normalized = normalizeDefinitionListContent(content);
628
- return normalized ? `\n\n${normalized}\n\n` : '';
629
- },
630
- };
631
- }
632
- function buildDtTranslator() {
633
- return {
634
- postprocess: ({ content }) => `${content.trim()}\n`,
635
- };
636
- }
637
- function buildDdTranslator() {
638
- return {
639
- postprocess: ({ content }) => content.trim() ? `: ${content.trim()}\n` : '',
640
- };
641
- }
642
- function wrapTranslator(prefix, suffix) {
643
- return () => ({
644
- postprocess: ({ content }) => `${prefix}${content}${suffix}`,
645
- });
646
- }
647
- function buildDetailsTranslator() {
648
- return {
649
- postprocess: ({ content }) => {
650
- const trimmed = content.trim();
651
- if (!trimmed)
652
- return '';
653
- return `\n\n${trimmed}\n\n`;
654
- },
655
- };
656
- }
657
- function buildSummaryTranslator() {
658
- return {
659
- postprocess: ({ content }) => `${content.trim()}\n\n`,
660
- };
661
- }
662
- // ---------------------------------------------------------------------------
663
- // Translator registry + converter singleton
664
- // ---------------------------------------------------------------------------
665
- function createCustomTranslators() {
666
- return {
667
- code: buildCodeTranslator,
668
- img: buildImageTranslator,
669
- dl: buildDlTranslator,
670
- dt: buildDtTranslator,
671
- dd: buildDdTranslator,
672
- div: buildDivTranslator,
673
- kbd: wrapTranslator('`', '`'),
674
- mark: wrapTranslator('==', '=='),
675
- sub: wrapTranslator('~', '~'),
676
- sup: wrapTranslator('^', '^'),
677
- section: buildSectionTranslator,
678
- details: buildDetailsTranslator,
679
- summary: buildSummaryTranslator,
680
- span: buildSpanTranslator,
681
- pre: buildMermaidPreTranslator,
682
- };
683
- }
684
- let markdownConverter = null;
685
- function getMarkdownConverter() {
686
- markdownConverter ??= new NodeHtmlMarkdown({
687
- codeFence: CODE_BLOCK.fence,
688
- codeBlockStyle: 'fenced',
689
- emDelimiter: '_',
690
- bulletMarker: '-',
691
- globalEscape: [/[\\`*_~]/gm, '\\$&'],
692
- }, createCustomTranslators());
693
- return markdownConverter;
694
- }
695
- export function translateHtmlFragmentToMarkdown(html) {
696
- return getMarkdownConverter().translate(html).trim();
697
- }