@j0hanz/fetch-url-mcp 1.9.3 → 1.9.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,4 @@
1
+ export declare function extractLanguageFromClassName(className: string): string | undefined;
2
+ export declare function resolveLanguageFromAttributes(className: string, dataLang: string): string | undefined;
3
+ export declare function detectLanguageFromCode(code: string): string | undefined;
4
+ //# sourceMappingURL=code-lang.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"code-lang.d.ts","sourceRoot":"","sources":["../../src/lib/code-lang.ts"],"names":[],"mappings":"AAgRA,wBAAgB,4BAA4B,CAC1C,SAAS,EAAE,MAAM,GAChB,MAAM,GAAG,SAAS,CAuBpB;AAqBD,wBAAgB,6BAA6B,CAC3C,SAAS,EAAE,MAAM,EACjB,QAAQ,EAAE,MAAM,GACf,MAAM,GAAG,SAAS,CAKpB;AACD,wBAAgB,sBAAsB,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,GAAG,SAAS,CAqBvE"}
@@ -0,0 +1,317 @@
1
+ const ASCII_SPACE = 32;
2
+ const ASCII_TAB = 9;
3
+ const ASCII_LT = 60;
4
+ const ASCII_DIGIT_0 = 48;
5
+ const ASCII_DIGIT_9 = 57;
6
+ const ASCII_UPPER_A = 65;
7
+ const ASCII_UPPER_Z = 90;
8
+ const ASCII_LOWER_A = 97;
9
+ const ASCII_LOWER_Z = 122;
10
+ const ASCII_UNDERSCORE = 95;
11
+ class DetectionContext {
12
+ code;
13
+ _lower;
14
+ _lines;
15
+ _trimmedStart;
16
+ constructor(code) {
17
+ this.code = code;
18
+ }
19
+ get lower() {
20
+ this._lower ??= this.code.toLowerCase();
21
+ return this._lower;
22
+ }
23
+ get lines() {
24
+ this._lines ??= this.code.split(/\r?\n/);
25
+ return this._lines;
26
+ }
27
+ get trimmedStart() {
28
+ this._trimmedStart ??= this.code.trimStart();
29
+ return this._trimmedStart;
30
+ }
31
+ }
32
+ const BASH_COMMANDS = new Set([
33
+ 'sudo',
34
+ 'chmod',
35
+ 'mkdir',
36
+ 'cd',
37
+ 'ls',
38
+ 'cat',
39
+ 'echo',
40
+ ]);
41
+ const BASH_PACKAGE_MANAGERS = [
42
+ 'npm',
43
+ 'yarn',
44
+ 'pnpm',
45
+ 'npx',
46
+ 'brew',
47
+ 'apt',
48
+ 'pip',
49
+ 'cargo',
50
+ 'go',
51
+ ];
52
+ const BASH_VERBS = new Set(['install', 'add', 'run', 'build', 'start']);
53
+ const TYPESCRIPT_HINTS = [
54
+ ': string',
55
+ ':string',
56
+ ': number',
57
+ ':number',
58
+ ': boolean',
59
+ ':boolean',
60
+ ': void',
61
+ ':void',
62
+ ': any',
63
+ ':any',
64
+ ': unknown',
65
+ ':unknown',
66
+ ': never',
67
+ ':never',
68
+ ];
69
+ const HTML_TAGS = [
70
+ '<!doctype',
71
+ '<html',
72
+ '<head',
73
+ '<body',
74
+ '<div',
75
+ '<span',
76
+ '<p',
77
+ '<a',
78
+ '<script',
79
+ '<style',
80
+ ];
81
+ const RUST_REGEX = /\b(?:fn|impl|struct|enum)\b/;
82
+ const JS_REGEX = /\b(?:const|let|var|function|class|async|await|export|import)\b/;
83
+ const PYTHON_UNIQUE_REGEX = /\b(?:def |elif |except |finally:|yield |lambda |raise |pass$)/m;
84
+ const JS_SIGNAL_REGEX = /\b(?:const |let |var |function |require\(|=>|===|!==|console\.)/;
85
+ const CSS_REGEX = /@media|@import|@keyframes|@theme\b|@utility\b|@layer\b|@apply\b|@variant\b|@custom-variant\b|@reference\b|@source\b/;
86
+ const CSS_PROPERTY_REGEX = /^\s*[a-z][\w-]*\s*:/;
87
+ function containsJsxTag(code) {
88
+ const len = code.length;
89
+ for (let i = 0; i < len - 1; i++) {
90
+ if (code.charCodeAt(i) === ASCII_LT) {
91
+ const next = code.charCodeAt(i + 1);
92
+ if (next >= ASCII_UPPER_A && next <= ASCII_UPPER_Z)
93
+ return true;
94
+ }
95
+ }
96
+ return false;
97
+ }
98
+ function isBashLine(line) {
99
+ const trimmed = line.trimStart();
100
+ if (trimmed.length === 0)
101
+ return false;
102
+ // Shell Prefix
103
+ if (trimmed.startsWith('#!') ||
104
+ trimmed.startsWith('$ ') ||
105
+ trimmed.startsWith('# ')) {
106
+ return true;
107
+ }
108
+ const spaceIdx = trimmed.indexOf(' ');
109
+ const firstWord = spaceIdx === -1 ? trimmed : trimmed.slice(0, spaceIdx);
110
+ if (BASH_COMMANDS.has(firstWord))
111
+ return true;
112
+ // Package Managers
113
+ const isPkgMgr = BASH_PACKAGE_MANAGERS.includes(firstWord);
114
+ if (isPkgMgr && spaceIdx !== -1) {
115
+ const rest = trimmed.slice(spaceIdx + 1);
116
+ const secondSpaceIdx = rest.indexOf(' ');
117
+ const secondWord = secondSpaceIdx === -1 ? rest : rest.slice(0, secondSpaceIdx);
118
+ if (BASH_VERBS.has(secondWord))
119
+ return true;
120
+ }
121
+ return false;
122
+ }
123
+ function detectBashIndicators(lines) {
124
+ return lines.some((line) => isBashLine(line));
125
+ }
126
+ function detectCssStructure(lines) {
127
+ for (const line of lines) {
128
+ const trimmed = line.trimStart();
129
+ if (trimmed.length === 0)
130
+ continue;
131
+ const hasSelector = (trimmed.startsWith('.') || trimmed.startsWith('#')) &&
132
+ trimmed.includes('{');
133
+ if (hasSelector)
134
+ return true;
135
+ if (trimmed.includes(';') &&
136
+ CSS_PROPERTY_REGEX.test(trimmed) &&
137
+ !trimmed.includes('(')) {
138
+ return true;
139
+ }
140
+ }
141
+ return false;
142
+ }
143
+ function detectYamlStructure(lines) {
144
+ for (const line of lines) {
145
+ const trimmed = line.trim();
146
+ if (trimmed.length === 0)
147
+ continue;
148
+ const colonIdx = trimmed.indexOf(':');
149
+ if (colonIdx <= 0)
150
+ continue;
151
+ const after = trimmed.charCodeAt(colonIdx + 1);
152
+ if (after === ASCII_SPACE || after === ASCII_TAB)
153
+ return true;
154
+ }
155
+ return false;
156
+ }
157
+ function matchRust(ctx) {
158
+ if (ctx.lower.includes('let mut'))
159
+ return true;
160
+ if (RUST_REGEX.test(ctx.lower))
161
+ return true;
162
+ return ctx.lower.includes('use ') && ctx.lower.includes('::');
163
+ }
164
+ function matchGo(ctx) {
165
+ if (ctx.lower.includes('import "'))
166
+ return true;
167
+ return /\b(?:package|func)\b/.test(ctx.lower);
168
+ }
169
+ function matchJsx(ctx) {
170
+ const l = ctx.lower;
171
+ if (l.includes('classname=') ||
172
+ l.includes('jsx:') ||
173
+ l.includes("from 'react'") ||
174
+ l.includes('from "react"')) {
175
+ return true;
176
+ }
177
+ return containsJsxTag(ctx.code);
178
+ }
179
+ function matchTypeScript(ctx) {
180
+ if (/\b(?:interface|type)\b/.test(ctx.lower))
181
+ return true;
182
+ const l = ctx.lower;
183
+ for (const hint of TYPESCRIPT_HINTS) {
184
+ if (l.includes(hint))
185
+ return true;
186
+ }
187
+ return false;
188
+ }
189
+ function matchSql(ctx) {
190
+ return /\b(?:select\s+(?:.+?\s+from|[\d*@])|insert\s+into|update\s+.+?\s+set|delete\s+from|create\s+(?:table|database|index|view|function|procedure|trigger|user|role)|alter\s+(?:table|database|index|view))\b/.test(ctx.lower);
191
+ }
192
+ function hasJsSignals(lowerCode) {
193
+ return (JS_SIGNAL_REGEX.test(lowerCode) ||
194
+ lowerCode.includes('{') ||
195
+ lowerCode.includes("from '"));
196
+ }
197
+ function matchPython(ctx) {
198
+ if (matchHtml(ctx))
199
+ return false;
200
+ const l = ctx.lower;
201
+ if (l.includes('print(') || l.includes('__name__'))
202
+ return true;
203
+ if (l.includes('self.') || l.includes('elif '))
204
+ return true;
205
+ // Check for Python's None/True/False using original case (they are capitalized in Python)
206
+ if (ctx.code.includes('None') ||
207
+ ctx.code.includes('True') ||
208
+ ctx.code.includes('False')) {
209
+ return true;
210
+ }
211
+ if (PYTHON_UNIQUE_REGEX.test(l))
212
+ return true;
213
+ // Shared keywords (import, from, class) — only match if no JS signals present
214
+ if (/\b(?:import|from|class)\b/.test(l) && !hasJsSignals(l)) {
215
+ return true;
216
+ }
217
+ return false;
218
+ }
219
+ function matchHtml(ctx) {
220
+ const l = ctx.lower;
221
+ for (const tag of HTML_TAGS) {
222
+ if (l.includes(tag))
223
+ return true;
224
+ }
225
+ return false;
226
+ }
227
+ // Pre-sorted by weight descending — first match wins in detectLanguageFromCode
228
+ const LANGUAGES = [
229
+ { lang: 'rust', weight: 25, match: matchRust },
230
+ { lang: 'go', weight: 22, match: matchGo },
231
+ { lang: 'jsx', weight: 22, match: matchJsx },
232
+ { lang: 'typescript', weight: 20, match: matchTypeScript },
233
+ { lang: 'sql', weight: 20, match: matchSql },
234
+ { lang: 'html', weight: 19, match: matchHtml },
235
+ { lang: 'python', weight: 18, match: matchPython },
236
+ {
237
+ lang: 'css',
238
+ weight: 18,
239
+ match: (ctx) => CSS_REGEX.test(ctx.lower) || detectCssStructure(ctx.lines),
240
+ },
241
+ { lang: 'bash', weight: 15, match: (ctx) => detectBashIndicators(ctx.lines) },
242
+ { lang: 'yaml', weight: 15, match: (ctx) => detectYamlStructure(ctx.lines) },
243
+ { lang: 'javascript', weight: 15, match: (ctx) => JS_REGEX.test(ctx.lower) },
244
+ {
245
+ lang: 'json',
246
+ weight: 10,
247
+ match: (ctx) => ctx.trimmedStart.startsWith('{') || ctx.trimmedStart.startsWith('['),
248
+ },
249
+ ];
250
+ export function extractLanguageFromClassName(className) {
251
+ if (!className)
252
+ return undefined;
253
+ // Split by whitespace and check for language indicators
254
+ const tokens = className.match(/\S+/g);
255
+ if (!tokens)
256
+ return undefined;
257
+ // Fast path: check for prefixes
258
+ for (const token of tokens) {
259
+ const lower = token.toLowerCase();
260
+ if (lower.startsWith('language-'))
261
+ return token.slice(9);
262
+ if (lower.startsWith('lang-'))
263
+ return token.slice(5);
264
+ if (lower.startsWith('highlight-'))
265
+ return token.slice(10);
266
+ }
267
+ // Fallback: check for hljs context
268
+ if (!tokens.includes('hljs'))
269
+ return undefined;
270
+ const langClass = tokens.find((t) => {
271
+ const l = t.toLowerCase();
272
+ return l !== 'hljs' && !l.startsWith('hljs-');
273
+ });
274
+ return langClass;
275
+ }
276
+ function resolveLanguageFromDataAttribute(dataLang) {
277
+ const trimmed = dataLang.trim();
278
+ if (!trimmed)
279
+ return undefined;
280
+ // Check if \w+
281
+ for (let i = 0; i < trimmed.length; i++) {
282
+ const c = trimmed.charCodeAt(i);
283
+ const isUpper = c >= ASCII_UPPER_A && c <= ASCII_UPPER_Z;
284
+ const isLower = c >= ASCII_LOWER_A && c <= ASCII_LOWER_Z;
285
+ const isDigit = c >= ASCII_DIGIT_0 && c <= ASCII_DIGIT_9;
286
+ const isUnder = c === ASCII_UNDERSCORE;
287
+ if (!isUpper && !isLower && !isDigit && !isUnder) {
288
+ return undefined;
289
+ }
290
+ }
291
+ return trimmed;
292
+ }
293
+ export function resolveLanguageFromAttributes(className, dataLang) {
294
+ return (extractLanguageFromClassName(className) ??
295
+ resolveLanguageFromDataAttribute(dataLang));
296
+ }
297
+ export function detectLanguageFromCode(code) {
298
+ if (!code)
299
+ return undefined;
300
+ // Fast path for empty/whitespace only
301
+ let empty = true;
302
+ for (let i = 0; i < code.length; i++) {
303
+ if (code.charCodeAt(i) > ASCII_SPACE) {
304
+ empty = false;
305
+ break;
306
+ }
307
+ }
308
+ if (empty)
309
+ return undefined;
310
+ const ctx = new DetectionContext(code);
311
+ // LANGUAGES is pre-sorted by weight descending — first match is highest confidence
312
+ for (const def of LANGUAGES) {
313
+ if (def.match(ctx))
314
+ return def.lang;
315
+ }
316
+ return undefined;
317
+ }
@@ -0,0 +1,4 @@
1
+ export declare function serializeDocumentForMarkdown(document: Document, fallback: string): string;
2
+ export declare function prepareDocumentForMarkdown(document: Document, baseUrl?: string, signal?: AbortSignal): void;
3
+ export declare function removeNoiseFromHtml(html: string, document?: Document, baseUrl?: string, signal?: AbortSignal): string;
4
+ //# sourceMappingURL=dom-prep.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"dom-prep.d.ts","sourceRoot":"","sources":["../../src/lib/dom-prep.ts"],"names":[],"mappings":"AAwlBA,wBAAgB,4BAA4B,CAC1C,QAAQ,EAAE,QAAQ,EAClB,QAAQ,EAAE,MAAM,GACf,MAAM,CAQR;AAmPD,wBAAgB,0BAA0B,CACxC,QAAQ,EAAE,QAAQ,EAClB,OAAO,CAAC,EAAE,MAAM,EAChB,MAAM,CAAC,EAAE,WAAW,GACnB,IAAI,CAkBN;AAiED,wBAAgB,mBAAmB,CACjC,IAAI,EAAE,MAAM,EACZ,QAAQ,CAAC,EAAE,QAAQ,EACnB,OAAO,CAAC,EAAE,MAAM,EAChB,MAAM,CAAC,EAAE,WAAW,GACnB,MAAM,CAcR"}