@j0hanz/superfetch 2.5.3 → 2.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/README.md +350 -226
  2. package/dist/assets/logo.svg +24837 -24835
  3. package/dist/cache.d.ts +28 -20
  4. package/dist/cache.js +292 -514
  5. package/dist/config.d.ts +41 -7
  6. package/dist/config.js +298 -148
  7. package/dist/crypto.js +25 -12
  8. package/dist/dom-noise-removal.js +379 -421
  9. package/dist/errors.d.ts +2 -2
  10. package/dist/errors.js +25 -8
  11. package/dist/fetch.d.ts +18 -16
  12. package/dist/fetch.js +1132 -526
  13. package/dist/host-normalization.js +40 -10
  14. package/dist/http-native.js +628 -287
  15. package/dist/index.js +67 -7
  16. package/dist/instructions.md +44 -31
  17. package/dist/ip-blocklist.d.ts +8 -0
  18. package/dist/ip-blocklist.js +65 -0
  19. package/dist/json.js +14 -9
  20. package/dist/language-detection.d.ts +2 -11
  21. package/dist/language-detection.js +289 -280
  22. package/dist/markdown-cleanup.d.ts +0 -1
  23. package/dist/markdown-cleanup.js +391 -429
  24. package/dist/mcp-validator.js +4 -2
  25. package/dist/mcp.js +184 -135
  26. package/dist/observability.js +89 -21
  27. package/dist/resources.js +16 -6
  28. package/dist/server-tuning.d.ts +2 -0
  29. package/dist/server-tuning.js +25 -23
  30. package/dist/session.d.ts +1 -0
  31. package/dist/session.js +41 -33
  32. package/dist/tasks.d.ts +2 -0
  33. package/dist/tasks.js +91 -9
  34. package/dist/timer-utils.d.ts +5 -0
  35. package/dist/timer-utils.js +20 -0
  36. package/dist/tools.d.ts +28 -5
  37. package/dist/tools.js +317 -183
  38. package/dist/transform-types.d.ts +5 -1
  39. package/dist/transform.d.ts +3 -2
  40. package/dist/transform.js +1138 -421
  41. package/dist/type-guards.d.ts +1 -0
  42. package/dist/type-guards.js +7 -0
  43. package/dist/workers/transform-child.d.ts +1 -0
  44. package/dist/workers/transform-child.js +118 -0
  45. package/dist/workers/transform-worker.js +87 -78
  46. package/package.json +14 -6
@@ -1,331 +1,340 @@
1
- /**
2
- * Language detection for code blocks.
3
- * Detects programming languages from code content and HTML attributes.
4
- */
5
- function createCodeSample(code) {
6
- return {
7
- code,
8
- lower: code.toLowerCase(),
9
- lines: code.split('\n'),
10
- trimmedStart: code.trimStart(),
11
- };
12
- }
13
- /* -------------------------------------------------------------------------------------------------
14
- * Word boundary matcher (cached)
15
- * ------------------------------------------------------------------------------------------------- */
16
- class WordBoundaryMatcher {
17
- cache = new Map();
18
- containsWord(source, word) {
19
- return this.getRegex(word).test(source);
1
+ // This module provides a heuristic-based language detection mechanism for code snippets.
2
+ class DetectionContext {
3
+ code;
4
+ _lower;
5
+ _lines;
6
+ _trimmedStart;
7
+ constructor(code) {
8
+ this.code = code;
20
9
  }
21
- getRegex(word) {
22
- const cached = this.cache.get(word);
23
- if (cached)
24
- return cached;
25
- // Keep behavior: compile `\b${word}\b` without escaping (words are controlled by patterns).
26
- const compiled = new RegExp(`\\b${word}\\b`);
27
- this.cache.set(word, compiled);
28
- return compiled;
10
+ get lower() {
11
+ this._lower ??= this.code.toLowerCase();
12
+ return this._lower;
29
13
  }
30
- }
31
- const wordMatcher = new WordBoundaryMatcher();
32
- /* -------------------------------------------------------------------------------------------------
33
- * Attribute-based language resolution
34
- * ------------------------------------------------------------------------------------------------- */
35
- class LanguageAttributeResolver {
36
- resolve(className, dataLang) {
37
- const classMatch = this.extractFromClassName(className);
38
- return classMatch ?? this.resolveFromDataAttribute(dataLang);
14
+ get lines() {
15
+ this._lines ??= this.code.split(/\r?\n/);
16
+ return this._lines;
39
17
  }
40
- /**
41
- * Extract language from class name (e.g., "language-typescript", "lang-js", "hljs javascript").
42
- * Note: preserves current behavior by returning the sliced original token casing.
43
- */
44
- extractFromClassName(className) {
45
- const tokens = className.match(/\S+/g);
46
- if (!tokens)
47
- return undefined;
48
- for (const token of tokens) {
49
- const lower = token.toLowerCase();
50
- if (lower.startsWith('language-'))
51
- return token.slice('language-'.length);
52
- if (lower.startsWith('lang-'))
53
- return token.slice('lang-'.length);
54
- if (lower.startsWith('highlight-'))
55
- return token.slice('highlight-'.length);
18
+ get trimmedStart() {
19
+ this._trimmedStart ??= this.code.trimStart();
20
+ return this._trimmedStart;
21
+ }
22
+ }
23
+ const BASH_COMMANDS = new Set([
24
+ 'sudo',
25
+ 'chmod',
26
+ 'mkdir',
27
+ 'cd',
28
+ 'ls',
29
+ 'cat',
30
+ 'echo',
31
+ ]);
32
+ const BASH_PACKAGE_MANAGERS = [
33
+ 'npm',
34
+ 'yarn',
35
+ 'pnpm',
36
+ 'npx',
37
+ 'brew',
38
+ 'apt',
39
+ 'pip',
40
+ 'cargo',
41
+ 'go',
42
+ ];
43
+ const BASH_VERBS = new Set(['install', 'add', 'run', 'build', 'start']);
44
+ const TYPESCRIPT_HINTS = [
45
+ ': string',
46
+ ':string',
47
+ ': number',
48
+ ':number',
49
+ ': boolean',
50
+ ':boolean',
51
+ ': void',
52
+ ':void',
53
+ ': any',
54
+ ':any',
55
+ ': unknown',
56
+ ':unknown',
57
+ ': never',
58
+ ':never',
59
+ ];
60
+ const HTML_TAGS = [
61
+ '<!doctype',
62
+ '<html',
63
+ '<head',
64
+ '<body',
65
+ '<div',
66
+ '<span',
67
+ '<p',
68
+ '<a',
69
+ '<script',
70
+ '<style',
71
+ ];
72
+ const RUST_REGEX = /\b(?:fn|impl|struct|enum)\b/;
73
+ const JS_REGEX = /\b(?:const|let|var|function|class|async|await|export|import)\b/;
74
+ const PYTHON_REGEX = /\b(?:def|class|import|from)\b/;
75
+ const CSS_REGEX = /@media|@import|@keyframes/;
76
+ function containsJsxTag(code) {
77
+ const len = code.length;
78
+ for (let i = 0; i < len - 1; i++) {
79
+ if (code.charCodeAt(i) === 60 /* < */) {
80
+ const next = code.charCodeAt(i + 1);
81
+ if (next >= 65 && next <= 90)
82
+ return true; // A-Z
56
83
  }
57
- if (tokens.includes('hljs')) {
58
- const langClass = tokens.find((t) => t !== 'hljs' && !t.startsWith('hljs-'));
59
- if (langClass)
60
- return langClass;
84
+ }
85
+ return false;
86
+ }
87
+ function isBashLine(line) {
88
+ const trimmed = line.trimStart();
89
+ if (trimmed.length === 0)
90
+ return false;
91
+ // Shell Prefix
92
+ if (trimmed.startsWith('#!') ||
93
+ trimmed.startsWith('$ ') ||
94
+ trimmed.startsWith('# ')) {
95
+ return true;
96
+ }
97
+ const spaceIdx = trimmed.indexOf(' ');
98
+ const firstWord = spaceIdx === -1 ? trimmed : trimmed.slice(0, spaceIdx);
99
+ if (BASH_COMMANDS.has(firstWord))
100
+ return true;
101
+ // Package Managers
102
+ let isPkgMgr = false;
103
+ for (const mgr of BASH_PACKAGE_MANAGERS) {
104
+ if (firstWord === mgr) {
105
+ isPkgMgr = true;
106
+ break;
61
107
  }
62
- return undefined;
63
108
  }
64
- /**
65
- * Resolve language from data-language attribute.
66
- * Only allows word characters (alphanumeric + underscore).
67
- */
68
- resolveFromDataAttribute(dataLang) {
69
- const trimmed = dataLang.trim();
70
- if (!trimmed)
71
- return undefined;
72
- return /^\w+$/.test(trimmed) ? trimmed : undefined;
109
+ if (isPkgMgr && spaceIdx !== -1) {
110
+ const rest = trimmed.slice(spaceIdx + 1);
111
+ const secondSpaceIdx = rest.indexOf(' ');
112
+ const secondWord = secondSpaceIdx === -1 ? rest : rest.slice(0, secondSpaceIdx);
113
+ if (BASH_VERBS.has(secondWord))
114
+ return true;
73
115
  }
116
+ return false;
74
117
  }
75
- const attributeResolver = new LanguageAttributeResolver();
76
- /* -------------------------------------------------------------------------------------------------
77
- * Heuristics
78
- * ------------------------------------------------------------------------------------------------- */
79
- const Heuristics = {
80
- containsJsxTag(code) {
81
- // Preserve original behavior (scan for `<` followed by A-Z).
82
- for (let i = 0; i < code.length - 1; i += 1) {
83
- if (code[i] !== '<')
84
- continue;
85
- const next = code[i + 1];
86
- if (!next)
87
- continue;
88
- if (next >= 'A' && next <= 'Z')
89
- return true;
118
+ function detectBashIndicators(lines) {
119
+ for (const line of lines) {
120
+ if (isBashLine(line))
121
+ return true;
122
+ }
123
+ return false;
124
+ }
125
+ function detectCssStructure(lines) {
126
+ for (const line of lines) {
127
+ const trimmed = line.trimStart();
128
+ if (trimmed.length === 0)
129
+ continue;
130
+ const hasSelector = (trimmed.startsWith('.') || trimmed.startsWith('#')) &&
131
+ trimmed.includes('{');
132
+ if (hasSelector || (trimmed.includes(':') && trimmed.includes(';'))) {
133
+ return true;
90
134
  }
91
- return false;
92
- },
93
- bash: {
94
- commands: ['sudo', 'chmod', 'mkdir', 'cd', 'ls', 'cat', 'echo'],
95
- pkgManagers: [
96
- 'npm',
97
- 'yarn',
98
- 'pnpm',
99
- 'npx',
100
- 'brew',
101
- 'apt',
102
- 'pip',
103
- 'cargo',
104
- 'go',
105
- ],
106
- verbs: ['install', 'add', 'run', 'build', 'start'],
107
- isShellPrefix(line) {
108
- return (line.startsWith('#!') || line.startsWith('$ ') || line.startsWith('# '));
109
- },
110
- matchesCommand(line) {
111
- return Heuristics.bash.commands.some((cmd) => line === cmd || line.startsWith(`${cmd} `));
112
- },
113
- matchesPackageManagerVerb(line) {
114
- for (const mgr of Heuristics.bash.pkgManagers) {
115
- if (!line.startsWith(`${mgr} `))
116
- continue;
117
- const rest = line.slice(mgr.length + 1);
118
- if (Heuristics.bash.verbs.some((v) => rest === v || rest.startsWith(`${v} `))) {
119
- return true;
120
- }
121
- }
122
- return false;
123
- },
124
- detectIndicators(lines) {
125
- for (const line of lines) {
126
- const trimmed = line.trimStart();
127
- if (trimmed &&
128
- (Heuristics.bash.isShellPrefix(trimmed) ||
129
- Heuristics.bash.matchesCommand(trimmed) ||
130
- Heuristics.bash.matchesPackageManagerVerb(trimmed))) {
131
- return true;
132
- }
133
- }
134
- return false;
135
- },
136
- },
137
- css: {
138
- detectStructure(lines) {
139
- for (const line of lines) {
140
- const trimmed = line.trimStart();
141
- if (!trimmed)
142
- continue;
143
- const hasSelector = (trimmed.startsWith('.') || trimmed.startsWith('#')) &&
144
- trimmed.includes('{');
145
- if (hasSelector || (trimmed.includes(':') && trimmed.includes(';'))) {
146
- return true;
147
- }
148
- }
149
- return false;
150
- },
151
- },
152
- yaml: {
153
- detectStructure(lines) {
154
- for (const line of lines) {
155
- const trimmed = line.trim();
156
- if (!trimmed)
157
- continue;
158
- const colonIdx = trimmed.indexOf(':');
159
- if (colonIdx > 0) {
160
- const after = trimmed[colonIdx + 1];
161
- if (after === ' ' || after === '\t')
162
- return true;
163
- }
164
- }
165
- return false;
166
- },
167
- },
168
- };
169
- /* -------------------------------------------------------------------------------------------------
170
- * Pattern engine
171
- * ------------------------------------------------------------------------------------------------- */
172
- const LANGUAGE_PATTERNS = [
135
+ }
136
+ return false;
137
+ }
138
+ function detectYamlStructure(lines) {
139
+ for (const line of lines) {
140
+ const trimmed = line.trim();
141
+ if (trimmed.length === 0)
142
+ continue;
143
+ const colonIdx = trimmed.indexOf(':');
144
+ if (colonIdx <= 0)
145
+ continue;
146
+ const after = trimmed.charCodeAt(colonIdx + 1);
147
+ // space (32) or tab (9)
148
+ if (after === 32 || after === 9)
149
+ return true;
150
+ }
151
+ return false;
152
+ }
153
+ const LANGUAGES = [
173
154
  {
174
- language: 'jsx',
175
- pattern: {
176
- keywords: ['classname=', 'jsx:', "from 'react'", 'from "react"'],
177
- custom: (code) => Heuristics.containsJsxTag(code),
155
+ lang: 'rust',
156
+ weight: 25,
157
+ match: (ctx) => {
158
+ if (ctx.lower.includes('let mut'))
159
+ return true;
160
+ if (RUST_REGEX.test(ctx.lower))
161
+ return true;
162
+ return ctx.lower.includes('use ') && ctx.lower.includes('::');
178
163
  },
179
164
  },
180
165
  {
181
- language: 'typescript',
182
- pattern: {
183
- wordBoundary: ['interface', 'type'],
184
- custom: (_code, lower) => [
185
- ': string',
186
- ':string',
187
- ': number',
188
- ':number',
189
- ': boolean',
190
- ':boolean',
191
- ': void',
192
- ':void',
193
- ': any',
194
- ':any',
195
- ': unknown',
196
- ':unknown',
197
- ': never',
198
- ':never',
199
- ].some((hint) => lower.includes(hint)),
166
+ lang: 'go',
167
+ weight: 22,
168
+ match: (ctx) => {
169
+ if (ctx.lower.includes('import "'))
170
+ return true;
171
+ return /\b(?:package|func)\b/.test(ctx.lower);
200
172
  },
201
173
  },
202
174
  {
203
- language: 'rust',
204
- pattern: {
205
- regex: /\b(?:fn|impl|struct|enum)\b/,
206
- keywords: ['let mut'],
207
- custom: (_code, lower) => lower.includes('use ') && lower.includes('::'),
175
+ lang: 'jsx',
176
+ weight: 22,
177
+ match: (ctx) => {
178
+ const l = ctx.lower;
179
+ if (l.includes('classname=') ||
180
+ l.includes('jsx:') ||
181
+ l.includes("from 'react'") ||
182
+ l.includes('from "react"')) {
183
+ return true;
184
+ }
185
+ return containsJsxTag(ctx.code);
208
186
  },
209
187
  },
210
188
  {
211
- language: 'javascript',
212
- pattern: {
213
- regex: /\b(?:const|let|var|function|class|async|await|export|import)\b/,
189
+ lang: 'typescript',
190
+ weight: 20,
191
+ match: (ctx) => {
192
+ if (/\b(?:interface|type)\b/.test(ctx.lower))
193
+ return true;
194
+ const l = ctx.lower;
195
+ for (const hint of TYPESCRIPT_HINTS) {
196
+ if (l.includes(hint))
197
+ return true;
198
+ }
199
+ return false;
214
200
  },
215
201
  },
216
202
  {
217
- language: 'python',
218
- pattern: {
219
- regex: /\b(?:def|class|import|from)\b/,
220
- keywords: ['print(', '__name__'],
203
+ lang: 'sql',
204
+ weight: 20,
205
+ match: (ctx) => {
206
+ const l = ctx.lower;
207
+ return /\b(?:select|insert|update|delete|create|alter|drop)\b/.test(l);
221
208
  },
222
209
  },
223
210
  {
224
- language: 'bash',
225
- pattern: {
226
- custom: (_code, _lower, lines) => Heuristics.bash.detectIndicators(lines),
211
+ lang: 'python',
212
+ weight: 18,
213
+ match: (ctx) => {
214
+ const l = ctx.lower;
215
+ if (l.includes('print(') || l.includes('__name__'))
216
+ return true;
217
+ return PYTHON_REGEX.test(l);
227
218
  },
228
219
  },
229
220
  {
230
- language: 'css',
231
- pattern: {
232
- regex: /@media|@import|@keyframes/,
233
- custom: (_code, _lower, lines) => Heuristics.css.detectStructure(lines),
221
+ lang: 'css',
222
+ weight: 18,
223
+ match: (ctx) => {
224
+ if (CSS_REGEX.test(ctx.lower))
225
+ return true;
226
+ return detectCssStructure(ctx.lines);
234
227
  },
235
228
  },
236
229
  {
237
- language: 'html',
238
- pattern: {
239
- keywords: [
240
- '<!doctype',
241
- '<html',
242
- '<head',
243
- '<body',
244
- '<div',
245
- '<span',
246
- '<p',
247
- '<a',
248
- '<script',
249
- '<style',
250
- ],
251
- },
230
+ lang: 'bash',
231
+ weight: 15,
232
+ match: (ctx) => detectBashIndicators(ctx.lines),
252
233
  },
253
234
  {
254
- language: 'json',
255
- pattern: {
256
- startsWith: ['{', '['],
257
- },
235
+ lang: 'yaml',
236
+ weight: 15,
237
+ match: (ctx) => detectYamlStructure(ctx.lines),
258
238
  },
259
239
  {
260
- language: 'yaml',
261
- pattern: {
262
- custom: (_code, _lower, lines) => Heuristics.yaml.detectStructure(lines),
263
- },
240
+ lang: 'javascript',
241
+ weight: 12,
242
+ match: (ctx) => JS_REGEX.test(ctx.lower),
264
243
  },
265
244
  {
266
- language: 'sql',
267
- pattern: {
268
- wordBoundary: [
269
- 'select',
270
- 'insert',
271
- 'update',
272
- 'delete',
273
- 'create',
274
- 'alter',
275
- 'drop',
276
- ],
245
+ lang: 'html',
246
+ weight: 12,
247
+ match: (ctx) => {
248
+ const l = ctx.lower;
249
+ for (const tag of HTML_TAGS) {
250
+ if (l.includes(tag))
251
+ return true;
252
+ }
253
+ return false;
277
254
  },
278
255
  },
279
256
  {
280
- language: 'go',
281
- pattern: {
282
- wordBoundary: ['package', 'func'],
283
- keywords: ['import "'],
257
+ lang: 'json',
258
+ weight: 10,
259
+ match: (ctx) => {
260
+ const s = ctx.trimmedStart;
261
+ return s.startsWith('{') || s.startsWith('[');
284
262
  },
285
263
  },
286
264
  ];
287
- class PatternEngine {
288
- matches(sample, pattern) {
289
- if (pattern.keywords?.some((kw) => sample.lower.includes(kw)))
290
- return true;
291
- if (pattern.wordBoundary?.some((w) => wordMatcher.containsWord(sample.lower, w)))
292
- return true;
293
- if (pattern.regex?.test(sample.lower))
294
- return true;
295
- if (pattern.startsWith?.some((prefix) => sample.trimmedStart.startsWith(prefix))) {
296
- return true;
297
- }
298
- if (pattern.custom?.(sample.code, sample.lower, sample.lines))
299
- return true;
300
- return false;
265
+ export function extractLanguageFromClassName(className) {
266
+ if (!className)
267
+ return undefined;
268
+ // Split by whitespace and check for language indicators
269
+ const tokens = className.match(/\S+/g);
270
+ if (!tokens)
271
+ return undefined;
272
+ // Fast path: check for prefixes
273
+ for (const token of tokens) {
274
+ const lower = token.toLowerCase();
275
+ if (lower.startsWith('language-'))
276
+ return token.slice(9);
277
+ if (lower.startsWith('lang-'))
278
+ return token.slice(5);
279
+ if (lower.startsWith('highlight-'))
280
+ return token.slice(10);
301
281
  }
282
+ // Fallback: check for hljs context
283
+ if (!tokens.includes('hljs'))
284
+ return undefined;
285
+ const langClass = tokens.find((t) => {
286
+ const l = t.toLowerCase();
287
+ return l !== 'hljs' && !l.startsWith('hljs-');
288
+ });
289
+ return langClass;
302
290
  }
303
- class LanguageDetector {
304
- engine = new PatternEngine();
305
- detect(code) {
306
- const sample = createCodeSample(code);
307
- for (const { language, pattern } of LANGUAGE_PATTERNS) {
308
- if (this.engine.matches(sample, pattern))
309
- return language;
310
- }
291
+ function resolveLanguageFromDataAttribute(dataLang) {
292
+ const trimmed = dataLang.trim();
293
+ if (!trimmed)
311
294
  return undefined;
295
+ // Check if \w+
296
+ for (let i = 0; i < trimmed.length; i++) {
297
+ const c = trimmed.charCodeAt(i);
298
+ // valid: A-Z, a-z, 0-9, _
299
+ const isUpper = c >= 65 && c <= 90;
300
+ const isLower = c >= 97 && c <= 122;
301
+ const isDigit = c >= 48 && c <= 57;
302
+ const isUnder = c === 95;
303
+ if (!isUpper && !isLower && !isDigit && !isUnder) {
304
+ return undefined;
305
+ }
312
306
  }
307
+ return trimmed;
308
+ }
309
+ export function resolveLanguageFromAttributes(className, dataLang) {
310
+ return (extractLanguageFromClassName(className) ??
311
+ resolveLanguageFromDataAttribute(dataLang));
313
312
  }
314
- const detector = new LanguageDetector();
315
- /* -------------------------------------------------------------------------------------------------
316
- * Public API
317
- * ------------------------------------------------------------------------------------------------- */
318
- /**
319
- * Detect programming language from code content using heuristics.
320
- */
321
313
  export function detectLanguageFromCode(code) {
322
- if (!code || code.trim().length === 0)
314
+ if (!code)
323
315
  return undefined;
324
- return detector.detect(code);
325
- }
326
- /**
327
- * Resolve language from HTML attributes (class name and data-language).
328
- */
329
- export function resolveLanguageFromAttributes(className, dataLang) {
330
- return attributeResolver.resolve(className, dataLang);
316
+ // Fast path for empty/whitespace only
317
+ let empty = true;
318
+ for (let i = 0; i < code.length; i++) {
319
+ if (code.charCodeAt(i) > 32) {
320
+ empty = false;
321
+ break;
322
+ }
323
+ }
324
+ if (empty)
325
+ return undefined;
326
+ const ctx = new DetectionContext(code);
327
+ let bestLang;
328
+ let bestScore = -1;
329
+ for (const def of LANGUAGES) {
330
+ if (def.match(ctx)) {
331
+ if (def.weight > bestScore) {
332
+ bestScore = def.weight;
333
+ bestLang = def.lang;
334
+ if (bestScore >= 25)
335
+ break;
336
+ }
337
+ }
338
+ }
339
+ return bestLang;
331
340
  }
@@ -3,5 +3,4 @@ export declare function cleanupMarkdownArtifacts(content: string): string;
3
3
  export declare function extractTitleFromRawMarkdown(content: string): string | undefined;
4
4
  export declare function addSourceToMarkdown(content: string, url: string): string;
5
5
  export declare function isRawTextContent(content: string): boolean;
6
- export declare function isLikelyHtmlContent(content: string): boolean;
7
6
  export declare function buildMetadataFooter(metadata?: MetadataBlock, fallbackUrl?: string): string;