@j0hanz/superfetch 2.4.1 → 2.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cache.d.ts CHANGED
@@ -36,7 +36,7 @@ export declare function get(cacheKey: string | null): CacheEntry | undefined;
36
36
  export declare function set(cacheKey: string | null, content: string, metadata: CacheEntryMetadata): void;
37
37
  export declare function keys(): readonly string[];
38
38
  export declare function isEnabled(): boolean;
39
- export declare function registerCachedContentResource(server: McpServer): void;
39
+ export declare function registerCachedContentResource(server: McpServer, serverIcon?: string): void;
40
40
  export declare function generateSafeFilename(url: string, title?: string, hashFallback?: string, extension?: string): string;
41
41
  export declare function handleDownload(res: ServerResponse, namespace: string, hash: string): void;
42
42
  export {};
package/dist/cache.js CHANGED
@@ -112,6 +112,7 @@ class NativeLruCache {
112
112
  max;
113
113
  ttlMs;
114
114
  entries = new Map();
115
+ nextPurgeAtMs = 0;
115
116
  constructor({ max, ttlMs }) {
116
117
  this.max = max;
117
118
  this.ttlMs = ttlMs;
@@ -132,12 +133,13 @@ class NativeLruCache {
132
133
  set(key, value) {
133
134
  if (this.max <= 0 || this.ttlMs <= 0)
134
135
  return;
136
+ const now = Date.now();
135
137
  this.entries.delete(key);
136
138
  this.entries.set(key, {
137
139
  value,
138
- expiresAtMs: Date.now() + this.ttlMs,
140
+ expiresAtMs: now + this.ttlMs,
139
141
  });
140
- this.purgeExpired(Date.now());
142
+ this.maybePurge(now);
141
143
  while (this.entries.size > this.max) {
142
144
  const oldestKey = this.entries.keys().next().value;
143
145
  if (oldestKey === undefined)
@@ -146,9 +148,15 @@ class NativeLruCache {
146
148
  }
147
149
  }
148
150
  keys() {
149
- this.purgeExpired(Date.now());
151
+ this.maybePurge(Date.now());
150
152
  return [...this.entries.keys()];
151
153
  }
154
+ maybePurge(now) {
155
+ if (this.entries.size > this.max || now >= this.nextPurgeAtMs) {
156
+ this.purgeExpired(now);
157
+ this.nextPurgeAtMs = now + this.ttlMs;
158
+ }
159
+ }
152
160
  purgeExpired(now) {
153
161
  for (const [key, entry] of this.entries) {
154
162
  if (this.isExpired(entry, now)) {
@@ -387,23 +395,34 @@ function notifyResourceUpdate(server, uri, subscriptions) {
387
395
  });
388
396
  });
389
397
  }
390
- export function registerCachedContentResource(server) {
398
+ export function registerCachedContentResource(server, serverIcon) {
391
399
  const isInitialized = attachInitializedGate(server);
392
400
  const subscriptions = registerResourceSubscriptionHandlers(server);
393
- registerCacheContentResource(server);
401
+ registerCacheContentResource(server, serverIcon);
394
402
  registerCacheUpdateSubscription(server, subscriptions, isInitialized);
395
403
  }
396
404
  function buildCachedContentResponse(uri, cacheKey) {
397
405
  const cached = requireCacheEntry(cacheKey);
398
406
  return buildMarkdownContentResponse(uri, cached.content);
399
407
  }
400
- function registerCacheContentResource(server) {
408
+ function registerCacheContentResource(server, serverIcon) {
401
409
  server.registerResource('cached-content', new ResourceTemplate('superfetch://cache/{namespace}/{urlHash}', {
402
410
  list: listCachedResources,
403
411
  }), {
404
412
  title: 'Cached Content',
405
413
  description: 'Access previously fetched web content from cache. Namespace: markdown. UrlHash: SHA-256 hash of the URL.',
406
414
  mimeType: 'text/markdown',
415
+ ...(serverIcon
416
+ ? {
417
+ icons: [
418
+ {
419
+ src: serverIcon,
420
+ mimeType: 'image/svg+xml',
421
+ sizes: ['any'],
422
+ },
423
+ ],
424
+ }
425
+ : {}),
407
426
  }, (uri, params) => {
408
427
  const { namespace, urlHash } = resolveCacheParams(params);
409
428
  const cacheKey = `${namespace}:${urlHash}`;
@@ -1,5 +1 @@
1
- /**
2
- * Remove noise elements from HTML and resolve relative URLs.
3
- * Used as a preprocessing step before markdown conversion.
4
- */
5
1
  export declare function removeNoiseFromHtml(html: string, document?: Document, baseUrl?: string): string;
@@ -331,7 +331,6 @@ function removeNoiseNodes(nodes, shouldCheckNoise = true) {
331
331
  removeNoiseFromNodeListLike(nodes, shouldCheckNoise);
332
332
  return;
333
333
  }
334
- // Generic iterable: copy to avoid iteration issues while removing.
335
334
  const nodeList = Array.from(nodes);
336
335
  for (const node of nodeList) {
337
336
  if (isElement(node) && (!shouldCheckNoise || isNoiseElement(node))) {
@@ -340,21 +339,15 @@ function removeNoiseNodes(nodes, shouldCheckNoise = true) {
340
339
  }
341
340
  }
342
341
  function stripNoiseNodes(document) {
343
- // Pass 1: Trusted selectors (Common noise)
344
- // We trust these selectors match actual noise, so we skip the expensive isNoiseElement check
345
- // Add user-configured extra selectors
346
342
  const targetSelectors = buildNoiseSelector(config.noiseRemoval.extraSelectors);
347
343
  const potentialNoiseNodes = document.querySelectorAll(targetSelectors);
348
344
  removeNoiseNodes(potentialNoiseNodes, false);
349
- // Second pass: check remaining elements for noise patterns (promo, fixed positioning, etc.)
350
345
  const allElements = document.querySelectorAll(CANDIDATE_NOISE_SELECTOR);
351
346
  removeNoiseNodes(allElements, true);
352
347
  }
353
348
  // ─────────────────────────────────────────────────────────────────────────────
354
349
  // URL Resolution
355
350
  // ─────────────────────────────────────────────────────────────────────────────
356
- // Protocol patterns to skip during URL resolution (fragment, mailto, tel, blob, data, javascript)
357
- // JavaScript protocol is detected to skip it for XSS prevention, not to evaluate it
358
351
  const SKIP_URL_PREFIXES = [
359
352
  '#',
360
353
  'java' + 'script:',
@@ -363,11 +356,6 @@ const SKIP_URL_PREFIXES = [
363
356
  'data:',
364
357
  'blob:',
365
358
  ];
366
- /**
367
- * Check if a URL scheme should be skipped during resolution.
368
- * These schemes are either fragment-only (#), protocol handlers (mailto, tel),
369
- * inline data (data, blob), or javascript: which we skip to avoid XSS.
370
- */
371
359
  function shouldSkipUrlResolution(url) {
372
360
  const normalized = url.trim().toLowerCase();
373
361
  return SKIP_URL_PREFIXES.some((prefix) => normalized.startsWith(prefix));
@@ -383,79 +371,61 @@ function tryResolveUrl(relativeUrl, baseUrl) {
383
371
  return null;
384
372
  }
385
373
  }
386
- /**
387
- * Resolve anchor hrefs to absolute URLs.
388
- */
389
- function resolveAnchorUrls(document, baseUrl) {
390
- for (const anchor of document.querySelectorAll('a[href]')) {
391
- const href = anchor.getAttribute('href');
392
- if (href && !shouldSkipUrlResolution(href)) {
393
- const resolved = tryResolveUrl(href, baseUrl);
394
- if (resolved)
395
- anchor.setAttribute('href', resolved);
396
- }
374
+ function resolveAnchorElement(element, base) {
375
+ const href = element.getAttribute('href');
376
+ if (href && !shouldSkipUrlResolution(href)) {
377
+ const resolved = tryResolveUrl(href, base);
378
+ if (resolved)
379
+ element.setAttribute('href', resolved);
397
380
  }
398
381
  }
399
- /**
400
- * Resolve image srcs to absolute URLs.
401
- */
402
- function resolveImageUrls(document, baseUrl) {
403
- for (const img of document.querySelectorAll('img[src]')) {
404
- const src = img.getAttribute('src');
405
- if (src && !shouldSkipUrlResolution(src)) {
406
- const resolved = tryResolveUrl(src, baseUrl);
407
- if (resolved)
408
- img.setAttribute('src', resolved);
409
- }
382
+ function resolveImageElement(element, base) {
383
+ const src = element.getAttribute('src');
384
+ if (src && !shouldSkipUrlResolution(src)) {
385
+ const resolved = tryResolveUrl(src, base);
386
+ if (resolved)
387
+ element.setAttribute('src', resolved);
410
388
  }
411
389
  }
412
- /**
413
- * Resolve source srcset to absolute URLs (for picture elements).
414
- */
415
- function resolveSrcsetUrls(document, baseUrl) {
416
- for (const source of document.querySelectorAll('source[srcset]')) {
417
- const srcset = source.getAttribute('srcset');
418
- if (!srcset)
419
- continue;
420
- // srcset can have multiple URLs with descriptors like "url 1x, url 2x"
421
- const resolved = srcset
422
- .split(',')
423
- .map((entry) => {
424
- const parts = entry.trim().split(/\s+/);
425
- const url = parts[0];
426
- if (url) {
427
- const resolvedUrl = tryResolveUrl(url, baseUrl);
428
- if (resolvedUrl)
429
- parts[0] = resolvedUrl;
430
- }
431
- return parts.join(' ');
432
- })
433
- .join(', ');
434
- source.setAttribute('srcset', resolved);
435
- }
390
+ function resolveSourceElement(element, base) {
391
+ const srcset = element.getAttribute('srcset');
392
+ if (!srcset)
393
+ return;
394
+ const resolved = srcset
395
+ .split(',')
396
+ .map((entry) => {
397
+ const parts = entry.trim().split(/\s+/);
398
+ const url = parts[0];
399
+ if (url) {
400
+ const resolvedUrl = tryResolveUrl(url, base);
401
+ if (resolvedUrl)
402
+ parts[0] = resolvedUrl;
403
+ }
404
+ return parts.join(' ');
405
+ })
406
+ .join(', ');
407
+ element.setAttribute('srcset', resolved);
436
408
  }
437
- /**
438
- * Resolve relative URLs in anchor and image elements to absolute URLs.
439
- * Fixes broken links/images in markdown output when the source uses relative paths.
440
- */
441
409
  function resolveRelativeUrls(document, baseUrl) {
442
410
  try {
443
411
  const base = new URL(baseUrl);
444
- resolveAnchorUrls(document, base);
445
- resolveImageUrls(document, base);
446
- resolveSrcsetUrls(document, base);
412
+ for (const element of document.querySelectorAll('a[href], img[src], source[srcset]')) {
413
+ const tag = element.tagName.toLowerCase();
414
+ if (tag === 'a') {
415
+ resolveAnchorElement(element, base);
416
+ }
417
+ else if (tag === 'img') {
418
+ resolveImageElement(element, base);
419
+ }
420
+ else if (tag === 'source') {
421
+ resolveSourceElement(element, base);
422
+ }
423
+ }
447
424
  }
448
425
  catch {
449
426
  /* invalid base URL - skip resolution */
450
427
  }
451
428
  }
452
- // ─────────────────────────────────────────────────────────────────────────────
453
- // Main Export
454
- // ─────────────────────────────────────────────────────────────────────────────
455
- /**
456
- * Remove noise elements from HTML and resolve relative URLs.
457
- * Used as a preprocessing step before markdown conversion.
458
- */
459
429
  export function removeNoiseFromHtml(html, document, baseUrl) {
460
430
  const shouldParse = isFullDocumentHtml(html) || mayContainNoise(html);
461
431
  if (!shouldParse)
@@ -98,8 +98,8 @@ function matchesPackageManagerVerb(line) {
98
98
  }
99
99
  return false;
100
100
  }
101
- function detectBashIndicators(code) {
102
- for (const line of code.split('\n')) {
101
+ function detectBashIndicators(lines) {
102
+ for (const line of lines) {
103
103
  const trimmed = line.trimStart();
104
104
  if (trimmed &&
105
105
  (isShellPrefix(trimmed) ||
@@ -110,8 +110,8 @@ function detectBashIndicators(code) {
110
110
  }
111
111
  return false;
112
112
  }
113
- function detectCssStructure(code) {
114
- for (const line of code.split('\n')) {
113
+ function detectCssStructure(lines) {
114
+ for (const line of lines) {
115
115
  const trimmed = line.trimStart();
116
116
  if (!trimmed)
117
117
  continue;
@@ -123,8 +123,8 @@ function detectCssStructure(code) {
123
123
  }
124
124
  return false;
125
125
  }
126
- function detectYamlStructure(code) {
127
- for (const line of code.split('\n')) {
126
+ function detectYamlStructure(lines) {
127
+ for (const line of lines) {
128
128
  const trimmed = line.trim();
129
129
  if (!trimmed)
130
130
  continue;
@@ -194,14 +194,14 @@ const LANGUAGE_PATTERNS = [
194
194
  {
195
195
  language: 'bash',
196
196
  pattern: {
197
- custom: (code) => detectBashIndicators(code),
197
+ custom: (_code, _lower, lines) => detectBashIndicators(lines),
198
198
  },
199
199
  },
200
200
  {
201
201
  language: 'css',
202
202
  pattern: {
203
203
  regex: /@media|@import|@keyframes/,
204
- custom: (code) => detectCssStructure(code),
204
+ custom: (_code, _lower, lines) => detectCssStructure(lines),
205
205
  },
206
206
  },
207
207
  {
@@ -230,7 +230,7 @@ const LANGUAGE_PATTERNS = [
230
230
  {
231
231
  language: 'yaml',
232
232
  pattern: {
233
- custom: (code) => detectYamlStructure(code),
233
+ custom: (_code, _lower, lines) => detectYamlStructure(lines),
234
234
  },
235
235
  },
236
236
  {
@@ -255,7 +255,7 @@ const LANGUAGE_PATTERNS = [
255
255
  },
256
256
  },
257
257
  ];
258
- function matchesLanguagePattern(code, lower, pattern) {
258
+ function matchesLanguagePattern(code, lower, lines, pattern) {
259
259
  if (pattern.keywords?.some((kw) => lower.includes(kw)))
260
260
  return true;
261
261
  if (pattern.wordBoundary?.some((w) => containsWord(lower, w)))
@@ -267,7 +267,7 @@ function matchesLanguagePattern(code, lower, pattern) {
267
267
  if (pattern.startsWith.some((prefix) => trimmed.startsWith(prefix)))
268
268
  return true;
269
269
  }
270
- if (pattern.custom?.(code, lower))
270
+ if (pattern.custom?.(code, lower, lines))
271
271
  return true;
272
272
  return false;
273
273
  }
@@ -276,8 +276,9 @@ function matchesLanguagePattern(code, lower, pattern) {
276
276
  */
277
277
  export function detectLanguageFromCode(code) {
278
278
  const lower = code.toLowerCase();
279
+ const lines = code.split('\n');
279
280
  for (const { language, pattern } of LANGUAGE_PATTERNS) {
280
- if (matchesLanguagePattern(code, lower, pattern))
281
+ if (matchesLanguagePattern(code, lower, lines, pattern))
281
282
  return language;
282
283
  }
283
284
  return undefined;
package/dist/mcp.js CHANGED
@@ -69,8 +69,9 @@ export function createMcpServer() {
69
69
  instructions,
70
70
  });
71
71
  setMcpServer(server);
72
- registerTools(server, getLocalIconData());
73
- registerCachedContentResource(server);
72
+ const localIcon = getLocalIconData();
73
+ registerTools(server, localIcon);
74
+ registerCachedContentResource(server, localIcon);
74
75
  registerInstructionsResource(server, instructions);
75
76
  return server;
76
77
  }
package/dist/tools.d.ts CHANGED
@@ -1,4 +1,5 @@
1
1
  import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
2
+ import type { CallToolResult, ContentBlock } from '@modelcontextprotocol/sdk/types.js';
2
3
  import type { MarkdownTransformResult } from './transform-types.js';
3
4
  export interface FetchUrlInput {
4
5
  url: string;
@@ -23,20 +24,15 @@ export interface ToolContentResourceBlock {
23
24
  text: string;
24
25
  };
25
26
  }
26
- export type ToolContentBlockUnion = ToolContentBlock | ToolContentResourceLinkBlock | ToolContentResourceBlock;
27
- export interface ToolErrorResponse {
28
- content: ToolContentBlockUnion[];
27
+ export type ToolContentBlockUnion = ContentBlock;
28
+ export type ToolErrorResponse = CallToolResult & {
29
29
  structuredContent: {
30
30
  error: string;
31
31
  url: string;
32
32
  };
33
33
  isError: true;
34
- }
35
- export interface ToolResponseBase {
36
- content: ToolContentBlockUnion[];
37
- structuredContent?: Record<string, unknown>;
38
- isError?: boolean;
39
- }
34
+ };
35
+ export type ToolResponseBase = CallToolResult;
40
36
  export interface FetchPipelineOptions<T> {
41
37
  /** URL to fetch */
42
38
  url: string;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@j0hanz/superfetch",
3
- "version": "2.4.1",
3
+ "version": "2.4.2",
4
4
  "mcpName": "io.github.j0hanz/superfetch",
5
5
  "description": "Intelligent web content fetcher MCP server that converts HTML to clean, AI-readable Markdown",
6
6
  "type": "module",