npm - @j0hanz/superfetch - Versions diffs - 2.4.1 → 2.4.2 - Mend

@j0hanz/superfetch 2.4.1 → 2.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/dist/cache.d.ts +1 -1
package/dist/cache.js +25 -6
package/dist/dom-noise-removal.d.ts +0 -4
package/dist/dom-noise-removal.js +42 -72
package/dist/language-detection.js +13 -12
package/dist/mcp.js +3 -2
package/dist/tools.d.ts +5 -9
package/package.json +1 -1

package/dist/cache.d.ts CHANGED Viewed

@@ -36,7 +36,7 @@ export declare function get(cacheKey: string | null): CacheEntry | undefined;
 export declare function set(cacheKey: string | null, content: string, metadata: CacheEntryMetadata): void;
 export declare function keys(): readonly string[];
 export declare function isEnabled(): boolean;
-export declare function registerCachedContentResource(server: McpServer): void;
+export declare function registerCachedContentResource(server: McpServer, serverIcon?: string): void;
 export declare function generateSafeFilename(url: string, title?: string, hashFallback?: string, extension?: string): string;
 export declare function handleDownload(res: ServerResponse, namespace: string, hash: string): void;
 export {};

package/dist/cache.js CHANGED Viewed

@@ -112,6 +112,7 @@ class NativeLruCache {
     max;
     ttlMs;
     entries = new Map();
+    nextPurgeAtMs = 0;
     constructor({ max, ttlMs }) {
         this.max = max;
         this.ttlMs = ttlMs;
@@ -132,12 +133,13 @@ class NativeLruCache {
     set(key, value) {
         if (this.max <= 0 || this.ttlMs <= 0)
             return;
+        const now = Date.now();
         this.entries.delete(key);
         this.entries.set(key, {
             value,
-            expiresAtMs: Date.now() + this.ttlMs,
+            expiresAtMs: now + this.ttlMs,
         });
-        this.purgeExpired(Date.now());
+        this.maybePurge(now);
         while (this.entries.size > this.max) {
             const oldestKey = this.entries.keys().next().value;
             if (oldestKey === undefined)
@@ -146,9 +148,15 @@ class NativeLruCache {
         }
     }
     keys() {
-        this.purgeExpired(Date.now());
+        this.maybePurge(Date.now());
         return [...this.entries.keys()];
     }
+    maybePurge(now) {
+        if (this.entries.size > this.max || now >= this.nextPurgeAtMs) {
+            this.purgeExpired(now);
+            this.nextPurgeAtMs = now + this.ttlMs;
+        }
+    }
     purgeExpired(now) {
         for (const [key, entry] of this.entries) {
             if (this.isExpired(entry, now)) {
@@ -387,23 +395,34 @@ function notifyResourceUpdate(server, uri, subscriptions) {
         });
     });
 }
-export function registerCachedContentResource(server) {
+export function registerCachedContentResource(server, serverIcon) {
     const isInitialized = attachInitializedGate(server);
     const subscriptions = registerResourceSubscriptionHandlers(server);
-    registerCacheContentResource(server);
+    registerCacheContentResource(server, serverIcon);
     registerCacheUpdateSubscription(server, subscriptions, isInitialized);
 }
 function buildCachedContentResponse(uri, cacheKey) {
     const cached = requireCacheEntry(cacheKey);
     return buildMarkdownContentResponse(uri, cached.content);
 }
-function registerCacheContentResource(server) {
+function registerCacheContentResource(server, serverIcon) {
     server.registerResource('cached-content', new ResourceTemplate('superfetch://cache/{namespace}/{urlHash}', {
         list: listCachedResources,
     }), {
         title: 'Cached Content',
         description: 'Access previously fetched web content from cache. Namespace: markdown. UrlHash: SHA-256 hash of the URL.',
         mimeType: 'text/markdown',
+        ...(serverIcon
+            ? {
+                icons: [
+                    {
+                        src: serverIcon,
+                        mimeType: 'image/svg+xml',
+                        sizes: ['any'],
+                    },
+                ],
+            }
+            : {}),
     }, (uri, params) => {
         const { namespace, urlHash } = resolveCacheParams(params);
         const cacheKey = `${namespace}:${urlHash}`;

package/dist/dom-noise-removal.d.ts CHANGED Viewed

@@ -1,5 +1 @@
-/**
- * Remove noise elements from HTML and resolve relative URLs.
- * Used as a preprocessing step before markdown conversion.
- */
 export declare function removeNoiseFromHtml(html: string, document?: Document, baseUrl?: string): string;

package/dist/dom-noise-removal.js CHANGED Viewed

@@ -331,7 +331,6 @@ function removeNoiseNodes(nodes, shouldCheckNoise = true) {
         removeNoiseFromNodeListLike(nodes, shouldCheckNoise);
         return;
     }
-    // Generic iterable: copy to avoid iteration issues while removing.
     const nodeList = Array.from(nodes);
     for (const node of nodeList) {
         if (isElement(node) && (!shouldCheckNoise || isNoiseElement(node))) {
@@ -340,21 +339,15 @@ function removeNoiseNodes(nodes, shouldCheckNoise = true) {
     }
 }
 function stripNoiseNodes(document) {
-    // Pass 1: Trusted selectors (Common noise)
-    // We trust these selectors match actual noise, so we skip the expensive isNoiseElement check
-    // Add user-configured extra selectors
     const targetSelectors = buildNoiseSelector(config.noiseRemoval.extraSelectors);
     const potentialNoiseNodes = document.querySelectorAll(targetSelectors);
     removeNoiseNodes(potentialNoiseNodes, false);
-    // Second pass: check remaining elements for noise patterns (promo, fixed positioning, etc.)
     const allElements = document.querySelectorAll(CANDIDATE_NOISE_SELECTOR);
     removeNoiseNodes(allElements, true);
 }
 // ─────────────────────────────────────────────────────────────────────────────
 // URL Resolution
 // ─────────────────────────────────────────────────────────────────────────────
-// Protocol patterns to skip during URL resolution (fragment, mailto, tel, blob, data, javascript)
-// JavaScript protocol is detected to skip it for XSS prevention, not to evaluate it
 const SKIP_URL_PREFIXES = [
     '#',
     'java' + 'script:',
@@ -363,11 +356,6 @@ const SKIP_URL_PREFIXES = [
     'data:',
     'blob:',
 ];
-/**
- * Check if a URL scheme should be skipped during resolution.
- * These schemes are either fragment-only (#), protocol handlers (mailto, tel),
- * inline data (data, blob), or javascript: which we skip to avoid XSS.
- */
 function shouldSkipUrlResolution(url) {
     const normalized = url.trim().toLowerCase();
     return SKIP_URL_PREFIXES.some((prefix) => normalized.startsWith(prefix));
@@ -383,79 +371,61 @@ function tryResolveUrl(relativeUrl, baseUrl) {
         return null;
     }
 }
-/**
- * Resolve anchor hrefs to absolute URLs.
- */
-function resolveAnchorUrls(document, baseUrl) {
-    for (const anchor of document.querySelectorAll('a[href]')) {
-        const href = anchor.getAttribute('href');
-        if (href && !shouldSkipUrlResolution(href)) {
-            const resolved = tryResolveUrl(href, baseUrl);
-            if (resolved)
-                anchor.setAttribute('href', resolved);
-        }
+function resolveAnchorElement(element, base) {
+    const href = element.getAttribute('href');
+    if (href && !shouldSkipUrlResolution(href)) {
+        const resolved = tryResolveUrl(href, base);
+        if (resolved)
+            element.setAttribute('href', resolved);
     }
 }
-/**
- * Resolve image srcs to absolute URLs.
- */
-function resolveImageUrls(document, baseUrl) {
-    for (const img of document.querySelectorAll('img[src]')) {
-        const src = img.getAttribute('src');
-        if (src && !shouldSkipUrlResolution(src)) {
-            const resolved = tryResolveUrl(src, baseUrl);
-            if (resolved)
-                img.setAttribute('src', resolved);
-        }
+function resolveImageElement(element, base) {
+    const src = element.getAttribute('src');
+    if (src && !shouldSkipUrlResolution(src)) {
+        const resolved = tryResolveUrl(src, base);
+        if (resolved)
+            element.setAttribute('src', resolved);
     }
 }
-/**
- * Resolve source srcset to absolute URLs (for picture elements).
- */
-function resolveSrcsetUrls(document, baseUrl) {
-    for (const source of document.querySelectorAll('source[srcset]')) {
-        const srcset = source.getAttribute('srcset');
-        if (!srcset)
-            continue;
-        // srcset can have multiple URLs with descriptors like "url 1x, url 2x"
-        const resolved = srcset
-            .split(',')
-            .map((entry) => {
-            const parts = entry.trim().split(/\s+/);
-            const url = parts[0];
-            if (url) {
-                const resolvedUrl = tryResolveUrl(url, baseUrl);
-                if (resolvedUrl)
-                    parts[0] = resolvedUrl;
-            }
-            return parts.join(' ');
-        })
-            .join(', ');
-        source.setAttribute('srcset', resolved);
-    }
+function resolveSourceElement(element, base) {
+    const srcset = element.getAttribute('srcset');
+    if (!srcset)
+        return;
+    const resolved = srcset
+        .split(',')
+        .map((entry) => {
+        const parts = entry.trim().split(/\s+/);
+        const url = parts[0];
+        if (url) {
+            const resolvedUrl = tryResolveUrl(url, base);
+            if (resolvedUrl)
+                parts[0] = resolvedUrl;
+        }
+        return parts.join(' ');
+    })
+        .join(', ');
+    element.setAttribute('srcset', resolved);
 }
-/**
- * Resolve relative URLs in anchor and image elements to absolute URLs.
- * Fixes broken links/images in markdown output when the source uses relative paths.
- */
 function resolveRelativeUrls(document, baseUrl) {
     try {
         const base = new URL(baseUrl);
-        resolveAnchorUrls(document, base);
-        resolveImageUrls(document, base);
-        resolveSrcsetUrls(document, base);
+        for (const element of document.querySelectorAll('a[href], img[src], source[srcset]')) {
+            const tag = element.tagName.toLowerCase();
+            if (tag === 'a') {
+                resolveAnchorElement(element, base);
+            }
+            else if (tag === 'img') {
+                resolveImageElement(element, base);
+            }
+            else if (tag === 'source') {
+                resolveSourceElement(element, base);
+            }
+        }
     }
     catch {
         /* invalid base URL - skip resolution */
     }
 }
-// ─────────────────────────────────────────────────────────────────────────────
-// Main Export
-// ─────────────────────────────────────────────────────────────────────────────
-/**
- * Remove noise elements from HTML and resolve relative URLs.
- * Used as a preprocessing step before markdown conversion.
- */
 export function removeNoiseFromHtml(html, document, baseUrl) {
     const shouldParse = isFullDocumentHtml(html) || mayContainNoise(html);
     if (!shouldParse)

package/dist/language-detection.js CHANGED Viewed

@@ -98,8 +98,8 @@ function matchesPackageManagerVerb(line) {
     }
     return false;
 }
-function detectBashIndicators(code) {
-    for (const line of code.split('\n')) {
+function detectBashIndicators(lines) {
+    for (const line of lines) {
         const trimmed = line.trimStart();
         if (trimmed &&
             (isShellPrefix(trimmed) ||
@@ -110,8 +110,8 @@ function detectBashIndicators(code) {
     }
     return false;
 }
-function detectCssStructure(code) {
-    for (const line of code.split('\n')) {
+function detectCssStructure(lines) {
+    for (const line of lines) {
         const trimmed = line.trimStart();
         if (!trimmed)
             continue;
@@ -123,8 +123,8 @@ function detectCssStructure(code) {
     }
     return false;
 }
-function detectYamlStructure(code) {
-    for (const line of code.split('\n')) {
+function detectYamlStructure(lines) {
+    for (const line of lines) {
         const trimmed = line.trim();
         if (!trimmed)
             continue;
@@ -194,14 +194,14 @@ const LANGUAGE_PATTERNS = [
     {
         language: 'bash',
         pattern: {
-            custom: (code) => detectBashIndicators(code),
+            custom: (_code, _lower, lines) => detectBashIndicators(lines),
         },
     },
     {
         language: 'css',
         pattern: {
             regex: /@media|@import|@keyframes/,
-            custom: (code) => detectCssStructure(code),
+            custom: (_code, _lower, lines) => detectCssStructure(lines),
         },
     },
     {
@@ -230,7 +230,7 @@ const LANGUAGE_PATTERNS = [
     {
         language: 'yaml',
         pattern: {
-            custom: (code) => detectYamlStructure(code),
+            custom: (_code, _lower, lines) => detectYamlStructure(lines),
         },
     },
     {
@@ -255,7 +255,7 @@ const LANGUAGE_PATTERNS = [
         },
     },
 ];
-function matchesLanguagePattern(code, lower, pattern) {
+function matchesLanguagePattern(code, lower, lines, pattern) {
     if (pattern.keywords?.some((kw) => lower.includes(kw)))
         return true;
     if (pattern.wordBoundary?.some((w) => containsWord(lower, w)))
@@ -267,7 +267,7 @@ function matchesLanguagePattern(code, lower, pattern) {
         if (pattern.startsWith.some((prefix) => trimmed.startsWith(prefix)))
             return true;
     }
-    if (pattern.custom?.(code, lower))
+    if (pattern.custom?.(code, lower, lines))
         return true;
     return false;
 }
@@ -276,8 +276,9 @@ function matchesLanguagePattern(code, lower, pattern) {
  */
 export function detectLanguageFromCode(code) {
     const lower = code.toLowerCase();
+    const lines = code.split('\n');
     for (const { language, pattern } of LANGUAGE_PATTERNS) {
-        if (matchesLanguagePattern(code, lower, pattern))
+        if (matchesLanguagePattern(code, lower, lines, pattern))
             return language;
     }
     return undefined;

package/dist/mcp.js CHANGED Viewed

@@ -69,8 +69,9 @@ export function createMcpServer() {
         instructions,
     });
     setMcpServer(server);
-    registerTools(server, getLocalIconData());
-    registerCachedContentResource(server);
+    const localIcon = getLocalIconData();
+    registerTools(server, localIcon);
+    registerCachedContentResource(server, localIcon);
     registerInstructionsResource(server, instructions);
     return server;
 }

package/dist/tools.d.ts CHANGED Viewed

@@ -1,4 +1,5 @@
 import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
+import type { CallToolResult, ContentBlock } from '@modelcontextprotocol/sdk/types.js';
 import type { MarkdownTransformResult } from './transform-types.js';
 export interface FetchUrlInput {
     url: string;
@@ -23,20 +24,15 @@ export interface ToolContentResourceBlock {
         text: string;
     };
 }
-export type ToolContentBlockUnion = ToolContentBlock | ToolContentResourceLinkBlock | ToolContentResourceBlock;
-export interface ToolErrorResponse {
-    content: ToolContentBlockUnion[];
+export type ToolContentBlockUnion = ContentBlock;
+export type ToolErrorResponse = CallToolResult & {
     structuredContent: {
         error: string;
         url: string;
     };
     isError: true;
-}
-export interface ToolResponseBase {
-    content: ToolContentBlockUnion[];
-    structuredContent?: Record<string, unknown>;
-    isError?: boolean;
-}
+};
+export type ToolResponseBase = CallToolResult;
 export interface FetchPipelineOptions<T> {
     /** URL to fetch */
     url: string;

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@j0hanz/superfetch",
-  "version": "2.4.1",
+  "version": "2.4.2",
   "mcpName": "io.github.j0hanz/superfetch",
   "description": "Intelligent web content fetcher MCP server that converts HTML to clean, AI-readable Markdown",
   "type": "module",