npm - portapack - Versions diffs - 0.3.0 → 0.3.1 - Mend

portapack 0.3.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

package/.github/workflows/ci.yml +5 -4
package/CHANGELOG.md +8 -0
package/README.md +8 -13
package/dist/cli/cli-entry.cjs +17 -38
package/dist/cli/cli-entry.cjs.map +1 -1
package/dist/index.js +17 -38
package/dist/index.js.map +1 -1
package/docs/.vitepress/config.ts +0 -1
package/docs/cli.md +14 -67
package/docs/configuration.md +101 -116
package/docs/getting-started.md +74 -44
package/package.json +1 -1
package/src/core/extractor.ts +295 -248
package/tests/unit/cli/cli.test.ts +1 -1
package/tests/unit/core/extractor.test.ts +412 -208
package/tests/unit/core/web-fetcher.test.ts +67 -67
package/tsconfig.jest.json +1 -0
package/docs/demo.md +0 -46

package/src/core/extractor.ts CHANGED Viewed

@@ -2,21 +2,19 @@
  * @file src/core/extractor.ts
  * @description Handles discovery, resolution, fetching, and optional embedding of assets
  * linked from HTML and recursively within CSS (@import, url()). This is the heart of finding EVERYTHING.
- * @version 1.1.4 - Added console logs for debugging path/URL resolution. Refined determineBaseUrl.
+ * @version 1.1.6 - Revised fetchAsset error handling logic for Axios errors.
  */
 // === Node.js Core Imports ===
 import { readFile } from 'fs/promises';
 import * as fs from 'fs'; // Required for statSync for sync directory check
-import type { FileHandle } from 'fs/promises';
+import type { FileHandle } from 'fs/promises'; // Import specific type if needed elsewhere
 import path from 'path';
 import { fileURLToPath, URL } from 'url'; // Crucial for file path/URL conversion
 // === External Dependencies ===
-// Using requireNamespace avoids potential ESM/CJS interop issues with mocks if they arise
-// const axios = require('axios'); // Alternative if import * causes issues with mocks
 import * as axiosNs from 'axios'; // Using namespace import for clarity
-import type { AxiosError, AxiosRequestConfig, AxiosResponse, InternalAxiosRequestConfig } from 'axios';
+import type { AxiosError, AxiosRequestConfig, AxiosResponse, InternalAxiosRequestConfig } from 'axios'; // Import necessary types
 // === Project Imports ===
 import type { Asset, ParsedHTML } from '../types'; // Adjust path if needed
@@ -46,10 +44,12 @@ type NodeJSErrnoException = Error & { code?: string };
  */
 function isUtf8DecodingLossy(originalBuffer: Buffer, decodedString: string): boolean {
     try {
+        // Re-encode the decoded string back to a buffer using UTF-8
         const reEncodedBuffer = Buffer.from(decodedString, 'utf-8');
+        // Compare the re-encoded buffer with the original buffer
         return !originalBuffer.equals(reEncodedBuffer);
     } catch (e) {
-        // Error during re-encoding likely means original wasn't valid UTF-8
+        // If an error occurs during re-encoding, it implies the original wasn't valid UTF-8
         return true;
     }
 }
@@ -62,9 +62,11 @@ function isUtf8DecodingLossy(originalBuffer: Buffer, decodedString: string): boo
  * @returns {string | undefined} The absolute base URL string ending in '/', or undefined if determination fails.
  */
 function determineBaseUrl(inputPathOrUrl: string, logger?: Logger): string | undefined {
-    // [DEBUG LOG] Added for diagnostics
-    console.log(`[DEBUG determineBaseUrl] Input: "${inputPathOrUrl}"`);
+    // Log the input for debugging purposes
+    // console.log(`[DEBUG determineBaseUrl] Input: "${inputPathOrUrl}"`); // Keep debug log commented unless needed
     logger?.debug(`Determining base URL for input: ${inputPathOrUrl}`);
+    // Handle invalid or empty input
     if (!inputPathOrUrl) {
         logger?.warn('Cannot determine base URL: inputPathOrUrl is empty or invalid.');
         return undefined;
@@ -74,20 +76,20 @@ function determineBaseUrl(inputPathOrUrl: string, logger?: Logger): string | und
         // Handle non-file URLs (HTTP, HTTPS)
         if (/^https?:\/\//i.test(inputPathOrUrl)) {
             const url = new URL(inputPathOrUrl);
-            // Get URL up to the last slash in the path
+            // Construct the base URL by taking the path up to the last '/'
             url.pathname = url.pathname.substring(0, url.pathname.lastIndexOf('/') + 1);
-            url.search = ''; url.hash = ''; // Clear query params/fragments
+            url.search = ''; // Remove query parameters
+            url.hash = ''; // Remove fragments
             const baseUrl = url.href;
             logger?.debug(`Determined remote base URL: ${baseUrl}`);
-            // [DEBUG LOG] Added for diagnostics
-            console.log(`[DEBUG determineBaseUrl] Determined Remote URL: "${baseUrl}"`);
-            return baseUrl; // URLs from constructor usually end in '/' if path ends in '/'
+            // console.log(`[DEBUG determineBaseUrl] Determined Remote URL: "${baseUrl}"`); // Keep debug log commented unless needed
+            // Return the constructed base URL (usually ends in '/')
+            return baseUrl;
         }
         // Handle other protocols (warn and return undefined)
         else if (inputPathOrUrl.includes('://') && !inputPathOrUrl.startsWith('file:')) {
             logger?.warn(`Input "${inputPathOrUrl}" looks like a URL but uses an unsupported protocol. Cannot determine base URL.`);
-             // [DEBUG LOG] Added for diagnostics
-             console.log(`[DEBUG determineBaseUrl] Unsupported protocol.`);
+            // console.log(`[DEBUG determineBaseUrl] Unsupported protocol.`); // Keep debug log commented unless needed
             return undefined;
         }
         // Handle file paths and file: URLs
@@ -97,32 +99,31 @@ function determineBaseUrl(inputPathOrUrl: string, logger?: Logger): string | und
             // Convert input to an absolute path
             if (inputPathOrUrl.startsWith('file:')) {
+                // Convert file URL to path
                 resourcePath = fileURLToPath(inputPathOrUrl);
                 // file: URLs ending in / strongly suggest a directory
                 isInputLikelyDirectory = inputPathOrUrl.endsWith('/');
             } else {
-                resourcePath = path.resolve(inputPathOrUrl); // Resolve relative/absolute file paths
-                 // Check if the resolved path *actually* exists and is a directory
-                 // This distinguishes 'C:\path\to\dir' from 'C:\path\to\file.html'
-                 try {
-                     // Use statSync carefully - assumes it's available and works (or mocked)
-                     isInputLikelyDirectory = fs.statSync(resourcePath).isDirectory();
-                 } catch {
-                     // If stat fails (ENOENT, EACCES), assume it refers to a file path
-                     isInputLikelyDirectory = false;
-                 }
+                // Resolve relative/absolute file paths
+                resourcePath = path.resolve(inputPathOrUrl);
+                // Check if the resolved path *actually* exists and is a directory
+                try {
+                    // Use statSync carefully - assumes it's available and works (or mocked)
+                    isInputLikelyDirectory = fs.statSync(resourcePath).isDirectory();
+                } catch {
+                    // If stat fails (ENOENT, EACCES), assume it refers to a file path
+                    isInputLikelyDirectory = false;
+                }
             }
-             // [DEBUG LOG] Added for diagnostics
-             console.log(`[DEBUG determineBaseUrl] resourcePath: "${resourcePath}", isInputLikelyDirectory: ${isInputLikelyDirectory}`);
+            // console.log(`[DEBUG determineBaseUrl] resourcePath: "${resourcePath}", isInputLikelyDirectory: ${isInputLikelyDirectory}`); // Keep debug log commented unless needed
-             // The base directory is the directory containing the resourcePath,
-             // OR resourcePath itself if it was identified as a directory.
+            // The base directory is the directory containing the resourcePath,
+            // OR resourcePath itself if it was identified as a directory.
             const baseDirPath = isInputLikelyDirectory ? resourcePath : path.dirname(resourcePath);
-            // [DEBUG LOG] Added for diagnostics
-            console.log(`[DEBUG determineBaseUrl] Calculated baseDirPath: "${baseDirPath}"`);
+            // console.log(`[DEBUG determineBaseUrl] Calculated baseDirPath: "${baseDirPath}"`); // Keep debug log commented unless needed
             // Convert base directory path back to a file URL ending in '/'
-            let normalizedPathForURL = baseDirPath.replace(/\\/g, '/'); // Use forward slashes
+            let normalizedPathForURL = baseDirPath.replace(/\\/g, '/'); // Use forward slashes for URL consistency
             // Ensure leading slash for Windows file URLs (e.g., /C:/...)
             if (/^[A-Z]:\//i.test(normalizedPathForURL) && !normalizedPathForURL.startsWith('/')) {
                 normalizedPathForURL = '/' + normalizedPathForURL;
@@ -132,19 +133,18 @@ function determineBaseUrl(inputPathOrUrl: string, logger?: Logger): string | und
                 normalizedPathForURL += '/';
             }
+            // Create the final file URL object and get its string representation
             const fileUrl = new URL('file://' + normalizedPathForURL);
             const fileUrlString = fileUrl.href;
             logger?.debug(`Determined base URL: ${fileUrlString} (from: ${inputPathOrUrl}, resolved base dir: ${baseDirPath})`);
-             // [DEBUG LOG] Added for diagnostics
-             console.log(`[DEBUG determineBaseUrl] Determined File URL: "${fileUrlString}"`);
+            // console.log(`[DEBUG determineBaseUrl] Determined File URL: "${fileUrlString}"`); // Keep debug log commented unless needed
             return fileUrlString;
         }
     } catch (error: unknown) {
+        // Handle any errors during base URL determination
         const message = error instanceof Error ? error.message : String(error);
-        // [DEBUG LOG] Added for diagnostics
-        console.error(`[DEBUG determineBaseUrl] Error determining base URL: ${message}`);
+        // console.error(`[DEBUG determineBaseUrl] Error determining base URL: ${message}`); // Keep debug log commented unless needed
         logger?.error(`💀 Failed to determine base URL for "${inputPathOrUrl}": ${message}${error instanceof Error && error.stack ? ` - Stack: ${error.stack}` : ''}`);
         return undefined;
     }
@@ -159,8 +159,10 @@ function determineBaseUrl(inputPathOrUrl: string, logger?: Logger): string | und
  * @returns {URL | null} A validated, absolute URL object, or null if invalid/ignorable.
  */
 function resolveAssetUrl(assetUrl: string, baseContextUrl?: string, logger?: Logger): URL | null {
+    // Trim whitespace from the URL
     const trimmedUrl = assetUrl?.trim();
-    // Ignore empty, data URIs, or fragment-only URLs
+    // Ignore empty URLs, data URIs, or fragment-only URLs
     if (!trimmedUrl || trimmedUrl.startsWith('data:') || trimmedUrl.startsWith('#')) {
         return null;
     }
@@ -170,34 +172,39 @@ function resolveAssetUrl(assetUrl: string, baseContextUrl?: string, logger?: Log
     // Handle protocol-relative URLs (e.g., //example.com/image.png)
     if (resolvableUrl.startsWith('//') && baseContextUrl) {
         try {
+            // Prepend the protocol from the base context URL
             const base = new URL(baseContextUrl);
-            resolvableUrl = base.protocol + resolvableUrl; // Prepend the base protocol (http: or https:)
+            resolvableUrl = base.protocol + resolvableUrl;
         } catch (e) {
+            // Log a warning if the base protocol cannot be determined
             logger?.warn(`Could not extract protocol from base "${baseContextUrl}" for protocol-relative URL "${trimmedUrl}". Skipping.`);
             return null;
         }
     }
     try {
-        // Use URL constructor for resolution. Handles absolute, relative paths, ../ etc.
-        // baseContextUrl provides the context for resolving relative URLs.
+        // Use URL constructor for resolution. Handles absolute paths, relative paths, ../ etc.
         const resolved = new URL(resolvableUrl, baseContextUrl);
-        // Don't attempt to fetch ws://, mailto:, etc. Add protocols as needed.
+        // Skip assets with unsupported protocols (e.g., mailto:, ws:)
         if (!['http:', 'https:', 'file:'].includes(resolved.protocol)) {
-             logger?.debug(`Skipping asset with unsupported protocol: ${resolved.href}`);
-             return null;
+            logger?.debug(`Skipping asset with unsupported protocol: ${resolved.href}`);
+            return null;
         }
+        // Return the resolved URL object
         return resolved;
     } catch (error: unknown) {
-        // Log errors during URL parsing/resolution but don't halt the process
+        // Log errors during URL parsing/resolution
         const message = error instanceof Error ? error.message : String(error);
-        // Avoid warning for relative paths when no base was provided (e.g., direct HTML string input)
+        // Avoid redundant warnings for relative paths when no base context was provided (expected failure)
         if (!/^[a-z]+:/i.test(resolvableUrl) && !resolvableUrl.startsWith('/') && !baseContextUrl) {
             logger?.warn(`Cannot resolve relative URL "${resolvableUrl}" - Base context URL was not provided or determined.`);
         } else {
+            // Log other resolution failures
             logger?.warn(`⚠️ Failed to parse/resolve URL "${resolvableUrl}" ${baseContextUrl ? 'against base "' + baseContextUrl + '"' : '(no base provided)'}: ${message}`);
         }
-        return null; // Return null if resolution fails
+        // Return null if resolution fails
+        return null;
     }
 }
@@ -214,29 +221,27 @@ function resolveCssRelativeUrl(
     cssBaseContextUrl: string, // e.g., file:///C:/mock/base/dir/css/deep.css or https://.../style.css
     logger?: Logger
 ): string | null {
-     // [DEBUG LOG] Added for diagnostics
-     console.log(`[DEBUG resolveCssRelativeUrl] Input: relative="${relativeUrl}", base="${cssBaseContextUrl}"`);
+     // console.log(`[DEBUG resolveCssRelativeUrl] Input: relative="${relativeUrl}", base="${cssBaseContextUrl}"`); // Keep debug log commented unless needed
+    // Ignore empty, data URIs, or fragments
     if (!relativeUrl || relativeUrl.startsWith('data:') || relativeUrl.startsWith('#')) {
-        return null; // Ignore empty, data URIs, or fragments
+        return null;
     }
     try {
         // Use the URL constructor which correctly handles relative paths including ../
-        // relative to the base URL provided.
+        // relative to the base URL provided (the CSS file's URL).
         const resolvedUrl = new URL(relativeUrl, cssBaseContextUrl);
-        // [DEBUG LOG] Added for diagnostics
-        console.log(`[DEBUG resolveCssRelativeUrl] Resolved URL object href: "${resolvedUrl.href}"`);
-        return resolvedUrl.href; // Return the resolved absolute URL string
+        // console.log(`[DEBUG resolveCssRelativeUrl] Resolved URL object href: "${resolvedUrl.href}"`); // Keep debug log commented unless needed
+        // Return the resolved absolute URL string
+        return resolvedUrl.href;
     } catch (error) {
-        // Log warning if URL resolution fails for some reason
+        // Log warning if URL resolution fails
         logger?.warn(
             `Failed to resolve CSS URL: "${relativeUrl}" relative to "${cssBaseContextUrl}": ${String(error)}`
         );
-         // [DEBUG LOG] Added for diagnostics
-         console.error(`[DEBUG resolveCssRelativeUrl] Error resolving: ${String(error)}`);
+        // console.error(`[DEBUG resolveCssRelativeUrl] Error resolving: ${String(error)}`); // Keep debug log commented unless needed
         return null;
     }
 }
@@ -251,92 +256,92 @@ function resolveCssRelativeUrl(
  * @returns {Promise<Buffer | null>} Asset content as a Buffer, or null on failure.
  */
 async function fetchAsset(resolvedUrl: URL, logger?: Logger, timeout: number = 10000): Promise<Buffer | null> {
-    // [DEBUG LOG] Added for diagnostics
-    console.log(`[DEBUG fetchAsset] Attempting fetch for URL: ${resolvedUrl.href}`);
+    // console.log(`[DEBUG fetchAsset] Attempting fetch for URL: ${resolvedUrl.href}`); // Keep debug log commented unless needed
     logger?.debug(`Attempting to fetch asset: ${resolvedUrl.href}`);
     const protocol = resolvedUrl.protocol;
     try {
+        // Handle HTTP and HTTPS protocols
         if (protocol === 'http:' || protocol === 'https:') {
-            // Use axios namespace import's default property
+            // Use axios to fetch remote content as an ArrayBuffer
             const response: AxiosResponse<ArrayBuffer> = await axiosNs.default.get(resolvedUrl.href, {
-                responseType: 'arraybuffer', timeout: timeout,
+                responseType: 'arraybuffer', // Fetch as binary data
+                timeout: timeout,            // Apply network timeout
             });
-             logger?.debug(`Workspaceed remote asset ${resolvedUrl.href} (Status: ${response.status}, Type: ${response.headers['content-type'] || 'N/A'}, Size: ${response.data?.byteLength ?? 0} bytes)`);
-             // [DEBUG LOG] Added for diagnostics
-             console.log(`[DEBUG fetchAsset] HTTP fetch SUCCESS for: ${resolvedUrl.href}, Status: ${response.status}`);
-             return Buffer.from(response.data);
-        } else if (protocol === 'file:') {
+            logger?.debug(`Workspaceed remote asset ${resolvedUrl.href} (Status: ${response.status}, Type: ${response.headers['content-type'] || 'N/A'}, Size: ${response.data?.byteLength ?? 0} bytes)`);
+            // console.log(`[DEBUG fetchAsset] HTTP fetch SUCCESS for: ${resolvedUrl.href}, Status: ${response.status}`); // Keep debug log commented unless needed
+            // Return the fetched data as a Node.js Buffer
+            return Buffer.from(response.data);
+        }
+        // Handle file protocol
+        else if (protocol === 'file:') {
             let filePath: string;
             try {
-                // Convert file URL to path. IMPORTANT: This strips query params and fragments.
+                // Convert file URL to a system file path
+                // IMPORTANT: This strips query params and fragments from the URL
                 filePath = fileURLToPath(resolvedUrl);
             } catch (e: any) {
-                 // [DEBUG LOG] Added for diagnostics
-                 console.error(`[DEBUG fetchAsset] fileURLToPath FAILED for: ${resolvedUrl.href}`, e);
-                logger?.error(`Could not convert file URL to path: ${resolvedUrl.href}. Error: ${e.message}`);
-                return null;
+                 // console.error(`[DEBUG fetchAsset] fileURLToPath FAILED for: ${resolvedUrl.href}`, e); // Keep debug log commented unless needed
+                 logger?.error(`Could not convert file URL to path: ${resolvedUrl.href}. Error: ${e.message}`);
+                 return null; // Return null if conversion fails
             }
             const normalizedForLog = path.normalize(filePath);
-             // [DEBUG LOG] Added for diagnostics
-             console.log(`[DEBUG fetchAsset] Attempting readFile with path: "${normalizedForLog}" (Original from URL: "${filePath}")`);
+            // console.log(`[DEBUG fetchAsset] Attempting readFile with path: "${normalizedForLog}" (Original from URL: "${filePath}")`); // Keep debug log commented unless needed
-             // Read file using fs/promises
+            // Read file content using fs/promises
             const data = await readFile(filePath); // This call uses the mock in tests
-             // [DEBUG LOG] Added for diagnostics
-             console.log(`[DEBUG fetchAsset] readFile call SUCCEEDED for path: "${normalizedForLog}". Data length: ${data?.byteLength}`);
-             logger?.debug(`Read local file ${filePath} (${data.byteLength} bytes)`);
+            // console.log(`[DEBUG fetchAsset] readFile call SUCCEEDED for path: "${normalizedForLog}". Data length: ${data?.byteLength}`); // Keep debug log commented unless needed
+            logger?.debug(`Read local file ${filePath} (${data.byteLength} bytes)`);
+            // Return the file content as a Buffer
             return data;
-        } else {
-             // [DEBUG LOG] Added for diagnostics
-             console.log(`[DEBUG fetchAsset] Unsupported protocol: ${protocol}`);
+        }
+        // Handle unsupported protocols
+        else {
+             // console.log(`[DEBUG fetchAsset] Unsupported protocol: ${protocol}`); // Keep debug log commented unless needed
              logger?.warn(`Unsupported protocol "${protocol}" in URL: ${resolvedUrl.href}`);
-            return null;
+             return null;
         }
     } catch (error: unknown) {
-         // [DEBUG LOG] Added for diagnostics
-         const failedId = protocol === 'file:' ? path.normalize(fileURLToPath(resolvedUrl)) : resolvedUrl.href;
-         console.error(`[DEBUG fetchAsset] fetch/read FAILED for: "${failedId}". Error:`, error);
-        // --- Handle Errors Based on Protocol/Context ---
-        // Use the imported namespace directly for isAxiosError check
-        if ((protocol === 'http:' || protocol === 'https:') && axiosNs.isAxiosError(error)) {
-            const status = error.response?.status ?? 'N/A';
-            const statusText = error.response?.statusText ?? 'Error';
-            const code = error.code ?? 'N/A';
-            const message = error.message;
-            // Format consistent with test expectations
-            const logMessage = `⚠️ Failed to fetch remote asset ${resolvedUrl.href}: Status ${status} - ${statusText}. Code: ${code}, Message: ${message}`;
+        // --- Handle Errors During Fetch/Read ---
+        const failedId = protocol === 'file:' ? path.normalize(fileURLToPath(resolvedUrl)) : resolvedUrl.href;
+        // console.error(`[DEBUG fetchAsset] CAUGHT Error for ${failedId}. Type: ${Object.prototype.toString.call(error)}, Constructor: ${error?.constructor?.name}, isAxiosError property: ${(error as any)?.isAxiosError}, Code: ${(error as any)?.code}`); // Keep for debugging if needed
+        // *** FIXED LOGIC: Check for AxiosError using its property *before* generic instanceof Error ***
+        if ((protocol === 'http:' || protocol === 'https:') && (error as any)?.isAxiosError === true) {
+            const axiosError = error as AxiosError; // Cast for easier property access
+            const status = axiosError.response?.status ?? 'N/A';
+            const code = axiosError.code ?? 'N/A'; // e.g., ECONNABORTED for timeout
+            // Use the specific log format
+            const logMessage = `⚠️ Failed to fetch remote asset ${resolvedUrl.href}: ${axiosError.message} (Code: ${code})`;
             logger?.warn(logMessage);
         }
-        // Check for specific FS errors (only relevant if protocol was file:)
-            if (error instanceof Error && (error as { code?: string }).code === 'ENOENT') {
-            let failedPath = resolvedUrl.href; // Fallback path for logging if conversion fails
+        // Check for file system errors *next*
+        else if (protocol === 'file:' && error instanceof Error) {
+            let failedPath = resolvedUrl.href;
             try { failedPath = fileURLToPath(resolvedUrl); } catch { /* ignore */ }
-            failedPath = path.normalize(failedPath); // Normalize for consistent logging
+            failedPath = path.normalize(failedPath);
-            if (error instanceof Error && (error as NodeJSErrnoException).code === 'ENOENT') {
+            if ((error as NodeJSErrnoException).code === 'ENOENT') {
                 logger?.warn(`⚠️ File not found (ENOENT) for asset: ${failedPath}.`);
-            } else if (error instanceof Error && (error as NodeJSErrnoException).code === 'EACCES') {
-                // Log EACCES specifically for tests to catch if needed
-                 logger?.warn(`⚠️ Permission denied (EACCES) reading asset: ${failedPath}.`);
-                 // Also log the more generic message that the test currently expects
-                 logger?.warn(`⚠️ Failed to read local asset ${failedPath}: ${error.message}`);
-            } else if (error instanceof Error) {
-                logger?.warn(`⚠️ Failed to read local asset ${failedPath}: ${error.message}`);
+            } else if ((error as NodeJSErrnoException).code === 'EACCES') {
+                 // Log ONLY the specific EACCES message
+                logger?.warn(`⚠️ Permission denied (EACCES) reading asset: ${failedPath}.`);
             } else {
-                logger?.warn(`⚠️ An unknown error occurred while reading local asset ${failedPath}: ${String(error)}`);
+                logger?.warn(`⚠️ Failed to read local asset ${failedPath}: ${error.message}`);
             }
         }
-        // Generic fallback for truly unexpected errors during fetch/read
+        // Generic fallback for *other* types of Errors (that are not Axios or known FS errors)
         else if (error instanceof Error) {
             logger?.warn(`⚠️ An unexpected error occurred processing asset ${resolvedUrl.href}: ${error.message}`);
-        } else {
+        }
+        // Fallback for non-Error throws (e.g., strings, numbers)
+        else {
             logger?.warn(`⚠️ An unknown and unexpected error occurred processing asset ${resolvedUrl.href}: ${String(error)}`);
         }
-        return null; // Return null on ANY fetch/read error caught here
+        // Return null on ANY error
+        return null;
     }
 }
@@ -353,50 +358,57 @@ function extractUrlsFromCSS(
     cssBaseContextUrl: string,
     logger?: Logger
 ): Asset[] {
+    // Array to hold assets discovered within this CSS content
     const newlyDiscovered: Asset[] = [];
-    // Track URLs processed within this specific CSS file to avoid adding duplicates from the same file
+    // Set to track URLs processed within this specific CSS file to avoid adding duplicates from the same file
     const processedInThisParse = new Set<string>();
-    // Regex for url(...) patterns, handling optional quotes
+    // Regex for url(...) patterns, handling optional quotes (non-greedy match for URL)
     const urlRegex = /url\(\s*(['"]?)(.*?)\1\s*\)/gi;
-    // Regex for @import rules, handling url() or bare string, optional quotes
+    // Regex for @import rules, handling url() or bare string, optional quotes (non-greedy match for URL)
     const importRegex = /@import\s+(?:url\(\s*(['"]?)(.*?)\1\s*\)|(['"])(.*?)\3)\s*;/gi;
     /** Internal helper to process a found URL string */
     const processFoundUrl = (rawUrl: string | undefined, ruleType: '@import' | 'url()') => {
-        if (!rawUrl || rawUrl.trim() === '' || rawUrl.startsWith('data:')) return;
+        // Skip if URL is empty, undefined, a data URI, or only a fragment
+        if (!rawUrl || rawUrl.trim() === '' || rawUrl.startsWith('data:') || rawUrl.startsWith('#')) return;
+        // Resolve the potentially relative URL against the CSS file's base URL
         const resolvedUrl = resolveCssRelativeUrl(rawUrl, cssBaseContextUrl, logger);
-        // If successfully resolved and not already found in *this* CSS file
+        // If successfully resolved and not already found *in this specific CSS file*
         if (resolvedUrl && !processedInThisParse.has(resolvedUrl)) {
+            // Mark this resolved URL as processed for this CSS file
             processedInThisParse.add(resolvedUrl);
-            const { assetType } = guessMimeType(resolvedUrl); // Guess type based on resolved URL
+            // Guess the asset type (css, image, font, etc.) based on the resolved URL
+            const { assetType } = guessMimeType(resolvedUrl);
-            // Add to the list of assets discovered in this pass
+            // Add the discovered asset to the list for this CSS file
             newlyDiscovered.push({
                 type: assetType,
-                url: resolvedUrl, // The resolved absolute URL string
+                url: resolvedUrl, // Store the resolved absolute URL string
                 content: undefined // Content will be fetched later if needed
             });
-             logger?.debug(`Discovered nested ${assetType} asset (${ruleType}) in CSS ${cssBaseContextUrl}: ${resolvedUrl}`);
+            logger?.debug(`Discovered nested ${assetType} asset (${ruleType}) in CSS ${cssBaseContextUrl}: ${resolvedUrl}`);
         }
     };
-    // Execute regex for url(...)
+    // Find all url(...) matches in the CSS content
     let match;
     while ((match = urlRegex.exec(cssContent)) !== null) {
-        processFoundUrl(match[2], 'url()'); // Group 2 captures the URL part
+        // Group 2 captures the URL part inside url()
+        processFoundUrl(match[2], 'url()');
     }
-    // Execute regex for @import
-    // Reset lastIndex as we're using the same regex instance implicitly if defined outside loop
-    importRegex.lastIndex = 0; // Explicitly reset
+    // Find all @import matches in the CSS content
+    // Reset lastIndex as we're reusing the regex object implicitly
+    importRegex.lastIndex = 0;
     while ((match = importRegex.exec(cssContent)) !== null) {
         // Group 2 captures url('...'), Group 4 captures bare "..."
         processFoundUrl(match[2] || match[4], '@import');
     }
+    // Return the list of assets discovered within this CSS content
     return newlyDiscovered;
 }
@@ -422,59 +434,65 @@ export async function extractAssets(
 ): Promise<ParsedHTML> {
     logger?.info(`🚀 Starting asset extraction! Embed: ${embedAssets}. Input: ${inputPathOrUrl || '(HTML content only)'}`);
+    // Get the initial list of assets found directly in the HTML
     const initialAssets: Asset[] = parsed.assets || [];
-    // Stores the final result: Map<resolved URL string, Asset object>
+    // Stores the final result: Map<resolved URL string, Asset object> to ensure uniqueness
     const finalAssetsMap = new Map<string, Asset>();
-    // Queue holds assets to be processed: { url: string (resolved), type: ..., content?: ... }
+    // Queue holds assets whose content needs to be processed (fetched/analyzed)
     let assetsToProcess: Asset[] = [];
-    // Set to track URLs that are already processed (in finalAssetsMap) OR currently in the queue (assetsToProcess)
+    // Set to track URLs that are either already fully processed (in finalAssetsMap)
+    // OR currently in the processing queue (assetsToProcess) to prevent reprocessing/loops.
     const processedOrQueuedUrls = new Set<string>();
-    // --- Determine Base URL Context ---
+    // --- Determine Base URL Context for the HTML ---
     const htmlBaseContextUrl = determineBaseUrl(inputPathOrUrl || '', logger);
+    // Warn if no base URL could be found and there are relative paths in the initial assets
     if (!htmlBaseContextUrl && initialAssets.some(a => !/^[a-z]+:/i.test(a.url) && !a.url.startsWith('data:') && !a.url.startsWith('#') && !a.url.startsWith('/'))) {
         logger?.warn("🚨 No valid base path/URL determined for the HTML source! Resolution of relative asset paths from HTML may fail.");
     } else if (htmlBaseContextUrl) {
         logger?.debug(`Using HTML base context URL: ${htmlBaseContextUrl}`);
     }
-    // --- Initial Queue Population ---
+    // --- Initial Queue Population from HTML assets ---
     logger?.debug(`Queueing ${initialAssets.length} initial assets parsed from HTML...`);
     for (const asset of initialAssets) {
         // Resolve the initial asset URL against the HTML base context
         const resolvedUrlObj = resolveAssetUrl(asset.url, htmlBaseContextUrl, logger);
+        // Skip if URL is invalid, data URI, fragment, or unsupported protocol
         if (!resolvedUrlObj) {
-             logger?.debug(` -> Skipping initial asset with unresolvable/ignorable URL: ${asset.url}`);
-             continue; // Skip if URL is invalid or data URI etc.
+            logger?.debug(` -> Skipping initial asset with unresolvable/ignorable URL: ${asset.url}`);
+            continue;
         }
-        const urlToQueue = resolvedUrlObj.href; // Use the resolved absolute URL string
+        // Get the resolved absolute URL string
+        const urlToQueue = resolvedUrlObj.href;
-        // Skip data URIs and check if this URL is already tracked
-        if (!urlToQueue.startsWith('data:') && !processedOrQueuedUrls.has(urlToQueue)) {
-            processedOrQueuedUrls.add(urlToQueue); // Mark as queued
+        // Check if this URL is already tracked (processed or queued)
+        if (!processedOrQueuedUrls.has(urlToQueue)) {
+            // Mark as queued (add to set *before* adding to array)
+            processedOrQueuedUrls.add(urlToQueue);
             // Guess type from the resolved/original URL if not provided initially
             const { assetType: guessedType } = guessMimeType(urlToQueue);
-            const initialType = asset.type ?? guessedType;
+            const initialType = asset.type ?? guessedType; // Use provided type or fallback to guessed type
-            // Add to the processing queue
+            // Add the resolved asset to the processing queue
             assetsToProcess.push({
                 url: urlToQueue, // Use the resolved URL
                 type: initialType,
-                content: undefined
+                content: undefined // Content is initially undefined
             });
-             logger?.debug(` -> Queued initial asset: ${urlToQueue} (Original raw: ${asset.url})`);
-        } else if (urlToQueue.startsWith('data:')) {
-            logger?.debug(` -> Skipping data URI: ${urlToQueue.substring(0, 50)}...`);
+            logger?.debug(` -> Queued initial asset: ${urlToQueue} (Original raw: ${asset.url})`);
         } else {
-             logger?.debug(` -> Skipping already processed/queued initial asset: ${urlToQueue}`);
+            logger?.debug(` -> Skipping already processed/queued initial asset: ${urlToQueue}`);
         }
     }
-    // --- Main processing loop ---
+    // --- Main processing loop (continues as long as there are assets to process) ---
     let iterationCount = 0;
     while (assetsToProcess.length > 0) {
         iterationCount++;
+        // Prevent potential infinite loops
         if (iterationCount > MAX_ASSET_EXTRACTION_ITERATIONS) {
             logger?.error(`🛑 Asset extraction loop limit hit (${MAX_ASSET_EXTRACTION_ITERATIONS})! Aborting.`);
             const remainingUrls = assetsToProcess.map(a => a.url).slice(0, 10).join(', ');
@@ -482,175 +500,204 @@ export async function extractAssets(
             // Add assets remaining in queue to final map without content before breaking
             assetsToProcess.forEach(asset => {
                 if (!finalAssetsMap.has(asset.url)) {
-                     finalAssetsMap.set(asset.url, { ...asset, content: undefined });
+                    finalAssetsMap.set(asset.url, { ...asset, content: undefined });
                 }
             });
-            assetsToProcess = []; // Clear queue
+            assetsToProcess = []; // Clear queue to stop the loop
             break; // Exit loop
         }
-        // Process assets in batches for clarity in logs
+        // Take a snapshot of the current queue to process in this iteration
         const currentBatch = [...assetsToProcess];
-        assetsToProcess = []; // Clear queue for the next batch discovered in this iteration
+        // Clear the main queue; new assets found in this batch will be added here for the *next* iteration
+        assetsToProcess = [];
         logger?.debug(`--- Processing batch ${iterationCount}: ${currentBatch.length} asset(s) ---`);
+        // Process each asset in the current batch
         for (const asset of currentBatch) {
-            // Skip if already fully processed (e.g., added in a previous batch)
+            // Double-check: Skip if this asset somehow got fully processed in a previous iteration (shouldn't happen with current logic, but safe check)
             if (finalAssetsMap.has(asset.url)) {
-                 logger?.debug(`Skipping asset already in final map: ${asset.url}`);
+                logger?.debug(`Skipping asset already in final map: ${asset.url}`);
                 continue;
             }
-            let assetContentBuffer: Buffer | null = null;
-            let finalContent: string | undefined = undefined; // For embedding
-            let cssContentForParsing: string | undefined = undefined; // For CSS parsing
+            let assetContentBuffer: Buffer | null = null; // To store fetched binary content
+            let finalContent: string | undefined = undefined; // Final content (text or data URI) for the Asset object
+            let cssContentForParsing: string | undefined = undefined; // Text content specifically for parsing CSS
             // --- Determine if fetching is needed ---
-            // Fetch if embedding everything OR if it's CSS (need content for parsing)
+            // Fetch if we need to embed all assets OR if it's CSS (we need content to parse for nested assets)
             const needsFetching = embedAssets || asset.type === 'css';
             let assetUrlObj: URL | null = null; // URL object needed for fetchAsset
             if (needsFetching) {
                 // --- Create URL object for fetching ---
                 try {
-                    assetUrlObj = new URL(asset.url); // Asset URL should be absolute here
+                    // Asset URL should be absolute at this point
+                    assetUrlObj = new URL(asset.url);
                 } catch (urlError) {
-                     logger?.warn(`Cannot create URL object for "${asset.url}", skipping fetch. Error: ${urlError instanceof Error ? urlError.message : String(urlError)}`);
-                    finalAssetsMap.set(asset.url, { ...asset, content: undefined }); // Store asset without content
-                    continue; // Skip to next asset in batch
+                    // Log error if creating URL object fails
+                    logger?.warn(`Cannot create URL object for "${asset.url}", skipping fetch. Error: ${urlError instanceof Error ? urlError.message : String(urlError)}`);
+                    // Store asset without content in the final map
+                    finalAssetsMap.set(asset.url, { ...asset, content: undefined });
+                    // Skip to next asset in the current batch
+                    continue;
                 }
                 // --- Fetch Asset ---
                 if (assetUrlObj) {
+                    // Call fetchAsset (which handles http/https/file and errors)
                     assetContentBuffer = await fetchAsset(assetUrlObj, logger);
                     // fetchAsset returns null on failure
                 }
             } // End if(needsFetching)
-            // --- If fetching was needed but failed, store asset without content and skip ---
+            // --- If fetching was required but failed, store asset without content and continue ---
             if (needsFetching && assetContentBuffer === null) {
-                 logger?.debug(`Storing asset ${asset.url} without content due to fetch failure.`);
+                logger?.debug(`Storing asset ${asset.url} without content due to fetch failure.`);
+                // Add to final map with undefined content
                 finalAssetsMap.set(asset.url, { ...asset, content: undefined });
-                continue; // Skip to next asset in batch
+                // Skip to the next asset in the current batch
+                continue;
             }
             // --- Prepare Content for Storing/Embedding (if fetched successfully) ---
             if (assetContentBuffer) { // Only proceed if content was fetched
-                 const mimeInfo = guessMimeType(asset.url); // Guess MIME based on URL extension
-                 const effectiveMime = mimeInfo.mime || 'application/octet-stream'; // Fallback MIME
-                 // Try to decode TEXT types as UTF-8
-                 if (TEXT_ASSET_TYPES.has(asset.type)) {
-                     let textContent: string | undefined;
-                     let wasLossy = false;
-                     try {
-                         textContent = assetContentBuffer.toString('utf-8');
-                         wasLossy = isUtf8DecodingLossy(assetContentBuffer, textContent);
-                     } catch (e) { textContent = undefined; wasLossy = true; }
-                     if (!wasLossy && textContent !== undefined) {
-                         // If embedding, store the text content
-                         if (embedAssets) {
-                             finalContent = textContent;
-                         } else {
-                             finalContent = undefined; // Not embedding text
-                         }
-                         // If it's CSS, store its text content for parsing regardless of embedding
-                         if (asset.type === 'css') {
-                             cssContentForParsing = textContent;
-                         }
-                     } else {
-                         // Decoding failed or was lossy
-                         logger?.warn(`Could not decode ${asset.type} asset ${asset.url} as valid UTF-8 text.${embedAssets ? ' Falling back to base64 data URI.' : ''}`);
-                         cssContentForParsing = undefined; // Cannot parse if decoding failed
-                         // Embed as base64 data URI if requested
-                         if (embedAssets) {
-                             finalContent = `data:${effectiveMime};base64,${assetContentBuffer.toString('base64')}`;
-                         } else {
-                             finalContent = undefined;
-                         }
-                     }
-                 }
-                 // Embed BINARY types as base64 data URI if requested
-                 else if (BINARY_ASSET_TYPES.has(asset.type)) {
-                     if (embedAssets) {
-                         finalContent = `data:${effectiveMime};base64,${assetContentBuffer.toString('base64')}`;
-                     } else {
-                         finalContent = undefined; // Not embedding
-                     }
-                     cssContentForParsing = undefined; // Not CSS
-                 }
-                 // Handle 'other' types: attempt text decode, fallback to base64 if embedding
-                 else { // asset.type === 'other' or unknown
-                      cssContentForParsing = undefined; // Not CSS
-                     if (embedAssets) {
-                         try {
-                             const attemptedTextContent = assetContentBuffer.toString('utf-8');
-                             if (isUtf8DecodingLossy(assetContentBuffer, attemptedTextContent)) {
-                                 logger?.warn(`Couldn't embed unclassified asset ${asset.url} as text due to invalid UTF-8 sequences. Falling back to base64 (octet-stream).`);
-                                 finalContent = `data:application/octet-stream;base64,${assetContentBuffer.toString('base64')}`;
-                             } else {
-                                 finalContent = attemptedTextContent;
-                                 logger?.debug(`Successfully embedded unclassified asset ${asset.url} as text.`);
-                             }
-                         } catch (decodeError) {
-                              logger?.warn(`Error during text decoding for unclassified asset ${asset.url}: ${decodeError instanceof Error ? decodeError.message : String(decodeError)}. Falling back to base64.`);
-                              finalContent = `data:application/octet-stream;base64,${assetContentBuffer.toString('base64')}`;
-                         }
-                     } else {
-                         finalContent = undefined; // Not embedding
-                     }
-                 }
+                // Guess MIME type based on the asset's URL extension
+                const mimeInfo = guessMimeType(asset.url);
+                // Use the guessed MIME type or fallback to a generic binary type
+                const effectiveMime = mimeInfo.mime || 'application/octet-stream';
+                // Handle TEXT types (CSS, JS)
+                if (TEXT_ASSET_TYPES.has(asset.type)) {
+                    let textContent: string | undefined;
+                    let wasLossy = false;
+                    try {
+                        // Try decoding the buffer as UTF-8
+                        textContent = assetContentBuffer.toString('utf-8');
+                        // Check if the decoding process lost information (e.g., invalid sequences replaced)
+                        wasLossy = isUtf8DecodingLossy(assetContentBuffer, textContent);
+                    } catch (e) {
+                        // Decoding itself failed
+                        textContent = undefined;
+                        wasLossy = true;
+                    }
+                    // If decoding was successful and not lossy
+                    if (!wasLossy && textContent !== undefined) {
+                        // If embedding, store the text content
+                        if (embedAssets) {
+                            finalContent = textContent;
+                        } else {
+                            finalContent = undefined; // Not embedding text, store undefined
+                        }
+                        // If it's CSS, store its text content for parsing regardless of embedding option
+                        if (asset.type === 'css') {
+                            cssContentForParsing = textContent;
+                        }
+                    } else {
+                        // Decoding failed or was lossy
+                        // Fixed log message: Added "asset" after type.
+                        logger?.warn(`Could not decode ${asset.type} asset ${asset.url} as valid UTF-8 text.${embedAssets ? ' Falling back to base64 data URI.' : ''}`);
+                        cssContentForParsing = undefined; // Cannot parse CSS if decoding failed
+                        // Embed as base64 data URI if requested, using the effective MIME type
+                        if (embedAssets) {
+                            finalContent = `data:${effectiveMime};base64,${assetContentBuffer.toString('base64')}`;
+                        } else {
+                            finalContent = undefined; // Not embedding
+                        }
+                    }
+                }
+                // Handle BINARY types (image, font, video, audio)
+                else if (BINARY_ASSET_TYPES.has(asset.type)) {
+                    // Embed as base64 data URI if requested
+                    if (embedAssets) {
+                        finalContent = `data:${effectiveMime};base64,${assetContentBuffer.toString('base64')}`;
+                    } else {
+                        finalContent = undefined; // Not embedding
+                    }
+                    cssContentForParsing = undefined; // Not CSS, so no parsing needed
+                }
+                // Handle 'other' or unknown types
+                else {
+                    cssContentForParsing = undefined; // Assume not parseable as CSS
+                    // If embedding, attempt to store as text, fallback to base64 if invalid UTF-8
+                    if (embedAssets) {
+                        try {
+                            const attemptedTextContent = assetContentBuffer.toString('utf-8');
+                            if (isUtf8DecodingLossy(assetContentBuffer, attemptedTextContent)) {
+                                // If text decoding is lossy, warn and use base64
+                                logger?.warn(`Couldn't embed unclassified asset ${asset.url} as text due to invalid UTF-8 sequences. Falling back to base64 (octet-stream).`);
+                                finalContent = `data:application/octet-stream;base64,${assetContentBuffer.toString('base64')}`;
+                            } else {
+                                // Store as text if decoding worked
+                                finalContent = attemptedTextContent;
+                                logger?.debug(`Successfully embedded unclassified asset ${asset.url} as text.`);
+                            }
+                        } catch (decodeError) {
+                            // If toString fails, warn and use base64
+                            logger?.warn(`Error during text decoding for unclassified asset ${asset.url}: ${decodeError instanceof Error ? decodeError.message : String(decodeError)}. Falling back to base64.`);
+                            finalContent = `data:application/octet-stream;base64,${assetContentBuffer.toString('base64')}`;
+                        }
+                    } else {
+                        finalContent = undefined; // Not embedding
+                    }
+                }
             } else { // Content was not fetched (e.g., embedAssets=false and not CSS)
-                 finalContent = undefined;
-                 cssContentForParsing = undefined;
+                finalContent = undefined;
+                cssContentForParsing = undefined;
             }
-            // --- Store the final asset ---
-            // Use the resolved URL as the key and in the asset object itself
+            // --- Store the final processed asset in the map ---
+            // Use the resolved URL as the key and ensure the asset object also uses the resolved URL
             finalAssetsMap.set(asset.url, { ...asset, url: asset.url, content: finalContent });
-            // Note: URL was already added to processedOrQueuedUrls when initially queued or discovered
+            // Note: URL was already added to processedOrQueuedUrls when initially queued or discovered in CSS
             // --- Process CSS for nested assets ---
             // Only if it's CSS and we successfully decoded its content for parsing
             if (asset.type === 'css' && cssContentForParsing) {
                 // Determine the base URL *for this specific CSS file* to resolve its relative links
-                 const cssBaseContextUrl = determineBaseUrl(asset.url, logger); // CSS URL is absolute here
-                 logger?.debug(`CSS base context for resolving nested assets within ${asset.url}: ${cssBaseContextUrl}`);
+                const cssBaseContextUrl = determineBaseUrl(asset.url, logger); // CSS URL is absolute here
+                logger?.debug(`CSS base context for resolving nested assets within ${asset.url}: ${cssBaseContextUrl}`);
                 if (cssBaseContextUrl) {
-                    // Get the list of *potentially* new assets discovered in this CSS file's content
+                    // Extract URLs found within this CSS content
                     const newlyDiscoveredAssets = extractUrlsFromCSS(
                         cssContentForParsing,
-                        cssBaseContextUrl, // Use CSS file's base URL
+                        cssBaseContextUrl, // Use the CSS file's own URL as the base
                         logger
                     );
+                    // If new assets were found in the CSS
                     if (newlyDiscoveredAssets.length > 0) {
-                         logger?.debug(`Discovered ${newlyDiscoveredAssets.length} nested assets in CSS ${asset.url}. Checking against queue...`);
+                        logger?.debug(`Discovered ${newlyDiscoveredAssets.length} nested assets in CSS ${asset.url}. Checking against queue...`);
+                        // Process each newly discovered asset
                         for (const newAsset of newlyDiscoveredAssets) {
-                             // CHECK: Add to queue only if this resolved URL hasn't been processed OR queued before.
-                             if (!processedOrQueuedUrls.has(newAsset.url)) {
-                                 processedOrQueuedUrls.add(newAsset.url); // Mark as queued now
-                                 assetsToProcess.push(newAsset); // Add to the main queue for the *next* iteration
-                                 logger?.debug(` -> Queued new nested asset: ${newAsset.url}`);
-                             } else {
-                                  logger?.debug(` -> Skipping already processed/queued nested asset: ${newAsset.url}`);
-                             }
+                            // CHECK: Add to the main processing queue only if this resolved URL hasn't been processed OR queued before.
+                            if (!processedOrQueuedUrls.has(newAsset.url)) {
+                                processedOrQueuedUrls.add(newAsset.url); // Mark as queued now
+                                assetsToProcess.push(newAsset); // Add to the queue for the *next* iteration
+                                logger?.debug(` -> Queued new nested asset: ${newAsset.url}`);
+                            } else {
+                                // Skip if already handled
+                                logger?.debug(` -> Skipping already processed/queued nested asset: ${newAsset.url}`);
+                            }
                         }
                     }
                 } else {
-                     logger?.warn(`Could not determine base URL context for CSS file ${asset.url}. Cannot resolve nested relative paths within it.`);
+                    // Warn if the base URL for the CSS file couldn't be determined (shouldn't happen if asset.url was valid)
+                    logger?.warn(`Could not determine base URL context for CSS file ${asset.url}. Cannot resolve nested relative paths within it.`);
                 }
             } // End if(asset.type === 'css' && cssContentForParsing)
         } // End for loop over currentBatch
-    } // End while loop
+    } // End while loop (assetsToProcess.length > 0)
-    const finalIterationCount = iterationCount > MAX_ASSET_EXTRACTION_ITERATIONS ? 'MAX+' : iterationCount;
+    // Log completion summary
+    const finalIterationCount = iterationCount > MAX_ASSET_EXTRACTION_ITERATIONS ? `${MAX_ASSET_EXTRACTION_ITERATIONS}+ (limit hit)` : iterationCount;
     logger?.info(`✅ Asset extraction COMPLETE! Found ${finalAssetsMap.size} unique assets in ${finalIterationCount} iterations.`);
-    // Return the original HTML content and the final list of processed assets
+    // Return the original HTML content and the final list of processed assets from the map
     return {
         htmlContent: parsed.htmlContent,
         assets: Array.from(finalAssetsMap.values())