portapack 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1,6 +1,129 @@
1
- // src/core/parser.ts
1
+ // src/core/web-fetcher.ts
2
+ import * as puppeteer from "puppeteer";
3
+ import * as fs2 from "fs/promises";
4
+
5
+ // src/types.ts
6
+ var LogLevel = /* @__PURE__ */ ((LogLevel2) => {
7
+ LogLevel2[LogLevel2["NONE"] = 0] = "NONE";
8
+ LogLevel2[LogLevel2["ERROR"] = 1] = "ERROR";
9
+ LogLevel2[LogLevel2["WARN"] = 2] = "WARN";
10
+ LogLevel2[LogLevel2["INFO"] = 3] = "INFO";
11
+ LogLevel2[LogLevel2["DEBUG"] = 4] = "DEBUG";
12
+ return LogLevel2;
13
+ })(LogLevel || {});
14
+
15
+ // src/utils/logger.ts
16
+ var Logger = class _Logger {
17
+ /** The current minimum log level required for a message to be output. */
18
+ level;
19
+ /**
20
+ * Creates a new Logger instance.
21
+ * Defaults to LogLevel.INFO if no level is provided.
22
+ *
23
+ * @param {LogLevel} [level=LogLevel.INFO] - The initial log level for this logger instance.
24
+ * Must be one of the values from the LogLevel enum.
25
+ */
26
+ constructor(level = 3 /* INFO */) {
27
+ this.level = level !== void 0 && LogLevel[level] !== void 0 ? level : 3 /* INFO */;
28
+ }
29
+ /**
30
+ * Updates the logger's current level. Messages below this level will be suppressed.
31
+ *
32
+ * @param {LogLevel} level - The new log level to set. Must be a LogLevel enum member.
33
+ */
34
+ setLevel(level) {
35
+ this.level = level;
36
+ }
37
+ /**
38
+ * Logs a debug message if the current log level is DEBUG or higher.
39
+ *
40
+ * @param {string} message - The debug message string.
41
+ */
42
+ debug(message) {
43
+ if (this.level >= 4 /* DEBUG */) {
44
+ console.debug(`[DEBUG] ${message}`);
45
+ }
46
+ }
47
+ /**
48
+ * Logs an informational message if the current log level is INFO or higher.
49
+ *
50
+ * @param {string} message - The informational message string.
51
+ */
52
+ info(message) {
53
+ if (this.level >= 3 /* INFO */) {
54
+ console.info(`[INFO] ${message}`);
55
+ }
56
+ }
57
+ /**
58
+ * Logs a warning message if the current log level is WARN or higher.
59
+ *
60
+ * @param {string} message - The warning message string.
61
+ */
62
+ warn(message) {
63
+ if (this.level >= 2 /* WARN */) {
64
+ console.warn(`[WARN] ${message}`);
65
+ }
66
+ }
67
+ /**
68
+ * Logs an error message if the current log level is ERROR or higher.
69
+ *
70
+ * @param {string} message - The error message string.
71
+ */
72
+ error(message) {
73
+ if (this.level >= 1 /* ERROR */) {
74
+ console.error(`[ERROR] ${message}`);
75
+ }
76
+ }
77
+ /**
78
+ * Static factory method to create a Logger instance based on a simple boolean `verbose` flag.
79
+ *
80
+ * @static
81
+ * @param {{ verbose?: boolean }} [options={}] - An object potentially containing a `verbose` flag.
82
+ * @returns {Logger} A new Logger instance set to LogLevel.DEBUG if options.verbose is true,
83
+ * otherwise set to LogLevel.INFO.
84
+ */
85
+ static fromVerboseFlag(options = {}) {
86
+ return new _Logger(options.verbose ? 4 /* DEBUG */ : 3 /* INFO */);
87
+ }
88
+ /**
89
+ * Static factory method to create a Logger instance based on a LogLevel string name.
90
+ * Useful for creating a logger from config files or environments variables.
91
+ *
92
+ * @static
93
+ * @param {string | undefined} levelName - The name of the log level (e.g., 'debug', 'info', 'warn', 'error', 'silent'/'none'). Case-insensitive.
94
+ * @param {LogLevel} [defaultLevel=LogLevel.INFO] - The level to use if levelName is invalid or undefined.
95
+ * @returns {Logger} A new Logger instance set to the corresponding LogLevel.
96
+ */
97
+ static fromLevelName(levelName, defaultLevel = 3 /* INFO */) {
98
+ if (!levelName) {
99
+ return new _Logger(defaultLevel);
100
+ }
101
+ switch (levelName.toLowerCase()) {
102
+ // Return enum members
103
+ case "debug":
104
+ return new _Logger(4 /* DEBUG */);
105
+ case "info":
106
+ return new _Logger(3 /* INFO */);
107
+ case "warn":
108
+ return new _Logger(2 /* WARN */);
109
+ case "error":
110
+ return new _Logger(1 /* ERROR */);
111
+ case "silent":
112
+ case "none":
113
+ return new _Logger(0 /* NONE */);
114
+ default:
115
+ console.warn(`[Logger] Invalid log level name "${levelName}". Defaulting to ${LogLevel[defaultLevel]}.`);
116
+ return new _Logger(defaultLevel);
117
+ }
118
+ }
119
+ };
120
+
121
+ // src/core/extractor.ts
2
122
  import { readFile } from "fs/promises";
3
- import * as cheerio from "cheerio";
123
+ import * as fs from "fs";
124
+ import path2 from "path";
125
+ import { fileURLToPath, URL as URL2 } from "url";
126
+ import * as axiosNs from "axios";
4
127
 
5
128
  // src/utils/mime.ts
6
129
  import path from "path";
@@ -58,76 +181,7 @@ function guessMimeType(urlOrPath) {
58
181
  return MIME_MAP[ext] || DEFAULT_MIME_TYPE;
59
182
  }
60
183
 
61
- // src/core/parser.ts
62
- async function parseHTML(entryFilePath, logger) {
63
- logger?.debug(`Parsing HTML file: ${entryFilePath}`);
64
- let htmlContent;
65
- try {
66
- htmlContent = await readFile(entryFilePath, "utf-8");
67
- logger?.debug(`Successfully read HTML file (${Buffer.byteLength(htmlContent)} bytes).`);
68
- } catch (err) {
69
- logger?.error(`Failed to read HTML file "${entryFilePath}": ${err.message}`);
70
- throw new Error(`Could not read input HTML file: ${entryFilePath}`, { cause: err });
71
- }
72
- const $ = cheerio.load(htmlContent);
73
- const assets = [];
74
- const addedUrls = /* @__PURE__ */ new Set();
75
- const addAsset = (url, forcedType) => {
76
- if (!url || url.trim() === "" || url.startsWith("data:")) {
77
- return;
78
- }
79
- if (!addedUrls.has(url)) {
80
- addedUrls.add(url);
81
- const mimeInfo = guessMimeType(url);
82
- const type = forcedType ?? mimeInfo.assetType;
83
- assets.push({ type, url });
84
- logger?.debug(`Discovered asset: Type='${type}', URL='${url}'`);
85
- } else {
86
- logger?.debug(`Skipping duplicate asset URL: ${url}`);
87
- }
88
- };
89
- logger?.debug("Extracting assets from HTML tags...");
90
- $('link[rel="stylesheet"][href]').each((_, el) => {
91
- addAsset($(el).attr("href"), "css");
92
- });
93
- $("script[src]").each((_, el) => {
94
- addAsset($(el).attr("src"), "js");
95
- });
96
- $("img[src]").each((_, el) => addAsset($(el).attr("src"), "image"));
97
- $('input[type="image"][src]').each((_, el) => addAsset($(el).attr("src"), "image"));
98
- $("img[srcset], picture source[srcset]").each((_, el) => {
99
- const srcset = $(el).attr("srcset");
100
- srcset?.split(",").forEach((entry) => {
101
- const [url] = entry.trim().split(/\s+/);
102
- addAsset(url, "image");
103
- });
104
- });
105
- $("video[src]").each((_, el) => addAsset($(el).attr("src"), "video"));
106
- $("video[poster]").each((_, el) => addAsset($(el).attr("poster"), "image"));
107
- $("audio[src]").each((_, el) => addAsset($(el).attr("src"), "audio"));
108
- $("video > source[src]").each((_, el) => addAsset($(el).attr("src"), "video"));
109
- $("audio > source[src]").each((_, el) => addAsset($(el).attr("src"), "audio"));
110
- $("link[href]").filter((_, el) => {
111
- const rel = $(el).attr("rel")?.toLowerCase() ?? "";
112
- return ["icon", "shortcut icon", "apple-touch-icon", "manifest"].includes(rel);
113
- }).each((_, el) => {
114
- const rel = $(el).attr("rel")?.toLowerCase() ?? "";
115
- const isIcon = ["icon", "shortcut icon", "apple-touch-icon"].includes(rel);
116
- addAsset($(el).attr("href"), isIcon ? "image" : void 0);
117
- });
118
- $('link[rel="preload"][as="font"][href]').each((_, el) => {
119
- addAsset($(el).attr("href"), "font");
120
- });
121
- logger?.info(`HTML parsing complete. Discovered ${assets.length} unique asset links.`);
122
- return { htmlContent, assets };
123
- }
124
-
125
184
  // src/core/extractor.ts
126
- import { readFile as readFile2 } from "fs/promises";
127
- import * as fs from "fs";
128
- import path2 from "path";
129
- import { fileURLToPath, URL as URL2 } from "url";
130
- import * as axios from "axios";
131
185
  var TEXT_ASSET_TYPES = /* @__PURE__ */ new Set(["css", "js"]);
132
186
  var BINARY_ASSET_TYPES = /* @__PURE__ */ new Set(["image", "font", "video", "audio"]);
133
187
  var MAX_ASSET_EXTRACTION_ITERATIONS = 1e3;
@@ -140,6 +194,7 @@ function isUtf8DecodingLossy(originalBuffer, decodedString) {
140
194
  }
141
195
  }
142
196
  function determineBaseUrl(inputPathOrUrl, logger) {
197
+ console.log(`[DEBUG determineBaseUrl] Input: "${inputPathOrUrl}"`);
143
198
  logger?.debug(`Determining base URL for input: ${inputPathOrUrl}`);
144
199
  if (!inputPathOrUrl) {
145
200
  logger?.warn("Cannot determine base URL: inputPathOrUrl is empty or invalid.");
@@ -153,49 +208,46 @@ function determineBaseUrl(inputPathOrUrl, logger) {
153
208
  url.hash = "";
154
209
  const baseUrl = url.href;
155
210
  logger?.debug(`Determined remote base URL: ${baseUrl}`);
211
+ console.log(`[DEBUG determineBaseUrl] Determined Remote URL: "${baseUrl}"`);
156
212
  return baseUrl;
157
213
  } else if (inputPathOrUrl.includes("://") && !inputPathOrUrl.startsWith("file:")) {
158
214
  logger?.warn(`Input "${inputPathOrUrl}" looks like a URL but uses an unsupported protocol. Cannot determine base URL.`);
215
+ console.log(`[DEBUG determineBaseUrl] Unsupported protocol.`);
159
216
  return void 0;
160
217
  } else {
161
- let absolutePath;
218
+ let resourcePath;
219
+ let isInputLikelyDirectory = false;
162
220
  if (inputPathOrUrl.startsWith("file:")) {
163
- try {
164
- absolutePath = fileURLToPath(inputPathOrUrl);
165
- } catch (e) {
166
- logger?.error(`\u{1F480} Failed to convert file URL "${inputPathOrUrl}" to path: ${e.message}`);
167
- return void 0;
168
- }
221
+ resourcePath = fileURLToPath(inputPathOrUrl);
222
+ isInputLikelyDirectory = inputPathOrUrl.endsWith("/");
169
223
  } else {
170
- absolutePath = path2.resolve(inputPathOrUrl);
171
- }
172
- let isDirectory = false;
173
- try {
174
- isDirectory = fs.statSync(absolutePath).isDirectory();
175
- } catch (statError) {
176
- if (statError instanceof Error && statError.code === "ENOENT") {
177
- logger?.debug(`Path "${absolutePath}" not found. Assuming input represents a file, using its parent directory as base.`);
178
- } else {
179
- logger?.warn(`Could not stat local path "${absolutePath}" during base URL determination: ${statError instanceof Error ? statError.message : String(statError)}. Assuming input represents a file.`);
224
+ resourcePath = path2.resolve(inputPathOrUrl);
225
+ try {
226
+ isInputLikelyDirectory = fs.statSync(resourcePath).isDirectory();
227
+ } catch {
228
+ isInputLikelyDirectory = false;
180
229
  }
181
- isDirectory = false;
182
230
  }
183
- const dirPath = isDirectory ? absolutePath : path2.dirname(absolutePath);
184
- let normalizedPathForURL = dirPath.replace(/\\/g, "/");
231
+ console.log(`[DEBUG determineBaseUrl] resourcePath: "${resourcePath}", isInputLikelyDirectory: ${isInputLikelyDirectory}`);
232
+ const baseDirPath = isInputLikelyDirectory ? resourcePath : path2.dirname(resourcePath);
233
+ console.log(`[DEBUG determineBaseUrl] Calculated baseDirPath: "${baseDirPath}"`);
234
+ let normalizedPathForURL = baseDirPath.replace(/\\/g, "/");
185
235
  if (/^[A-Z]:\//i.test(normalizedPathForURL) && !normalizedPathForURL.startsWith("/")) {
186
236
  normalizedPathForURL = "/" + normalizedPathForURL;
187
237
  }
188
- const fileUrl = new URL2("file://" + normalizedPathForURL);
189
- let fileUrlString = fileUrl.href;
190
- if (!fileUrlString.endsWith("/")) {
191
- fileUrlString += "/";
238
+ if (!normalizedPathForURL.endsWith("/")) {
239
+ normalizedPathForURL += "/";
192
240
  }
193
- logger?.debug(`Determined local base URL: ${fileUrlString} (from: ${inputPathOrUrl}, resolved dir: ${dirPath}, isDir: ${isDirectory})`);
241
+ const fileUrl = new URL2("file://" + normalizedPathForURL);
242
+ const fileUrlString = fileUrl.href;
243
+ logger?.debug(`Determined base URL: ${fileUrlString} (from: ${inputPathOrUrl}, resolved base dir: ${baseDirPath})`);
244
+ console.log(`[DEBUG determineBaseUrl] Determined File URL: "${fileUrlString}"`);
194
245
  return fileUrlString;
195
246
  }
196
247
  } catch (error) {
197
248
  const message = error instanceof Error ? error.message : String(error);
198
- logger?.error(`\u{1F480} Failed to determine base URL for "${inputPathOrUrl}": ${message}${error instanceof Error ? ` - Stack: ${error.stack}` : ""}`);
249
+ console.error(`[DEBUG determineBaseUrl] Error determining base URL: ${message}`);
250
+ logger?.error(`\u{1F480} Failed to determine base URL for "${inputPathOrUrl}": ${message}${error instanceof Error && error.stack ? ` - Stack: ${error.stack}` : ""}`);
199
251
  return void 0;
200
252
  }
201
253
  }
@@ -216,6 +268,10 @@ function resolveAssetUrl(assetUrl, baseContextUrl, logger) {
216
268
  }
217
269
  try {
218
270
  const resolved = new URL2(resolvableUrl, baseContextUrl);
271
+ if (!["http:", "https:", "file:"].includes(resolved.protocol)) {
272
+ logger?.debug(`Skipping asset with unsupported protocol: ${resolved.href}`);
273
+ return null;
274
+ }
219
275
  return resolved;
220
276
  } catch (error) {
221
277
  const message = error instanceof Error ? error.message : String(error);
@@ -228,83 +284,78 @@ function resolveAssetUrl(assetUrl, baseContextUrl, logger) {
228
284
  }
229
285
  }
230
286
  function resolveCssRelativeUrl(relativeUrl, cssBaseContextUrl, logger) {
231
- if (!relativeUrl || relativeUrl.startsWith("data:")) {
287
+ console.log(`[DEBUG resolveCssRelativeUrl] Input: relative="${relativeUrl}", base="${cssBaseContextUrl}"`);
288
+ if (!relativeUrl || relativeUrl.startsWith("data:") || relativeUrl.startsWith("#")) {
232
289
  return null;
233
290
  }
234
291
  try {
235
- if (cssBaseContextUrl.startsWith("file:")) {
236
- const basePath = fileURLToPath(cssBaseContextUrl);
237
- let cssDir;
238
- try {
239
- const stat = fs.statSync(basePath);
240
- if (stat.isDirectory()) {
241
- cssDir = basePath;
242
- } else {
243
- cssDir = path2.dirname(basePath);
244
- }
245
- } catch {
246
- cssDir = path2.dirname(basePath);
247
- }
248
- let resolvedPath = path2.resolve(cssDir, relativeUrl);
249
- resolvedPath = resolvedPath.replace(/\\/g, "/");
250
- if (/^[A-Z]:/i.test(resolvedPath) && !resolvedPath.startsWith("/")) {
251
- resolvedPath = "/" + resolvedPath;
252
- }
253
- return `file://${resolvedPath}`;
254
- } else {
255
- return new URL2(relativeUrl, cssBaseContextUrl).href;
256
- }
292
+ const resolvedUrl = new URL2(relativeUrl, cssBaseContextUrl);
293
+ console.log(`[DEBUG resolveCssRelativeUrl] Resolved URL object href: "${resolvedUrl.href}"`);
294
+ return resolvedUrl.href;
257
295
  } catch (error) {
258
296
  logger?.warn(
259
- `Failed to resolve CSS URL: "${relativeUrl}" against "${cssBaseContextUrl}": ${String(error)}`
297
+ `Failed to resolve CSS URL: "${relativeUrl}" relative to "${cssBaseContextUrl}": ${String(error)}`
260
298
  );
299
+ console.error(`[DEBUG resolveCssRelativeUrl] Error resolving: ${String(error)}`);
261
300
  return null;
262
301
  }
263
302
  }
264
303
  async function fetchAsset(resolvedUrl, logger, timeout = 1e4) {
304
+ console.log(`[DEBUG fetchAsset] Attempting fetch for URL: ${resolvedUrl.href}`);
265
305
  logger?.debug(`Attempting to fetch asset: ${resolvedUrl.href}`);
266
306
  const protocol = resolvedUrl.protocol;
267
307
  try {
268
308
  if (protocol === "http:" || protocol === "https:") {
269
- const response = await axios.default.get(resolvedUrl.href, {
309
+ const response = await axiosNs.default.get(resolvedUrl.href, {
270
310
  responseType: "arraybuffer",
271
311
  timeout
272
312
  });
273
- logger?.debug(`Workspaceed remote asset ${resolvedUrl.href} (Status: ${response.status}, Type: ${response.headers["content-type"] || "N/A"}, Size: ${response.data.byteLength} bytes)`);
313
+ logger?.debug(`Workspaceed remote asset ${resolvedUrl.href} (Status: ${response.status}, Type: ${response.headers["content-type"] || "N/A"}, Size: ${response.data?.byteLength ?? 0} bytes)`);
314
+ console.log(`[DEBUG fetchAsset] HTTP fetch SUCCESS for: ${resolvedUrl.href}, Status: ${response.status}`);
274
315
  return Buffer.from(response.data);
275
316
  } else if (protocol === "file:") {
276
317
  let filePath;
277
318
  try {
278
319
  filePath = fileURLToPath(resolvedUrl);
279
320
  } catch (e) {
321
+ console.error(`[DEBUG fetchAsset] fileURLToPath FAILED for: ${resolvedUrl.href}`, e);
280
322
  logger?.error(`Could not convert file URL to path: ${resolvedUrl.href}. Error: ${e.message}`);
281
323
  return null;
282
324
  }
283
- const data = await readFile2(filePath);
325
+ const normalizedForLog = path2.normalize(filePath);
326
+ console.log(`[DEBUG fetchAsset] Attempting readFile with path: "${normalizedForLog}" (Original from URL: "${filePath}")`);
327
+ const data = await readFile(filePath);
328
+ console.log(`[DEBUG fetchAsset] readFile call SUCCEEDED for path: "${normalizedForLog}". Data length: ${data?.byteLength}`);
284
329
  logger?.debug(`Read local file ${filePath} (${data.byteLength} bytes)`);
285
330
  return data;
286
331
  } else {
332
+ console.log(`[DEBUG fetchAsset] Unsupported protocol: ${protocol}`);
287
333
  logger?.warn(`Unsupported protocol "${protocol}" in URL: ${resolvedUrl.href}`);
288
334
  return null;
289
335
  }
290
336
  } catch (error) {
291
- if ((protocol === "http:" || protocol === "https:") && axios.default.isAxiosError(error)) {
337
+ const failedId = protocol === "file:" ? path2.normalize(fileURLToPath(resolvedUrl)) : resolvedUrl.href;
338
+ console.error(`[DEBUG fetchAsset] fetch/read FAILED for: "${failedId}". Error:`, error);
339
+ if ((protocol === "http:" || protocol === "https:") && axiosNs.isAxiosError(error)) {
292
340
  const status = error.response?.status ?? "N/A";
293
341
  const statusText = error.response?.statusText ?? "Error";
294
342
  const code = error.code ?? "N/A";
295
343
  const message = error.message;
296
344
  const logMessage = `\u26A0\uFE0F Failed to fetch remote asset ${resolvedUrl.href}: Status ${status} - ${statusText}. Code: ${code}, Message: ${message}`;
297
345
  logger?.warn(logMessage);
298
- } else if (protocol === "file:") {
346
+ }
347
+ if (error instanceof Error && error.code === "ENOENT") {
299
348
  let failedPath = resolvedUrl.href;
300
349
  try {
301
350
  failedPath = fileURLToPath(resolvedUrl);
302
351
  } catch {
303
352
  }
353
+ failedPath = path2.normalize(failedPath);
304
354
  if (error instanceof Error && error.code === "ENOENT") {
305
355
  logger?.warn(`\u26A0\uFE0F File not found (ENOENT) for asset: ${failedPath}.`);
306
356
  } else if (error instanceof Error && error.code === "EACCES") {
307
357
  logger?.warn(`\u26A0\uFE0F Permission denied (EACCES) reading asset: ${failedPath}.`);
358
+ logger?.warn(`\u26A0\uFE0F Failed to read local asset ${failedPath}: ${error.message}`);
308
359
  } else if (error instanceof Error) {
309
360
  logger?.warn(`\u26A0\uFE0F Failed to read local asset ${failedPath}: ${error.message}`);
310
361
  } else {
@@ -332,14 +383,13 @@ function extractUrlsFromCSS(cssContent, cssBaseContextUrl, logger) {
332
383
  newlyDiscovered.push({
333
384
  type: assetType,
334
385
  url: resolvedUrl,
335
- // The resolved URL string
386
+ // The resolved absolute URL string
336
387
  content: void 0
388
+ // Content will be fetched later if needed
337
389
  });
338
390
  logger?.debug(`Discovered nested ${assetType} asset (${ruleType}) in CSS ${cssBaseContextUrl}: ${resolvedUrl}`);
339
391
  }
340
392
  };
341
- urlRegex.lastIndex = 0;
342
- importRegex.lastIndex = 0;
343
393
  let match;
344
394
  while ((match = urlRegex.exec(cssContent)) !== null) {
345
395
  processFoundUrl(match[2], "url()");
@@ -355,23 +405,28 @@ async function extractAssets(parsed, embedAssets = true, inputPathOrUrl, logger)
355
405
  const initialAssets = parsed.assets || [];
356
406
  const finalAssetsMap = /* @__PURE__ */ new Map();
357
407
  let assetsToProcess = [];
408
+ const processedOrQueuedUrls = /* @__PURE__ */ new Set();
358
409
  const htmlBaseContextUrl = determineBaseUrl(inputPathOrUrl || "", logger);
359
410
  if (!htmlBaseContextUrl && initialAssets.some((a) => !/^[a-z]+:/i.test(a.url) && !a.url.startsWith("data:") && !a.url.startsWith("#") && !a.url.startsWith("/"))) {
360
411
  logger?.warn("\u{1F6A8} No valid base path/URL determined for the HTML source! Resolution of relative asset paths from HTML may fail.");
361
412
  } else if (htmlBaseContextUrl) {
362
413
  logger?.debug(`Using HTML base context URL: ${htmlBaseContextUrl}`);
363
414
  }
364
- const processedOrQueuedUrls = /* @__PURE__ */ new Set();
365
415
  logger?.debug(`Queueing ${initialAssets.length} initial assets parsed from HTML...`);
366
416
  for (const asset of initialAssets) {
367
417
  const resolvedUrlObj = resolveAssetUrl(asset.url, htmlBaseContextUrl, logger);
368
- const urlToQueue = resolvedUrlObj ? resolvedUrlObj.href : asset.url;
418
+ if (!resolvedUrlObj) {
419
+ logger?.debug(` -> Skipping initial asset with unresolvable/ignorable URL: ${asset.url}`);
420
+ continue;
421
+ }
422
+ const urlToQueue = resolvedUrlObj.href;
369
423
  if (!urlToQueue.startsWith("data:") && !processedOrQueuedUrls.has(urlToQueue)) {
370
424
  processedOrQueuedUrls.add(urlToQueue);
371
425
  const { assetType: guessedType } = guessMimeType(urlToQueue);
372
426
  const initialType = asset.type ?? guessedType;
373
427
  assetsToProcess.push({
374
428
  url: urlToQueue,
429
+ // Use the resolved URL
375
430
  type: initialType,
376
431
  content: void 0
377
432
  });
@@ -379,7 +434,7 @@ async function extractAssets(parsed, embedAssets = true, inputPathOrUrl, logger)
379
434
  } else if (urlToQueue.startsWith("data:")) {
380
435
  logger?.debug(` -> Skipping data URI: ${urlToQueue.substring(0, 50)}...`);
381
436
  } else {
382
- logger?.debug(` -> Skipping already queued initial asset: ${urlToQueue}`);
437
+ logger?.debug(` -> Skipping already processed/queued initial asset: ${urlToQueue}`);
383
438
  }
384
439
  }
385
440
  let iterationCount = 0;
@@ -450,7 +505,7 @@ async function extractAssets(parsed, embedAssets = true, inputPathOrUrl, logger)
450
505
  cssContentForParsing = textContent;
451
506
  }
452
507
  } else {
453
- logger?.warn(`Could not decode ${asset.type} ${asset.url} as valid UTF-8 text.${embedAssets ? " Falling back to base64 data URI." : ""}`);
508
+ logger?.warn(`Could not decode ${asset.type} asset ${asset.url} as valid UTF-8 text.${embedAssets ? " Falling back to base64 data URI." : ""}`);
454
509
  cssContentForParsing = void 0;
455
510
  if (embedAssets) {
456
511
  finalContent = `data:${effectiveMime};base64,${assetContentBuffer.toString("base64")}`;
@@ -497,6 +552,7 @@ async function extractAssets(parsed, embedAssets = true, inputPathOrUrl, logger)
497
552
  const newlyDiscoveredAssets = extractUrlsFromCSS(
498
553
  cssContentForParsing,
499
554
  cssBaseContextUrl,
555
+ // Use CSS file's base URL
500
556
  logger
501
557
  );
502
558
  if (newlyDiscoveredAssets.length > 0) {
@@ -671,7 +727,7 @@ async function minifyAssets(parsed, options = {}, logger) {
671
727
  }
672
728
 
673
729
  // src/core/packer.ts
674
- import * as cheerio2 from "cheerio";
730
+ import * as cheerio from "cheerio";
675
731
  function escapeScriptContent(code) {
676
732
  return code.replace(/<\/(script)/gi, "<\\/$1");
677
733
  }
@@ -784,7 +840,7 @@ function packHTML(parsed, logger) {
784
840
  return '<!DOCTYPE html><html><head><base href="./"></head><body></body></html>';
785
841
  }
786
842
  logger?.debug("Loading HTML content into Cheerio for packing...");
787
- const $ = cheerio2.load(htmlContent);
843
+ const $ = cheerio.load(htmlContent);
788
844
  logger?.debug("Ensuring <base> tag exists...");
789
845
  ensureBaseTag($, logger);
790
846
  logger?.debug("Starting asset inlining...");
@@ -795,126 +851,6 @@ function packHTML(parsed, logger) {
795
851
  return finalHtml;
796
852
  }
797
853
 
798
- // src/core/web-fetcher.ts
799
- import * as puppeteer from "puppeteer";
800
- import * as fs2 from "fs/promises";
801
-
802
- // src/types.ts
803
- var LogLevel = /* @__PURE__ */ ((LogLevel2) => {
804
- LogLevel2[LogLevel2["NONE"] = 0] = "NONE";
805
- LogLevel2[LogLevel2["ERROR"] = 1] = "ERROR";
806
- LogLevel2[LogLevel2["WARN"] = 2] = "WARN";
807
- LogLevel2[LogLevel2["INFO"] = 3] = "INFO";
808
- LogLevel2[LogLevel2["DEBUG"] = 4] = "DEBUG";
809
- return LogLevel2;
810
- })(LogLevel || {});
811
-
812
- // src/utils/logger.ts
813
- var Logger = class _Logger {
814
- /** The current minimum log level required for a message to be output. */
815
- level;
816
- /**
817
- * Creates a new Logger instance.
818
- * Defaults to LogLevel.INFO if no level is provided.
819
- *
820
- * @param {LogLevel} [level=LogLevel.INFO] - The initial log level for this logger instance.
821
- * Must be one of the values from the LogLevel enum.
822
- */
823
- constructor(level = 3 /* INFO */) {
824
- this.level = level !== void 0 && LogLevel[level] !== void 0 ? level : 3 /* INFO */;
825
- }
826
- /**
827
- * Updates the logger's current level. Messages below this level will be suppressed.
828
- *
829
- * @param {LogLevel} level - The new log level to set. Must be a LogLevel enum member.
830
- */
831
- setLevel(level) {
832
- this.level = level;
833
- }
834
- /**
835
- * Logs a debug message if the current log level is DEBUG or higher.
836
- *
837
- * @param {string} message - The debug message string.
838
- */
839
- debug(message) {
840
- if (this.level >= 4 /* DEBUG */) {
841
- console.debug(`[DEBUG] ${message}`);
842
- }
843
- }
844
- /**
845
- * Logs an informational message if the current log level is INFO or higher.
846
- *
847
- * @param {string} message - The informational message string.
848
- */
849
- info(message) {
850
- if (this.level >= 3 /* INFO */) {
851
- console.info(`[INFO] ${message}`);
852
- }
853
- }
854
- /**
855
- * Logs a warning message if the current log level is WARN or higher.
856
- *
857
- * @param {string} message - The warning message string.
858
- */
859
- warn(message) {
860
- if (this.level >= 2 /* WARN */) {
861
- console.warn(`[WARN] ${message}`);
862
- }
863
- }
864
- /**
865
- * Logs an error message if the current log level is ERROR or higher.
866
- *
867
- * @param {string} message - The error message string.
868
- */
869
- error(message) {
870
- if (this.level >= 1 /* ERROR */) {
871
- console.error(`[ERROR] ${message}`);
872
- }
873
- }
874
- /**
875
- * Static factory method to create a Logger instance based on a simple boolean `verbose` flag.
876
- *
877
- * @static
878
- * @param {{ verbose?: boolean }} [options={}] - An object potentially containing a `verbose` flag.
879
- * @returns {Logger} A new Logger instance set to LogLevel.DEBUG if options.verbose is true,
880
- * otherwise set to LogLevel.INFO.
881
- */
882
- static fromVerboseFlag(options = {}) {
883
- return new _Logger(options.verbose ? 4 /* DEBUG */ : 3 /* INFO */);
884
- }
885
- /**
886
- * Static factory method to create a Logger instance based on a LogLevel string name.
887
- * Useful for creating a logger from config files or environments variables.
888
- *
889
- * @static
890
- * @param {string | undefined} levelName - The name of the log level (e.g., 'debug', 'info', 'warn', 'error', 'silent'/'none'). Case-insensitive.
891
- * @param {LogLevel} [defaultLevel=LogLevel.INFO] - The level to use if levelName is invalid or undefined.
892
- * @returns {Logger} A new Logger instance set to the corresponding LogLevel.
893
- */
894
- static fromLevelName(levelName, defaultLevel = 3 /* INFO */) {
895
- if (!levelName) {
896
- return new _Logger(defaultLevel);
897
- }
898
- switch (levelName.toLowerCase()) {
899
- // Return enum members
900
- case "debug":
901
- return new _Logger(4 /* DEBUG */);
902
- case "info":
903
- return new _Logger(3 /* INFO */);
904
- case "warn":
905
- return new _Logger(2 /* WARN */);
906
- case "error":
907
- return new _Logger(1 /* ERROR */);
908
- case "silent":
909
- case "none":
910
- return new _Logger(0 /* NONE */);
911
- default:
912
- console.warn(`[Logger] Invalid log level name "${levelName}". Defaulting to ${LogLevel[defaultLevel]}.`);
913
- return new _Logger(defaultLevel);
914
- }
915
- }
916
- };
917
-
918
854
  // src/utils/slugify.ts
919
855
  function slugify(url) {
920
856
  if (!url || typeof url !== "string") return "index";
@@ -946,9 +882,11 @@ function bundleMultiPageHTML(pages, logger) {
946
882
  throw new Error(errorMsg);
947
883
  }
948
884
  logger?.info(`Bundling ${pages.length} pages into a multi-page HTML document.`);
885
+ let pageIndex = 0;
949
886
  const validPages = pages.filter((page) => {
950
887
  const isValid = page && typeof page === "object" && typeof page.url === "string" && typeof page.html === "string";
951
- if (!isValid) logger?.warn("Skipping invalid page entry");
888
+ if (!isValid) logger?.warn(`Skipping invalid page entry at index ${pageIndex}`);
889
+ pageIndex++;
952
890
  return isValid;
953
891
  });
954
892
  if (validPages.length === 0) {
@@ -958,70 +896,137 @@ function bundleMultiPageHTML(pages, logger) {
958
896
  }
959
897
  const slugMap = /* @__PURE__ */ new Map();
960
898
  const usedSlugs = /* @__PURE__ */ new Set();
899
+ let firstValidSlug = void 0;
900
+ let pageCounterForFallback = 1;
961
901
  for (const page of validPages) {
962
- const baseSlug = sanitizeSlug(page.url);
902
+ let baseSlug = sanitizeSlug(page.url);
903
+ const isRootIndex = page.url === "/" || page.url === "index.html" || page.url.endsWith("/index.html");
904
+ if (baseSlug === "index" && !isRootIndex) {
905
+ logger?.debug(`URL "${page.url}" sanitized to "index", attempting to find alternative slug.`);
906
+ const pathParts = page.url.replace(/\/$/, "").split("/").filter((p) => p && p.toLowerCase() !== "index.html" && p.toLowerCase() !== "index");
907
+ if (pathParts.length > 0) {
908
+ const lastPartSlug = sanitizeSlug(pathParts[pathParts.length - 1]);
909
+ if (lastPartSlug && lastPartSlug !== "index") {
910
+ baseSlug = lastPartSlug;
911
+ logger?.debug(`Using last path part slug "${baseSlug}" instead.`);
912
+ } else {
913
+ baseSlug = "page";
914
+ logger?.debug(`Last path part invalid ("${lastPartSlug}"), using fallback slug "page".`);
915
+ }
916
+ } else {
917
+ baseSlug = "page";
918
+ logger?.debug(`No valid path parts found, using fallback slug "page".`);
919
+ }
920
+ } else if (!baseSlug) {
921
+ if (isRootIndex) {
922
+ baseSlug = "index";
923
+ logger?.debug(`URL "${page.url}" sanitized to empty string, using "index" as it is a root index.`);
924
+ } else {
925
+ baseSlug = "page";
926
+ logger?.debug(`URL "${page.url}" sanitized to empty string, using fallback slug "page".`);
927
+ }
928
+ }
929
+ if (!baseSlug) {
930
+ baseSlug = `page-${pageCounterForFallback++}`;
931
+ logger?.warn(`Could not determine a valid base slug for "${page.url}", using generated fallback "${baseSlug}".`);
932
+ }
963
933
  let slug = baseSlug;
964
- let counter = 1;
934
+ let collisionCounter = 1;
935
+ const originalBaseSlugForLog = baseSlug;
965
936
  while (usedSlugs.has(slug)) {
966
- slug = `${baseSlug}-${counter++}`;
967
- logger?.warn(`Slug collision detected for "${page.url}". Using "${slug}" instead.`);
937
+ const newSlug = `${originalBaseSlugForLog}-${collisionCounter++}`;
938
+ logger?.warn(`Slug collision detected for "${page.url}" (intended slug: '${originalBaseSlugForLog}'). Using "${newSlug}" instead.`);
939
+ slug = newSlug;
968
940
  }
969
941
  usedSlugs.add(slug);
970
942
  slugMap.set(page.url, slug);
943
+ if (firstValidSlug === void 0) {
944
+ firstValidSlug = slug;
945
+ }
971
946
  }
972
- const defaultPageSlug = slugMap.get(validPages[0].url);
947
+ const defaultPageSlug = usedSlugs.has("index") ? "index" : firstValidSlug || "page";
973
948
  let output = `<!DOCTYPE html>
974
949
  <html lang="en">
975
950
  <head>
976
951
  <meta charset="UTF-8">
977
952
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
978
953
  <title>Multi-Page Bundle</title>
954
+ <style>
955
+ body { font-family: sans-serif; margin: 0; }
956
+ #main-nav { background-color: #f0f0f0; padding: 10px; border-bottom: 1px solid #ccc; }
957
+ #main-nav a { margin-right: 15px; text-decoration: none; color: #007bff; }
958
+ #main-nav a.active { font-weight: bold; text-decoration: underline; }
959
+ #page-container { padding: 20px; }
960
+ template { display: none; }
961
+ </style>
979
962
  </head>
980
963
  <body>
981
964
  <nav id="main-nav">
982
965
  ${validPages.map((p) => {
983
966
  const slug = slugMap.get(p.url);
984
- const label = p.url.split("/").pop()?.split(".")[0] || "Page";
967
+ const label = slug;
985
968
  return `<a href="#${slug}" data-page="${slug}">${label}</a>`;
986
- }).join("\n")}
969
+ }).join("\n ")}
987
970
  </nav>
988
971
  <div id="page-container"></div>
989
972
  ${validPages.map((p) => {
990
973
  const slug = slugMap.get(p.url);
991
974
  return `<template id="page-${slug}">${p.html}</template>`;
992
- }).join("\n")}
975
+ }).join("\n ")}
993
976
  <script id="router-script">
994
977
  document.addEventListener('DOMContentLoaded', function() {
978
+ const pageContainer = document.getElementById('page-container');
979
+ const navLinks = document.querySelectorAll('#main-nav a');
980
+
995
981
  function navigateTo(slug) {
996
982
  const template = document.getElementById('page-' + slug);
997
- const container = document.getElementById('page-container');
998
- if (!template || !container) return;
999
- container.innerHTML = '';
1000
- container.appendChild(template.content.cloneNode(true));
1001
- document.querySelectorAll('#main-nav a').forEach(link => {
1002
- if (link.getAttribute('data-page') === slug) link.classList.add('active');
1003
- else link.classList.remove('active');
983
+ if (!template || !pageContainer) {
984
+ console.warn('Navigation failed: Template or container not found for slug:', slug);
985
+ // Maybe try navigating to default page? Or just clear container?
986
+ if (pageContainer) pageContainer.innerHTML = '<p>Page not found.</p>';
987
+ return;
988
+ }
989
+ // Clear previous content and append new content
990
+ pageContainer.innerHTML = ''; // Clear reliably
991
+ pageContainer.appendChild(template.content.cloneNode(true));
992
+
993
+ // Update active link styling
994
+ navLinks.forEach(link => {
995
+ link.classList.toggle('active', link.getAttribute('data-page') === slug);
1004
996
  });
997
+
998
+ // Update URL hash without triggering hashchange if already correct
1005
999
  if (window.location.hash.substring(1) !== slug) {
1006
- history.pushState(null, '', '#' + slug);
1000
+ // Use pushState for cleaner history
1001
+ history.pushState({ slug: slug }, '', '#' + slug);
1007
1002
  }
1008
1003
  }
1009
1004
 
1010
- window.addEventListener('hashchange', () => {
1011
- const slug = window.location.hash.substring(1);
1012
- if (document.getElementById('page-' + slug)) navigateTo(slug);
1005
+ // Handle back/forward navigation
1006
+ window.addEventListener('popstate', (event) => {
1007
+ let slug = window.location.hash.substring(1);
1008
+ // If popstate event has state use it, otherwise fallback to hash or default
1009
+ if (event && event.state && event.state.slug) { // Check event exists
1010
+ slug = event.state.slug;
1011
+ }
1012
+ // Ensure the target page exists before navigating, fallback to default slug
1013
+ const targetSlug = document.getElementById('page-' + slug) ? slug : '${defaultPageSlug}';
1014
+ navigateTo(targetSlug);
1013
1015
  });
1014
1016
 
1015
- document.querySelectorAll('#main-nav a').forEach(link => {
1017
+ // Handle direct link clicks
1018
+ navLinks.forEach(link => {
1016
1019
  link.addEventListener('click', function(e) {
1017
1020
  e.preventDefault();
1018
1021
  const slug = this.getAttribute('data-page');
1019
- navigateTo(slug);
1022
+ if (slug) navigateTo(slug);
1020
1023
  });
1021
1024
  });
1022
1025
 
1023
- const initial = window.location.hash.substring(1);
1024
- navigateTo(document.getElementById('page-' + initial) ? initial : '${defaultPageSlug}');
1026
+ // Initial page load
1027
+ const initialHash = window.location.hash.substring(1);
1028
+ const initialSlug = document.getElementById('page-' + initialHash) ? initialHash : '${defaultPageSlug}';
1029
+ navigateTo(initialSlug);
1025
1030
  });
1026
1031
  </script>
1027
1032
  </body>
@@ -1031,51 +1036,74 @@ function bundleMultiPageHTML(pages, logger) {
1031
1036
  }
1032
1037
 
1033
1038
  // src/core/web-fetcher.ts
1034
- async function fetchAndPackWebPage(url, logger, timeout = 3e4) {
1039
+ var PUPPETEER_LAUNCH_OPTIONS = {
1040
+ headless: true,
1041
+ args: [
1042
+ "--no-sandbox",
1043
+ // Often required in containerized environments
1044
+ "--disable-setuid-sandbox",
1045
+ "--disable-dev-shm-usage"
1046
+ // Recommended for Docker/CI
1047
+ ]
1048
+ };
1049
+ var DEFAULT_PAGE_TIMEOUT = 3e4;
1050
+ async function fetchAndPackWebPage(url, logger, timeout = DEFAULT_PAGE_TIMEOUT, userAgent) {
1035
1051
  let browser = null;
1036
1052
  const start = Date.now();
1037
- logger?.debug(`Initiating fetch for single page: ${url}`);
1053
+ logger?.info(`Initiating fetch for single page: ${url}`);
1038
1054
  try {
1039
- browser = await puppeteer.launch({ headless: true });
1040
- logger?.debug(`Browser launched for ${url}`);
1055
+ logger?.debug("Launching browser...");
1056
+ browser = await puppeteer.launch(PUPPETEER_LAUNCH_OPTIONS);
1057
+ logger?.debug(`Browser launched successfully (PID: ${browser.process()?.pid}).`);
1041
1058
  const page = await browser.newPage();
1042
- logger?.debug(`Page created for ${url}`);
1059
+ logger?.debug(`New page created for ${url}`);
1060
+ if (userAgent) {
1061
+ await page.setUserAgent(userAgent);
1062
+ logger?.debug(`User-Agent set to: "${userAgent}"`);
1063
+ }
1043
1064
  try {
1044
1065
  logger?.debug(`Navigating to ${url} with timeout ${timeout}ms`);
1045
1066
  await page.goto(url, { waitUntil: "networkidle2", timeout });
1046
1067
  logger?.debug(`Navigation successful for ${url}`);
1047
1068
  const html = await page.content();
1048
- logger?.debug(`Content retrieved for ${url}`);
1069
+ logger?.debug(`Content retrieved for ${url} (${Buffer.byteLength(html, "utf-8")} bytes)`);
1049
1070
  const metadata = {
1050
1071
  input: url,
1051
1072
  outputSize: Buffer.byteLength(html, "utf-8"),
1052
1073
  assetCount: 0,
1053
- // Basic fetch doesn't track assets
1074
+ // Basic fetch doesn't track assets processed by *this* tool
1054
1075
  buildTimeMs: Date.now() - start,
1055
1076
  errors: []
1056
1077
  // No errors if we reached this point
1057
1078
  };
1058
1079
  await page.close();
1059
1080
  logger?.debug(`Page closed for ${url}`);
1081
+ await browser.close();
1060
1082
  logger?.debug(`Browser closed for ${url}`);
1061
1083
  browser = null;
1062
1084
  return { html, metadata };
1063
1085
  } catch (pageError) {
1064
1086
  logger?.error(`Error during page processing for ${url}: ${pageError.message}`);
1065
- try {
1066
- await page.close();
1067
- } catch (closeErr) {
1068
- throw closeErr;
1087
+ if (page && !page.isClosed()) {
1088
+ try {
1089
+ await page.close();
1090
+ logger?.debug(`Page closed after error for ${url}`);
1091
+ } catch (closeErr) {
1092
+ logger?.error(`Failed to close page after error for ${url}: ${closeErr.message}`);
1093
+ }
1069
1094
  }
1070
1095
  throw pageError;
1071
1096
  }
1072
1097
  } catch (launchError) {
1073
- logger?.error(`Critical error during browser launch or page creation for ${url}: ${launchError.message}`);
1098
+ logger?.error(`Critical error during browser launch or page setup for ${url}: ${launchError.message}`);
1074
1099
  if (browser) {
1075
1100
  try {
1076
1101
  await browser.close();
1102
+ logger?.debug("Browser closed after launch/setup error.");
1077
1103
  } catch (closeErr) {
1104
+ logger?.warn(`Failed to close browser after launch/setup error: ${closeErr.message}`);
1078
1105
  }
1106
+ browser = null;
1079
1107
  }
1080
1108
  throw launchError;
1081
1109
  } finally {
@@ -1088,99 +1116,123 @@ async function fetchAndPackWebPage(url, logger, timeout = 3e4) {
1088
1116
  }
1089
1117
  }
1090
1118
  }
1091
- async function crawlWebsite(startUrl, maxDepth, logger) {
1119
+ async function crawlWebsite(startUrl, options) {
1120
+ const {
1121
+ maxDepth = 1,
1122
+ timeout = DEFAULT_PAGE_TIMEOUT,
1123
+ // include = ['**'], // TODO: Implement glob filtering
1124
+ // exclude = [],
1125
+ userAgent,
1126
+ logger
1127
+ } = options;
1092
1128
  logger?.info(`Starting crawl for ${startUrl} with maxDepth ${maxDepth}`);
1093
1129
  if (maxDepth <= 0) {
1094
- logger?.info("maxDepth is 0 or negative, no pages will be crawled.");
1130
+ logger?.warn("maxDepth is 0 or negative, no pages will be crawled.");
1095
1131
  return [];
1096
1132
  }
1097
- const browser = await puppeteer.launch({ headless: true });
1133
+ let browser = null;
1098
1134
  const visited = /* @__PURE__ */ new Set();
1099
1135
  const results = [];
1100
1136
  const queue = [];
1101
1137
  let startOrigin;
1102
1138
  try {
1103
- startOrigin = new URL(startUrl).origin;
1104
- } catch (e) {
1105
- logger?.error(`Invalid start URL: ${startUrl}. ${e.message}`);
1106
- await browser.close();
1107
- return [];
1108
- }
1109
- let normalizedStartUrl;
1110
- try {
1111
- const parsedStartUrl = new URL(startUrl);
1112
- parsedStartUrl.hash = "";
1113
- normalizedStartUrl = parsedStartUrl.href;
1114
- } catch (e) {
1115
- logger?.error(`Invalid start URL: ${startUrl}. ${e.message}`);
1116
- await browser.close();
1117
- return [];
1118
- }
1119
- visited.add(normalizedStartUrl);
1120
- queue.push({ url: normalizedStartUrl, depth: 1 });
1121
- logger?.debug(`Queued initial URL: ${normalizedStartUrl} (depth 1)`);
1122
- while (queue.length > 0) {
1123
- const { url, depth } = queue.shift();
1124
- logger?.info(`Processing: ${url} (depth ${depth})`);
1125
- let page = null;
1126
1139
  try {
1127
- page = await browser.newPage();
1128
- await page.setViewport({ width: 1280, height: 800 });
1129
- await page.goto(url, { waitUntil: "networkidle2", timeout: 3e4 });
1130
- const html = await page.content();
1131
- results.push({ url, html });
1132
- logger?.debug(`Successfully fetched content for ${url}`);
1133
- if (depth < maxDepth) {
1134
- logger?.debug(`Discovering links on ${url} (current depth ${depth}, maxDepth ${maxDepth})`);
1135
- const hrefs = await page.evaluate(
1136
- () => Array.from(document.querySelectorAll("a[href]"), (a) => a.getAttribute("href"))
1137
- );
1138
- logger?.debug(`Found ${hrefs.length} potential hrefs on ${url}`);
1139
- let linksAdded = 0;
1140
- for (const href of hrefs) {
1141
- if (!href) continue;
1142
- let absoluteUrl;
1143
- try {
1144
- const resolved = new URL(href, url);
1145
- resolved.hash = "";
1146
- absoluteUrl = resolved.href;
1147
- } catch (e) {
1148
- logger?.debug(`Ignoring invalid URL syntax: "${href}" on page ${url}`);
1149
- continue;
1150
- }
1151
- if (absoluteUrl.startsWith(startOrigin) && !visited.has(absoluteUrl)) {
1152
- visited.add(absoluteUrl);
1153
- queue.push({ url: absoluteUrl, depth: depth + 1 });
1154
- linksAdded++;
1155
- } else {
1140
+ startOrigin = new URL(startUrl).origin;
1141
+ } catch (e) {
1142
+ logger?.error(`Invalid start URL: ${startUrl}. ${e.message}`);
1143
+ throw new Error(`Invalid start URL: ${startUrl}`);
1144
+ }
1145
+ let normalizedStartUrl;
1146
+ try {
1147
+ const parsedStartUrl = new URL(startUrl);
1148
+ parsedStartUrl.hash = "";
1149
+ normalizedStartUrl = parsedStartUrl.href;
1150
+ } catch (e) {
1151
+ logger?.error(`Invalid start URL: ${startUrl}. ${e.message}`);
1152
+ throw new Error(`Invalid start URL: ${startUrl}`);
1153
+ }
1154
+ logger?.debug("Launching browser for crawl...");
1155
+ browser = await puppeteer.launch(PUPPETEER_LAUNCH_OPTIONS);
1156
+ logger?.debug(`Browser launched for crawl (PID: ${browser.process()?.pid}).`);
1157
+ visited.add(normalizedStartUrl);
1158
+ queue.push({ url: normalizedStartUrl, depth: 1 });
1159
+ logger?.debug(`Queued initial URL: ${normalizedStartUrl} (depth 1)`);
1160
+ while (queue.length > 0) {
1161
+ const { url, depth } = queue.shift();
1162
+ logger?.info(`Processing: ${url} (depth ${depth})`);
1163
+ let page = null;
1164
+ try {
1165
+ page = await browser.newPage();
1166
+ if (userAgent) {
1167
+ await page.setUserAgent(userAgent);
1168
+ }
1169
+ await page.goto(url, { waitUntil: "networkidle2", timeout });
1170
+ const html = await page.content();
1171
+ results.push({ url, html });
1172
+ logger?.debug(`Successfully fetched content for ${url}`);
1173
+ if (depth < maxDepth) {
1174
+ logger?.debug(`Discovering links on ${url} (depth ${depth}/${maxDepth})`);
1175
+ const hrefs = await page.evaluate(
1176
+ () => Array.from(document.querySelectorAll("a[href]"), (a) => a.getAttribute("href"))
1177
+ );
1178
+ logger?.debug(`Found ${hrefs.length} potential hrefs on ${url}`);
1179
+ let linksAdded = 0;
1180
+ for (const href of hrefs) {
1181
+ if (!href) continue;
1182
+ let absoluteUrl;
1183
+ try {
1184
+ const resolved = new URL(href, url);
1185
+ resolved.hash = "";
1186
+ absoluteUrl = resolved.href;
1187
+ } catch (e) {
1188
+ logger?.debug(`Ignoring invalid URL syntax: "${href}" on page ${url}`);
1189
+ continue;
1190
+ }
1191
+ if (absoluteUrl.startsWith(startOrigin) && !visited.has(absoluteUrl)) {
1192
+ visited.add(absoluteUrl);
1193
+ queue.push({ url: absoluteUrl, depth: depth + 1 });
1194
+ linksAdded++;
1195
+ }
1156
1196
  }
1197
+ logger?.debug(`Added ${linksAdded} new unique internal links to queue from ${url}`);
1198
+ } else {
1199
+ logger?.debug(`Max depth (${maxDepth}) reached, not discovering links on ${url}`);
1157
1200
  }
1158
- logger?.debug(`Added ${linksAdded} new unique internal links to queue from ${url}`);
1159
- } else {
1160
- logger?.debug(`Max depth (${maxDepth}) reached, not discovering links on ${url}`);
1161
- }
1162
- } catch (err) {
1163
- logger?.warn(`\u274C Failed to process ${url}: ${err.message}`);
1164
- } finally {
1165
- if (page) {
1166
- try {
1167
- await page.close();
1168
- } catch (pageCloseError) {
1169
- logger?.error(`Failed to close page for ${url}: ${pageCloseError.message}`);
1201
+ } catch (err) {
1202
+ logger?.warn(`\u274C Failed to process ${url}: ${err.message}`);
1203
+ } finally {
1204
+ if (page && !page.isClosed()) {
1205
+ try {
1206
+ await page.close();
1207
+ } catch (pageCloseError) {
1208
+ logger?.error(`Failed to close page for ${url}: ${pageCloseError.message}`);
1209
+ }
1170
1210
  }
1171
1211
  }
1172
1212
  }
1213
+ } catch (error) {
1214
+ logger?.error(`Critical crawl error: ${error instanceof Error ? error.message : error}`);
1215
+ throw error;
1216
+ } finally {
1217
+ if (browser) {
1218
+ logger?.info(`Crawl finished or errored. Closing browser.`);
1219
+ await browser.close();
1220
+ logger?.debug(`Browser closed after crawl.`);
1221
+ }
1173
1222
  }
1174
- logger?.info(`Crawl finished. Closing browser.`);
1175
- await browser.close();
1176
- logger?.info(`Found ${results.length} pages.`);
1223
+ logger?.info(`Crawl found ${results.length} pages.`);
1177
1224
  return results;
1178
1225
  }
1179
- async function recursivelyBundleSite(startUrl, outputFile, maxDepth = 1) {
1180
- const logger = new Logger();
1226
+ async function recursivelyBundleSite(startUrl, outputFile, maxDepth = 1, loggerInstance) {
1227
+ const logger = loggerInstance || new Logger();
1181
1228
  logger.info(`Starting recursive site bundle for ${startUrl} to ${outputFile} (maxDepth: ${maxDepth})`);
1182
1229
  try {
1183
- const pages = await crawlWebsite(startUrl, maxDepth, logger);
1230
+ const crawlOptions = {
1231
+ maxDepth,
1232
+ logger
1233
+ /* Add other options like timeout, userAgent if needed */
1234
+ };
1235
+ const pages = await crawlWebsite(startUrl, crawlOptions);
1184
1236
  if (pages.length === 0) {
1185
1237
  logger.warn("Crawl completed but found 0 pages. Output file may be empty or reflect an empty bundle.");
1186
1238
  } else {
@@ -1204,6 +1256,72 @@ async function recursivelyBundleSite(startUrl, outputFile, maxDepth = 1) {
1204
1256
  }
1205
1257
  }
1206
1258
 
1259
+ // src/core/parser.ts
1260
+ import { readFile as readFile2 } from "fs/promises";
1261
+ import * as cheerio2 from "cheerio";
1262
+ async function parseHTML(entryFilePath, logger) {
1263
+ logger?.debug(`Parsing HTML file: ${entryFilePath}`);
1264
+ let htmlContent;
1265
+ try {
1266
+ htmlContent = await readFile2(entryFilePath, "utf-8");
1267
+ logger?.debug(`Successfully read HTML file (${Buffer.byteLength(htmlContent)} bytes).`);
1268
+ } catch (err) {
1269
+ logger?.error(`Failed to read HTML file "${entryFilePath}": ${err.message}`);
1270
+ throw new Error(`Could not read input HTML file: ${entryFilePath}`, { cause: err });
1271
+ }
1272
+ const $ = cheerio2.load(htmlContent);
1273
+ const assets = [];
1274
+ const addedUrls = /* @__PURE__ */ new Set();
1275
+ const addAsset = (url, forcedType) => {
1276
+ if (!url || url.trim() === "" || url.startsWith("data:")) {
1277
+ return;
1278
+ }
1279
+ if (!addedUrls.has(url)) {
1280
+ addedUrls.add(url);
1281
+ const mimeInfo = guessMimeType(url);
1282
+ const type = forcedType ?? mimeInfo.assetType;
1283
+ assets.push({ type, url });
1284
+ logger?.debug(`Discovered asset: Type='${type}', URL='${url}'`);
1285
+ } else {
1286
+ logger?.debug(`Skipping duplicate asset URL: ${url}`);
1287
+ }
1288
+ };
1289
+ logger?.debug("Extracting assets from HTML tags...");
1290
+ $('link[rel="stylesheet"][href]').each((_, el) => {
1291
+ addAsset($(el).attr("href"), "css");
1292
+ });
1293
+ $("script[src]").each((_, el) => {
1294
+ addAsset($(el).attr("src"), "js");
1295
+ });
1296
+ $("img[src]").each((_, el) => addAsset($(el).attr("src"), "image"));
1297
+ $('input[type="image"][src]').each((_, el) => addAsset($(el).attr("src"), "image"));
1298
+ $("img[srcset], picture source[srcset]").each((_, el) => {
1299
+ const srcset = $(el).attr("srcset");
1300
+ srcset?.split(",").forEach((entry) => {
1301
+ const [url] = entry.trim().split(/\s+/);
1302
+ addAsset(url, "image");
1303
+ });
1304
+ });
1305
+ $("video[src]").each((_, el) => addAsset($(el).attr("src"), "video"));
1306
+ $("video[poster]").each((_, el) => addAsset($(el).attr("poster"), "image"));
1307
+ $("audio[src]").each((_, el) => addAsset($(el).attr("src"), "audio"));
1308
+ $("video > source[src]").each((_, el) => addAsset($(el).attr("src"), "video"));
1309
+ $("audio > source[src]").each((_, el) => addAsset($(el).attr("src"), "audio"));
1310
+ $("link[href]").filter((_, el) => {
1311
+ const rel = $(el).attr("rel")?.toLowerCase() ?? "";
1312
+ return ["icon", "shortcut icon", "apple-touch-icon", "manifest"].includes(rel);
1313
+ }).each((_, el) => {
1314
+ const rel = $(el).attr("rel")?.toLowerCase() ?? "";
1315
+ const isIcon = ["icon", "shortcut icon", "apple-touch-icon"].includes(rel);
1316
+ addAsset($(el).attr("href"), isIcon ? "image" : void 0);
1317
+ });
1318
+ $('link[rel="preload"][as="font"][href]').each((_, el) => {
1319
+ addAsset($(el).attr("href"), "font");
1320
+ });
1321
+ logger?.info(`HTML parsing complete. Discovered ${assets.length} unique asset links.`);
1322
+ return { htmlContent, assets };
1323
+ }
1324
+
1207
1325
  // src/utils/meta.ts
1208
1326
  var BuildTimer = class {
1209
1327
  startTime;
@@ -1284,122 +1402,84 @@ var BuildTimer = class {
1284
1402
  };
1285
1403
 
1286
1404
  // src/index.ts
1405
+ async function pack(input, options = {}) {
1406
+ const logger = options.loggerInstance || new Logger(options.logLevel);
1407
+ const isHttp = /^https?:\/\//i.test(input);
1408
+ if (!isHttp && /:\/\//.test(input) && !input.startsWith("file://")) {
1409
+ const errorMsg = `Unsupported protocol or input type: ${input}`;
1410
+ logger.error(errorMsg);
1411
+ throw new Error(errorMsg);
1412
+ }
1413
+ const isRemote = /^https?:\/\//i.test(input);
1414
+ const recursive = options.recursive === true || typeof options.recursive === "number";
1415
+ if (isRemote && recursive) {
1416
+ const depth = typeof options.recursive === "number" ? options.recursive : 1;
1417
+ logger.info(`Starting recursive fetch for ${input} up to depth ${depth}`);
1418
+ return generateRecursivePortableHTML(input, depth, options, logger);
1419
+ }
1420
+ logger.info(`Starting single page processing for: ${input}`);
1421
+ return generatePortableHTML(input, options, logger);
1422
+ }
1287
1423
  async function generatePortableHTML(input, options = {}, loggerInstance) {
1288
1424
  const logger = loggerInstance || new Logger(options.logLevel);
1289
- logger.info(`Generating portable HTML for: ${input}`);
1290
1425
  const timer = new BuildTimer(input);
1291
- const isRemote = /^https?:\/\//i.test(input);
1292
- if (isRemote) {
1293
- logger.info(`Input is a remote URL. Fetching page content directly...`);
1426
+ if (/^https?:\/\//i.test(input)) {
1427
+ logger.info(`Workspaceing remote page: ${input}`);
1294
1428
  try {
1295
- const result = await fetchAndPackWebPage2(input, options, logger);
1296
- logger.info(`Remote fetch complete. Input: ${input}, Size: ${result.metadata.outputSize} bytes, Time: ${result.metadata.buildTimeMs}ms`);
1297
- return result;
1429
+ const result = await fetchAndPackWebPage(input, logger);
1430
+ const metadata = timer.finish(result.html, result.metadata);
1431
+ logger.info(`Finished fetching and packing remote page: ${input}`);
1432
+ return { html: result.html, metadata };
1298
1433
  } catch (error) {
1299
- logger.error(`Failed to fetch remote URL ${input}: ${error.message}`);
1434
+ logger.error(`Error fetching remote page ${input}: ${error.message}`);
1300
1435
  throw error;
1301
1436
  }
1302
1437
  }
1303
- logger.info(`Input is a local file path. Starting local processing pipeline...`);
1304
- const basePath = options.baseUrl || input;
1305
- logger.debug(`Using base path for asset resolution: ${basePath}`);
1438
+ logger.info(`Processing local file: ${input}`);
1306
1439
  try {
1440
+ const baseUrl = options.baseUrl || input;
1307
1441
  const parsed = await parseHTML(input, logger);
1308
- const enriched = await extractAssets(parsed, options.embedAssets ?? true, basePath, logger);
1442
+ const enriched = await extractAssets(parsed, options.embedAssets ?? true, baseUrl, logger);
1309
1443
  const minified = await minifyAssets(enriched, options, logger);
1310
1444
  const finalHtml = packHTML(minified, logger);
1311
1445
  const metadata = timer.finish(finalHtml, {
1312
1446
  assetCount: minified.assets.length
1313
- // FIX: Removed incorrect attempt to get errors from logger
1314
- // Errors collected by the timer itself (via timer.addError) will be included automatically.
1315
1447
  });
1316
- logger.info(`Local processing complete. Input: ${input}, Size: ${metadata.outputSize} bytes, Assets: ${metadata.assetCount}, Time: ${metadata.buildTimeMs}ms`);
1317
- if (metadata.errors && metadata.errors.length > 0) {
1318
- logger.warn(`Completed with ${metadata.errors.length} warning(s) logged in metadata.`);
1319
- }
1448
+ logger.info(`Finished processing local file: ${input}`);
1320
1449
  return { html: finalHtml, metadata };
1321
1450
  } catch (error) {
1322
- logger.error(`Error during local processing for ${input}: ${error.message}`);
1451
+ logger.error(`Error processing local file ${input}: ${error.message}`);
1323
1452
  throw error;
1324
1453
  }
1325
1454
  }
1326
1455
  async function generateRecursivePortableHTML(url, depth = 1, options = {}, loggerInstance) {
1327
1456
  const logger = loggerInstance || new Logger(options.logLevel);
1328
- logger.info(`Generating recursive portable HTML for: ${url}, Max Depth: ${depth}`);
1329
1457
  const timer = new BuildTimer(url);
1330
1458
  if (!/^https?:\/\//i.test(url)) {
1331
- const errMsg = `Invalid input URL for recursive bundling: ${url}. Must start with http(s)://`;
1332
- logger.error(errMsg);
1333
- throw new Error(errMsg);
1459
+ const errorMsg = `Invalid URL for recursive bundling. Must start with http:// or https://. Received: ${url}`;
1460
+ logger.error(errorMsg);
1461
+ throw new Error(errorMsg);
1334
1462
  }
1335
- const internalOutputPathPlaceholder = `${new URL(url).hostname}_recursive.html`;
1463
+ logger.info(`Starting recursive bundle for ${url} up to depth ${depth}`);
1336
1464
  try {
1337
- const { html, pages } = await recursivelyBundleSite(url, internalOutputPathPlaceholder, depth);
1338
- logger.info(`Recursive crawl complete. Discovered and bundled ${pages} pages.`);
1465
+ const { html, pages } = await recursivelyBundleSite(url, "output.html", depth, logger);
1339
1466
  timer.setPageCount(pages);
1340
1467
  const metadata = timer.finish(html, {
1341
1468
  assetCount: 0,
1342
- // NOTE: Asset count across multiple pages is not currently aggregated.
1343
1469
  pagesBundled: pages
1344
- // TODO: Potentially collect errors from the core function if it returns them
1345
1470
  });
1346
- logger.info(`Recursive bundling complete. Input: ${url}, Size: ${metadata.outputSize} bytes, Pages: ${metadata.pagesBundled}, Time: ${metadata.buildTimeMs}ms`);
1347
- if (metadata.errors && metadata.errors.length > 0) {
1348
- logger.warn(`Completed with ${metadata.errors.length} warning(s) logged in metadata.`);
1349
- }
1471
+ logger.info(`Finished recursive bundle for ${url}. Bundled ${pages} pages.`);
1350
1472
  return { html, metadata };
1351
1473
  } catch (error) {
1352
- logger.error(`Error during recursive generation for ${url}: ${error.message}`);
1353
- if (error.cause instanceof Error) {
1354
- logger.error(`Cause: ${error.cause.message}`);
1355
- }
1356
- throw error;
1357
- }
1358
- }
1359
- async function fetchAndPackWebPage2(url, options = {}, loggerInstance) {
1360
- const logger = loggerInstance || new Logger(options.logLevel);
1361
- logger.info(`Workspaceing single remote page: ${url}`);
1362
- const timer = new BuildTimer(url);
1363
- if (!/^https?:\/\//i.test(url)) {
1364
- const errMsg = `Invalid input URL for fetchAndPackWebPage: ${url}. Must start with http(s)://`;
1365
- logger.error(errMsg);
1366
- throw new Error(errMsg);
1367
- }
1368
- try {
1369
- const result = await fetchAndPackWebPage(url, logger);
1370
- const metadata = timer.finish(result.html, {
1371
- // Use assetCount and errors from core metadata if available
1372
- assetCount: result.metadata?.assetCount ?? 0,
1373
- errors: result.metadata?.errors ?? []
1374
- // Ensure errors array exists
1375
- });
1376
- logger.info(`Single page fetch complete. Input: ${url}, Size: ${metadata.outputSize} bytes, Assets: ${metadata.assetCount}, Time: ${metadata.buildTimeMs}ms`);
1377
- if (metadata.errors && metadata.errors.length > 0) {
1378
- logger.warn(`Completed with ${metadata.errors.length} warning(s) logged in metadata.`);
1379
- }
1380
- return { html: result.html, metadata };
1381
- } catch (error) {
1382
- logger.error(`Error during single page fetch for ${url}: ${error.message}`);
1383
- throw error;
1384
- }
1385
- }
1386
- function bundleMultiPageHTML2(pages, options = {}, loggerInstance) {
1387
- const logger = loggerInstance || new Logger(options.logLevel);
1388
- logger.info(`Bundling ${pages.length} provided pages into multi-page HTML...`);
1389
- try {
1390
- const bundledHtml = bundleMultiPageHTML(pages, logger);
1391
- logger.info(`Multi-page bundling complete.`);
1392
- return bundledHtml;
1393
- } catch (error) {
1394
- logger.error(`Error during multi-page bundling: ${error.message}`);
1474
+ logger.error(`Error during recursive bundle for ${url}: ${error.message}`);
1395
1475
  throw error;
1396
1476
  }
1397
1477
  }
1398
1478
  export {
1399
- LogLevel,
1400
- bundleMultiPageHTML2 as bundleMultiPageHTML,
1401
- fetchAndPackWebPage2 as fetchAndPackWebPage,
1479
+ Logger,
1480
+ bundleMultiPageHTML,
1402
1481
  generatePortableHTML,
1403
- generateRecursivePortableHTML
1482
+ generateRecursivePortableHTML,
1483
+ pack
1404
1484
  };
1405
1485
  //# sourceMappingURL=index.js.map