portapack 0.3.1 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. package/.eslintrc.json +67 -8
  2. package/.releaserc.js +25 -27
  3. package/CHANGELOG.md +14 -22
  4. package/LICENSE.md +21 -0
  5. package/README.md +22 -53
  6. package/commitlint.config.js +30 -34
  7. package/dist/cli/cli-entry.cjs +183 -98
  8. package/dist/cli/cli-entry.cjs.map +1 -1
  9. package/dist/index.d.ts +0 -3
  10. package/dist/index.js +178 -97
  11. package/dist/index.js.map +1 -1
  12. package/docs/.vitepress/config.ts +38 -33
  13. package/docs/.vitepress/sidebar-generator.ts +89 -38
  14. package/docs/architecture.md +186 -0
  15. package/docs/cli.md +23 -23
  16. package/docs/code-of-conduct.md +7 -1
  17. package/docs/configuration.md +12 -11
  18. package/docs/contributing.md +6 -2
  19. package/docs/deployment.md +10 -5
  20. package/docs/development.md +8 -5
  21. package/docs/getting-started.md +13 -13
  22. package/docs/index.md +1 -1
  23. package/docs/public/android-chrome-192x192.png +0 -0
  24. package/docs/public/android-chrome-512x512.png +0 -0
  25. package/docs/public/apple-touch-icon.png +0 -0
  26. package/docs/public/favicon-16x16.png +0 -0
  27. package/docs/public/favicon-32x32.png +0 -0
  28. package/docs/public/favicon.ico +0 -0
  29. package/docs/roadmap.md +233 -0
  30. package/docs/site.webmanifest +1 -0
  31. package/docs/troubleshooting.md +12 -1
  32. package/examples/main.ts +5 -30
  33. package/examples/sample-project/script.js +1 -1
  34. package/jest.config.ts +8 -13
  35. package/nodemon.json +5 -10
  36. package/package.json +2 -5
  37. package/src/cli/cli-entry.ts +2 -2
  38. package/src/cli/cli.ts +21 -16
  39. package/src/cli/options.ts +127 -113
  40. package/src/core/bundler.ts +253 -222
  41. package/src/core/extractor.ts +632 -565
  42. package/src/core/minifier.ts +173 -162
  43. package/src/core/packer.ts +141 -137
  44. package/src/core/parser.ts +74 -73
  45. package/src/core/web-fetcher.ts +270 -258
  46. package/src/index.ts +18 -17
  47. package/src/types.ts +9 -11
  48. package/src/utils/font.ts +12 -6
  49. package/src/utils/logger.ts +110 -105
  50. package/src/utils/meta.ts +75 -76
  51. package/src/utils/mime.ts +50 -50
  52. package/src/utils/slugify.ts +33 -34
  53. package/tests/unit/cli/cli-entry.test.ts +72 -70
  54. package/tests/unit/cli/cli.test.ts +314 -278
  55. package/tests/unit/cli/options.test.ts +294 -301
  56. package/tests/unit/core/bundler.test.ts +426 -329
  57. package/tests/unit/core/extractor.test.ts +793 -549
  58. package/tests/unit/core/minifier.test.ts +374 -274
  59. package/tests/unit/core/packer.test.ts +298 -264
  60. package/tests/unit/core/parser.test.ts +538 -150
  61. package/tests/unit/core/web-fetcher.test.ts +389 -359
  62. package/tests/unit/index.test.ts +238 -197
  63. package/tests/unit/utils/font.test.ts +26 -21
  64. package/tests/unit/utils/logger.test.ts +267 -260
  65. package/tests/unit/utils/meta.test.ts +29 -28
  66. package/tests/unit/utils/mime.test.ts +73 -74
  67. package/tests/unit/utils/slugify.test.ts +14 -12
  68. package/tsconfig.build.json +9 -10
  69. package/tsconfig.jest.json +1 -1
  70. package/tsconfig.json +2 -2
  71. package/tsup.config.ts +8 -9
  72. package/typedoc.json +5 -9
  73. /package/docs/{portapack-transparent.png → public/portapack-transparent.png} +0 -0
  74. /package/docs/{portapack.jpg → public/portapack.jpg} +0 -0
@@ -12,12 +12,12 @@ import { bundleMultiPageHTML } from './bundler'; // Assuming bundler is here
12
12
 
13
13
  // Puppeteer Launch Options (Consider making configurable)
14
14
  const PUPPETEER_LAUNCH_OPTIONS: puppeteer.LaunchOptions = {
15
- headless: true,
16
- args: [
17
- '--no-sandbox', // Often required in containerized environments
18
- '--disable-setuid-sandbox',
19
- '--disable-dev-shm-usage', // Recommended for Docker/CI
20
- ],
15
+ headless: true,
16
+ args: [
17
+ '--no-sandbox', // Often required in containerized environments
18
+ '--disable-setuid-sandbox',
19
+ '--disable-dev-shm-usage', // Recommended for Docker/CI
20
+ ],
21
21
  };
22
22
 
23
23
  // Default Page Navigation Options (Consider making configurable)
@@ -36,88 +36,94 @@ const DEFAULT_PAGE_TIMEOUT = 30000; // 30 seconds
36
36
  * @throws {Error} Throws errors from Puppeteer launch, page creation, or navigation failures.
37
37
  */
38
38
  export async function fetchAndPackWebPage(
39
- url: string,
40
- logger?: Logger,
41
- timeout: number = DEFAULT_PAGE_TIMEOUT,
42
- userAgent?: string,
39
+ url: string,
40
+ logger?: Logger,
41
+ timeout: number = DEFAULT_PAGE_TIMEOUT,
42
+ userAgent?: string
43
43
  ): Promise<BuildResult> {
44
- let browser: puppeteer.Browser | null = null;
45
- const start = Date.now();
46
- logger?.info(`Initiating fetch for single page: ${url}`);
44
+ let browser: puppeteer.Browser | null = null;
45
+ const start = Date.now();
46
+ logger?.info(`Initiating fetch for single page: ${url}`);
47
+
48
+ try {
49
+ logger?.debug('Launching browser...');
50
+ browser = await puppeteer.launch(PUPPETEER_LAUNCH_OPTIONS);
51
+ logger?.debug(`Browser launched successfully (PID: ${browser.process()?.pid}).`);
52
+ const page = await browser.newPage();
53
+ logger?.debug(`New page created for ${url}`);
54
+
55
+ // Set User-Agent if provided
56
+ if (userAgent) {
57
+ await page.setUserAgent(userAgent);
58
+ logger?.debug(`User-Agent set to: "${userAgent}"`);
59
+ }
47
60
 
48
61
  try {
49
- logger?.debug('Launching browser...');
50
- browser = await puppeteer.launch(PUPPETEER_LAUNCH_OPTIONS);
51
- logger?.debug(`Browser launched successfully (PID: ${browser.process()?.pid}).`);
52
- const page = await browser.newPage();
53
- logger?.debug(`New page created for ${url}`);
54
-
55
- // Set User-Agent if provided
56
- if (userAgent) {
57
- await page.setUserAgent(userAgent);
58
- logger?.debug(`User-Agent set to: "${userAgent}"`);
59
- }
60
-
62
+ logger?.debug(`Navigating to ${url} with timeout ${timeout}ms`);
63
+ await page.goto(url, { waitUntil: 'networkidle2', timeout: timeout });
64
+ logger?.debug(`Navigation successful for ${url}`);
65
+ const html = await page.content();
66
+ logger?.debug(`Content retrieved for ${url} (${Buffer.byteLength(html, 'utf-8')} bytes)`);
67
+
68
+ const metadata: BundleMetadata = {
69
+ input: url,
70
+ outputSize: Buffer.byteLength(html, 'utf-8'),
71
+ assetCount: 0, // Basic fetch doesn't track assets processed by *this* tool
72
+ buildTimeMs: Date.now() - start,
73
+ errors: [], // No errors if we reached this point
74
+ };
75
+
76
+ await page.close();
77
+ logger?.debug(`Page closed for ${url}`);
78
+ await browser.close();
79
+ logger?.debug(`Browser closed for ${url}`);
80
+ browser = null; // Ensure browser is marked as closed
81
+
82
+ return { html, metadata };
83
+ } catch (pageError: any) {
84
+ logger?.error(`Error during page processing for ${url}: ${pageError.message}`);
85
+ // Attempt to close the page even if processing failed
86
+ if (page && !page.isClosed()) {
61
87
  try {
62
- logger?.debug(`Navigating to ${url} with timeout ${timeout}ms`);
63
- await page.goto(url, { waitUntil: 'networkidle2', timeout: timeout });
64
- logger?.debug(`Navigation successful for ${url}`);
65
- const html = await page.content();
66
- logger?.debug(`Content retrieved for ${url} (${Buffer.byteLength(html, 'utf-8')} bytes)`);
67
-
68
- const metadata: BundleMetadata = {
69
- input: url,
70
- outputSize: Buffer.byteLength(html, 'utf-8'),
71
- assetCount: 0, // Basic fetch doesn't track assets processed by *this* tool
72
- buildTimeMs: Date.now() - start,
73
- errors: [], // No errors if we reached this point
74
- };
75
-
76
- await page.close();
77
- logger?.debug(`Page closed for ${url}`);
78
- await browser.close();
79
- logger?.debug(`Browser closed for ${url}`);
80
- browser = null; // Ensure browser is marked as closed
81
-
82
- return { html, metadata };
83
-
84
- } catch (pageError: any) {
85
- logger?.error(`Error during page processing for ${url}: ${pageError.message}`);
86
- // Attempt to close the page even if processing failed
87
- if (page && !page.isClosed()) {
88
- try {
89
- await page.close();
90
- logger?.debug(`Page closed after error for ${url}`);
91
- } catch (closeErr: any) {
92
- logger?.error(`Failed to close page after error for ${url}: ${closeErr.message}`);
93
- // Decide if this secondary error should be thrown or just logged
94
- }
95
- }
96
- throw pageError; // Re-throw the original page processing error
97
- }
98
- } catch (launchError: any) {
99
- logger?.error(`Critical error during browser launch or page setup for ${url}: ${launchError.message}`);
100
- // Ensure browser is closed if launch succeeded partially but later failed
101
- if (browser) {
102
- try {
103
- await browser.close();
104
- logger?.debug('Browser closed after launch/setup error.');
105
- } catch (closeErr: any) {
106
- logger?.warn(`Failed to close browser after launch/setup error: ${closeErr.message}`);
107
- }
108
- browser = null;
109
- }
110
- throw launchError; // Re-throw the original launch/setup error
111
- } finally {
112
- // Final safety net: If browser somehow wasn't closed and isn't null, attempt closure.
113
- if (browser) {
114
- logger?.warn(`Closing browser in final cleanup for ${url}. This might indicate an unusual error path.`);
115
- try { await browser.close(); } catch (closeErr) { /* Ignore final browser close error */ }
88
+ await page.close();
89
+ logger?.debug(`Page closed after error for ${url}`);
90
+ } catch (closeErr: any) {
91
+ logger?.error(`Failed to close page after error for ${url}: ${closeErr.message}`);
92
+ // Decide if this secondary error should be thrown or just logged
116
93
  }
94
+ }
95
+ throw pageError; // Re-throw the original page processing error
117
96
  }
97
+ } catch (launchError: any) {
98
+ logger?.error(
99
+ `Critical error during browser launch or page setup for ${url}: ${launchError.message}`
100
+ );
101
+ // Ensure browser is closed if launch succeeded partially but later failed
102
+ if (browser) {
103
+ try {
104
+ await browser.close();
105
+ logger?.debug('Browser closed after launch/setup error.');
106
+ } catch (closeErr: any) {
107
+ logger?.warn(`Failed to close browser after launch/setup error: ${closeErr.message}`);
108
+ }
109
+ browser = null;
110
+ }
111
+ throw launchError; // Re-throw the original launch/setup error
112
+ } finally {
113
+ // Final safety net: If browser somehow wasn't closed and isn't null, attempt closure.
114
+ if (browser) {
115
+ logger?.warn(
116
+ `Closing browser in final cleanup for ${url}. This might indicate an unusual error path.`
117
+ );
118
+ try {
119
+ await browser.close();
120
+ } catch (closeErr) {
121
+ /* Ignore final browser close error */
122
+ }
123
+ }
124
+ }
118
125
  }
119
126
 
120
-
121
127
  /**
122
128
  * @typedef {object} CrawlOptions
123
129
  * @property {number} [maxDepth=1] - Maximum crawl depth.
@@ -141,160 +147,157 @@ export async function fetchAndPackWebPage(
141
147
  * containing the URL and HTML for each successfully crawled page.
142
148
  */
143
149
  async function crawlWebsite(
144
- startUrl: string,
145
- options: {
146
- maxDepth?: number;
147
- timeout?: number;
148
- include?: string[]; // Add include/exclude/userAgent later if needed
149
- exclude?: string[];
150
- userAgent?: string;
151
- logger?: Logger;
152
- }
150
+ startUrl: string,
151
+ options: {
152
+ maxDepth?: number;
153
+ timeout?: number;
154
+ include?: string[]; // Add include/exclude/userAgent later if needed
155
+ exclude?: string[];
156
+ userAgent?: string;
157
+ logger?: Logger;
158
+ }
153
159
  ): Promise<PageEntry[]> {
154
- const {
155
- maxDepth = 1,
156
- timeout = DEFAULT_PAGE_TIMEOUT,
157
- // include = ['**'], // TODO: Implement glob filtering
158
- // exclude = [],
159
- userAgent,
160
- logger,
161
- } = options;
162
-
163
- logger?.info(`Starting crawl for ${startUrl} with maxDepth ${maxDepth}`);
164
-
165
- if (maxDepth <= 0) {
166
- logger?.warn('maxDepth is 0 or negative, no pages will be crawled.');
167
- return [];
160
+ const {
161
+ maxDepth = 1,
162
+ timeout = DEFAULT_PAGE_TIMEOUT,
163
+ // include = ['**'], // TODO: Implement glob filtering
164
+ // exclude = [],
165
+ userAgent,
166
+ logger,
167
+ } = options;
168
+
169
+ logger?.info(`Starting crawl for ${startUrl} with maxDepth ${maxDepth}`);
170
+
171
+ if (maxDepth <= 0) {
172
+ logger?.warn('maxDepth is 0 or negative, no pages will be crawled.');
173
+ return [];
174
+ }
175
+
176
+ let browser: puppeteer.Browser | null = null;
177
+ const visited = new Set<string>();
178
+ const results: PageEntry[] = [];
179
+ const queue: { url: string; depth: number }[] = [];
180
+ let startOrigin: string;
181
+
182
+ try {
183
+ // Validate start URL and get origin
184
+ try {
185
+ startOrigin = new URL(startUrl).origin;
186
+ } catch (e: any) {
187
+ logger?.error(`Invalid start URL: ${startUrl}. ${e.message}`);
188
+ throw new Error(`Invalid start URL: ${startUrl}`); // Propagate error
168
189
  }
169
190
 
170
- let browser: puppeteer.Browser | null = null;
171
- const visited = new Set<string>();
172
- const results: PageEntry[] = [];
173
- const queue: { url: string; depth: number }[] = [];
174
- let startOrigin: string;
175
-
191
+ // Normalize start URL (remove fragment)
192
+ let normalizedStartUrl: string;
176
193
  try {
177
- // Validate start URL and get origin
178
- try {
179
- startOrigin = new URL(startUrl).origin;
180
- } catch (e: any) {
181
- logger?.error(`Invalid start URL: ${startUrl}. ${e.message}`);
182
- throw new Error(`Invalid start URL: ${startUrl}`); // Propagate error
183
- }
194
+ const parsedStartUrl = new URL(startUrl);
195
+ parsedStartUrl.hash = '';
196
+ normalizedStartUrl = parsedStartUrl.href;
197
+ } catch (e: any) {
198
+ logger?.error(`Invalid start URL: ${startUrl}. ${e.message}`);
199
+ throw new Error(`Invalid start URL: ${startUrl}`); // Propagate error
200
+ }
184
201
 
185
- // Normalize start URL (remove fragment)
186
- let normalizedStartUrl: string;
187
- try {
188
- const parsedStartUrl = new URL(startUrl);
189
- parsedStartUrl.hash = '';
190
- normalizedStartUrl = parsedStartUrl.href;
191
- } catch (e: any) {
192
- logger?.error(`Invalid start URL: ${startUrl}. ${e.message}`);
193
- throw new Error(`Invalid start URL: ${startUrl}`); // Propagate error
202
+ // Launch browser *after* validating URL
203
+ logger?.debug('Launching browser for crawl...');
204
+ browser = await puppeteer.launch(PUPPETEER_LAUNCH_OPTIONS);
205
+ logger?.debug(`Browser launched for crawl (PID: ${browser.process()?.pid}).`);
206
+
207
+ // Initial queue setup
208
+ visited.add(normalizedStartUrl);
209
+ queue.push({ url: normalizedStartUrl, depth: 1 });
210
+ logger?.debug(`Queued initial URL: ${normalizedStartUrl} (depth 1)`);
211
+
212
+ while (queue.length > 0) {
213
+ const { url, depth } = queue.shift()!;
214
+ logger?.info(`Processing: ${url} (depth ${depth})`);
215
+ let page: puppeteer.Page | null = null;
216
+
217
+ try {
218
+ page = await browser.newPage();
219
+
220
+ if (userAgent) {
221
+ await page.setUserAgent(userAgent);
194
222
  }
223
+ // Consider adding viewport setting if needed: await page.setViewport({ width: 1280, height: 800 });
224
+
225
+ await page.goto(url, { waitUntil: 'networkidle2', timeout: timeout });
226
+ const html = await page.content();
195
227
 
196
- // Launch browser *after* validating URL
197
- logger?.debug('Launching browser for crawl...');
198
- browser = await puppeteer.launch(PUPPETEER_LAUNCH_OPTIONS);
199
- logger?.debug(`Browser launched for crawl (PID: ${browser.process()?.pid}).`);
228
+ results.push({ url, html }); // Matches PageEntry type
229
+ logger?.debug(`Successfully fetched content for ${url}`);
200
230
 
201
- // Initial queue setup
202
- visited.add(normalizedStartUrl);
203
- queue.push({ url: normalizedStartUrl, depth: 1 });
204
- logger?.debug(`Queued initial URL: ${normalizedStartUrl} (depth 1)`);
231
+ // Link Discovery (only if not at max depth)
232
+ if (depth < maxDepth) {
233
+ logger?.debug(`Discovering links on ${url} (depth ${depth}/${maxDepth})`);
234
+ const hrefs = await page.evaluate(() =>
235
+ Array.from(document.querySelectorAll('a[href]'), a => a.getAttribute('href'))
236
+ );
237
+ logger?.debug(`Found ${hrefs.length} potential hrefs on ${url}`);
205
238
 
206
- while (queue.length > 0) {
207
- const { url, depth } = queue.shift()!;
208
- logger?.info(`Processing: ${url} (depth ${depth})`);
209
- let page: puppeteer.Page | null = null;
239
+ let linksAdded = 0;
240
+ for (const href of hrefs) {
241
+ if (!href) continue;
210
242
 
243
+ let absoluteUrl: string;
211
244
  try {
212
- page = await browser.newPage();
213
-
214
- if (userAgent) {
215
- await page.setUserAgent(userAgent);
216
- }
217
- // Consider adding viewport setting if needed: await page.setViewport({ width: 1280, height: 800 });
218
-
219
- await page.goto(url, { waitUntil: 'networkidle2', timeout: timeout });
220
- const html = await page.content();
221
-
222
- results.push({ url, html }); // Matches PageEntry type
223
- logger?.debug(`Successfully fetched content for ${url}`);
224
-
225
- // Link Discovery (only if not at max depth)
226
- if (depth < maxDepth) {
227
- logger?.debug(`Discovering links on ${url} (depth ${depth}/${maxDepth})`);
228
- const hrefs = await page.evaluate(() =>
229
- Array.from(document.querySelectorAll('a[href]'), a => a.getAttribute('href'))
230
- );
231
- logger?.debug(`Found ${hrefs.length} potential hrefs on ${url}`);
232
-
233
- let linksAdded = 0;
234
- for (const href of hrefs) {
235
- if (!href) continue;
236
-
237
- let absoluteUrl: string;
238
- try {
239
- const resolved = new URL(href, url);
240
- resolved.hash = ''; // Normalize
241
- absoluteUrl = resolved.href;
242
- } catch (e) {
243
- logger?.debug(`Ignoring invalid URL syntax: "${href}" on page ${url}`);
244
- continue;
245
- }
246
-
247
- // TODO: Implement include/exclude filtering here using micromatch or similar
248
- // if (!matchesInclude(absoluteUrl, include) || matchesExclude(absoluteUrl, exclude)) {
249
- // logger?.debug(`Skipping due to include/exclude rules: ${absoluteUrl}`);
250
- // continue;
251
- // }
252
-
253
- // Filter: same origin and not visited
254
- if (absoluteUrl.startsWith(startOrigin) && !visited.has(absoluteUrl)) {
255
- visited.add(absoluteUrl);
256
- queue.push({ url: absoluteUrl, depth: depth + 1 });
257
- linksAdded++;
258
- }
259
- }
260
- logger?.debug(`Added ${linksAdded} new unique internal links to queue from ${url}`);
261
- } else {
262
- logger?.debug(`Max depth (${maxDepth}) reached, not discovering links on ${url}`);
263
- }
264
-
265
- } catch (err: any) {
266
- logger?.warn(`❌ Failed to process ${url}: ${err.message}`);
267
- // Continue crawl even if one page fails
268
- } finally {
269
- if (page && !page.isClosed()) {
270
- try {
271
- await page.close();
272
- } catch (pageCloseError: any) {
273
- logger?.error(`Failed to close page for ${url}: ${pageCloseError.message}`);
274
- }
275
- }
245
+ const resolved = new URL(href, url);
246
+ resolved.hash = ''; // Normalize
247
+ absoluteUrl = resolved.href;
248
+ } catch (e) {
249
+ logger?.debug(`Ignoring invalid URL syntax: "${href}" on page ${url}`);
250
+ continue;
276
251
  }
277
- } // End while loop
278
-
279
- } catch (error) {
280
- // Catch critical errors like invalid start URL or browser launch failure
281
- logger?.error(`Critical crawl error: ${error instanceof Error ? error.message : error}`);
282
- // Rethrow or handle appropriately
283
- throw error;
284
- } finally {
285
- // Ensure browser is closed after crawl finishes or critical error occurs
286
- if (browser) {
287
- logger?.info(`Crawl finished or errored. Closing browser.`);
288
- await browser.close();
289
- logger?.debug(`Browser closed after crawl.`);
252
+
253
+ // TODO: Implement include/exclude filtering here using micromatch or similar
254
+ // if (!matchesInclude(absoluteUrl, include) || matchesExclude(absoluteUrl, exclude)) {
255
+ // logger?.debug(`Skipping due to include/exclude rules: ${absoluteUrl}`);
256
+ // continue;
257
+ // }
258
+
259
+ // Filter: same origin and not visited
260
+ if (absoluteUrl.startsWith(startOrigin) && !visited.has(absoluteUrl)) {
261
+ visited.add(absoluteUrl);
262
+ queue.push({ url: absoluteUrl, depth: depth + 1 });
263
+ linksAdded++;
264
+ }
265
+ }
266
+ logger?.debug(`Added ${linksAdded} new unique internal links to queue from ${url}`);
267
+ } else {
268
+ logger?.debug(`Max depth (${maxDepth}) reached, not discovering links on ${url}`);
269
+ }
270
+ } catch (err: any) {
271
+ logger?.warn(`❌ Failed to process ${url}: ${err.message}`);
272
+ // Continue crawl even if one page fails
273
+ } finally {
274
+ if (page && !page.isClosed()) {
275
+ try {
276
+ await page.close();
277
+ } catch (pageCloseError: any) {
278
+ logger?.error(`Failed to close page for ${url}: ${pageCloseError.message}`);
279
+ }
290
280
  }
281
+ }
282
+ } // End while loop
283
+ } catch (error) {
284
+ // Catch critical errors like invalid start URL or browser launch failure
285
+ logger?.error(`Critical crawl error: ${error instanceof Error ? error.message : error}`);
286
+ // Rethrow or handle appropriately
287
+ throw error;
288
+ } finally {
289
+ // Ensure browser is closed after crawl finishes or critical error occurs
290
+ if (browser) {
291
+ logger?.info(`Crawl finished or errored. Closing browser.`);
292
+ await browser.close();
293
+ logger?.debug(`Browser closed after crawl.`);
291
294
  }
295
+ }
292
296
 
293
- logger?.info(`Crawl found ${results.length} pages.`);
294
- return results;
297
+ logger?.info(`Crawl found ${results.length} pages.`);
298
+ return results;
295
299
  }
296
300
 
297
-
298
301
  /**
299
302
  * Fetches all internal pages of a website recursively starting from a given URL,
300
303
  * bundles them into a single HTML string using the bundler module, and writes
@@ -310,47 +313,56 @@ async function crawlWebsite(
310
313
  * @throws {Error} Throws errors if the crawl initiation fails, bundling fails, or file writing fails.
311
314
  */
312
315
  export async function recursivelyBundleSite(
313
- startUrl: string,
314
- outputFile: string,
315
- maxDepth = 1,
316
- loggerInstance?: Logger // Added optional logger parameter
316
+ startUrl: string,
317
+ outputFile: string,
318
+ maxDepth = 1,
319
+ loggerInstance?: Logger // Added optional logger parameter
317
320
  ): Promise<{ pages: number; html: string }> {
318
- // Use provided logger OR create a new default one
319
- const logger = loggerInstance || new Logger();
320
- logger.info(`Starting recursive site bundle for ${startUrl} to ${outputFile} (maxDepth: ${maxDepth})`);
321
-
322
- try {
323
- // Step 1: Crawl the website
324
- // Pass necessary options down to crawlWebsite
325
- const crawlOptions = { maxDepth, logger /* Add other options like timeout, userAgent if needed */ };
326
- const pages: PageEntry[] = await crawlWebsite(startUrl, crawlOptions);
327
-
328
- if (pages.length === 0) {
329
- logger.warn("Crawl completed but found 0 pages. Output file may be empty or reflect an empty bundle.");
330
- } else {
331
- logger.info(`Crawl successful, found ${pages.length} pages. Starting bundling.`);
332
- }
321
+ // Use provided logger OR create a new default one
322
+ const logger = loggerInstance || new Logger();
323
+ logger.info(
324
+ `Starting recursive site bundle for ${startUrl} to ${outputFile} (maxDepth: ${maxDepth})`
325
+ );
326
+
327
+ try {
328
+ // Step 1: Crawl the website
329
+ // Pass necessary options down to crawlWebsite
330
+ const crawlOptions = {
331
+ maxDepth,
332
+ logger /* Add other options like timeout, userAgent if needed */,
333
+ };
334
+ const pages: PageEntry[] = await crawlWebsite(startUrl, crawlOptions);
335
+
336
+ if (pages.length === 0) {
337
+ logger.warn(
338
+ 'Crawl completed but found 0 pages. Output file may be empty or reflect an empty bundle.'
339
+ );
340
+ } else {
341
+ logger.info(`Crawl successful, found ${pages.length} pages. Starting bundling.`);
342
+ }
333
343
 
334
- // Step 2: Bundle the HTML content
335
- // Pass the same logger instance for consistent logging
336
- const bundledHtml = bundleMultiPageHTML(pages, logger);
337
- logger.info(`Bundling complete. Output size: ${Buffer.byteLength(bundledHtml, 'utf-8')} bytes.`);
338
-
339
- // Step 3: Write the bundled HTML to the output file
340
- logger.info(`Writing bundled HTML to ${outputFile}`);
341
- await fs.writeFile(outputFile, bundledHtml, 'utf-8');
342
- logger.info(`Successfully wrote bundled output to ${outputFile}`);
343
-
344
- // Step 4: Return the results
345
- return {
346
- pages: pages.length,
347
- html: bundledHtml
348
- };
349
- } catch (error: any) {
350
- logger.error(`Error during recursive site bundle: ${error.message}`);
351
- if (error.stack) {
352
- logger.error(`Stack trace: ${error.stack}`);
353
- }
354
- throw error; // Re-throw the error
344
+ // Step 2: Bundle the HTML content
345
+ // Pass the same logger instance for consistent logging
346
+ const bundledHtml = bundleMultiPageHTML(pages, logger);
347
+ logger.info(
348
+ `Bundling complete. Output size: ${Buffer.byteLength(bundledHtml, 'utf-8')} bytes.`
349
+ );
350
+
351
+ // Step 3: Write the bundled HTML to the output file
352
+ logger.info(`Writing bundled HTML to ${outputFile}`);
353
+ await fs.writeFile(outputFile, bundledHtml, 'utf-8');
354
+ logger.info(`Successfully wrote bundled output to ${outputFile}`);
355
+
356
+ // Step 4: Return the results
357
+ return {
358
+ pages: pages.length,
359
+ html: bundledHtml,
360
+ };
361
+ } catch (error: any) {
362
+ logger.error(`Error during recursive site bundle: ${error.message}`);
363
+ if (error.stack) {
364
+ logger.error(`Stack trace: ${error.stack}`);
355
365
  }
356
- }
366
+ throw error; // Re-throw the error
367
+ }
368
+ }
package/src/index.ts CHANGED
@@ -2,11 +2,12 @@
2
2
  * @file index.ts
3
3
  * @description Public API surface for PortaPack.
4
4
  * Exposes the unified `pack()` method and advanced helpers like recursive crawling and multi-page bundling.
5
- * @version 1.0.0 - (Add version if applicable)
6
- * @date 2025-04-11
7
5
  */
8
6
 
9
- import { fetchAndPackWebPage as coreFetchAndPack, recursivelyBundleSite as coreRecursivelyBundleSite } from './core/web-fetcher';
7
+ import {
8
+ fetchAndPackWebPage as coreFetchAndPack,
9
+ recursivelyBundleSite as coreRecursivelyBundleSite,
10
+ } from './core/web-fetcher';
10
11
  import { parseHTML } from './core/parser';
11
12
  import { extractAssets } from './core/extractor';
12
13
  import { minifyAssets } from './core/minifier';
@@ -56,10 +57,10 @@ export async function pack(
56
57
  // Check if it contains '://' but isn't http(s) -> likely unsupported protocol
57
58
  // Allow anything else (including relative/absolute paths without explicit protocols)
58
59
  if (!isHttp && /:\/\//.test(input) && !input.startsWith('file://')) {
59
- const errorMsg = `Unsupported protocol or input type: ${input}`;
60
- logger.error(errorMsg);
61
- throw new Error(errorMsg);
62
- }
60
+ const errorMsg = `Unsupported protocol or input type: ${input}`;
61
+ logger.error(errorMsg);
62
+ throw new Error(errorMsg);
63
+ }
63
64
 
64
65
  const isRemote = /^https?:\/\//i.test(input); // Check again after validation
65
66
  const recursive = options.recursive === true || typeof options.recursive === 'number';
@@ -101,8 +102,8 @@ export async function generatePortableHTML(
101
102
  logger.info(`Finished fetching and packing remote page: ${input}`);
102
103
  return { html: result.html, metadata };
103
104
  } catch (error: any) {
104
- logger.error(`Error fetching remote page ${input}: ${error.message}`);
105
- throw error;
105
+ logger.error(`Error fetching remote page ${input}: ${error.message}`);
106
+ throw error;
106
107
  }
107
108
  }
108
109
 
@@ -121,8 +122,8 @@ export async function generatePortableHTML(
121
122
  logger.info(`Finished processing local file: ${input}`);
122
123
  return { html: finalHtml, metadata };
123
124
  } catch (error: any) {
124
- logger.error(`Error processing local file ${input}: ${error.message}`);
125
- throw error;
125
+ logger.error(`Error processing local file ${input}: ${error.message}`);
126
+ throw error;
126
127
  }
127
128
  }
128
129
 
@@ -147,9 +148,9 @@ export async function generateRecursivePortableHTML(
147
148
  const timer = new BuildTimer(url);
148
149
 
149
150
  if (!/^https?:\/\//i.test(url)) {
150
- const errorMsg = `Invalid URL for recursive bundling. Must start with http:// or https://. Received: ${url}`;
151
- logger.error(errorMsg);
152
- throw new Error(errorMsg);
151
+ const errorMsg = `Invalid URL for recursive bundling. Must start with http:// or https://. Received: ${url}`;
152
+ logger.error(errorMsg);
153
+ throw new Error(errorMsg);
153
154
  }
154
155
 
155
156
  logger.info(`Starting recursive bundle for ${url} up to depth ${depth}`);
@@ -166,8 +167,8 @@ export async function generateRecursivePortableHTML(
166
167
  logger.info(`Finished recursive bundle for ${url}. Bundled ${pages} pages.`);
167
168
  return { html, metadata };
168
169
  } catch (error: any) {
169
- logger.error(`Error during recursive bundle for ${url}: ${error.message}`);
170
- throw error;
170
+ logger.error(`Error during recursive bundle for ${url}: ${error.message}`);
171
+ throw error;
171
172
  }
172
173
  }
173
174
 
@@ -196,4 +197,4 @@ export type {
196
197
  ParsedHTML,
197
198
  Asset,
198
199
  PageEntry,
199
- };
200
+ };