portapack 0.2.1 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/ci.yml +5 -4
- package/CHANGELOG.md +20 -0
- package/README.md +81 -219
- package/dist/cli/{cli-entry.js → cli-entry.cjs} +620 -513
- package/dist/cli/cli-entry.cjs.map +1 -0
- package/dist/index.d.ts +51 -56
- package/dist/index.js +517 -458
- package/dist/index.js.map +1 -1
- package/docs/.vitepress/config.ts +0 -1
- package/docs/cli.md +108 -45
- package/docs/configuration.md +101 -116
- package/docs/getting-started.md +74 -44
- package/jest.config.ts +18 -8
- package/jest.setup.cjs +66 -146
- package/package.json +5 -5
- package/src/cli/cli-entry.ts +15 -15
- package/src/cli/cli.ts +130 -119
- package/src/core/bundler.ts +174 -63
- package/src/core/extractor.ts +364 -277
- package/src/core/web-fetcher.ts +205 -141
- package/src/index.ts +161 -224
- package/tests/unit/cli/cli-entry.test.ts +66 -77
- package/tests/unit/cli/cli.test.ts +243 -145
- package/tests/unit/core/bundler.test.ts +334 -258
- package/tests/unit/core/extractor.test.ts +608 -1064
- package/tests/unit/core/minifier.test.ts +130 -221
- package/tests/unit/core/packer.test.ts +255 -106
- package/tests/unit/core/parser.test.ts +89 -458
- package/tests/unit/core/web-fetcher.test.ts +310 -265
- package/tests/unit/index.test.ts +206 -300
- package/tests/unit/utils/logger.test.ts +32 -28
- package/tsconfig.jest.json +8 -7
- package/tsup.config.ts +34 -29
- package/dist/cli/cli-entry.js.map +0 -1
- package/docs/demo.md +0 -46
- package/output.html +0 -1
- package/site-packed.html +0 -1
- package/test-output.html +0 -0
package/src/core/web-fetcher.ts
CHANGED
@@ -7,14 +7,21 @@
|
|
7
7
|
import * as puppeteer from 'puppeteer';
|
8
8
|
import * as fs from 'fs/promises';
|
9
9
|
import { Logger } from '../utils/logger'; // Assuming logger is in ../utils
|
10
|
-
import { BuildResult, PageEntry } from '../types'; // Assuming types are defined here
|
10
|
+
import { BuildResult, PageEntry, BundleMetadata } from '../types'; // Assuming types are defined here
|
11
11
|
import { bundleMultiPageHTML } from './bundler'; // Assuming bundler is here
|
12
12
|
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
13
|
+
// Puppeteer Launch Options (Consider making configurable)
|
14
|
+
const PUPPETEER_LAUNCH_OPTIONS: puppeteer.LaunchOptions = {
|
15
|
+
headless: true,
|
16
|
+
args: [
|
17
|
+
'--no-sandbox', // Often required in containerized environments
|
18
|
+
'--disable-setuid-sandbox',
|
19
|
+
'--disable-dev-shm-usage', // Recommended for Docker/CI
|
20
|
+
],
|
21
|
+
};
|
22
|
+
|
23
|
+
// Default Page Navigation Options (Consider making configurable)
|
24
|
+
const DEFAULT_PAGE_TIMEOUT = 30000; // 30 seconds
|
18
25
|
|
19
26
|
/**
|
20
27
|
* Fetches the rendered HTML content and basic metadata for a single web page URL.
|
@@ -22,7 +29,8 @@ import { bundleMultiPageHTML } from './bundler'; // Assuming bundler is here
|
|
22
29
|
*
|
23
30
|
* @param {string} url - The fully qualified URL to fetch.
|
24
31
|
* @param {Logger} [logger] - Optional logger instance for debug/info messages.
|
25
|
-
* @param {number} [timeout=
|
32
|
+
* @param {number} [timeout=DEFAULT_PAGE_TIMEOUT] - Navigation timeout in milliseconds.
|
33
|
+
* @param {string} [userAgent] - Optional custom User-Agent string.
|
26
34
|
* @returns {Promise<BuildResult>} A promise that resolves with the fetched HTML
|
27
35
|
* and metadata, or rejects on critical errors.
|
28
36
|
* @throws {Error} Throws errors from Puppeteer launch, page creation, or navigation failures.
|
@@ -30,36 +38,44 @@ import { bundleMultiPageHTML } from './bundler'; // Assuming bundler is here
|
|
30
38
|
export async function fetchAndPackWebPage(
|
31
39
|
url: string,
|
32
40
|
logger?: Logger,
|
33
|
-
timeout: number =
|
41
|
+
timeout: number = DEFAULT_PAGE_TIMEOUT,
|
42
|
+
userAgent?: string,
|
34
43
|
): Promise<BuildResult> {
|
35
|
-
let browser: puppeteer.Browser | null = null;
|
44
|
+
let browser: puppeteer.Browser | null = null;
|
36
45
|
const start = Date.now();
|
37
|
-
logger?.
|
46
|
+
logger?.info(`Initiating fetch for single page: ${url}`);
|
38
47
|
|
39
48
|
try {
|
40
|
-
|
41
|
-
|
49
|
+
logger?.debug('Launching browser...');
|
50
|
+
browser = await puppeteer.launch(PUPPETEER_LAUNCH_OPTIONS);
|
51
|
+
logger?.debug(`Browser launched successfully (PID: ${browser.process()?.pid}).`);
|
42
52
|
const page = await browser.newPage();
|
43
|
-
logger?.debug(`
|
53
|
+
logger?.debug(`New page created for ${url}`);
|
54
|
+
|
55
|
+
// Set User-Agent if provided
|
56
|
+
if (userAgent) {
|
57
|
+
await page.setUserAgent(userAgent);
|
58
|
+
logger?.debug(`User-Agent set to: "${userAgent}"`);
|
59
|
+
}
|
44
60
|
|
45
61
|
try {
|
46
62
|
logger?.debug(`Navigating to ${url} with timeout ${timeout}ms`);
|
47
63
|
await page.goto(url, { waitUntil: 'networkidle2', timeout: timeout });
|
48
64
|
logger?.debug(`Navigation successful for ${url}`);
|
49
65
|
const html = await page.content();
|
50
|
-
logger?.debug(`Content retrieved for ${url}`);
|
66
|
+
logger?.debug(`Content retrieved for ${url} (${Buffer.byteLength(html, 'utf-8')} bytes)`);
|
51
67
|
|
52
|
-
const metadata:
|
68
|
+
const metadata: BundleMetadata = {
|
53
69
|
input: url,
|
54
70
|
outputSize: Buffer.byteLength(html, 'utf-8'),
|
55
|
-
assetCount: 0, // Basic fetch doesn't track assets
|
71
|
+
assetCount: 0, // Basic fetch doesn't track assets processed by *this* tool
|
56
72
|
buildTimeMs: Date.now() - start,
|
57
73
|
errors: [], // No errors if we reached this point
|
58
74
|
};
|
59
75
|
|
60
|
-
await page.close();
|
76
|
+
await page.close();
|
61
77
|
logger?.debug(`Page closed for ${url}`);
|
62
|
-
|
78
|
+
await browser.close();
|
63
79
|
logger?.debug(`Browser closed for ${url}`);
|
64
80
|
browser = null; // Ensure browser is marked as closed
|
65
81
|
|
@@ -67,25 +83,33 @@ export async function fetchAndPackWebPage(
|
|
67
83
|
|
68
84
|
} catch (pageError: any) {
|
69
85
|
logger?.error(`Error during page processing for ${url}: ${pageError.message}`);
|
70
|
-
//
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
86
|
+
// Attempt to close the page even if processing failed
|
87
|
+
if (page && !page.isClosed()) {
|
88
|
+
try {
|
89
|
+
await page.close();
|
90
|
+
logger?.debug(`Page closed after error for ${url}`);
|
91
|
+
} catch (closeErr: any) {
|
92
|
+
logger?.error(`Failed to close page after error for ${url}: ${closeErr.message}`);
|
93
|
+
// Decide if this secondary error should be thrown or just logged
|
94
|
+
}
|
75
95
|
}
|
76
96
|
throw pageError; // Re-throw the original page processing error
|
77
97
|
}
|
78
98
|
} catch (launchError: any) {
|
79
|
-
logger?.error(`Critical error during browser launch or page
|
80
|
-
// Ensure browser is closed if launch succeeded but
|
81
|
-
// Although if launch fails, browser might be null.
|
99
|
+
logger?.error(`Critical error during browser launch or page setup for ${url}: ${launchError.message}`);
|
100
|
+
// Ensure browser is closed if launch succeeded partially but later failed
|
82
101
|
if (browser) {
|
83
|
-
try {
|
102
|
+
try {
|
103
|
+
await browser.close();
|
104
|
+
logger?.debug('Browser closed after launch/setup error.');
|
105
|
+
} catch (closeErr: any) {
|
106
|
+
logger?.warn(`Failed to close browser after launch/setup error: ${closeErr.message}`);
|
107
|
+
}
|
108
|
+
browser = null;
|
84
109
|
}
|
85
110
|
throw launchError; // Re-throw the original launch/setup error
|
86
111
|
} finally {
|
87
|
-
// Final
|
88
|
-
// This handles edge cases where errors might bypass earlier closes.
|
112
|
+
// Final safety net: If browser somehow wasn't closed and isn't null, attempt closure.
|
89
113
|
if (browser) {
|
90
114
|
logger?.warn(`Closing browser in final cleanup for ${url}. This might indicate an unusual error path.`);
|
91
115
|
try { await browser.close(); } catch (closeErr) { /* Ignore final browser close error */ }
|
@@ -93,156 +117,194 @@ export async function fetchAndPackWebPage(
|
|
93
117
|
}
|
94
118
|
}
|
95
119
|
|
120
|
+
|
121
|
+
/**
|
122
|
+
* @typedef {object} CrawlOptions
|
123
|
+
* @property {number} [maxDepth=1] - Maximum crawl depth.
|
124
|
+
* @property {number} [timeout=DEFAULT_PAGE_TIMEOUT] - Navigation timeout per page.
|
125
|
+
* @property {string[]} [include=[]] - Glob patterns for URLs to include.
|
126
|
+
* @property {string[]} [exclude=[]] - Glob patterns for URLs to exclude.
|
127
|
+
* @property {string} [userAgent] - Custom User-Agent string.
|
128
|
+
* @property {Logger} [logger] - Optional logger instance.
|
129
|
+
*/
|
130
|
+
|
96
131
|
/**
|
97
132
|
* Internal function to recursively crawl a website starting from a given URL.
|
98
133
|
* Uses a single browser instance and manages pages for efficiency during crawl.
|
99
134
|
* Implements Breadth-First Search (BFS) using a queue.
|
135
|
+
* Respects same-origin policy and visited URLs.
|
100
136
|
*
|
101
137
|
* @private
|
102
138
|
* @param {string} startUrl - The initial URL to start crawling from.
|
103
|
-
* @param {
|
104
|
-
* @param {Logger} [logger] - Optional logger instance.
|
139
|
+
* @param {CrawlOptions} options - Crawling configuration options.
|
105
140
|
* @returns {Promise<PageEntry[]>} A promise resolving to an array of PageEntry objects
|
106
141
|
* containing the URL and HTML for each successfully crawled page.
|
107
142
|
*/
|
108
143
|
async function crawlWebsite(
|
109
144
|
startUrl: string,
|
110
|
-
|
111
|
-
|
145
|
+
options: {
|
146
|
+
maxDepth?: number;
|
147
|
+
timeout?: number;
|
148
|
+
include?: string[]; // Add include/exclude/userAgent later if needed
|
149
|
+
exclude?: string[];
|
150
|
+
userAgent?: string;
|
151
|
+
logger?: Logger;
|
152
|
+
}
|
112
153
|
): Promise<PageEntry[]> {
|
154
|
+
const {
|
155
|
+
maxDepth = 1,
|
156
|
+
timeout = DEFAULT_PAGE_TIMEOUT,
|
157
|
+
// include = ['**'], // TODO: Implement glob filtering
|
158
|
+
// exclude = [],
|
159
|
+
userAgent,
|
160
|
+
logger,
|
161
|
+
} = options;
|
162
|
+
|
113
163
|
logger?.info(`Starting crawl for ${startUrl} with maxDepth ${maxDepth}`);
|
114
|
-
|
115
|
-
// Don't even start a browser if maxDepth is 0
|
164
|
+
|
116
165
|
if (maxDepth <= 0) {
|
117
|
-
logger?.
|
166
|
+
logger?.warn('maxDepth is 0 or negative, no pages will be crawled.');
|
118
167
|
return [];
|
119
168
|
}
|
120
|
-
|
121
|
-
|
169
|
+
|
170
|
+
let browser: puppeteer.Browser | null = null;
|
122
171
|
const visited = new Set<string>();
|
123
172
|
const results: PageEntry[] = [];
|
124
|
-
// Queue stores URLs to visit and their corresponding depth
|
125
173
|
const queue: { url: string; depth: number }[] = [];
|
126
|
-
|
127
|
-
// Initialize startOrigin for same-origin check
|
128
174
|
let startOrigin: string;
|
129
|
-
try {
|
130
|
-
startOrigin = new URL(startUrl).origin;
|
131
|
-
} catch (e: any) {
|
132
|
-
logger?.error(`Invalid start URL: ${startUrl}. ${e.message}`);
|
133
|
-
await browser.close();
|
134
|
-
return []; // Cannot start crawl with invalid URL
|
135
|
-
}
|
136
175
|
|
137
|
-
// Normalize start URL (remove fragment) and add to queue/visited if depth allows
|
138
|
-
let normalizedStartUrl: string;
|
139
176
|
try {
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
}
|
177
|
+
// Validate start URL and get origin
|
178
|
+
try {
|
179
|
+
startOrigin = new URL(startUrl).origin;
|
180
|
+
} catch (e: any) {
|
181
|
+
logger?.error(`Invalid start URL: ${startUrl}. ${e.message}`);
|
182
|
+
throw new Error(`Invalid start URL: ${startUrl}`); // Propagate error
|
183
|
+
}
|
148
184
|
|
149
|
-
|
150
|
-
|
151
|
-
|
185
|
+
// Normalize start URL (remove fragment)
|
186
|
+
let normalizedStartUrl: string;
|
187
|
+
try {
|
188
|
+
const parsedStartUrl = new URL(startUrl);
|
189
|
+
parsedStartUrl.hash = '';
|
190
|
+
normalizedStartUrl = parsedStartUrl.href;
|
191
|
+
} catch (e: any) {
|
192
|
+
logger?.error(`Invalid start URL: ${startUrl}. ${e.message}`);
|
193
|
+
throw new Error(`Invalid start URL: ${startUrl}`); // Propagate error
|
194
|
+
}
|
152
195
|
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
196
|
+
// Launch browser *after* validating URL
|
197
|
+
logger?.debug('Launching browser for crawl...');
|
198
|
+
browser = await puppeteer.launch(PUPPETEER_LAUNCH_OPTIONS);
|
199
|
+
logger?.debug(`Browser launched for crawl (PID: ${browser.process()?.pid}).`);
|
157
200
|
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
await page.goto(url, { waitUntil: 'networkidle2', timeout: 30000 });
|
163
|
-
const html = await page.content();
|
201
|
+
// Initial queue setup
|
202
|
+
visited.add(normalizedStartUrl);
|
203
|
+
queue.push({ url: normalizedStartUrl, depth: 1 });
|
204
|
+
logger?.debug(`Queued initial URL: ${normalizedStartUrl} (depth 1)`);
|
164
205
|
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
continue;
|
195
|
-
|
206
|
+
while (queue.length > 0) {
|
207
|
+
const { url, depth } = queue.shift()!;
|
208
|
+
logger?.info(`Processing: ${url} (depth ${depth})`);
|
209
|
+
let page: puppeteer.Page | null = null;
|
210
|
+
|
211
|
+
try {
|
212
|
+
page = await browser.newPage();
|
213
|
+
|
214
|
+
if (userAgent) {
|
215
|
+
await page.setUserAgent(userAgent);
|
216
|
+
}
|
217
|
+
// Consider adding viewport setting if needed: await page.setViewport({ width: 1280, height: 800 });
|
218
|
+
|
219
|
+
await page.goto(url, { waitUntil: 'networkidle2', timeout: timeout });
|
220
|
+
const html = await page.content();
|
221
|
+
|
222
|
+
results.push({ url, html }); // Matches PageEntry type
|
223
|
+
logger?.debug(`Successfully fetched content for ${url}`);
|
224
|
+
|
225
|
+
// Link Discovery (only if not at max depth)
|
226
|
+
if (depth < maxDepth) {
|
227
|
+
logger?.debug(`Discovering links on ${url} (depth ${depth}/${maxDepth})`);
|
228
|
+
const hrefs = await page.evaluate(() =>
|
229
|
+
Array.from(document.querySelectorAll('a[href]'), a => a.getAttribute('href'))
|
230
|
+
);
|
231
|
+
logger?.debug(`Found ${hrefs.length} potential hrefs on ${url}`);
|
232
|
+
|
233
|
+
let linksAdded = 0;
|
234
|
+
for (const href of hrefs) {
|
235
|
+
if (!href) continue;
|
236
|
+
|
237
|
+
let absoluteUrl: string;
|
238
|
+
try {
|
239
|
+
const resolved = new URL(href, url);
|
240
|
+
resolved.hash = ''; // Normalize
|
241
|
+
absoluteUrl = resolved.href;
|
242
|
+
} catch (e) {
|
243
|
+
logger?.debug(`Ignoring invalid URL syntax: "${href}" on page ${url}`);
|
244
|
+
continue;
|
245
|
+
}
|
246
|
+
|
247
|
+
// TODO: Implement include/exclude filtering here using micromatch or similar
|
248
|
+
// if (!matchesInclude(absoluteUrl, include) || matchesExclude(absoluteUrl, exclude)) {
|
249
|
+
// logger?.debug(`Skipping due to include/exclude rules: ${absoluteUrl}`);
|
250
|
+
// continue;
|
251
|
+
// }
|
196
252
|
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
linksAdded++;
|
204
|
-
// logger?.debug(`Queueing: ${absoluteUrl} (depth ${depth + 1})`); // Verbose
|
205
|
-
} else {
|
206
|
-
// logger?.debug(`Skipping (external, visited, or invalid): ${absoluteUrl}`); // Verbose
|
253
|
+
// Filter: same origin and not visited
|
254
|
+
if (absoluteUrl.startsWith(startOrigin) && !visited.has(absoluteUrl)) {
|
255
|
+
visited.add(absoluteUrl);
|
256
|
+
queue.push({ url: absoluteUrl, depth: depth + 1 });
|
257
|
+
linksAdded++;
|
258
|
+
}
|
207
259
|
}
|
260
|
+
logger?.debug(`Added ${linksAdded} new unique internal links to queue from ${url}`);
|
261
|
+
} else {
|
262
|
+
logger?.debug(`Max depth (${maxDepth}) reached, not discovering links on ${url}`);
|
208
263
|
}
|
209
|
-
logger?.debug(`Added ${linksAdded} new unique internal links to queue from ${url}`);
|
210
|
-
} else {
|
211
|
-
logger?.debug(`Max depth (${maxDepth}) reached, not discovering links on ${url}`);
|
212
|
-
}
|
213
264
|
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
// Log if closing the page fails, but don't let it stop the crawl
|
225
|
-
logger?.error(`Failed to close page for ${url}: ${pageCloseError.message}`);
|
265
|
+
} catch (err: any) {
|
266
|
+
logger?.warn(`❌ Failed to process ${url}: ${err.message}`);
|
267
|
+
// Continue crawl even if one page fails
|
268
|
+
} finally {
|
269
|
+
if (page && !page.isClosed()) {
|
270
|
+
try {
|
271
|
+
await page.close();
|
272
|
+
} catch (pageCloseError: any) {
|
273
|
+
logger?.error(`Failed to close page for ${url}: ${pageCloseError.message}`);
|
274
|
+
}
|
226
275
|
}
|
227
276
|
}
|
277
|
+
} // End while loop
|
278
|
+
|
279
|
+
} catch (error) {
|
280
|
+
// Catch critical errors like invalid start URL or browser launch failure
|
281
|
+
logger?.error(`Critical crawl error: ${error instanceof Error ? error.message : error}`);
|
282
|
+
// Rethrow or handle appropriately
|
283
|
+
throw error;
|
284
|
+
} finally {
|
285
|
+
// Ensure browser is closed after crawl finishes or critical error occurs
|
286
|
+
if (browser) {
|
287
|
+
logger?.info(`Crawl finished or errored. Closing browser.`);
|
288
|
+
await browser.close();
|
289
|
+
logger?.debug(`Browser closed after crawl.`);
|
228
290
|
}
|
229
|
-
}
|
291
|
+
}
|
230
292
|
|
231
|
-
logger?.info(`Crawl
|
232
|
-
await browser.close();
|
233
|
-
logger?.info(`Found ${results.length} pages.`);
|
293
|
+
logger?.info(`Crawl found ${results.length} pages.`);
|
234
294
|
return results;
|
235
295
|
}
|
236
296
|
|
297
|
+
|
237
298
|
/**
|
238
299
|
* Fetches all internal pages of a website recursively starting from a given URL,
|
239
300
|
* bundles them into a single HTML string using the bundler module, and writes
|
240
|
-
* the result to a file.
|
301
|
+
* the result to a file. Creates its own logger unless `loggerInstance` is provided.
|
241
302
|
*
|
242
303
|
* @export
|
243
304
|
* @param {string} startUrl - The fully qualified URL to begin crawling from.
|
244
305
|
* @param {string} outputFile - The path where the bundled HTML file should be saved.
|
245
306
|
* @param {number} [maxDepth=1] - The maximum depth to crawl links (default: 1, only the start page).
|
307
|
+
* @param {Logger} [loggerInstance] - Optional external logger instance to use.
|
246
308
|
* @returns {Promise<{ pages: number; html: string }>} A promise resolving to an object containing
|
247
309
|
* the number of pages successfully crawled and the final bundled HTML string.
|
248
310
|
* @throws {Error} Throws errors if the crawl initiation fails, bundling fails, or file writing fails.
|
@@ -250,15 +312,18 @@ async function crawlWebsite(
|
|
250
312
|
export async function recursivelyBundleSite(
|
251
313
|
startUrl: string,
|
252
314
|
outputFile: string,
|
253
|
-
maxDepth = 1
|
315
|
+
maxDepth = 1,
|
316
|
+
loggerInstance?: Logger // Added optional logger parameter
|
254
317
|
): Promise<{ pages: number; html: string }> {
|
255
|
-
//
|
256
|
-
const logger = new Logger();
|
318
|
+
// Use provided logger OR create a new default one
|
319
|
+
const logger = loggerInstance || new Logger();
|
257
320
|
logger.info(`Starting recursive site bundle for ${startUrl} to ${outputFile} (maxDepth: ${maxDepth})`);
|
258
321
|
|
259
322
|
try {
|
260
323
|
// Step 1: Crawl the website
|
261
|
-
|
324
|
+
// Pass necessary options down to crawlWebsite
|
325
|
+
const crawlOptions = { maxDepth, logger /* Add other options like timeout, userAgent if needed */ };
|
326
|
+
const pages: PageEntry[] = await crawlWebsite(startUrl, crawlOptions);
|
262
327
|
|
263
328
|
if (pages.length === 0) {
|
264
329
|
logger.warn("Crawl completed but found 0 pages. Output file may be empty or reflect an empty bundle.");
|
@@ -267,7 +332,8 @@ export async function recursivelyBundleSite(
|
|
267
332
|
}
|
268
333
|
|
269
334
|
// Step 2: Bundle the HTML content
|
270
|
-
|
335
|
+
// Pass the same logger instance for consistent logging
|
336
|
+
const bundledHtml = bundleMultiPageHTML(pages, logger);
|
271
337
|
logger.info(`Bundling complete. Output size: ${Buffer.byteLength(bundledHtml, 'utf-8')} bytes.`);
|
272
338
|
|
273
339
|
// Step 3: Write the bundled HTML to the output file
|
@@ -282,11 +348,9 @@ export async function recursivelyBundleSite(
|
|
282
348
|
};
|
283
349
|
} catch (error: any) {
|
284
350
|
logger.error(`Error during recursive site bundle: ${error.message}`);
|
285
|
-
// Log the stack trace for better debugging if available
|
286
351
|
if (error.stack) {
|
287
352
|
logger.error(`Stack trace: ${error.stack}`);
|
288
353
|
}
|
289
|
-
// Re-throw the error
|
290
|
-
throw error;
|
354
|
+
throw error; // Re-throw the error
|
291
355
|
}
|
292
356
|
}
|