portapack 0.3.0 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslintrc.json +67 -8
- package/.github/workflows/ci.yml +5 -4
- package/.releaserc.js +25 -27
- package/CHANGELOG.md +12 -19
- package/LICENSE.md +21 -0
- package/README.md +34 -36
- package/commitlint.config.js +30 -34
- package/dist/cli/cli-entry.cjs +199 -135
- package/dist/cli/cli-entry.cjs.map +1 -1
- package/dist/index.d.ts +0 -3
- package/dist/index.js +194 -134
- package/dist/index.js.map +1 -1
- package/docs/.vitepress/config.ts +36 -34
- package/docs/.vitepress/sidebar-generator.ts +89 -38
- package/docs/cli.md +29 -82
- package/docs/code-of-conduct.md +7 -1
- package/docs/configuration.md +103 -117
- package/docs/contributing.md +6 -2
- package/docs/deployment.md +10 -5
- package/docs/development.md +8 -5
- package/docs/getting-started.md +76 -45
- package/docs/index.md +1 -1
- package/docs/public/android-chrome-192x192.png +0 -0
- package/docs/public/android-chrome-512x512.png +0 -0
- package/docs/public/apple-touch-icon.png +0 -0
- package/docs/public/favicon-16x16.png +0 -0
- package/docs/public/favicon-32x32.png +0 -0
- package/docs/public/favicon.ico +0 -0
- package/docs/site.webmanifest +1 -0
- package/docs/troubleshooting.md +12 -1
- package/examples/main.ts +7 -10
- package/examples/sample-project/script.js +1 -1
- package/jest.config.ts +8 -13
- package/nodemon.json +5 -10
- package/package.json +2 -5
- package/src/cli/cli-entry.ts +2 -2
- package/src/cli/cli.ts +21 -16
- package/src/cli/options.ts +127 -113
- package/src/core/bundler.ts +254 -221
- package/src/core/extractor.ts +639 -520
- package/src/core/minifier.ts +173 -162
- package/src/core/packer.ts +141 -137
- package/src/core/parser.ts +74 -73
- package/src/core/web-fetcher.ts +270 -258
- package/src/index.ts +18 -17
- package/src/types.ts +9 -11
- package/src/utils/font.ts +12 -6
- package/src/utils/logger.ts +110 -105
- package/src/utils/meta.ts +75 -76
- package/src/utils/mime.ts +50 -50
- package/src/utils/slugify.ts +33 -34
- package/tests/unit/cli/cli-entry.test.ts +72 -70
- package/tests/unit/cli/cli.test.ts +314 -278
- package/tests/unit/cli/options.test.ts +294 -301
- package/tests/unit/core/bundler.test.ts +426 -329
- package/tests/unit/core/extractor.test.ts +828 -380
- package/tests/unit/core/minifier.test.ts +374 -274
- package/tests/unit/core/packer.test.ts +298 -264
- package/tests/unit/core/parser.test.ts +538 -150
- package/tests/unit/core/web-fetcher.test.ts +389 -359
- package/tests/unit/index.test.ts +238 -197
- package/tests/unit/utils/font.test.ts +26 -21
- package/tests/unit/utils/logger.test.ts +267 -260
- package/tests/unit/utils/meta.test.ts +29 -28
- package/tests/unit/utils/mime.test.ts +73 -74
- package/tests/unit/utils/slugify.test.ts +14 -12
- package/tsconfig.build.json +9 -10
- package/tsconfig.jest.json +2 -1
- package/tsconfig.json +2 -2
- package/tsup.config.ts +8 -8
- package/typedoc.json +5 -9
- package/docs/demo.md +0 -46
- /package/docs/{portapack-transparent.png → public/portapack-transparent.png} +0 -0
- /package/docs/{portapack.jpg → public/portapack.jpg} +0 -0
package/src/core/web-fetcher.ts
CHANGED
@@ -12,12 +12,12 @@ import { bundleMultiPageHTML } from './bundler'; // Assuming bundler is here
|
|
12
12
|
|
13
13
|
// Puppeteer Launch Options (Consider making configurable)
|
14
14
|
const PUPPETEER_LAUNCH_OPTIONS: puppeteer.LaunchOptions = {
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
15
|
+
headless: true,
|
16
|
+
args: [
|
17
|
+
'--no-sandbox', // Often required in containerized environments
|
18
|
+
'--disable-setuid-sandbox',
|
19
|
+
'--disable-dev-shm-usage', // Recommended for Docker/CI
|
20
|
+
],
|
21
21
|
};
|
22
22
|
|
23
23
|
// Default Page Navigation Options (Consider making configurable)
|
@@ -36,88 +36,94 @@ const DEFAULT_PAGE_TIMEOUT = 30000; // 30 seconds
|
|
36
36
|
* @throws {Error} Throws errors from Puppeteer launch, page creation, or navigation failures.
|
37
37
|
*/
|
38
38
|
export async function fetchAndPackWebPage(
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
39
|
+
url: string,
|
40
|
+
logger?: Logger,
|
41
|
+
timeout: number = DEFAULT_PAGE_TIMEOUT,
|
42
|
+
userAgent?: string
|
43
43
|
): Promise<BuildResult> {
|
44
|
-
|
45
|
-
|
46
|
-
|
44
|
+
let browser: puppeteer.Browser | null = null;
|
45
|
+
const start = Date.now();
|
46
|
+
logger?.info(`Initiating fetch for single page: ${url}`);
|
47
|
+
|
48
|
+
try {
|
49
|
+
logger?.debug('Launching browser...');
|
50
|
+
browser = await puppeteer.launch(PUPPETEER_LAUNCH_OPTIONS);
|
51
|
+
logger?.debug(`Browser launched successfully (PID: ${browser.process()?.pid}).`);
|
52
|
+
const page = await browser.newPage();
|
53
|
+
logger?.debug(`New page created for ${url}`);
|
54
|
+
|
55
|
+
// Set User-Agent if provided
|
56
|
+
if (userAgent) {
|
57
|
+
await page.setUserAgent(userAgent);
|
58
|
+
logger?.debug(`User-Agent set to: "${userAgent}"`);
|
59
|
+
}
|
47
60
|
|
48
61
|
try {
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
62
|
+
logger?.debug(`Navigating to ${url} with timeout ${timeout}ms`);
|
63
|
+
await page.goto(url, { waitUntil: 'networkidle2', timeout: timeout });
|
64
|
+
logger?.debug(`Navigation successful for ${url}`);
|
65
|
+
const html = await page.content();
|
66
|
+
logger?.debug(`Content retrieved for ${url} (${Buffer.byteLength(html, 'utf-8')} bytes)`);
|
67
|
+
|
68
|
+
const metadata: BundleMetadata = {
|
69
|
+
input: url,
|
70
|
+
outputSize: Buffer.byteLength(html, 'utf-8'),
|
71
|
+
assetCount: 0, // Basic fetch doesn't track assets processed by *this* tool
|
72
|
+
buildTimeMs: Date.now() - start,
|
73
|
+
errors: [], // No errors if we reached this point
|
74
|
+
};
|
75
|
+
|
76
|
+
await page.close();
|
77
|
+
logger?.debug(`Page closed for ${url}`);
|
78
|
+
await browser.close();
|
79
|
+
logger?.debug(`Browser closed for ${url}`);
|
80
|
+
browser = null; // Ensure browser is marked as closed
|
81
|
+
|
82
|
+
return { html, metadata };
|
83
|
+
} catch (pageError: any) {
|
84
|
+
logger?.error(`Error during page processing for ${url}: ${pageError.message}`);
|
85
|
+
// Attempt to close the page even if processing failed
|
86
|
+
if (page && !page.isClosed()) {
|
61
87
|
try {
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
const metadata: BundleMetadata = {
|
69
|
-
input: url,
|
70
|
-
outputSize: Buffer.byteLength(html, 'utf-8'),
|
71
|
-
assetCount: 0, // Basic fetch doesn't track assets processed by *this* tool
|
72
|
-
buildTimeMs: Date.now() - start,
|
73
|
-
errors: [], // No errors if we reached this point
|
74
|
-
};
|
75
|
-
|
76
|
-
await page.close();
|
77
|
-
logger?.debug(`Page closed for ${url}`);
|
78
|
-
await browser.close();
|
79
|
-
logger?.debug(`Browser closed for ${url}`);
|
80
|
-
browser = null; // Ensure browser is marked as closed
|
81
|
-
|
82
|
-
return { html, metadata };
|
83
|
-
|
84
|
-
} catch (pageError: any) {
|
85
|
-
logger?.error(`Error during page processing for ${url}: ${pageError.message}`);
|
86
|
-
// Attempt to close the page even if processing failed
|
87
|
-
if (page && !page.isClosed()) {
|
88
|
-
try {
|
89
|
-
await page.close();
|
90
|
-
logger?.debug(`Page closed after error for ${url}`);
|
91
|
-
} catch (closeErr: any) {
|
92
|
-
logger?.error(`Failed to close page after error for ${url}: ${closeErr.message}`);
|
93
|
-
// Decide if this secondary error should be thrown or just logged
|
94
|
-
}
|
95
|
-
}
|
96
|
-
throw pageError; // Re-throw the original page processing error
|
97
|
-
}
|
98
|
-
} catch (launchError: any) {
|
99
|
-
logger?.error(`Critical error during browser launch or page setup for ${url}: ${launchError.message}`);
|
100
|
-
// Ensure browser is closed if launch succeeded partially but later failed
|
101
|
-
if (browser) {
|
102
|
-
try {
|
103
|
-
await browser.close();
|
104
|
-
logger?.debug('Browser closed after launch/setup error.');
|
105
|
-
} catch (closeErr: any) {
|
106
|
-
logger?.warn(`Failed to close browser after launch/setup error: ${closeErr.message}`);
|
107
|
-
}
|
108
|
-
browser = null;
|
109
|
-
}
|
110
|
-
throw launchError; // Re-throw the original launch/setup error
|
111
|
-
} finally {
|
112
|
-
// Final safety net: If browser somehow wasn't closed and isn't null, attempt closure.
|
113
|
-
if (browser) {
|
114
|
-
logger?.warn(`Closing browser in final cleanup for ${url}. This might indicate an unusual error path.`);
|
115
|
-
try { await browser.close(); } catch (closeErr) { /* Ignore final browser close error */ }
|
88
|
+
await page.close();
|
89
|
+
logger?.debug(`Page closed after error for ${url}`);
|
90
|
+
} catch (closeErr: any) {
|
91
|
+
logger?.error(`Failed to close page after error for ${url}: ${closeErr.message}`);
|
92
|
+
// Decide if this secondary error should be thrown or just logged
|
116
93
|
}
|
94
|
+
}
|
95
|
+
throw pageError; // Re-throw the original page processing error
|
117
96
|
}
|
97
|
+
} catch (launchError: any) {
|
98
|
+
logger?.error(
|
99
|
+
`Critical error during browser launch or page setup for ${url}: ${launchError.message}`
|
100
|
+
);
|
101
|
+
// Ensure browser is closed if launch succeeded partially but later failed
|
102
|
+
if (browser) {
|
103
|
+
try {
|
104
|
+
await browser.close();
|
105
|
+
logger?.debug('Browser closed after launch/setup error.');
|
106
|
+
} catch (closeErr: any) {
|
107
|
+
logger?.warn(`Failed to close browser after launch/setup error: ${closeErr.message}`);
|
108
|
+
}
|
109
|
+
browser = null;
|
110
|
+
}
|
111
|
+
throw launchError; // Re-throw the original launch/setup error
|
112
|
+
} finally {
|
113
|
+
// Final safety net: If browser somehow wasn't closed and isn't null, attempt closure.
|
114
|
+
if (browser) {
|
115
|
+
logger?.warn(
|
116
|
+
`Closing browser in final cleanup for ${url}. This might indicate an unusual error path.`
|
117
|
+
);
|
118
|
+
try {
|
119
|
+
await browser.close();
|
120
|
+
} catch (closeErr) {
|
121
|
+
/* Ignore final browser close error */
|
122
|
+
}
|
123
|
+
}
|
124
|
+
}
|
118
125
|
}
|
119
126
|
|
120
|
-
|
121
127
|
/**
|
122
128
|
* @typedef {object} CrawlOptions
|
123
129
|
* @property {number} [maxDepth=1] - Maximum crawl depth.
|
@@ -141,160 +147,157 @@ export async function fetchAndPackWebPage(
|
|
141
147
|
* containing the URL and HTML for each successfully crawled page.
|
142
148
|
*/
|
143
149
|
async function crawlWebsite(
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
150
|
+
startUrl: string,
|
151
|
+
options: {
|
152
|
+
maxDepth?: number;
|
153
|
+
timeout?: number;
|
154
|
+
include?: string[]; // Add include/exclude/userAgent later if needed
|
155
|
+
exclude?: string[];
|
156
|
+
userAgent?: string;
|
157
|
+
logger?: Logger;
|
158
|
+
}
|
153
159
|
): Promise<PageEntry[]> {
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
160
|
+
const {
|
161
|
+
maxDepth = 1,
|
162
|
+
timeout = DEFAULT_PAGE_TIMEOUT,
|
163
|
+
// include = ['**'], // TODO: Implement glob filtering
|
164
|
+
// exclude = [],
|
165
|
+
userAgent,
|
166
|
+
logger,
|
167
|
+
} = options;
|
168
|
+
|
169
|
+
logger?.info(`Starting crawl for ${startUrl} with maxDepth ${maxDepth}`);
|
170
|
+
|
171
|
+
if (maxDepth <= 0) {
|
172
|
+
logger?.warn('maxDepth is 0 or negative, no pages will be crawled.');
|
173
|
+
return [];
|
174
|
+
}
|
175
|
+
|
176
|
+
let browser: puppeteer.Browser | null = null;
|
177
|
+
const visited = new Set<string>();
|
178
|
+
const results: PageEntry[] = [];
|
179
|
+
const queue: { url: string; depth: number }[] = [];
|
180
|
+
let startOrigin: string;
|
181
|
+
|
182
|
+
try {
|
183
|
+
// Validate start URL and get origin
|
184
|
+
try {
|
185
|
+
startOrigin = new URL(startUrl).origin;
|
186
|
+
} catch (e: any) {
|
187
|
+
logger?.error(`Invalid start URL: ${startUrl}. ${e.message}`);
|
188
|
+
throw new Error(`Invalid start URL: ${startUrl}`); // Propagate error
|
168
189
|
}
|
169
190
|
|
170
|
-
|
171
|
-
|
172
|
-
const results: PageEntry[] = [];
|
173
|
-
const queue: { url: string; depth: number }[] = [];
|
174
|
-
let startOrigin: string;
|
175
|
-
|
191
|
+
// Normalize start URL (remove fragment)
|
192
|
+
let normalizedStartUrl: string;
|
176
193
|
try {
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
194
|
+
const parsedStartUrl = new URL(startUrl);
|
195
|
+
parsedStartUrl.hash = '';
|
196
|
+
normalizedStartUrl = parsedStartUrl.href;
|
197
|
+
} catch (e: any) {
|
198
|
+
logger?.error(`Invalid start URL: ${startUrl}. ${e.message}`);
|
199
|
+
throw new Error(`Invalid start URL: ${startUrl}`); // Propagate error
|
200
|
+
}
|
184
201
|
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
202
|
+
// Launch browser *after* validating URL
|
203
|
+
logger?.debug('Launching browser for crawl...');
|
204
|
+
browser = await puppeteer.launch(PUPPETEER_LAUNCH_OPTIONS);
|
205
|
+
logger?.debug(`Browser launched for crawl (PID: ${browser.process()?.pid}).`);
|
206
|
+
|
207
|
+
// Initial queue setup
|
208
|
+
visited.add(normalizedStartUrl);
|
209
|
+
queue.push({ url: normalizedStartUrl, depth: 1 });
|
210
|
+
logger?.debug(`Queued initial URL: ${normalizedStartUrl} (depth 1)`);
|
211
|
+
|
212
|
+
while (queue.length > 0) {
|
213
|
+
const { url, depth } = queue.shift()!;
|
214
|
+
logger?.info(`Processing: ${url} (depth ${depth})`);
|
215
|
+
let page: puppeteer.Page | null = null;
|
216
|
+
|
217
|
+
try {
|
218
|
+
page = await browser.newPage();
|
219
|
+
|
220
|
+
if (userAgent) {
|
221
|
+
await page.setUserAgent(userAgent);
|
194
222
|
}
|
223
|
+
// Consider adding viewport setting if needed: await page.setViewport({ width: 1280, height: 800 });
|
224
|
+
|
225
|
+
await page.goto(url, { waitUntil: 'networkidle2', timeout: timeout });
|
226
|
+
const html = await page.content();
|
195
227
|
|
196
|
-
|
197
|
-
logger?.debug(
|
198
|
-
browser = await puppeteer.launch(PUPPETEER_LAUNCH_OPTIONS);
|
199
|
-
logger?.debug(`Browser launched for crawl (PID: ${browser.process()?.pid}).`);
|
228
|
+
results.push({ url, html }); // Matches PageEntry type
|
229
|
+
logger?.debug(`Successfully fetched content for ${url}`);
|
200
230
|
|
201
|
-
//
|
202
|
-
|
203
|
-
|
204
|
-
|
231
|
+
// Link Discovery (only if not at max depth)
|
232
|
+
if (depth < maxDepth) {
|
233
|
+
logger?.debug(`Discovering links on ${url} (depth ${depth}/${maxDepth})`);
|
234
|
+
const hrefs = await page.evaluate(() =>
|
235
|
+
Array.from(document.querySelectorAll('a[href]'), a => a.getAttribute('href'))
|
236
|
+
);
|
237
|
+
logger?.debug(`Found ${hrefs.length} potential hrefs on ${url}`);
|
205
238
|
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
let page: puppeteer.Page | null = null;
|
239
|
+
let linksAdded = 0;
|
240
|
+
for (const href of hrefs) {
|
241
|
+
if (!href) continue;
|
210
242
|
|
243
|
+
let absoluteUrl: string;
|
211
244
|
try {
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
await page.goto(url, { waitUntil: 'networkidle2', timeout: timeout });
|
220
|
-
const html = await page.content();
|
221
|
-
|
222
|
-
results.push({ url, html }); // Matches PageEntry type
|
223
|
-
logger?.debug(`Successfully fetched content for ${url}`);
|
224
|
-
|
225
|
-
// Link Discovery (only if not at max depth)
|
226
|
-
if (depth < maxDepth) {
|
227
|
-
logger?.debug(`Discovering links on ${url} (depth ${depth}/${maxDepth})`);
|
228
|
-
const hrefs = await page.evaluate(() =>
|
229
|
-
Array.from(document.querySelectorAll('a[href]'), a => a.getAttribute('href'))
|
230
|
-
);
|
231
|
-
logger?.debug(`Found ${hrefs.length} potential hrefs on ${url}`);
|
232
|
-
|
233
|
-
let linksAdded = 0;
|
234
|
-
for (const href of hrefs) {
|
235
|
-
if (!href) continue;
|
236
|
-
|
237
|
-
let absoluteUrl: string;
|
238
|
-
try {
|
239
|
-
const resolved = new URL(href, url);
|
240
|
-
resolved.hash = ''; // Normalize
|
241
|
-
absoluteUrl = resolved.href;
|
242
|
-
} catch (e) {
|
243
|
-
logger?.debug(`Ignoring invalid URL syntax: "${href}" on page ${url}`);
|
244
|
-
continue;
|
245
|
-
}
|
246
|
-
|
247
|
-
// TODO: Implement include/exclude filtering here using micromatch or similar
|
248
|
-
// if (!matchesInclude(absoluteUrl, include) || matchesExclude(absoluteUrl, exclude)) {
|
249
|
-
// logger?.debug(`Skipping due to include/exclude rules: ${absoluteUrl}`);
|
250
|
-
// continue;
|
251
|
-
// }
|
252
|
-
|
253
|
-
// Filter: same origin and not visited
|
254
|
-
if (absoluteUrl.startsWith(startOrigin) && !visited.has(absoluteUrl)) {
|
255
|
-
visited.add(absoluteUrl);
|
256
|
-
queue.push({ url: absoluteUrl, depth: depth + 1 });
|
257
|
-
linksAdded++;
|
258
|
-
}
|
259
|
-
}
|
260
|
-
logger?.debug(`Added ${linksAdded} new unique internal links to queue from ${url}`);
|
261
|
-
} else {
|
262
|
-
logger?.debug(`Max depth (${maxDepth}) reached, not discovering links on ${url}`);
|
263
|
-
}
|
264
|
-
|
265
|
-
} catch (err: any) {
|
266
|
-
logger?.warn(`❌ Failed to process ${url}: ${err.message}`);
|
267
|
-
// Continue crawl even if one page fails
|
268
|
-
} finally {
|
269
|
-
if (page && !page.isClosed()) {
|
270
|
-
try {
|
271
|
-
await page.close();
|
272
|
-
} catch (pageCloseError: any) {
|
273
|
-
logger?.error(`Failed to close page for ${url}: ${pageCloseError.message}`);
|
274
|
-
}
|
275
|
-
}
|
245
|
+
const resolved = new URL(href, url);
|
246
|
+
resolved.hash = ''; // Normalize
|
247
|
+
absoluteUrl = resolved.href;
|
248
|
+
} catch (e) {
|
249
|
+
logger?.debug(`Ignoring invalid URL syntax: "${href}" on page ${url}`);
|
250
|
+
continue;
|
276
251
|
}
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
252
|
+
|
253
|
+
// TODO: Implement include/exclude filtering here using micromatch or similar
|
254
|
+
// if (!matchesInclude(absoluteUrl, include) || matchesExclude(absoluteUrl, exclude)) {
|
255
|
+
// logger?.debug(`Skipping due to include/exclude rules: ${absoluteUrl}`);
|
256
|
+
// continue;
|
257
|
+
// }
|
258
|
+
|
259
|
+
// Filter: same origin and not visited
|
260
|
+
if (absoluteUrl.startsWith(startOrigin) && !visited.has(absoluteUrl)) {
|
261
|
+
visited.add(absoluteUrl);
|
262
|
+
queue.push({ url: absoluteUrl, depth: depth + 1 });
|
263
|
+
linksAdded++;
|
264
|
+
}
|
265
|
+
}
|
266
|
+
logger?.debug(`Added ${linksAdded} new unique internal links to queue from ${url}`);
|
267
|
+
} else {
|
268
|
+
logger?.debug(`Max depth (${maxDepth}) reached, not discovering links on ${url}`);
|
269
|
+
}
|
270
|
+
} catch (err: any) {
|
271
|
+
logger?.warn(`❌ Failed to process ${url}: ${err.message}`);
|
272
|
+
// Continue crawl even if one page fails
|
273
|
+
} finally {
|
274
|
+
if (page && !page.isClosed()) {
|
275
|
+
try {
|
276
|
+
await page.close();
|
277
|
+
} catch (pageCloseError: any) {
|
278
|
+
logger?.error(`Failed to close page for ${url}: ${pageCloseError.message}`);
|
279
|
+
}
|
290
280
|
}
|
281
|
+
}
|
282
|
+
} // End while loop
|
283
|
+
} catch (error) {
|
284
|
+
// Catch critical errors like invalid start URL or browser launch failure
|
285
|
+
logger?.error(`Critical crawl error: ${error instanceof Error ? error.message : error}`);
|
286
|
+
// Rethrow or handle appropriately
|
287
|
+
throw error;
|
288
|
+
} finally {
|
289
|
+
// Ensure browser is closed after crawl finishes or critical error occurs
|
290
|
+
if (browser) {
|
291
|
+
logger?.info(`Crawl finished or errored. Closing browser.`);
|
292
|
+
await browser.close();
|
293
|
+
logger?.debug(`Browser closed after crawl.`);
|
291
294
|
}
|
295
|
+
}
|
292
296
|
|
293
|
-
|
294
|
-
|
297
|
+
logger?.info(`Crawl found ${results.length} pages.`);
|
298
|
+
return results;
|
295
299
|
}
|
296
300
|
|
297
|
-
|
298
301
|
/**
|
299
302
|
* Fetches all internal pages of a website recursively starting from a given URL,
|
300
303
|
* bundles them into a single HTML string using the bundler module, and writes
|
@@ -310,47 +313,56 @@ async function crawlWebsite(
|
|
310
313
|
* @throws {Error} Throws errors if the crawl initiation fails, bundling fails, or file writing fails.
|
311
314
|
*/
|
312
315
|
export async function recursivelyBundleSite(
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
316
|
+
startUrl: string,
|
317
|
+
outputFile: string,
|
318
|
+
maxDepth = 1,
|
319
|
+
loggerInstance?: Logger // Added optional logger parameter
|
317
320
|
): Promise<{ pages: number; html: string }> {
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
321
|
+
// Use provided logger OR create a new default one
|
322
|
+
const logger = loggerInstance || new Logger();
|
323
|
+
logger.info(
|
324
|
+
`Starting recursive site bundle for ${startUrl} to ${outputFile} (maxDepth: ${maxDepth})`
|
325
|
+
);
|
326
|
+
|
327
|
+
try {
|
328
|
+
// Step 1: Crawl the website
|
329
|
+
// Pass necessary options down to crawlWebsite
|
330
|
+
const crawlOptions = {
|
331
|
+
maxDepth,
|
332
|
+
logger /* Add other options like timeout, userAgent if needed */,
|
333
|
+
};
|
334
|
+
const pages: PageEntry[] = await crawlWebsite(startUrl, crawlOptions);
|
335
|
+
|
336
|
+
if (pages.length === 0) {
|
337
|
+
logger.warn(
|
338
|
+
'Crawl completed but found 0 pages. Output file may be empty or reflect an empty bundle.'
|
339
|
+
);
|
340
|
+
} else {
|
341
|
+
logger.info(`Crawl successful, found ${pages.length} pages. Starting bundling.`);
|
342
|
+
}
|
333
343
|
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
344
|
+
// Step 2: Bundle the HTML content
|
345
|
+
// Pass the same logger instance for consistent logging
|
346
|
+
const bundledHtml = bundleMultiPageHTML(pages, logger);
|
347
|
+
logger.info(
|
348
|
+
`Bundling complete. Output size: ${Buffer.byteLength(bundledHtml, 'utf-8')} bytes.`
|
349
|
+
);
|
350
|
+
|
351
|
+
// Step 3: Write the bundled HTML to the output file
|
352
|
+
logger.info(`Writing bundled HTML to ${outputFile}`);
|
353
|
+
await fs.writeFile(outputFile, bundledHtml, 'utf-8');
|
354
|
+
logger.info(`Successfully wrote bundled output to ${outputFile}`);
|
355
|
+
|
356
|
+
// Step 4: Return the results
|
357
|
+
return {
|
358
|
+
pages: pages.length,
|
359
|
+
html: bundledHtml,
|
360
|
+
};
|
361
|
+
} catch (error: any) {
|
362
|
+
logger.error(`Error during recursive site bundle: ${error.message}`);
|
363
|
+
if (error.stack) {
|
364
|
+
logger.error(`Stack trace: ${error.stack}`);
|
355
365
|
}
|
356
|
-
|
366
|
+
throw error; // Re-throw the error
|
367
|
+
}
|
368
|
+
}
|
package/src/index.ts
CHANGED
@@ -2,11 +2,12 @@
|
|
2
2
|
* @file index.ts
|
3
3
|
* @description Public API surface for PortaPack.
|
4
4
|
* Exposes the unified `pack()` method and advanced helpers like recursive crawling and multi-page bundling.
|
5
|
-
* @version 1.0.0 - (Add version if applicable)
|
6
|
-
* @date 2025-04-11
|
7
5
|
*/
|
8
6
|
|
9
|
-
import {
|
7
|
+
import {
|
8
|
+
fetchAndPackWebPage as coreFetchAndPack,
|
9
|
+
recursivelyBundleSite as coreRecursivelyBundleSite,
|
10
|
+
} from './core/web-fetcher';
|
10
11
|
import { parseHTML } from './core/parser';
|
11
12
|
import { extractAssets } from './core/extractor';
|
12
13
|
import { minifyAssets } from './core/minifier';
|
@@ -56,10 +57,10 @@ export async function pack(
|
|
56
57
|
// Check if it contains '://' but isn't http(s) -> likely unsupported protocol
|
57
58
|
// Allow anything else (including relative/absolute paths without explicit protocols)
|
58
59
|
if (!isHttp && /:\/\//.test(input) && !input.startsWith('file://')) {
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
60
|
+
const errorMsg = `Unsupported protocol or input type: ${input}`;
|
61
|
+
logger.error(errorMsg);
|
62
|
+
throw new Error(errorMsg);
|
63
|
+
}
|
63
64
|
|
64
65
|
const isRemote = /^https?:\/\//i.test(input); // Check again after validation
|
65
66
|
const recursive = options.recursive === true || typeof options.recursive === 'number';
|
@@ -101,8 +102,8 @@ export async function generatePortableHTML(
|
|
101
102
|
logger.info(`Finished fetching and packing remote page: ${input}`);
|
102
103
|
return { html: result.html, metadata };
|
103
104
|
} catch (error: any) {
|
104
|
-
|
105
|
-
|
105
|
+
logger.error(`Error fetching remote page ${input}: ${error.message}`);
|
106
|
+
throw error;
|
106
107
|
}
|
107
108
|
}
|
108
109
|
|
@@ -121,8 +122,8 @@ export async function generatePortableHTML(
|
|
121
122
|
logger.info(`Finished processing local file: ${input}`);
|
122
123
|
return { html: finalHtml, metadata };
|
123
124
|
} catch (error: any) {
|
124
|
-
|
125
|
-
|
125
|
+
logger.error(`Error processing local file ${input}: ${error.message}`);
|
126
|
+
throw error;
|
126
127
|
}
|
127
128
|
}
|
128
129
|
|
@@ -147,9 +148,9 @@ export async function generateRecursivePortableHTML(
|
|
147
148
|
const timer = new BuildTimer(url);
|
148
149
|
|
149
150
|
if (!/^https?:\/\//i.test(url)) {
|
150
|
-
|
151
|
-
|
152
|
-
|
151
|
+
const errorMsg = `Invalid URL for recursive bundling. Must start with http:// or https://. Received: ${url}`;
|
152
|
+
logger.error(errorMsg);
|
153
|
+
throw new Error(errorMsg);
|
153
154
|
}
|
154
155
|
|
155
156
|
logger.info(`Starting recursive bundle for ${url} up to depth ${depth}`);
|
@@ -166,8 +167,8 @@ export async function generateRecursivePortableHTML(
|
|
166
167
|
logger.info(`Finished recursive bundle for ${url}. Bundled ${pages} pages.`);
|
167
168
|
return { html, metadata };
|
168
169
|
} catch (error: any) {
|
169
|
-
|
170
|
-
|
170
|
+
logger.error(`Error during recursive bundle for ${url}: ${error.message}`);
|
171
|
+
throw error;
|
171
172
|
}
|
172
173
|
}
|
173
174
|
|
@@ -196,4 +197,4 @@ export type {
|
|
196
197
|
ParsedHTML,
|
197
198
|
Asset,
|
198
199
|
PageEntry,
|
199
|
-
};
|
200
|
+
};
|