portapack 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslintrc.json +9 -0
- package/.github/workflows/ci.yml +73 -0
- package/.github/workflows/deploy-pages.yml +56 -0
- package/.prettierrc +9 -0
- package/.releaserc.js +29 -0
- package/CHANGELOG.md +21 -0
- package/README.md +288 -0
- package/commitlint.config.js +36 -0
- package/dist/cli/cli-entry.js +1694 -0
- package/dist/cli/cli-entry.js.map +1 -0
- package/dist/index.d.ts +275 -0
- package/dist/index.js +1405 -0
- package/dist/index.js.map +1 -0
- package/docs/.vitepress/config.ts +89 -0
- package/docs/.vitepress/sidebar-generator.ts +73 -0
- package/docs/cli.md +117 -0
- package/docs/code-of-conduct.md +65 -0
- package/docs/configuration.md +151 -0
- package/docs/contributing.md +107 -0
- package/docs/demo.md +46 -0
- package/docs/deployment.md +132 -0
- package/docs/development.md +168 -0
- package/docs/getting-started.md +106 -0
- package/docs/index.md +40 -0
- package/docs/portapack-transparent.png +0 -0
- package/docs/portapack.jpg +0 -0
- package/docs/troubleshooting.md +107 -0
- package/examples/main.ts +118 -0
- package/examples/sample-project/index.html +12 -0
- package/examples/sample-project/logo.png +1 -0
- package/examples/sample-project/script.js +1 -0
- package/examples/sample-project/styles.css +1 -0
- package/jest.config.ts +124 -0
- package/jest.setup.cjs +211 -0
- package/nodemon.json +11 -0
- package/output.html +1 -0
- package/package.json +161 -0
- package/site-packed.html +1 -0
- package/src/cli/cli-entry.ts +28 -0
- package/src/cli/cli.ts +139 -0
- package/src/cli/options.ts +151 -0
- package/src/core/bundler.ts +201 -0
- package/src/core/extractor.ts +618 -0
- package/src/core/minifier.ts +233 -0
- package/src/core/packer.ts +191 -0
- package/src/core/parser.ts +115 -0
- package/src/core/web-fetcher.ts +292 -0
- package/src/index.ts +262 -0
- package/src/types.ts +163 -0
- package/src/utils/font.ts +41 -0
- package/src/utils/logger.ts +139 -0
- package/src/utils/meta.ts +100 -0
- package/src/utils/mime.ts +90 -0
- package/src/utils/slugify.ts +70 -0
- package/test-output.html +0 -0
- package/tests/__fixtures__/sample-project/index.html +5 -0
- package/tests/unit/cli/cli-entry.test.ts +104 -0
- package/tests/unit/cli/cli.test.ts +230 -0
- package/tests/unit/cli/options.test.ts +316 -0
- package/tests/unit/core/bundler.test.ts +287 -0
- package/tests/unit/core/extractor.test.ts +1129 -0
- package/tests/unit/core/minifier.test.ts +414 -0
- package/tests/unit/core/packer.test.ts +193 -0
- package/tests/unit/core/parser.test.ts +540 -0
- package/tests/unit/core/web-fetcher.test.ts +374 -0
- package/tests/unit/index.test.ts +339 -0
- package/tests/unit/utils/font.test.ts +81 -0
- package/tests/unit/utils/logger.test.ts +275 -0
- package/tests/unit/utils/meta.test.ts +70 -0
- package/tests/unit/utils/mime.test.ts +96 -0
- package/tests/unit/utils/slugify.test.ts +71 -0
- package/tsconfig.build.json +11 -0
- package/tsconfig.jest.json +17 -0
- package/tsconfig.json +20 -0
- package/tsup.config.ts +71 -0
- package/typedoc.json +28 -0
@@ -0,0 +1,292 @@
|
|
1
|
+
/**
|
2
|
+
* @file src/core/web-fetcher.ts
|
3
|
+
* @description Provides functions for fetching web page content using Puppeteer,
|
4
|
+
* including recursive site crawling capabilities.
|
5
|
+
*/
|
6
|
+
|
7
|
+
import * as puppeteer from 'puppeteer';
|
8
|
+
import * as fs from 'fs/promises';
|
9
|
+
import { Logger } from '../utils/logger'; // Assuming logger is in ../utils
|
10
|
+
import { BuildResult, PageEntry } from '../types'; // Assuming types are defined here
|
11
|
+
import { bundleMultiPageHTML } from './bundler'; // Assuming bundler is here
|
12
|
+
|
13
|
+
/**
|
14
|
+
* @typedef {object} CrawlResult
|
15
|
+
* @property {string} url - The URL of the crawled page.
|
16
|
+
* @property {string} html - The HTML content of the crawled page.
|
17
|
+
*/
|
18
|
+
|
19
|
+
/**
|
20
|
+
* Fetches the rendered HTML content and basic metadata for a single web page URL.
|
21
|
+
* Manages its own browser instance lifecycle (launch and close).
|
22
|
+
*
|
23
|
+
* @param {string} url - The fully qualified URL to fetch.
|
24
|
+
* @param {Logger} [logger] - Optional logger instance for debug/info messages.
|
25
|
+
* @param {number} [timeout=30000] - Navigation timeout in milliseconds.
|
26
|
+
* @returns {Promise<BuildResult>} A promise that resolves with the fetched HTML
|
27
|
+
* and metadata, or rejects on critical errors.
|
28
|
+
* @throws {Error} Throws errors from Puppeteer launch, page creation, or navigation failures.
|
29
|
+
*/
|
30
|
+
export async function fetchAndPackWebPage(
|
31
|
+
url: string,
|
32
|
+
logger?: Logger,
|
33
|
+
timeout: number = 30000
|
34
|
+
): Promise<BuildResult> {
|
35
|
+
let browser: puppeteer.Browser | null = null; // Initialize browser to null
|
36
|
+
const start = Date.now();
|
37
|
+
logger?.debug(`Initiating fetch for single page: ${url}`);
|
38
|
+
|
39
|
+
try {
|
40
|
+
browser = await puppeteer.launch({ headless: true });
|
41
|
+
logger?.debug(`Browser launched for ${url}`);
|
42
|
+
const page = await browser.newPage();
|
43
|
+
logger?.debug(`Page created for ${url}`);
|
44
|
+
|
45
|
+
try {
|
46
|
+
logger?.debug(`Navigating to ${url} with timeout ${timeout}ms`);
|
47
|
+
await page.goto(url, { waitUntil: 'networkidle2', timeout: timeout });
|
48
|
+
logger?.debug(`Navigation successful for ${url}`);
|
49
|
+
const html = await page.content();
|
50
|
+
logger?.debug(`Content retrieved for ${url}`);
|
51
|
+
|
52
|
+
const metadata: BuildResult['metadata'] = {
|
53
|
+
input: url,
|
54
|
+
outputSize: Buffer.byteLength(html, 'utf-8'),
|
55
|
+
assetCount: 0, // Basic fetch doesn't track assets
|
56
|
+
buildTimeMs: Date.now() - start,
|
57
|
+
errors: [], // No errors if we reached this point
|
58
|
+
};
|
59
|
+
|
60
|
+
await page.close(); // Close the page specifically
|
61
|
+
logger?.debug(`Page closed for ${url}`);
|
62
|
+
// await browser.close(); // Close the browser instance
|
63
|
+
logger?.debug(`Browser closed for ${url}`);
|
64
|
+
browser = null; // Ensure browser is marked as closed
|
65
|
+
|
66
|
+
return { html, metadata };
|
67
|
+
|
68
|
+
} catch (pageError: any) {
|
69
|
+
logger?.error(`Error during page processing for ${url}: ${pageError.message}`);
|
70
|
+
// Ensure page is closed even if an error occurred during processing
|
71
|
+
try { await page.close();
|
72
|
+
|
73
|
+
} catch (closeErr) {
|
74
|
+
throw closeErr;
|
75
|
+
}
|
76
|
+
throw pageError; // Re-throw the original page processing error
|
77
|
+
}
|
78
|
+
} catch (launchError: any) {
|
79
|
+
logger?.error(`Critical error during browser launch or page creation for ${url}: ${launchError.message}`);
|
80
|
+
// Ensure browser is closed if launch succeeded but newPage failed, etc.
|
81
|
+
// Although if launch fails, browser might be null.
|
82
|
+
if (browser) {
|
83
|
+
try { await browser.close(); } catch (closeErr) { /* Ignore browser close error */ }
|
84
|
+
}
|
85
|
+
throw launchError; // Re-throw the original launch/setup error
|
86
|
+
} finally {
|
87
|
+
// Final check: If browser somehow wasn't closed and isn't null, attempt closure.
|
88
|
+
// This handles edge cases where errors might bypass earlier closes.
|
89
|
+
if (browser) {
|
90
|
+
logger?.warn(`Closing browser in final cleanup for ${url}. This might indicate an unusual error path.`);
|
91
|
+
try { await browser.close(); } catch (closeErr) { /* Ignore final browser close error */ }
|
92
|
+
}
|
93
|
+
}
|
94
|
+
}
|
95
|
+
|
96
|
+
/**
|
97
|
+
* Internal function to recursively crawl a website starting from a given URL.
|
98
|
+
* Uses a single browser instance and manages pages for efficiency during crawl.
|
99
|
+
* Implements Breadth-First Search (BFS) using a queue.
|
100
|
+
*
|
101
|
+
* @private
|
102
|
+
* @param {string} startUrl - The initial URL to start crawling from.
|
103
|
+
* @param {number} maxDepth - The maximum depth of links to follow (1 means only the start URL).
|
104
|
+
* @param {Logger} [logger] - Optional logger instance.
|
105
|
+
* @returns {Promise<PageEntry[]>} A promise resolving to an array of PageEntry objects
|
106
|
+
* containing the URL and HTML for each successfully crawled page.
|
107
|
+
*/
|
108
|
+
async function crawlWebsite(
|
109
|
+
startUrl: string,
|
110
|
+
maxDepth: number,
|
111
|
+
logger?: Logger
|
112
|
+
): Promise<PageEntry[]> {
|
113
|
+
logger?.info(`Starting crawl for ${startUrl} with maxDepth ${maxDepth}`);
|
114
|
+
|
115
|
+
// Don't even start a browser if maxDepth is 0
|
116
|
+
if (maxDepth <= 0) {
|
117
|
+
logger?.info('maxDepth is 0 or negative, no pages will be crawled.');
|
118
|
+
return [];
|
119
|
+
}
|
120
|
+
|
121
|
+
const browser = await puppeteer.launch({ headless: true });
|
122
|
+
const visited = new Set<string>();
|
123
|
+
const results: PageEntry[] = [];
|
124
|
+
// Queue stores URLs to visit and their corresponding depth
|
125
|
+
const queue: { url: string; depth: number }[] = [];
|
126
|
+
|
127
|
+
// Initialize startOrigin for same-origin check
|
128
|
+
let startOrigin: string;
|
129
|
+
try {
|
130
|
+
startOrigin = new URL(startUrl).origin;
|
131
|
+
} catch (e: any) {
|
132
|
+
logger?.error(`Invalid start URL: ${startUrl}. ${e.message}`);
|
133
|
+
await browser.close();
|
134
|
+
return []; // Cannot start crawl with invalid URL
|
135
|
+
}
|
136
|
+
|
137
|
+
// Normalize start URL (remove fragment) and add to queue/visited if depth allows
|
138
|
+
let normalizedStartUrl: string;
|
139
|
+
try {
|
140
|
+
const parsedStartUrl = new URL(startUrl);
|
141
|
+
parsedStartUrl.hash = ''; // Remove fragment for consistent visited checks
|
142
|
+
normalizedStartUrl = parsedStartUrl.href;
|
143
|
+
} catch (e: any) {
|
144
|
+
logger?.error(`Invalid start URL: ${startUrl}. ${e.message}`);
|
145
|
+
await browser.close();
|
146
|
+
return []; // Cannot start crawl with invalid URL
|
147
|
+
}
|
148
|
+
|
149
|
+
visited.add(normalizedStartUrl);
|
150
|
+
queue.push({ url: normalizedStartUrl, depth: 1 });
|
151
|
+
logger?.debug(`Queued initial URL: ${normalizedStartUrl} (depth 1)`);
|
152
|
+
|
153
|
+
while (queue.length > 0) {
|
154
|
+
const { url, depth } = queue.shift()!; // Non-null assertion ok due to queue.length check
|
155
|
+
logger?.info(`Processing: ${url} (depth ${depth})`);
|
156
|
+
let page: puppeteer.Page | null = null;
|
157
|
+
|
158
|
+
try {
|
159
|
+
page = await browser.newPage();
|
160
|
+
// Set a reasonable viewport, sometimes helps with rendering/layout dependent scripts
|
161
|
+
await page.setViewport({ width: 1280, height: 800 });
|
162
|
+
await page.goto(url, { waitUntil: 'networkidle2', timeout: 30000 });
|
163
|
+
const html = await page.content();
|
164
|
+
|
165
|
+
// Add successfully fetched page to results
|
166
|
+
// Ensure the object structure matches your PageEntry type definition
|
167
|
+
results.push({ url, html });
|
168
|
+
logger?.debug(`Successfully fetched content for ${url}`);
|
169
|
+
|
170
|
+
// --- Link Discovery ---
|
171
|
+
// Only look for more links if we haven't reached the maximum depth
|
172
|
+
if (depth < maxDepth) {
|
173
|
+
logger?.debug(`Discovering links on ${url} (current depth ${depth}, maxDepth ${maxDepth})`);
|
174
|
+
// Use page.evaluate to get all href attributes directly from the DOM
|
175
|
+
const hrefs = await page.evaluate(() =>
|
176
|
+
Array.from(document.querySelectorAll('a[href]'), a => a.getAttribute('href'))
|
177
|
+
);
|
178
|
+
logger?.debug(`Found ${hrefs.length} potential hrefs on ${url}`);
|
179
|
+
|
180
|
+
let linksAdded = 0;
|
181
|
+
for (const href of hrefs) {
|
182
|
+
if (!href) continue; // Skip empty hrefs like href=""
|
183
|
+
|
184
|
+
let absoluteUrl: string;
|
185
|
+
try {
|
186
|
+
// Resolve the href relative to the current page's URL
|
187
|
+
const resolved = new URL(href, url);
|
188
|
+
// Remove fragment (#) for visited checks and queueing consistency
|
189
|
+
resolved.hash = '';
|
190
|
+
absoluteUrl = resolved.href;
|
191
|
+
} catch (e) {
|
192
|
+
// Ignore URLs that fail to parse (e.g., "javascript:void(0)")
|
193
|
+
logger?.debug(`Ignoring invalid URL syntax: "${href}" on page ${url}`);
|
194
|
+
continue;
|
195
|
+
}
|
196
|
+
|
197
|
+
// --- Filtering and Queueing ---
|
198
|
+
// 1. Check if it belongs to the same origin as the start URL
|
199
|
+
// 2. Check if it has already been visited (or is in the queue)
|
200
|
+
if (absoluteUrl.startsWith(startOrigin) && !visited.has(absoluteUrl)) {
|
201
|
+
visited.add(absoluteUrl); // Mark as visited *before* adding to queue
|
202
|
+
queue.push({ url: absoluteUrl, depth: depth + 1 });
|
203
|
+
linksAdded++;
|
204
|
+
// logger?.debug(`Queueing: ${absoluteUrl} (depth ${depth + 1})`); // Verbose
|
205
|
+
} else {
|
206
|
+
// logger?.debug(`Skipping (external, visited, or invalid): ${absoluteUrl}`); // Verbose
|
207
|
+
}
|
208
|
+
}
|
209
|
+
logger?.debug(`Added ${linksAdded} new unique internal links to queue from ${url}`);
|
210
|
+
} else {
|
211
|
+
logger?.debug(`Max depth (${maxDepth}) reached, not discovering links on ${url}`);
|
212
|
+
}
|
213
|
+
|
214
|
+
} catch (err: any) {
|
215
|
+
// Log errors encountered during page processing (goto, content, evaluate)
|
216
|
+
logger?.warn(`❌ Failed to process ${url}: ${err.message}`);
|
217
|
+
// Optionally add error details to results or a separate error list if needed
|
218
|
+
} finally {
|
219
|
+
// Ensure the page is closed reliably after processing or error
|
220
|
+
if (page) {
|
221
|
+
try {
|
222
|
+
await page.close();
|
223
|
+
} catch (pageCloseError: any) {
|
224
|
+
// Log if closing the page fails, but don't let it stop the crawl
|
225
|
+
logger?.error(`Failed to close page for ${url}: ${pageCloseError.message}`);
|
226
|
+
}
|
227
|
+
}
|
228
|
+
}
|
229
|
+
} // End while loop
|
230
|
+
|
231
|
+
logger?.info(`Crawl finished. Closing browser.`);
|
232
|
+
await browser.close();
|
233
|
+
logger?.info(`Found ${results.length} pages.`);
|
234
|
+
return results;
|
235
|
+
}
|
236
|
+
|
237
|
+
/**
|
238
|
+
* Fetches all internal pages of a website recursively starting from a given URL,
|
239
|
+
* bundles them into a single HTML string using the bundler module, and writes
|
240
|
+
* the result to a file.
|
241
|
+
*
|
242
|
+
* @export
|
243
|
+
* @param {string} startUrl - The fully qualified URL to begin crawling from.
|
244
|
+
* @param {string} outputFile - The path where the bundled HTML file should be saved.
|
245
|
+
* @param {number} [maxDepth=1] - The maximum depth to crawl links (default: 1, only the start page).
|
246
|
+
* @returns {Promise<{ pages: number; html: string }>} A promise resolving to an object containing
|
247
|
+
* the number of pages successfully crawled and the final bundled HTML string.
|
248
|
+
* @throws {Error} Throws errors if the crawl initiation fails, bundling fails, or file writing fails.
|
249
|
+
*/
|
250
|
+
export async function recursivelyBundleSite(
|
251
|
+
startUrl: string,
|
252
|
+
outputFile: string,
|
253
|
+
maxDepth = 1
|
254
|
+
): Promise<{ pages: number; html: string }> {
|
255
|
+
// Create a logger instance specifically for this operation
|
256
|
+
const logger = new Logger();
|
257
|
+
logger.info(`Starting recursive site bundle for ${startUrl} to ${outputFile} (maxDepth: ${maxDepth})`);
|
258
|
+
|
259
|
+
try {
|
260
|
+
// Step 1: Crawl the website
|
261
|
+
const pages: PageEntry[] = await crawlWebsite(startUrl, maxDepth, logger);
|
262
|
+
|
263
|
+
if (pages.length === 0) {
|
264
|
+
logger.warn("Crawl completed but found 0 pages. Output file may be empty or reflect an empty bundle.");
|
265
|
+
} else {
|
266
|
+
logger.info(`Crawl successful, found ${pages.length} pages. Starting bundling.`);
|
267
|
+
}
|
268
|
+
|
269
|
+
// Step 2: Bundle the HTML content
|
270
|
+
const bundledHtml = bundleMultiPageHTML(pages, logger); // Passing logger for consistency
|
271
|
+
logger.info(`Bundling complete. Output size: ${Buffer.byteLength(bundledHtml, 'utf-8')} bytes.`);
|
272
|
+
|
273
|
+
// Step 3: Write the bundled HTML to the output file
|
274
|
+
logger.info(`Writing bundled HTML to ${outputFile}`);
|
275
|
+
await fs.writeFile(outputFile, bundledHtml, 'utf-8');
|
276
|
+
logger.info(`Successfully wrote bundled output to ${outputFile}`);
|
277
|
+
|
278
|
+
// Step 4: Return the results
|
279
|
+
return {
|
280
|
+
pages: pages.length,
|
281
|
+
html: bundledHtml
|
282
|
+
};
|
283
|
+
} catch (error: any) {
|
284
|
+
logger.error(`Error during recursive site bundle: ${error.message}`);
|
285
|
+
// Log the stack trace for better debugging if available
|
286
|
+
if (error.stack) {
|
287
|
+
logger.error(`Stack trace: ${error.stack}`);
|
288
|
+
}
|
289
|
+
// Re-throw the error to signal failure to the caller
|
290
|
+
throw error;
|
291
|
+
}
|
292
|
+
}
|
package/src/index.ts
ADDED
@@ -0,0 +1,262 @@
|
|
1
|
+
/**
|
2
|
+
* @file src/index.ts
|
3
|
+
* @description
|
4
|
+
* Main public API for the PortaPack library.
|
5
|
+
* Provides functions to create portable HTML files from local paths or URLs,
|
6
|
+
* including single-page fetching, recursive site crawling, and multi-page bundling.
|
7
|
+
* It coordinates calls to various core modules (parser, extractor, minifier, packer, web-fetcher, bundler).
|
8
|
+
*/
|
9
|
+
|
10
|
+
// Core processing modules
|
11
|
+
import { parseHTML } from './core/parser';
|
12
|
+
import { extractAssets } from './core/extractor';
|
13
|
+
import { minifyAssets } from './core/minifier';
|
14
|
+
import { packHTML } from './core/packer';
|
15
|
+
// Core web fetching modules (imported with aliases)
|
16
|
+
import {
|
17
|
+
fetchAndPackWebPage as coreFetchAndPack,
|
18
|
+
recursivelyBundleSite as coreRecursivelyBundleSite
|
19
|
+
} from './core/web-fetcher';
|
20
|
+
// Core bundler module (for multi-page)
|
21
|
+
import { bundleMultiPageHTML as coreBundleMultiPageHTML } from './core/bundler';
|
22
|
+
// Utilities
|
23
|
+
import { BuildTimer } from './utils/meta';
|
24
|
+
import { Logger } from './utils/logger';
|
25
|
+
|
26
|
+
// Types
|
27
|
+
import type {
|
28
|
+
BundleOptions,
|
29
|
+
BuildResult,
|
30
|
+
PageEntry,
|
31
|
+
BundleMetadata // Type used in return values
|
32
|
+
} from './types';
|
33
|
+
|
34
|
+
/**
|
35
|
+
* Generates a single, portable HTML file from a local file path or a remote URL.
|
36
|
+
*
|
37
|
+
* - **For local files:** Reads the file, parses it, discovers linked assets (CSS, JS, images, fonts),
|
38
|
+
* fetches/reads asset content, optionally embeds assets as data URIs (default),
|
39
|
+
* optionally minifies HTML/CSS/JS (default), and packs everything into a single HTML string.
|
40
|
+
* - **For remote URLs:** Fetches the HTML content of the single specified URL using the core web-fetcher.
|
41
|
+
* *Note: This does not process/embed assets for single remote URLs; it returns the fetched HTML as-is.*
|
42
|
+
*
|
43
|
+
* @export
|
44
|
+
* @param {string} input - The local file path or remote http(s) URL of the HTML document.
|
45
|
+
* @param {BundleOptions} [options={}] - Configuration options controlling embedding, minification,
|
46
|
+
* base URL, logging level, etc. See `BundleOptions` type for details.
|
47
|
+
* @param {Logger} [loggerInstance] - Optional pre-configured logger instance to use.
|
48
|
+
* @returns {Promise<BuildResult>} A promise resolving to an object containing the final HTML string
|
49
|
+
* and metadata (`BundleMetadata`) about the bundling process (input, size, time, assets, errors).
|
50
|
+
* @throws {Error} Throws errors if file reading, parsing, required asset fetching, or processing fails critically.
|
51
|
+
*/
|
52
|
+
export async function generatePortableHTML(
|
53
|
+
input: string,
|
54
|
+
options: BundleOptions = {},
|
55
|
+
loggerInstance?: Logger // Allow passing logger
|
56
|
+
): Promise<BuildResult> {
|
57
|
+
// Use passed logger or create one based on options. Defaults to LogLevel.INFO.
|
58
|
+
const logger = loggerInstance || new Logger(options.logLevel);
|
59
|
+
logger.info(`Generating portable HTML for: ${input}`);
|
60
|
+
const timer = new BuildTimer(input); // Start timing
|
61
|
+
|
62
|
+
// --- Handle Remote URLs ---
|
63
|
+
const isRemote = /^https?:\/\//i.test(input);
|
64
|
+
if (isRemote) {
|
65
|
+
logger.info(`Input is a remote URL. Fetching page content directly...`);
|
66
|
+
try {
|
67
|
+
// Call the specific public API wrapper for fetching, passing logger and options
|
68
|
+
const result = await fetchAndPackWebPage(input, options, logger);
|
69
|
+
logger.info(`Remote fetch complete. Input: ${input}, Size: ${result.metadata.outputSize} bytes, Time: ${result.metadata.buildTimeMs}ms`);
|
70
|
+
// Forward the result (which includes metadata finalized by fetchAndPackWebPage)
|
71
|
+
return result;
|
72
|
+
} catch (error: any) {
|
73
|
+
logger.error(`Failed to fetch remote URL ${input}: ${error.message}`);
|
74
|
+
throw error; // Re-throw to signal failure
|
75
|
+
}
|
76
|
+
}
|
77
|
+
|
78
|
+
// --- Handle Local Files ---
|
79
|
+
logger.info(`Input is a local file path. Starting local processing pipeline...`);
|
80
|
+
// Determine base path for resolving relative assets. Default to input file's path.
|
81
|
+
const basePath = options.baseUrl || input;
|
82
|
+
logger.debug(`Using base path for asset resolution: ${basePath}`);
|
83
|
+
|
84
|
+
try {
|
85
|
+
// Execute the core processing steps sequentially, passing the logger
|
86
|
+
const parsed = await parseHTML(input, logger);
|
87
|
+
const enriched = await extractAssets(parsed, options.embedAssets ?? true, basePath, logger);
|
88
|
+
const minified = await minifyAssets(enriched, options, logger); // Pass full options
|
89
|
+
const finalHtml = packHTML(minified, logger);
|
90
|
+
|
91
|
+
// Finalize metadata using the timer.
|
92
|
+
// Pass assetCount calculated from the final list of processed assets.
|
93
|
+
const metadata = timer.finish(finalHtml, {
|
94
|
+
assetCount: minified.assets.length
|
95
|
+
// FIX: Removed incorrect attempt to get errors from logger
|
96
|
+
// Errors collected by the timer itself (via timer.addError) will be included automatically.
|
97
|
+
});
|
98
|
+
logger.info(`Local processing complete. Input: ${input}, Size: ${metadata.outputSize} bytes, Assets: ${metadata.assetCount}, Time: ${metadata.buildTimeMs}ms`);
|
99
|
+
if (metadata.errors && metadata.errors.length > 0) {
|
100
|
+
logger.warn(`Completed with ${metadata.errors.length} warning(s) logged in metadata.`);
|
101
|
+
}
|
102
|
+
|
103
|
+
// Include any errors collected *by the timer* in the result
|
104
|
+
return { html: finalHtml, metadata };
|
105
|
+
|
106
|
+
} catch (error: any) {
|
107
|
+
logger.error(`Error during local processing for ${input}: ${error.message}`);
|
108
|
+
throw error; // Re-throw critical errors
|
109
|
+
}
|
110
|
+
}
|
111
|
+
|
112
|
+
/**
|
113
|
+
* Crawls a website starting from a given URL up to a specified depth,
|
114
|
+
* bundles all discovered internal HTML pages into a single multi-page file,
|
115
|
+
* and returns the result.
|
116
|
+
*
|
117
|
+
* @export
|
118
|
+
* @param {string} url - The entry point URL to start crawling. Must be http or https.
|
119
|
+
* @param {number} [depth=1] - The maximum link depth to crawl (1 means only the starting page).
|
120
|
+
* @param {BundleOptions} [options={}] - Configuration options. Primarily used for `logLevel`.
|
121
|
+
* @param {Logger} [loggerInstance] - Optional pre-configured logger instance to use.
|
122
|
+
* @returns {Promise<BuildResult>} A promise resolving to an object containing the bundled multi-page HTML string
|
123
|
+
* and metadata (`BundleMetadata`) about the crawl and bundling process.
|
124
|
+
* @throws {Error} Throws errors if the initial URL is invalid, crawling fails, or bundling fails.
|
125
|
+
*/
|
126
|
+
export async function generateRecursivePortableHTML(
|
127
|
+
url: string,
|
128
|
+
depth = 1,
|
129
|
+
options: BundleOptions = {},
|
130
|
+
loggerInstance?: Logger // Allow passing logger
|
131
|
+
): Promise<BuildResult> {
|
132
|
+
// Use passed logger or create one
|
133
|
+
const logger = loggerInstance || new Logger(options.logLevel);
|
134
|
+
logger.info(`Generating recursive portable HTML for: ${url}, Max Depth: ${depth}`);
|
135
|
+
const timer = new BuildTimer(url);
|
136
|
+
|
137
|
+
if (!/^https?:\/\//i.test(url)) {
|
138
|
+
const errMsg = `Invalid input URL for recursive bundling: ${url}. Must start with http(s)://`;
|
139
|
+
logger.error(errMsg);
|
140
|
+
throw new Error(errMsg);
|
141
|
+
}
|
142
|
+
|
143
|
+
// Placeholder output path for core function (consider removing if core doesn't need it)
|
144
|
+
const internalOutputPathPlaceholder = `${new URL(url).hostname}_recursive.html`;
|
145
|
+
|
146
|
+
try {
|
147
|
+
// Call the CORE recursive site function
|
148
|
+
// Assuming coreRecursivelyBundleSite accepts logger as an optional argument
|
149
|
+
const { html, pages } = await coreRecursivelyBundleSite(url, internalOutputPathPlaceholder, depth); // Pass logger if accepted
|
150
|
+
logger.info(`Recursive crawl complete. Discovered and bundled ${pages} pages.`);
|
151
|
+
|
152
|
+
// Finalize metadata
|
153
|
+
timer.setPageCount(pages); // Store page count
|
154
|
+
const metadata = timer.finish(html, {
|
155
|
+
assetCount: 0, // NOTE: Asset count across multiple pages is not currently aggregated.
|
156
|
+
pagesBundled: pages
|
157
|
+
// TODO: Potentially collect errors from the core function if it returns them
|
158
|
+
});
|
159
|
+
logger.info(`Recursive bundling complete. Input: ${url}, Size: ${metadata.outputSize} bytes, Pages: ${metadata.pagesBundled}, Time: ${metadata.buildTimeMs}ms`);
|
160
|
+
if (metadata.errors && metadata.errors.length > 0) {
|
161
|
+
logger.warn(`Completed with ${metadata.errors.length} warning(s) logged in metadata.`);
|
162
|
+
}
|
163
|
+
|
164
|
+
return { html, metadata };
|
165
|
+
|
166
|
+
} catch (error: any) {
|
167
|
+
logger.error(`Error during recursive generation for ${url}: ${error.message}`);
|
168
|
+
if (error.cause instanceof Error) { // Log cause if it's an Error
|
169
|
+
logger.error(`Cause: ${error.cause.message}`);
|
170
|
+
}
|
171
|
+
throw error; // Re-throw
|
172
|
+
}
|
173
|
+
}
|
174
|
+
|
175
|
+
/**
|
176
|
+
* Fetches the HTML content of a single remote URL using the core web-fetcher.
|
177
|
+
* This function acts as a public wrapper, primarily adding standardized timing and metadata.
|
178
|
+
* It does *not* process assets within the fetched HTML.
|
179
|
+
*
|
180
|
+
* @export
|
181
|
+
* @param {string} url - The remote http(s) URL to fetch.
|
182
|
+
* @param {BundleOptions} [options={}] - Configuration options, mainly for `logLevel`.
|
183
|
+
* @param {Logger} [loggerInstance] - Optional pre-configured logger instance to use.
|
184
|
+
* @returns {Promise<BuildResult>} A promise resolving to the BuildResult containing the fetched HTML
|
185
|
+
* and metadata from the fetch operation.
|
186
|
+
* @throws {Error} Propagates errors directly from the core fetching function or if URL is invalid.
|
187
|
+
*/
|
188
|
+
export async function fetchAndPackWebPage(
|
189
|
+
url: string,
|
190
|
+
options: BundleOptions = {},
|
191
|
+
loggerInstance?: Logger // Allow passing an existing logger
|
192
|
+
): Promise<BuildResult> {
|
193
|
+
// Use the passed logger or create a new one based on options
|
194
|
+
const logger = loggerInstance || new Logger(options.logLevel);
|
195
|
+
logger.info(`Workspaceing single remote page: ${url}`);
|
196
|
+
const timer = new BuildTimer(url);
|
197
|
+
|
198
|
+
if (!/^https?:\/\//i.test(url)) {
|
199
|
+
const errMsg = `Invalid input URL for fetchAndPackWebPage: ${url}. Must start with http(s)://`;
|
200
|
+
logger.error(errMsg);
|
201
|
+
throw new Error(errMsg);
|
202
|
+
}
|
203
|
+
|
204
|
+
try {
|
205
|
+
// Call the CORE fetcher function, passing the logger
|
206
|
+
// Assuming coreFetchAndPack accepts logger as an optional second argument
|
207
|
+
const result = await coreFetchAndPack(url, logger);
|
208
|
+
|
209
|
+
// Finalize metadata using timer and data from the core result
|
210
|
+
const metadata = timer.finish(result.html, {
|
211
|
+
// Use assetCount and errors from core metadata if available
|
212
|
+
assetCount: result.metadata?.assetCount ?? 0,
|
213
|
+
errors: result.metadata?.errors ?? [] // Ensure errors array exists
|
214
|
+
});
|
215
|
+
logger.info(`Single page fetch complete. Input: ${url}, Size: ${metadata.outputSize} bytes, Assets: ${metadata.assetCount}, Time: ${metadata.buildTimeMs}ms`);
|
216
|
+
if (metadata.errors && metadata.errors.length > 0) {
|
217
|
+
logger.warn(`Completed with ${metadata.errors.length} warning(s) logged in metadata.`);
|
218
|
+
}
|
219
|
+
|
220
|
+
// Return HTML from core result, but use metadata finalized by this wrapper
|
221
|
+
return { html: result.html, metadata };
|
222
|
+
} catch (error: any) {
|
223
|
+
logger.error(`Error during single page fetch for ${url}: ${error.message}`);
|
224
|
+
throw error; // Re-throw original error
|
225
|
+
}
|
226
|
+
}
|
227
|
+
|
228
|
+
/**
|
229
|
+
* Bundles an array of pre-fetched/generated HTML pages into a single static HTML file
|
230
|
+
* using `<template>` tags and a simple client-side hash-based router.
|
231
|
+
* This function does not perform any asset processing on the input HTML strings.
|
232
|
+
*
|
233
|
+
* @export
|
234
|
+
* @param {PageEntry[]} pages - An array of page objects, where each object has a `url` (for slug generation)
|
235
|
+
* and `html` (the content for that page).
|
236
|
+
* @param {BundleOptions} [options={}] - Configuration options, primarily used for `logLevel`.
|
237
|
+
* @param {Logger} [loggerInstance] - Optional pre-configured logger instance.
|
238
|
+
* @returns {string} A single HTML string representing the bundled multi-page document.
|
239
|
+
*/
|
240
|
+
export function bundleMultiPageHTML(
|
241
|
+
pages: PageEntry[],
|
242
|
+
options: BundleOptions = {},
|
243
|
+
loggerInstance?: Logger // Allow passing an existing logger
|
244
|
+
): string {
|
245
|
+
// Use passed logger or create a new one
|
246
|
+
const logger = loggerInstance || new Logger(options.logLevel);
|
247
|
+
logger.info(`Bundling ${pages.length} provided pages into multi-page HTML...`);
|
248
|
+
|
249
|
+
try {
|
250
|
+
// Directly call the CORE multi-page bundler function, passing the logger
|
251
|
+
// Assuming coreBundleMultiPageHTML accepts logger as an optional second argument
|
252
|
+
const bundledHtml = coreBundleMultiPageHTML(pages, logger);
|
253
|
+
logger.info(`Multi-page bundling complete.`);
|
254
|
+
return bundledHtml;
|
255
|
+
} catch (error: any) {
|
256
|
+
logger.error(`Error during multi-page bundling: ${error.message}`);
|
257
|
+
throw error; // Re-throw error
|
258
|
+
}
|
259
|
+
}
|
260
|
+
|
261
|
+
// Optional: Export core types directly from index for easier consumption?
|
262
|
+
export * from './types';
|