portapack 0.2.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -0
- package/README.md +83 -216
- package/dist/cli/{cli-entry.js → cli-entry.cjs} +626 -498
- package/dist/cli/cli-entry.cjs.map +1 -0
- package/dist/index.d.ts +51 -56
- package/dist/index.js +523 -443
- package/dist/index.js.map +1 -1
- package/docs/cli.md +158 -42
- package/jest.config.ts +18 -8
- package/jest.setup.cjs +66 -146
- package/package.json +5 -5
- package/src/cli/cli-entry.ts +15 -15
- package/src/cli/cli.ts +130 -119
- package/src/core/bundler.ts +174 -63
- package/src/core/extractor.ts +243 -203
- package/src/core/web-fetcher.ts +205 -141
- package/src/index.ts +161 -224
- package/tests/unit/cli/cli-entry.test.ts +66 -77
- package/tests/unit/cli/cli.test.ts +243 -145
- package/tests/unit/core/bundler.test.ts +334 -258
- package/tests/unit/core/extractor.test.ts +391 -1051
- package/tests/unit/core/minifier.test.ts +130 -221
- package/tests/unit/core/packer.test.ts +255 -106
- package/tests/unit/core/parser.test.ts +89 -458
- package/tests/unit/core/web-fetcher.test.ts +330 -285
- package/tests/unit/index.test.ts +206 -300
- package/tests/unit/utils/logger.test.ts +32 -28
- package/tsconfig.jest.json +7 -7
- package/tsup.config.ts +34 -29
- package/dist/cli/cli-entry.js.map +0 -1
- package/output.html +0 -1
- package/site-packed.html +0 -1
- package/test-output.html +0 -0
@@ -1,8 +1,6 @@
|
|
1
1
|
/**
|
2
2
|
* @file tests/unit/core/web-fetcher.test.ts
|
3
3
|
* @description Unit tests for the web page fetching and crawling logic (`web-fetcher.ts`).
|
4
|
-
* Uses Jest mocks extensively to isolate the code under test from actual
|
5
|
-
* Puppeteer operations and filesystem access, compatible with ESM.
|
6
4
|
*/
|
7
5
|
|
8
6
|
// --- Type Imports ---
|
@@ -11,72 +9,56 @@ import type {
|
|
11
9
|
Browser,
|
12
10
|
HTTPResponse,
|
13
11
|
GoToOptions,
|
14
|
-
LaunchOptions
|
12
|
+
LaunchOptions,
|
13
|
+
Viewport,
|
14
|
+
EvaluateFunc,
|
15
|
+
ElementHandle,
|
16
|
+
// UserAgentMetadata
|
15
17
|
} from 'puppeteer';
|
16
|
-
import type { BuildResult, PageEntry } from '../../../src/types';
|
18
|
+
import type { BuildResult, PageEntry, BundleMetadata } from '../../../src/types';
|
17
19
|
import { Logger } from '../../../src/utils/logger';
|
18
20
|
import type { PathLike } from 'fs';
|
19
21
|
|
20
22
|
// --- Jest Imports ---
|
21
|
-
import { jest, describe, it, expect, beforeEach } from '@jest/globals';
|
22
|
-
|
23
|
-
//
|
24
|
-
|
25
|
-
|
26
|
-
const
|
27
|
-
const
|
28
|
-
const
|
29
|
-
const
|
30
|
-
const
|
31
|
-
const
|
32
|
-
const
|
33
|
-
const
|
34
|
-
const mockNewPage = jest.fn<()
|
35
|
-
const mockBrowserClose = jest.fn<()
|
23
|
+
import { jest, describe, it, expect, beforeEach, afterEach } from '@jest/globals';
|
24
|
+
|
25
|
+
// =================== MOCK SETUP ===================
|
26
|
+
const mockPageGoto = jest.fn<Page['goto']>();
|
27
|
+
const mockPageContent = jest.fn<Page['content']>();
|
28
|
+
const mockPageEvaluate = jest.fn<Page['evaluate']>();
|
29
|
+
const mockPageClose = jest.fn<Page['close']>();
|
30
|
+
const mockPageSetViewport = jest.fn<Page['setViewport']>();
|
31
|
+
const mockPageUrl = jest.fn<Page['url']>();
|
32
|
+
const mockPage$ = jest.fn<Page['$']>();
|
33
|
+
const mockPage$$ = jest.fn<Page['$$']>();
|
34
|
+
const mockPageIsClosed = jest.fn<Page['isClosed']>();
|
35
|
+
const mockPageSetUserAgent = jest.fn<Page['setUserAgent']>();
|
36
|
+
const mockNewPage = jest.fn<Browser['newPage']>();
|
37
|
+
const mockBrowserClose = jest.fn<Browser['close']>();
|
38
|
+
const mockBrowserProcess = jest.fn<Browser['process']>().mockReturnValue(null);
|
36
39
|
const mockLaunch = jest.fn<(options?: LaunchOptions) => Promise<Browser>>();
|
40
|
+
const mockWriteFile = jest.fn<typeof import('fs/promises').writeFile>();
|
41
|
+
const mockBundleMultiPageHTMLFn = jest.fn<(pages: PageEntry[], logger?: Logger) => string>();
|
37
42
|
|
38
|
-
|
39
|
-
|
43
|
+
jest.mock('puppeteer', () => ({ __esModule: true, launch: mockLaunch, }));
|
44
|
+
jest.mock('fs/promises', () => ({ __esModule: true, writeFile: mockWriteFile, }));
|
45
|
+
jest.mock('../../../src/core/bundler', () => ({ __esModule: true, bundleMultiPageHTML: mockBundleMultiPageHTMLFn, }));
|
46
|
+
// ====================================================
|
40
47
|
|
41
|
-
|
48
|
+
import { fetchAndPackWebPage, recursivelyBundleSite } from '../../../src/core/web-fetcher';
|
42
49
|
|
43
|
-
// Mock the 'puppeteer' module
|
44
|
-
jest.unstable_mockModule('puppeteer', () => ({
|
45
|
-
launch: mockLaunch,
|
46
|
-
}));
|
47
|
-
|
48
|
-
// Mock 'fs/promises' - providing only named exports
|
49
|
-
jest.unstable_mockModule('fs/promises', () => ({
|
50
|
-
writeFile: mockWriteFile,
|
51
|
-
// Add readFile, mkdir etc. mocks if web-fetcher.ts uses them
|
52
|
-
}));
|
53
|
-
|
54
|
-
// Mock the internal bundler module
|
55
|
-
jest.unstable_mockModule('../../../src/core/bundler', () => ({
|
56
|
-
bundleMultiPageHTML: mockBundleMultiPageHTMLFn,
|
57
|
-
}));
|
58
|
-
|
59
|
-
|
60
|
-
// --- Dynamic Import ---
|
61
|
-
// Import the module under test *after* all mocks are set up
|
62
|
-
// This should now work if the import in web-fetcher.ts is correct
|
63
|
-
const { fetchAndPackWebPage, recursivelyBundleSite } = await import('../../../src/core/web-fetcher');
|
64
|
-
|
65
|
-
|
66
|
-
// --- Test Suite Setup ---
|
67
50
|
jest.setTimeout(60000);
|
68
51
|
|
69
52
|
describe('🕸️ web-fetcher', () => {
|
70
|
-
|
71
|
-
let
|
72
|
-
let mockPageObject: Partial<Pick<Page, 'goto' | 'content' | 'close' | '$' | '$$' | 'evaluate' | 'url' | 'setViewport'>>;
|
53
|
+
let mockBrowserObject: Partial<Browser>;
|
54
|
+
let mockPageObject: Partial<Page>;
|
73
55
|
let loggerInstance: Logger;
|
74
56
|
|
75
|
-
// --- Constants
|
57
|
+
// --- Constants ---
|
76
58
|
const startUrl = 'https://test-crawl.site/';
|
77
59
|
const page2Url = `${startUrl}page2`;
|
78
60
|
const page3Url = `${startUrl}page3`;
|
79
|
-
const relativeUrl = `${startUrl}relative.html`;
|
61
|
+
const relativeUrl = `${startUrl}relative.html`; // Absolute for mock key
|
80
62
|
const subDomainUrl = 'https://sub.test-crawl.site/other';
|
81
63
|
const httpDomainUrl = 'http://test-crawl.site/other';
|
82
64
|
const externalUrl = 'https://othersite.com';
|
@@ -102,273 +84,336 @@ describe('🕸️ web-fetcher', () => {
|
|
102
84
|
|
103
85
|
beforeEach(() => {
|
104
86
|
jest.clearAllMocks();
|
105
|
-
|
106
|
-
// Logger setup
|
107
|
-
loggerInstance = new Logger(); // Use default level
|
87
|
+
loggerInstance = new Logger(); // Set to DEBUG for verbose mock logs if needed
|
108
88
|
jest.spyOn(loggerInstance, 'debug');
|
109
89
|
jest.spyOn(loggerInstance, 'warn');
|
110
90
|
jest.spyOn(loggerInstance, 'error');
|
111
91
|
jest.spyOn(loggerInstance, 'info');
|
112
92
|
|
113
|
-
//
|
93
|
+
// Assemble mock objects
|
94
|
+
mockPageObject = {
|
95
|
+
goto: mockPageGoto, content: mockPageContent, evaluate: mockPageEvaluate as any,
|
96
|
+
close: mockPageClose, setViewport: mockPageSetViewport, url: mockPageUrl,
|
97
|
+
$: mockPage$ as any, $$: mockPage$$ as any, isClosed: mockPageIsClosed,
|
98
|
+
setUserAgent: mockPageSetUserAgent
|
99
|
+
};
|
100
|
+
mockBrowserObject = { newPage: mockNewPage, close: mockBrowserClose, process: mockBrowserProcess };
|
101
|
+
|
102
|
+
// Default Mock Configurations
|
114
103
|
mockPageGoto.mockResolvedValue(null);
|
115
104
|
mockPageContent.mockResolvedValue('<html><body>Default Mock Page Content</body></html>');
|
116
|
-
mockPageEvaluate.mockResolvedValue([]);
|
105
|
+
mockPageEvaluate.mockResolvedValue([]); // Default to no links
|
117
106
|
mockPageClose.mockResolvedValue(undefined);
|
118
107
|
mockPageSetViewport.mockResolvedValue(undefined);
|
119
|
-
mockPageUrl.mockReturnValue(startUrl);
|
108
|
+
mockPageUrl.mockReturnValue(startUrl); // Default URL initially
|
120
109
|
mockPage$.mockResolvedValue(null);
|
121
110
|
mockPage$$.mockResolvedValue([]);
|
122
|
-
|
111
|
+
mockPageIsClosed.mockReturnValue(false);
|
112
|
+
mockPageSetUserAgent.mockResolvedValue(undefined);
|
113
|
+
mockNewPage.mockResolvedValue(mockPageObject as Page); // Ensure newPage returns the configured mock object
|
123
114
|
mockBrowserClose.mockResolvedValue(undefined);
|
124
115
|
mockLaunch.mockResolvedValue(mockBrowserObject as Browser);
|
125
116
|
mockWriteFile.mockResolvedValue(undefined);
|
126
117
|
mockBundleMultiPageHTMLFn.mockReturnValue(bundledHtmlResult);
|
127
|
-
|
128
|
-
// Assemble mock objects
|
129
|
-
mockPageObject = {
|
130
|
-
goto: mockPageGoto, content: mockPageContent, evaluate: mockPageEvaluate,
|
131
|
-
close: mockPageClose, setViewport: mockPageSetViewport, url: mockPageUrl,
|
132
|
-
$: mockPage$, $$: mockPage$$,
|
133
|
-
};
|
134
|
-
mockBrowserObject = { newPage: mockNewPage, close: mockBrowserClose };
|
135
|
-
|
136
|
-
// Re-configure mockNewPage implementation AFTER objects are defined
|
137
|
-
mockNewPage.mockImplementation(async () => mockPageObject as Page);
|
138
118
|
});
|
139
119
|
|
140
120
|
// --- Test Suites ---
|
141
121
|
|
142
|
-
describe('fetchAndPackWebPage()', () => {
|
143
|
-
// Test cases from previous version should now work with correct mocking
|
144
|
-
// ... (Keep all 5 fetchAndPackWebPage tests: ✅, 🚨, ❌, 💥content, 💥newpage) ...
|
145
|
-
const testUrl = 'https://example-fetch.com'; // URL just used as input
|
146
|
-
|
147
|
-
// it('✅ fetches rendered HTML using mocked Puppeteer', async () => {
|
148
|
-
// const expectedHtml = '<html><body>Specific Mock Content</body></html>';
|
149
|
-
// mockPageContent.mockResolvedValueOnce(expectedHtml); // Override mock for this test
|
150
|
-
|
151
|
-
// const result = await fetchAndPackWebPage(testUrl, loggerInstance);
|
152
|
-
|
153
|
-
// expect(mockLaunch).toHaveBeenCalledTimes(1);
|
154
|
-
// expect(mockNewPage).toHaveBeenCalledTimes(1);
|
155
|
-
// expect(mockPageGoto).toHaveBeenCalledWith(testUrl, expect.objectContaining({ waitUntil: 'networkidle2' }));
|
156
|
-
// expect(mockPageContent).toHaveBeenCalledTimes(1);
|
157
|
-
// expect(mockPageClose).toHaveBeenCalledTimes(1);
|
158
|
-
// expect(mockBrowserClose).toHaveBeenCalledTimes(1);
|
159
|
-
// expect(result.html).toBe(expectedHtml);
|
160
|
-
// });
|
161
|
-
|
162
|
-
// it('🚨 handles navigation timeout or failure gracefully (mocked)', async () => {
|
163
|
-
// const testFailUrl = 'https://fail.test';
|
164
|
-
// const navigationError = new Error('Navigation Timeout Exceeded: 30000ms exceeded');
|
165
|
-
// mockPageGoto.mockRejectedValueOnce(navigationError); // Make the mocked goto fail
|
166
|
-
|
167
|
-
// await expect(fetchAndPackWebPage(testFailUrl, loggerInstance))
|
168
|
-
// .rejects.toThrow(navigationError);
|
169
|
-
|
170
|
-
// expect(mockPageGoto).toHaveBeenCalledWith(testFailUrl, expect.anything());
|
171
|
-
// expect(mockPageContent).not.toHaveBeenCalled();
|
172
|
-
// expect(mockPageClose).toHaveBeenCalledTimes(1);
|
173
|
-
// expect(mockBrowserClose).toHaveBeenCalledTimes(1);
|
174
|
-
// });
|
175
122
|
|
123
|
+
describe('fetchAndPackWebPage()', () => {
|
124
|
+
const testUrl = 'https://example-fetch.com';
|
125
|
+
// --- fetchAndPackWebPage tests ---
|
126
|
+
it('✅ fetches rendered HTML using mocked Puppeteer', async () => {
|
127
|
+
const expectedHtml = '<html><body>Specific Mock Content</body></html>';
|
128
|
+
mockPageContent.mockResolvedValueOnce(expectedHtml);
|
129
|
+
const result = await fetchAndPackWebPage(testUrl, loggerInstance);
|
130
|
+
expect(mockLaunch).toHaveBeenCalledTimes(1);
|
131
|
+
expect(mockNewPage).toHaveBeenCalledTimes(1);
|
132
|
+
expect(mockPageGoto).toHaveBeenCalledWith(testUrl, expect.objectContaining({ waitUntil: 'networkidle2', timeout: 30000 }));
|
133
|
+
expect(mockPageContent).toHaveBeenCalledTimes(1);
|
134
|
+
expect(mockPageClose).toHaveBeenCalledTimes(1);
|
135
|
+
expect(mockBrowserClose).toHaveBeenCalledTimes(1);
|
136
|
+
expect(result.html).toBe(expectedHtml);
|
137
|
+
});
|
138
|
+
it('✅ handles custom timeout and userAgent options', async () => {
|
139
|
+
const customTimeout = 15000;
|
140
|
+
const customUA = "TestAgent/1.0";
|
141
|
+
mockPageContent.mockResolvedValueOnce("Custom UA Page");
|
142
|
+
await fetchAndPackWebPage(testUrl, loggerInstance, customTimeout, customUA);
|
143
|
+
expect(mockLaunch).toHaveBeenCalledTimes(1);
|
144
|
+
expect(mockNewPage).toHaveBeenCalledTimes(1);
|
145
|
+
expect(mockPageSetUserAgent).toHaveBeenCalledWith(customUA);
|
146
|
+
expect(mockPageGoto).toHaveBeenCalledWith(testUrl, expect.objectContaining({ timeout: customTimeout }));
|
147
|
+
expect(mockPageClose).toHaveBeenCalledTimes(1);
|
148
|
+
expect(mockBrowserClose).toHaveBeenCalledTimes(1);
|
149
|
+
});
|
150
|
+
it('🚨 handles navigation timeout or failure gracefully (mocked)', async () => {
|
151
|
+
const testFailUrl = 'https://fail.test';
|
152
|
+
const navigationError = new Error('Navigation Timeout Exceeded: 30000ms exceeded');
|
153
|
+
mockPageGoto.mockImplementationOnce(async (url) => { if (url === testFailUrl) throw navigationError; return null; });
|
154
|
+
await expect(fetchAndPackWebPage(testFailUrl, loggerInstance)).rejects.toThrow(navigationError);
|
155
|
+
expect(mockPageGoto).toHaveBeenCalledWith(testFailUrl, expect.anything());
|
156
|
+
expect(mockPageContent).not.toHaveBeenCalled();
|
157
|
+
expect(mockPageClose).toHaveBeenCalledTimes(1);
|
158
|
+
expect(mockBrowserClose).toHaveBeenCalledTimes(1);
|
159
|
+
});
|
176
160
|
it('❌ handles browser launch errors gracefully (mocked)', async () => {
|
177
|
-
|
178
|
-
|
161
|
+
const launchError = new Error('Failed to launch browser');
|
162
|
+
mockLaunch.mockRejectedValueOnce(launchError);
|
163
|
+
await expect(fetchAndPackWebPage(testUrl, loggerInstance)).rejects.toThrow(launchError);
|
164
|
+
expect(mockLaunch).toHaveBeenCalledTimes(1);
|
165
|
+
expect(mockNewPage).not.toHaveBeenCalled();
|
166
|
+
expect(mockBrowserClose).not.toHaveBeenCalled();
|
167
|
+
});
|
168
|
+
it('💥 handles errors during page content retrieval (mocked)', async () => {
|
169
|
+
const contentError = new Error('Failed to get page content');
|
170
|
+
mockPageGoto.mockResolvedValue(null);
|
171
|
+
mockPageContent.mockRejectedValueOnce(contentError);
|
172
|
+
await expect(fetchAndPackWebPage(testUrl, loggerInstance)).rejects.toThrow(contentError);
|
173
|
+
expect(mockPageGoto).toHaveBeenCalledTimes(1);
|
174
|
+
expect(mockPageContent).toHaveBeenCalledTimes(1);
|
175
|
+
expect(mockPageClose).toHaveBeenCalledTimes(1);
|
176
|
+
expect(mockBrowserClose).toHaveBeenCalledTimes(1);
|
177
|
+
});
|
178
|
+
it('💥 handles errors during new page creation (mocked)', async () => {
|
179
|
+
const newPageError = new Error('Failed to create new page');
|
180
|
+
mockLaunch.mockResolvedValue(mockBrowserObject as Browser);
|
181
|
+
mockNewPage.mockRejectedValueOnce(newPageError);
|
182
|
+
await expect(fetchAndPackWebPage(testUrl, loggerInstance)).rejects.toThrow(newPageError);
|
183
|
+
expect(mockLaunch).toHaveBeenCalledTimes(1);
|
184
|
+
expect(mockNewPage).toHaveBeenCalledTimes(1);
|
185
|
+
expect(mockPageGoto).not.toHaveBeenCalled();
|
186
|
+
expect(mockBrowserClose).toHaveBeenCalledTimes(1);
|
187
|
+
});
|
188
|
+
});
|
189
|
+
|
190
|
+
|
191
|
+
describe('recursivelyBundleSite()', () => {
|
192
|
+
// Helper function using the mocks - STATEFUL EVALUATE (Revised)
|
193
|
+
const setupCrawlSimulation = (pages: Record<string, { html: string; links?: string[] }>) => {
|
194
|
+
// State variable *within* the helper scope
|
195
|
+
let currentSimulatedUrl = '';
|
196
|
+
|
197
|
+
// Reset mocks each time setup is called
|
198
|
+
mockPageUrl.mockReset(); mockPageContent.mockReset();
|
199
|
+
mockPageEvaluate.mockReset(); mockPageGoto.mockReset();
|
200
|
+
mockNewPage.mockReset();
|
201
|
+
|
202
|
+
// newPage returns the shared page object
|
203
|
+
mockNewPage.mockImplementation(async () => mockPageObject as Page);
|
204
|
+
|
205
|
+
// goto updates the state variable *within this scope*
|
206
|
+
mockPageGoto.mockImplementation(async (url: string): Promise<HTTPResponse | null> => {
|
207
|
+
console.log(`DEBUG MOCK [Helper]: page.goto setting current URL to: ${url}`);
|
208
|
+
currentSimulatedUrl = url; // Update the variable in *this* closure
|
209
|
+
return null;
|
210
|
+
});
|
211
|
+
|
212
|
+
// url reads the state variable *from this scope*
|
213
|
+
mockPageUrl.mockImplementation((): string => {
|
214
|
+
return currentSimulatedUrl || startUrl;
|
215
|
+
});
|
216
|
+
|
217
|
+
// content reads the state variable *from this scope*
|
218
|
+
mockPageContent.mockImplementation(async (): Promise<string> => {
|
219
|
+
const urlNow = currentSimulatedUrl || startUrl;
|
220
|
+
return pages[urlNow]?.html ?? `<html><body>Fallback for ${urlNow}</body></html>`;
|
221
|
+
});
|
222
|
+
|
223
|
+
// evaluate reads state *from this scope* and returns links
|
224
|
+
// Needs 'as any' cast on the implementation due to complex signature
|
225
|
+
(mockPageEvaluate as any).mockImplementation(async () => {
|
226
|
+
const urlNow = currentSimulatedUrl || startUrl; // Read state from this closure
|
227
|
+
const links = pages[urlNow]?.links ?? []; // Get links based on current state
|
228
|
+
console.log(`DEBUG MOCK [Helper-Stateful]: page.evaluate for ${urlNow}. Returning links: ${JSON.stringify(links)}`);
|
229
|
+
return links; // Return only links
|
230
|
+
});
|
231
|
+
};
|
179
232
|
|
180
|
-
|
181
|
-
|
233
|
+
|
234
|
+
// --- recursivelyBundleSite tests ---
|
235
|
+
it('📄 crawls site recursively (BFS), bundles output, respects depth', async () => {
|
236
|
+
const maxDepth = 2;
|
237
|
+
setupCrawlSimulation({
|
238
|
+
[startUrl]: { html: page1HtmlWithLinks, links: ['/page2', page3Url] }, // Links for startUrl
|
239
|
+
[page2Url]: { html: page2HtmlNoLinks, links: [] }, // No links for page2
|
240
|
+
[page3Url]: { html: page3HtmlWithCycleLink, links: ['/'] } // Link back for page3
|
241
|
+
});
|
242
|
+
|
243
|
+
const result = await recursivelyBundleSite(startUrl, outputPath, maxDepth, loggerInstance);
|
182
244
|
|
183
245
|
expect(mockLaunch).toHaveBeenCalledTimes(1);
|
184
|
-
|
185
|
-
expect(
|
246
|
+
// Check calls - SHOULD WORK NOW
|
247
|
+
expect(mockNewPage).toHaveBeenCalledTimes(3); // start, page2, page3
|
248
|
+
expect(mockPageGoto).toHaveBeenCalledTimes(3); // start, page2, page3
|
249
|
+
expect(mockPageEvaluate).toHaveBeenCalledTimes(1); // Only called for startUrl (depth 1 < maxDepth 2)
|
250
|
+
expect(mockPageClose).toHaveBeenCalledTimes(3);
|
251
|
+
expect(mockBrowserClose).toHaveBeenCalledTimes(1);
|
252
|
+
expect(mockBundleMultiPageHTMLFn).toHaveBeenCalledTimes(1);
|
253
|
+
const bundleArgs = mockBundleMultiPageHTMLFn.mock.calls[0][0] as PageEntry[];
|
254
|
+
expect(bundleArgs).toHaveLength(3); // Should collect all 3 pages
|
255
|
+
expect(result.pages).toBe(3);
|
186
256
|
});
|
187
257
|
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
// expect(mockPageGoto).toHaveBeenCalledTimes(1);
|
197
|
-
// expect(mockPageContent).toHaveBeenCalledTimes(1); // Attempted
|
198
|
-
// expect(mockPageClose).toHaveBeenCalledTimes(1);
|
199
|
-
// expect(mockBrowserClose).toHaveBeenCalledTimes(1);
|
200
|
-
// });
|
201
|
-
// it('💥 handles errors during new page creation (mocked)', async () => {
|
202
|
-
// const newPageError = new Error('Failed to create new page');
|
203
|
-
// mockLaunch.mockResolvedValue(mockBrowserObject as Browser); // Launch succeeds
|
204
|
-
// mockNewPage.mockRejectedValueOnce(newPageError); // newPage fails
|
205
|
-
|
206
|
-
// // Act: Call the function and expect it to throw the error
|
207
|
-
// await expect(fetchAndPackWebPage(testUrl, loggerInstance))
|
208
|
-
// .rejects.toThrow(newPageError);
|
209
|
-
|
210
|
-
// // Assert: Check the state *after* the error occurred
|
211
|
-
// expect(mockLaunch).toHaveBeenCalledTimes(1);
|
212
|
-
// // REMOVED: mockNewPage.mockResolvedValueOnce(mockPage); // This line was incorrect and unnecessary
|
213
|
-
// expect(mockNewPage).toHaveBeenCalledTimes(1); // Verify newPage was attempted
|
214
|
-
// expect(mockPageGoto).not.toHaveBeenCalled(); // Navigation should not happen if newPage fails
|
215
|
-
// expect(mockBrowserClose).toHaveBeenCalledTimes(1); // Cleanup should still run
|
216
|
-
// });
|
217
|
-
});
|
258
|
+
it('🔁 obeys crawl depth limit (maxDepth = 1)', async () => {
|
259
|
+
setupCrawlSimulation({ [startUrl]: { html: page1HtmlWithLinks, links: ['/page2'] } });
|
260
|
+
const result = await recursivelyBundleSite(startUrl, outputPath, 1, loggerInstance);
|
261
|
+
expect(mockNewPage).toHaveBeenCalledTimes(1); // Only startUrl
|
262
|
+
expect(mockPageEvaluate).not.toHaveBeenCalled(); // Depth 1 not < maxDepth 1
|
263
|
+
expect(result.pages).toBe(1);
|
264
|
+
});
|
218
265
|
|
219
|
-
|
220
|
-
|
266
|
+
it('S crawls using default maxDepth = 1 if not provided', async () => {
|
267
|
+
setupCrawlSimulation({ [startUrl]: { html: page1HtmlWithLinks, links: ['/page2'] } });
|
268
|
+
await recursivelyBundleSite(startUrl, outputPath, undefined, loggerInstance);
|
269
|
+
expect(mockNewPage).toHaveBeenCalledTimes(1);
|
270
|
+
expect(mockPageEvaluate).not.toHaveBeenCalled();
|
271
|
+
});
|
221
272
|
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
});
|
227
|
-
mockPageContent.mockImplementation(async () => {
|
228
|
-
const currentUrl = mockPageUrl();
|
229
|
-
return pages[currentUrl]?.html ?? `<html><body>Fallback for ${currentUrl}</body></html>`;
|
230
|
-
});
|
231
|
-
mockPageEvaluate.mockImplementation(async (evalFn: any) => {
|
232
|
-
if (typeof evalFn === 'function' && evalFn.toString().includes('querySelectorAll')) {
|
233
|
-
const currentUrl = mockPageUrl();
|
234
|
-
return pages[currentUrl]?.links ?? [];
|
235
|
-
}
|
236
|
-
return [];
|
237
|
-
});
|
238
|
-
mockNewPage.mockImplementation(async () => mockPageObject as Page);
|
239
|
-
};
|
240
|
-
|
241
|
-
// Test cases from previous version should now work with correct mocking
|
242
|
-
// ... (Keep all 9 recursivelyBundleSite tests: 📄, 🔁, S, 🚫, 🔗, 🔄, 🤕, 📁, 💾) ...
|
243
|
-
// it('📄 crawls site recursively (BFS), bundles output, respects depth', async () => {
|
244
|
-
// const maxDepth = 2;
|
245
|
-
// setupCrawlSimulation({
|
246
|
-
// [startUrl]: { html: page1HtmlWithLinks, links: ['/page2', page3Url] },
|
247
|
-
// [page2Url]: { html: page2HtmlNoLinks, links: [] },
|
248
|
-
// [page3Url]: { html: page3HtmlWithCycleLink, links: ['/'] }
|
249
|
-
// });
|
250
|
-
|
251
|
-
// const result = await recursivelyBundleSite(startUrl, outputPath, maxDepth);
|
252
|
-
|
253
|
-
// expect(mockLaunch).toHaveBeenCalledTimes(1);
|
254
|
-
// expect(mockNewPage).toHaveBeenCalledTimes(3);
|
255
|
-
// expect(mockPageGoto).toHaveBeenCalledTimes(3);
|
256
|
-
// expect(mockPageEvaluate).toHaveBeenCalledTimes(1); // d1 only
|
257
|
-
// expect(mockPageClose).toHaveBeenCalledTimes(3);
|
258
|
-
// expect(mockBrowserClose).toHaveBeenCalledTimes(1);
|
259
|
-
|
260
|
-
// const bundleArgs = mockBundleMultiPageHTMLFn.mock.calls[0][0] as PageEntry[];
|
261
|
-
// expect(bundleArgs).toHaveLength(3);
|
262
|
-
// expect(mockWriteFile).toHaveBeenCalledTimes(1);
|
263
|
-
// expect(result.pages).toBe(3);
|
264
|
-
// });
|
265
|
-
|
266
|
-
// it('🔁 obeys crawl depth limit (maxDepth = 1)', async () => {
|
267
|
-
// setupCrawlSimulation({ [startUrl]: { html: page1HtmlWithLinks, links: ['/page2'] } });
|
268
|
-
// const result = await recursivelyBundleSite(startUrl, outputPath, 1);
|
269
|
-
// expect(mockLaunch).toHaveBeenCalledTimes(1);
|
270
|
-
// expect(mockNewPage).toHaveBeenCalledTimes(1);
|
271
|
-
// expect(mockPageEvaluate).not.toHaveBeenCalled();
|
272
|
-
// expect(mockBundleMultiPageHTMLFn.mock.calls[0][0]).toHaveLength(1);
|
273
|
-
// expect(result.pages).toBe(1);
|
274
|
-
// });
|
275
|
-
|
276
|
-
it('S crawls using default maxDepth = 1 if not provided', async () => {
|
277
|
-
setupCrawlSimulation({ [startUrl]: { html: page1HtmlWithLinks, links: ['/page2'] } });
|
278
|
-
await recursivelyBundleSite(startUrl, outputPath); // No maxDepth
|
279
|
-
expect(mockLaunch).toHaveBeenCalledTimes(1);
|
280
|
-
expect(mockNewPage).toHaveBeenCalledTimes(1);
|
281
|
-
expect(mockPageEvaluate).not.toHaveBeenCalled();
|
282
|
-
expect(mockBundleMultiPageHTMLFn.mock.calls[0][0]).toHaveLength(1);
|
273
|
+
it('🚫 handles maxDepth = 0 correctly (fetches nothing, bundles nothing)', async () => {
|
274
|
+
const result = await recursivelyBundleSite(startUrl, outputPath, 0, loggerInstance);
|
275
|
+
expect(mockLaunch).not.toHaveBeenCalled();
|
276
|
+
expect(result.pages).toBe(0);
|
283
277
|
});
|
284
278
|
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
// expect(mockLaunch).toHaveBeenCalledTimes(1);
|
289
|
-
// expect(mockNewPage).not.toHaveBeenCalled();
|
290
|
-
// expect(mockBrowserClose).toHaveBeenCalledTimes(1);
|
291
|
-
// expect(mockBundleMultiPageHTMLFn).toHaveBeenCalledWith([]);
|
292
|
-
// expect(result.pages).toBe(0);
|
293
|
-
// });
|
294
|
-
|
295
|
-
// it('🔗 filters links correctly (internal, visited, origin, fragments, relative)', async () => {
|
296
|
-
// const maxDepth = 3;
|
297
|
-
// setupCrawlSimulation({
|
298
|
-
// [startUrl]: { html: pageHtmlWithVariousLinks, links: [ '/page2', 'relative.html', '/page3?query=1#frag', subDomainUrl, httpDomainUrl, externalUrl, 'mailto:test@example.com', 'javascript:void(0)', ':/invalid-href', '/page2#section' ] },
|
299
|
-
// [page2Url]: { html: page2HtmlNoLinks, links: ['page3'] },
|
300
|
-
// [page3Url]: { html: page3HtmlWithCycleLink, links: ['/', '/page2#a'] },
|
301
|
-
// [relativeUrl]: { html: 'Relative Page', links: [] }
|
302
|
-
// });
|
303
|
-
// await recursivelyBundleSite(startUrl, outputPath, maxDepth);
|
304
|
-
// expect(mockLaunch).toHaveBeenCalledTimes(1);
|
305
|
-
// expect(mockNewPage).toHaveBeenCalledTimes(4); // start, page2, page3, relative
|
306
|
-
// expect(mockPageGoto).toHaveBeenCalledTimes(4);
|
307
|
-
// expect(mockPageGoto).toHaveBeenCalledWith(startUrl, expect.anything());
|
308
|
-
// expect(mockPageGoto).toHaveBeenCalledWith(page2Url, expect.anything());
|
309
|
-
// expect(mockPageGoto).toHaveBeenCalledWith(page3Url, expect.anything());
|
310
|
-
// expect(mockPageGoto).toHaveBeenCalledWith(relativeUrl, expect.anything());
|
311
|
-
// expect(mockPageEvaluate).toHaveBeenCalledTimes(4); // d1, d2, d2, d2
|
312
|
-
// expect(mockBundleMultiPageHTMLFn.mock.calls[0][0]).toHaveLength(4);
|
313
|
-
// });
|
314
|
-
|
315
|
-
it('🔄 handles crawl cycles gracefully (visited set)', async () => {
|
279
|
+
it('🔗 filters links correctly (internal, visited, origin, fragments, relative)', async () => {
|
280
|
+
const maxDepth = 3;
|
281
|
+
// Setup simulation with a mix of links
|
316
282
|
setupCrawlSimulation({
|
317
|
-
[startUrl]: { html:
|
318
|
-
[page2Url]: { html:
|
319
|
-
[page3Url]: { html:
|
283
|
+
[startUrl]: { html: pageHtmlWithVariousLinks, links: [ '/page2', 'relative.html', '/page3?query=1#frag', subDomainUrl, httpDomainUrl, externalUrl, 'mailto:t@e.com', 'javascript:void(0)', ':/bad', '/page2#section'] },
|
284
|
+
[page2Url]: { html: page2HtmlNoLinks, links: ['/page3'] }, // Needs absolute path for key
|
285
|
+
[page3Url]: { html: page3HtmlWithCycleLink, links: ['/', '/page2#a'] },
|
286
|
+
[relativeUrl]: { html: 'Relative Page', links: [] } // Needs absolute path for key
|
320
287
|
});
|
321
|
-
await recursivelyBundleSite(startUrl, outputPath,
|
322
|
-
|
323
|
-
expect(
|
324
|
-
expect(
|
288
|
+
await recursivelyBundleSite(startUrl, outputPath, maxDepth, loggerInstance);
|
289
|
+
|
290
|
+
expect(mockNewPage).toHaveBeenCalledTimes(4); // startUrl, page2Url, relativeUrl, page3Url
|
291
|
+
expect(mockPageGoto).toHaveBeenCalledTimes(4);
|
292
|
+
// Evaluate called if depth < maxDepth
|
293
|
+
// startUrl (d1<3), page2Url (d2<3), relativeUrl (d2<3), page3Url (d3==3, NO)
|
294
|
+
expect(mockPageEvaluate).toHaveBeenCalledTimes(3);
|
295
|
+
expect(mockBundleMultiPageHTMLFn.mock.calls[0][0]).toHaveLength(4); // All 4 valid internal pages collected
|
325
296
|
});
|
326
297
|
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
298
|
+
|
299
|
+
it('🔄 handles crawl cycles gracefully (visited set)', async () => {
|
300
|
+
setupCrawlSimulation({
|
301
|
+
[startUrl]: { html: `<a>1</a>`, links: [page2Url] },
|
302
|
+
[page2Url]: { html: `<a>2</a>`, links: [page3Url] },
|
303
|
+
[page3Url]: { html: `<a>3</a>`, links: [startUrl, page2Url] } // Links back
|
304
|
+
});
|
305
|
+
await recursivelyBundleSite(startUrl, outputPath, 5, loggerInstance);
|
306
|
+
expect(mockNewPage).toHaveBeenCalledTimes(3); // Each visited only once
|
307
|
+
expect(mockPageGoto).toHaveBeenCalledTimes(3);
|
308
|
+
// Evaluate called if depth < maxDepth
|
309
|
+
// start (d1<5), page2 (d2<5), page3 (d3<5) -> YES for all 3
|
310
|
+
expect(mockPageEvaluate).toHaveBeenCalledTimes(3);
|
311
|
+
expect(mockBundleMultiPageHTMLFn.mock.calls[0][0]).toHaveLength(3);
|
312
|
+
});
|
313
|
+
|
314
|
+
it('🤕 handles fetch errors during crawl and continues (mocked)', async () => {
|
315
|
+
const errorUrl = page2Url;
|
316
|
+
const successUrl = page3Url;
|
317
|
+
const fetchError = new Error("Mock navigation failed!");
|
318
|
+
|
319
|
+
// Define the structure of the page data value
|
320
|
+
interface MockPageData {
|
321
|
+
html: string;
|
322
|
+
links?: string[];
|
323
|
+
}
|
324
|
+
|
325
|
+
// Explicitly type pagesData using Record<string, MockPageData>
|
326
|
+
const pagesData: Record<string, MockPageData> = {
|
327
|
+
[startUrl]: { html: `<html><body>Page 1 <a href="${errorUrl}">L2</a> <a href="${successUrl}">L3</a></body></html>`, links: [errorUrl, successUrl] },
|
328
|
+
// No entry for errorUrl
|
329
|
+
[successUrl]: { html: page2HtmlNoLinks, links: [] } // Page 3 successfully fetched
|
330
|
+
};
|
331
|
+
let currentUrlForTest = ''; // Local state for this test's mock
|
332
|
+
|
333
|
+
// Configure mocks directly for this test scenario
|
334
|
+
mockNewPage.mockImplementation(async () => mockPageObject as Page);
|
335
|
+
mockPageGoto.mockImplementation(async (url: string) => {
|
336
|
+
console.log(`[DEBUG MOCK - Error Test]: page.goto attempting: ${url}`);
|
337
|
+
currentUrlForTest = url;
|
338
|
+
if (url === errorUrl) {
|
339
|
+
console.log(`[DEBUG MOCK - Error Test]: Throwing for ${url}`);
|
340
|
+
throw fetchError;
|
341
|
+
}
|
342
|
+
console.log(`[DEBUG MOCK - Error Test]: Goto success for ${url}`);
|
343
|
+
return null;
|
344
|
+
});
|
345
|
+
mockPageUrl.mockImplementation(() => currentUrlForTest);
|
346
|
+
|
347
|
+
// These lines should now be type-safe because pagesData is a Record<string, ...>
|
348
|
+
mockPageContent.mockImplementation(async () => pagesData[currentUrlForTest]?.html ?? `<html><body>Mock Fallback for ${currentUrlForTest}</body></html>`);
|
349
|
+
const mockPageEvaluate = jest.fn<any>(); // Use any to simplify mock typing
|
350
|
+
// Run the function
|
351
|
+
const result = await recursivelyBundleSite(startUrl, outputPath, 2, loggerInstance);
|
352
|
+
|
353
|
+
// Assertions (remain the same)
|
354
|
+
expect(mockNewPage).toHaveBeenCalledTimes(3);
|
355
|
+
expect(mockPageGoto).toHaveBeenCalledTimes(3);
|
356
|
+
expect(mockPageClose).toHaveBeenCalledTimes(3);
|
357
|
+
expect(mockBrowserClose).toHaveBeenCalledTimes(1);
|
358
|
+
expect(loggerInstance.warn).toHaveBeenCalledTimes(1);
|
359
|
+
expect(loggerInstance.warn).toHaveBeenCalledWith(expect.stringContaining(`❌ Failed to process ${errorUrl}: ${fetchError.message}`));
|
360
|
+
expect(mockBundleMultiPageHTMLFn).toHaveBeenCalledTimes(1);
|
361
|
+
const bundledPages = mockBundleMultiPageHTMLFn.mock.calls[0][0];
|
362
|
+
expect(bundledPages).toHaveLength(2);
|
363
|
+
expect(bundledPages.find(p => p.url === startUrl)).toBeDefined();
|
364
|
+
expect(bundledPages.find(p => p.url === successUrl)).toBeDefined();
|
365
|
+
expect(result.pages).toBe(2);
|
366
|
+
});
|
367
|
+
|
368
|
+
it('📁 handles empty crawl result (e.g., initial fetch fails) (mocked)', async () => {
|
369
|
+
const initialFetchError = new Error("Initial goto failed");
|
370
|
+
|
371
|
+
// Specific mock setup for this test
|
372
|
+
// No need for pagesData as the first fetch fails
|
373
|
+
mockNewPage.mockImplementation(async () => mockPageObject as Page);
|
374
|
+
mockPageGoto.mockImplementation(async (url: string) => {
|
375
|
+
console.log(`[DEBUG MOCK - Initial Fail Test]: page.goto attempting: ${url}`);
|
376
|
+
if (url === startUrl) {
|
377
|
+
console.log(`[DEBUG MOCK - Initial Fail Test]: Throwing for ${url}`);
|
378
|
+
throw initialFetchError;
|
379
|
+
}
|
380
|
+
// Should not be called for other URLs in this test scenario
|
381
|
+
console.error(`[DEBUG MOCK - Initial Fail Test]: ERROR - goto called unexpectedly for ${url}`);
|
382
|
+
return null;
|
383
|
+
});
|
384
|
+
// Other mocks (content, evaluate) shouldn't be called if goto fails first
|
385
|
+
|
386
|
+
// Run the function
|
387
|
+
const result = await recursivelyBundleSite(startUrl, outputPath, 1, loggerInstance);
|
388
|
+
|
389
|
+
// Assertions
|
390
|
+
expect(mockLaunch).toHaveBeenCalledTimes(1);
|
391
|
+
expect(mockNewPage).toHaveBeenCalledTimes(1); // Attempted to open one page
|
392
|
+
expect(mockPageGoto).toHaveBeenCalledTimes(1); // Attempted to navigate once
|
393
|
+
expect(mockPageGoto).toHaveBeenCalledWith(startUrl, expect.anything());
|
394
|
+
expect(mockPageClose).toHaveBeenCalledTimes(1); // The single page attempt should be closed
|
395
|
+
expect(mockBrowserClose).toHaveBeenCalledTimes(1);
|
396
|
+
|
397
|
+
expect(loggerInstance.warn).toHaveBeenCalledTimes(1); // Expect exactly one warning
|
398
|
+
expect(loggerInstance.warn).toHaveBeenCalledWith(expect.stringContaining(`❌ Failed to process ${startUrl}: ${initialFetchError.message}`)); // Check message
|
399
|
+
|
400
|
+
expect(mockBundleMultiPageHTMLFn).toHaveBeenCalledTimes(1);
|
401
|
+
expect(mockBundleMultiPageHTMLFn).toHaveBeenCalledWith([], loggerInstance); // Ensure it bundles an empty array
|
402
|
+
|
403
|
+
expect(mockWriteFile).toHaveBeenCalledTimes(1); // Should still write the (empty) bundle
|
404
|
+
expect(result.pages).toBe(0); // Verify returned page count
|
405
|
+
});
|
406
|
+
|
407
|
+
it('💾 handles file write errors gracefully (mocked)', async () => {
|
408
|
+
const writeError = new Error("Disk full");
|
409
|
+
mockWriteFile.mockRejectedValueOnce(writeError);
|
410
|
+
setupCrawlSimulation({ [startUrl]: { html: page2HtmlNoLinks, links: [] } });
|
411
|
+
|
412
|
+
await expect(recursivelyBundleSite(startUrl, outputPath, 1, loggerInstance))
|
413
|
+
.rejects.toThrow(writeError);
|
414
|
+
|
415
|
+
expect(mockWriteFile).toHaveBeenCalledTimes(1);
|
416
|
+
expect(loggerInstance.error).toHaveBeenCalledWith(expect.stringContaining(`Error during recursive site bundle: ${writeError.message}`));
|
417
|
+
});
|
373
418
|
});
|
374
419
|
});
|