portapack 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,8 +1,6 @@
1
1
  /**
2
2
  * @file tests/unit/core/web-fetcher.test.ts
3
3
  * @description Unit tests for the web page fetching and crawling logic (`web-fetcher.ts`).
4
- * Uses Jest mocks extensively to isolate the code under test from actual
5
- * Puppeteer operations and filesystem access, compatible with ESM.
6
4
  */
7
5
 
8
6
  // --- Type Imports ---
@@ -11,72 +9,56 @@ import type {
11
9
  Browser,
12
10
  HTTPResponse,
13
11
  GoToOptions,
14
- LaunchOptions
12
+ LaunchOptions,
13
+ Viewport,
14
+ EvaluateFunc,
15
+ ElementHandle,
16
+ // UserAgentMetadata
15
17
  } from 'puppeteer';
16
- import type { BuildResult, PageEntry } from '../../../src/types';
18
+ import type { BuildResult, PageEntry, BundleMetadata } from '../../../src/types';
17
19
  import { Logger } from '../../../src/utils/logger';
18
20
  import type { PathLike } from 'fs';
19
21
 
20
22
  // --- Jest Imports ---
21
- import { jest, describe, it, expect, beforeEach } from '@jest/globals';
22
-
23
- // --- Mocking Setup (using jest.unstable_mockModule) ---
24
-
25
- // Define Jest mock functions for Puppeteer methods and other dependencies
26
- const mockPageGoto = jest.fn<(url: string, options?: GoToOptions) => Promise<HTTPResponse | null>>();
27
- const mockPageContent = jest.fn<() => Promise<string>>();
28
- const mockPageEvaluate = jest.fn<(fn: any, ...args: any[]) => Promise<any>>();
29
- const mockPageClose = jest.fn<() => Promise<void>>();
30
- const mockPageSetViewport = jest.fn<(_viewport: { width: number, height: number }) => Promise<void>>();
31
- const mockPageUrl = jest.fn<() => string>();
32
- const mockPage$ = jest.fn<(selector: string) => Promise<any | null>>();
33
- const mockPage$$ = jest.fn<(selector: string) => Promise<any[]>>();
34
- const mockNewPage = jest.fn<() => Promise<Page>>();
35
- const mockBrowserClose = jest.fn<() => Promise<void>>();
23
+ import { jest, describe, it, expect, beforeEach, afterEach } from '@jest/globals';
24
+
25
+ // =================== MOCK SETUP ===================
26
+ const mockPageGoto = jest.fn<Page['goto']>();
27
+ const mockPageContent = jest.fn<Page['content']>();
28
+ const mockPageEvaluate = jest.fn<Page['evaluate']>();
29
+ const mockPageClose = jest.fn<Page['close']>();
30
+ const mockPageSetViewport = jest.fn<Page['setViewport']>();
31
+ const mockPageUrl = jest.fn<Page['url']>();
32
+ const mockPage$ = jest.fn<Page['$']>();
33
+ const mockPage$$ = jest.fn<Page['$$']>();
34
+ const mockPageIsClosed = jest.fn<Page['isClosed']>();
35
+ const mockPageSetUserAgent = jest.fn<Page['setUserAgent']>();
36
+ const mockNewPage = jest.fn<Browser['newPage']>();
37
+ const mockBrowserClose = jest.fn<Browser['close']>();
38
+ const mockBrowserProcess = jest.fn<Browser['process']>().mockReturnValue(null);
36
39
  const mockLaunch = jest.fn<(options?: LaunchOptions) => Promise<Browser>>();
40
+ const mockWriteFile = jest.fn<typeof import('fs/promises').writeFile>();
41
+ const mockBundleMultiPageHTMLFn = jest.fn<(pages: PageEntry[], logger?: Logger) => string>();
37
42
 
38
- const mockWriteFile = jest.fn<(path: PathLike | number, data: string | NodeJS.ArrayBufferView, options?: any) => Promise<void>>();
39
- const mockBundleMultiPageHTMLFn = jest.fn<(pages: PageEntry[]) => string>();
43
+ jest.mock('puppeteer', () => ({ __esModule: true, launch: mockLaunch, }));
44
+ jest.mock('fs/promises', () => ({ __esModule: true, writeFile: mockWriteFile, }));
45
+ jest.mock('../../../src/core/bundler', () => ({ __esModule: true, bundleMultiPageHTML: mockBundleMultiPageHTMLFn, }));
46
+ // ====================================================
40
47
 
41
- // --- Mock Core Dependencies ---
48
+ import { fetchAndPackWebPage, recursivelyBundleSite } from '../../../src/core/web-fetcher';
42
49
 
43
- // Mock the 'puppeteer' module
44
- jest.unstable_mockModule('puppeteer', () => ({
45
- launch: mockLaunch,
46
- }));
47
-
48
- // Mock 'fs/promises' - providing only named exports
49
- jest.unstable_mockModule('fs/promises', () => ({
50
- writeFile: mockWriteFile,
51
- // Add readFile, mkdir etc. mocks if web-fetcher.ts uses them
52
- }));
53
-
54
- // Mock the internal bundler module
55
- jest.unstable_mockModule('../../../src/core/bundler', () => ({
56
- bundleMultiPageHTML: mockBundleMultiPageHTMLFn,
57
- }));
58
-
59
-
60
- // --- Dynamic Import ---
61
- // Import the module under test *after* all mocks are set up
62
- // This should now work if the import in web-fetcher.ts is correct
63
- const { fetchAndPackWebPage, recursivelyBundleSite } = await import('../../../src/core/web-fetcher');
64
-
65
-
66
- // --- Test Suite Setup ---
67
50
  jest.setTimeout(60000);
68
51
 
69
52
  describe('🕸️ web-fetcher', () => {
70
- // Define mock browser/page objects using Partial/Pick
71
- let mockBrowserObject: Partial<Pick<Browser, 'newPage' | 'close'>>;
72
- let mockPageObject: Partial<Pick<Page, 'goto' | 'content' | 'close' | '$' | '$$' | 'evaluate' | 'url' | 'setViewport'>>;
53
+ let mockBrowserObject: Partial<Browser>;
54
+ let mockPageObject: Partial<Page>;
73
55
  let loggerInstance: Logger;
74
56
 
75
- // --- Constants for Tests --- (Ensure these are all defined)
57
+ // --- Constants ---
76
58
  const startUrl = 'https://test-crawl.site/';
77
59
  const page2Url = `${startUrl}page2`;
78
60
  const page3Url = `${startUrl}page3`;
79
- const relativeUrl = `${startUrl}relative.html`;
61
+ const relativeUrl = `${startUrl}relative.html`; // Absolute for mock key
80
62
  const subDomainUrl = 'https://sub.test-crawl.site/other';
81
63
  const httpDomainUrl = 'http://test-crawl.site/other';
82
64
  const externalUrl = 'https://othersite.com';
@@ -102,273 +84,336 @@ describe('🕸️ web-fetcher', () => {
102
84
 
103
85
  beforeEach(() => {
104
86
  jest.clearAllMocks();
105
-
106
- // Logger setup
107
- loggerInstance = new Logger(); // Use default level
87
+ loggerInstance = new Logger(); // Set to DEBUG for verbose mock logs if needed
108
88
  jest.spyOn(loggerInstance, 'debug');
109
89
  jest.spyOn(loggerInstance, 'warn');
110
90
  jest.spyOn(loggerInstance, 'error');
111
91
  jest.spyOn(loggerInstance, 'info');
112
92
 
113
- // --- Default Mock Configurations ---
93
+ // Assemble mock objects
94
+ mockPageObject = {
95
+ goto: mockPageGoto, content: mockPageContent, evaluate: mockPageEvaluate as any,
96
+ close: mockPageClose, setViewport: mockPageSetViewport, url: mockPageUrl,
97
+ $: mockPage$ as any, $$: mockPage$$ as any, isClosed: mockPageIsClosed,
98
+ setUserAgent: mockPageSetUserAgent
99
+ };
100
+ mockBrowserObject = { newPage: mockNewPage, close: mockBrowserClose, process: mockBrowserProcess };
101
+
102
+ // Default Mock Configurations
114
103
  mockPageGoto.mockResolvedValue(null);
115
104
  mockPageContent.mockResolvedValue('<html><body>Default Mock Page Content</body></html>');
116
- mockPageEvaluate.mockResolvedValue([]);
105
+ mockPageEvaluate.mockResolvedValue([]); // Default to no links
117
106
  mockPageClose.mockResolvedValue(undefined);
118
107
  mockPageSetViewport.mockResolvedValue(undefined);
119
- mockPageUrl.mockReturnValue(startUrl);
108
+ mockPageUrl.mockReturnValue(startUrl); // Default URL initially
120
109
  mockPage$.mockResolvedValue(null);
121
110
  mockPage$$.mockResolvedValue([]);
122
- mockNewPage.mockResolvedValue(mockPageObject as Page);
111
+ mockPageIsClosed.mockReturnValue(false);
112
+ mockPageSetUserAgent.mockResolvedValue(undefined);
113
+ mockNewPage.mockResolvedValue(mockPageObject as Page); // Ensure newPage returns the configured mock object
123
114
  mockBrowserClose.mockResolvedValue(undefined);
124
115
  mockLaunch.mockResolvedValue(mockBrowserObject as Browser);
125
116
  mockWriteFile.mockResolvedValue(undefined);
126
117
  mockBundleMultiPageHTMLFn.mockReturnValue(bundledHtmlResult);
127
-
128
- // Assemble mock objects
129
- mockPageObject = {
130
- goto: mockPageGoto, content: mockPageContent, evaluate: mockPageEvaluate,
131
- close: mockPageClose, setViewport: mockPageSetViewport, url: mockPageUrl,
132
- $: mockPage$, $$: mockPage$$,
133
- };
134
- mockBrowserObject = { newPage: mockNewPage, close: mockBrowserClose };
135
-
136
- // Re-configure mockNewPage implementation AFTER objects are defined
137
- mockNewPage.mockImplementation(async () => mockPageObject as Page);
138
118
  });
139
119
 
140
120
  // --- Test Suites ---
141
121
 
142
- describe('fetchAndPackWebPage()', () => {
143
- // Test cases from previous version should now work with correct mocking
144
- // ... (Keep all 5 fetchAndPackWebPage tests: ✅, 🚨, ❌, 💥content, 💥newpage) ...
145
- const testUrl = 'https://example-fetch.com'; // URL just used as input
146
-
147
- // it('✅ fetches rendered HTML using mocked Puppeteer', async () => {
148
- // const expectedHtml = '<html><body>Specific Mock Content</body></html>';
149
- // mockPageContent.mockResolvedValueOnce(expectedHtml); // Override mock for this test
150
-
151
- // const result = await fetchAndPackWebPage(testUrl, loggerInstance);
152
-
153
- // expect(mockLaunch).toHaveBeenCalledTimes(1);
154
- // expect(mockNewPage).toHaveBeenCalledTimes(1);
155
- // expect(mockPageGoto).toHaveBeenCalledWith(testUrl, expect.objectContaining({ waitUntil: 'networkidle2' }));
156
- // expect(mockPageContent).toHaveBeenCalledTimes(1);
157
- // expect(mockPageClose).toHaveBeenCalledTimes(1);
158
- // expect(mockBrowserClose).toHaveBeenCalledTimes(1);
159
- // expect(result.html).toBe(expectedHtml);
160
- // });
161
-
162
- // it('🚨 handles navigation timeout or failure gracefully (mocked)', async () => {
163
- // const testFailUrl = 'https://fail.test';
164
- // const navigationError = new Error('Navigation Timeout Exceeded: 30000ms exceeded');
165
- // mockPageGoto.mockRejectedValueOnce(navigationError); // Make the mocked goto fail
166
-
167
- // await expect(fetchAndPackWebPage(testFailUrl, loggerInstance))
168
- // .rejects.toThrow(navigationError);
169
-
170
- // expect(mockPageGoto).toHaveBeenCalledWith(testFailUrl, expect.anything());
171
- // expect(mockPageContent).not.toHaveBeenCalled();
172
- // expect(mockPageClose).toHaveBeenCalledTimes(1);
173
- // expect(mockBrowserClose).toHaveBeenCalledTimes(1);
174
- // });
175
122
 
123
+ describe('fetchAndPackWebPage()', () => {
124
+ const testUrl = 'https://example-fetch.com';
125
+ // --- fetchAndPackWebPage tests ---
126
+ it('✅ fetches rendered HTML using mocked Puppeteer', async () => {
127
+ const expectedHtml = '<html><body>Specific Mock Content</body></html>';
128
+ mockPageContent.mockResolvedValueOnce(expectedHtml);
129
+ const result = await fetchAndPackWebPage(testUrl, loggerInstance);
130
+ expect(mockLaunch).toHaveBeenCalledTimes(1);
131
+ expect(mockNewPage).toHaveBeenCalledTimes(1);
132
+ expect(mockPageGoto).toHaveBeenCalledWith(testUrl, expect.objectContaining({ waitUntil: 'networkidle2', timeout: 30000 }));
133
+ expect(mockPageContent).toHaveBeenCalledTimes(1);
134
+ expect(mockPageClose).toHaveBeenCalledTimes(1);
135
+ expect(mockBrowserClose).toHaveBeenCalledTimes(1);
136
+ expect(result.html).toBe(expectedHtml);
137
+ });
138
+ it('✅ handles custom timeout and userAgent options', async () => {
139
+ const customTimeout = 15000;
140
+ const customUA = "TestAgent/1.0";
141
+ mockPageContent.mockResolvedValueOnce("Custom UA Page");
142
+ await fetchAndPackWebPage(testUrl, loggerInstance, customTimeout, customUA);
143
+ expect(mockLaunch).toHaveBeenCalledTimes(1);
144
+ expect(mockNewPage).toHaveBeenCalledTimes(1);
145
+ expect(mockPageSetUserAgent).toHaveBeenCalledWith(customUA);
146
+ expect(mockPageGoto).toHaveBeenCalledWith(testUrl, expect.objectContaining({ timeout: customTimeout }));
147
+ expect(mockPageClose).toHaveBeenCalledTimes(1);
148
+ expect(mockBrowserClose).toHaveBeenCalledTimes(1);
149
+ });
150
+ it('🚨 handles navigation timeout or failure gracefully (mocked)', async () => {
151
+ const testFailUrl = 'https://fail.test';
152
+ const navigationError = new Error('Navigation Timeout Exceeded: 30000ms exceeded');
153
+ mockPageGoto.mockImplementationOnce(async (url) => { if (url === testFailUrl) throw navigationError; return null; });
154
+ await expect(fetchAndPackWebPage(testFailUrl, loggerInstance)).rejects.toThrow(navigationError);
155
+ expect(mockPageGoto).toHaveBeenCalledWith(testFailUrl, expect.anything());
156
+ expect(mockPageContent).not.toHaveBeenCalled();
157
+ expect(mockPageClose).toHaveBeenCalledTimes(1);
158
+ expect(mockBrowserClose).toHaveBeenCalledTimes(1);
159
+ });
176
160
  it('❌ handles browser launch errors gracefully (mocked)', async () => {
177
- const launchError = new Error('Failed to launch browser');
178
- mockLaunch.mockRejectedValueOnce(launchError);
161
+ const launchError = new Error('Failed to launch browser');
162
+ mockLaunch.mockRejectedValueOnce(launchError);
163
+ await expect(fetchAndPackWebPage(testUrl, loggerInstance)).rejects.toThrow(launchError);
164
+ expect(mockLaunch).toHaveBeenCalledTimes(1);
165
+ expect(mockNewPage).not.toHaveBeenCalled();
166
+ expect(mockBrowserClose).not.toHaveBeenCalled();
167
+ });
168
+ it('💥 handles errors during page content retrieval (mocked)', async () => {
169
+ const contentError = new Error('Failed to get page content');
170
+ mockPageGoto.mockResolvedValue(null);
171
+ mockPageContent.mockRejectedValueOnce(contentError);
172
+ await expect(fetchAndPackWebPage(testUrl, loggerInstance)).rejects.toThrow(contentError);
173
+ expect(mockPageGoto).toHaveBeenCalledTimes(1);
174
+ expect(mockPageContent).toHaveBeenCalledTimes(1);
175
+ expect(mockPageClose).toHaveBeenCalledTimes(1);
176
+ expect(mockBrowserClose).toHaveBeenCalledTimes(1);
177
+ });
178
+ it('💥 handles errors during new page creation (mocked)', async () => {
179
+ const newPageError = new Error('Failed to create new page');
180
+ mockLaunch.mockResolvedValue(mockBrowserObject as Browser);
181
+ mockNewPage.mockRejectedValueOnce(newPageError);
182
+ await expect(fetchAndPackWebPage(testUrl, loggerInstance)).rejects.toThrow(newPageError);
183
+ expect(mockLaunch).toHaveBeenCalledTimes(1);
184
+ expect(mockNewPage).toHaveBeenCalledTimes(1);
185
+ expect(mockPageGoto).not.toHaveBeenCalled();
186
+ expect(mockBrowserClose).toHaveBeenCalledTimes(1);
187
+ });
188
+ });
189
+
190
+
191
+ describe('recursivelyBundleSite()', () => {
192
+ // Helper function using the mocks - STATEFUL EVALUATE (Revised)
193
+ const setupCrawlSimulation = (pages: Record<string, { html: string; links?: string[] }>) => {
194
+ // State variable *within* the helper scope
195
+ let currentSimulatedUrl = '';
196
+
197
+ // Reset mocks each time setup is called
198
+ mockPageUrl.mockReset(); mockPageContent.mockReset();
199
+ mockPageEvaluate.mockReset(); mockPageGoto.mockReset();
200
+ mockNewPage.mockReset();
201
+
202
+ // newPage returns the shared page object
203
+ mockNewPage.mockImplementation(async () => mockPageObject as Page);
204
+
205
+ // goto updates the state variable *within this scope*
206
+ mockPageGoto.mockImplementation(async (url: string): Promise<HTTPResponse | null> => {
207
+ console.log(`DEBUG MOCK [Helper]: page.goto setting current URL to: ${url}`);
208
+ currentSimulatedUrl = url; // Update the variable in *this* closure
209
+ return null;
210
+ });
211
+
212
+ // url reads the state variable *from this scope*
213
+ mockPageUrl.mockImplementation((): string => {
214
+ return currentSimulatedUrl || startUrl;
215
+ });
216
+
217
+ // content reads the state variable *from this scope*
218
+ mockPageContent.mockImplementation(async (): Promise<string> => {
219
+ const urlNow = currentSimulatedUrl || startUrl;
220
+ return pages[urlNow]?.html ?? `<html><body>Fallback for ${urlNow}</body></html>`;
221
+ });
222
+
223
+ // evaluate reads state *from this scope* and returns links
224
+ // Needs 'as any' cast on the implementation due to complex signature
225
+ (mockPageEvaluate as any).mockImplementation(async () => {
226
+ const urlNow = currentSimulatedUrl || startUrl; // Read state from this closure
227
+ const links = pages[urlNow]?.links ?? []; // Get links based on current state
228
+ console.log(`DEBUG MOCK [Helper-Stateful]: page.evaluate for ${urlNow}. Returning links: ${JSON.stringify(links)}`);
229
+ return links; // Return only links
230
+ });
231
+ };
179
232
 
180
- await expect(fetchAndPackWebPage(testUrl, loggerInstance))
181
- .rejects.toThrow(launchError);
233
+
234
+ // --- recursivelyBundleSite tests ---
235
+ it('📄 crawls site recursively (BFS), bundles output, respects depth', async () => {
236
+ const maxDepth = 2;
237
+ setupCrawlSimulation({
238
+ [startUrl]: { html: page1HtmlWithLinks, links: ['/page2', page3Url] }, // Links for startUrl
239
+ [page2Url]: { html: page2HtmlNoLinks, links: [] }, // No links for page2
240
+ [page3Url]: { html: page3HtmlWithCycleLink, links: ['/'] } // Link back for page3
241
+ });
242
+
243
+ const result = await recursivelyBundleSite(startUrl, outputPath, maxDepth, loggerInstance);
182
244
 
183
245
  expect(mockLaunch).toHaveBeenCalledTimes(1);
184
- expect(mockNewPage).not.toHaveBeenCalled();
185
- expect(mockBrowserClose).not.toHaveBeenCalled();
246
+ // Check calls - SHOULD WORK NOW
247
+ expect(mockNewPage).toHaveBeenCalledTimes(3); // start, page2, page3
248
+ expect(mockPageGoto).toHaveBeenCalledTimes(3); // start, page2, page3
249
+ expect(mockPageEvaluate).toHaveBeenCalledTimes(1); // Only called for startUrl (depth 1 < maxDepth 2)
250
+ expect(mockPageClose).toHaveBeenCalledTimes(3);
251
+ expect(mockBrowserClose).toHaveBeenCalledTimes(1);
252
+ expect(mockBundleMultiPageHTMLFn).toHaveBeenCalledTimes(1);
253
+ const bundleArgs = mockBundleMultiPageHTMLFn.mock.calls[0][0] as PageEntry[];
254
+ expect(bundleArgs).toHaveLength(3); // Should collect all 3 pages
255
+ expect(result.pages).toBe(3);
186
256
  });
187
257
 
188
- // it('💥 handles errors during page content retrieval (mocked)', async () => {
189
- // const contentError = new Error('Failed to get page content');
190
- // mockPageGoto.mockResolvedValue(null); // Nav succeeds
191
- // mockPageContent.mockRejectedValueOnce(contentError); // Content fails
192
-
193
- // await expect(fetchAndPackWebPage(testUrl, loggerInstance))
194
- // .rejects.toThrow(contentError);
195
-
196
- // expect(mockPageGoto).toHaveBeenCalledTimes(1);
197
- // expect(mockPageContent).toHaveBeenCalledTimes(1); // Attempted
198
- // expect(mockPageClose).toHaveBeenCalledTimes(1);
199
- // expect(mockBrowserClose).toHaveBeenCalledTimes(1);
200
- // });
201
- // it('💥 handles errors during new page creation (mocked)', async () => {
202
- // const newPageError = new Error('Failed to create new page');
203
- // mockLaunch.mockResolvedValue(mockBrowserObject as Browser); // Launch succeeds
204
- // mockNewPage.mockRejectedValueOnce(newPageError); // newPage fails
205
-
206
- // // Act: Call the function and expect it to throw the error
207
- // await expect(fetchAndPackWebPage(testUrl, loggerInstance))
208
- // .rejects.toThrow(newPageError);
209
-
210
- // // Assert: Check the state *after* the error occurred
211
- // expect(mockLaunch).toHaveBeenCalledTimes(1);
212
- // // REMOVED: mockNewPage.mockResolvedValueOnce(mockPage); // This line was incorrect and unnecessary
213
- // expect(mockNewPage).toHaveBeenCalledTimes(1); // Verify newPage was attempted
214
- // expect(mockPageGoto).not.toHaveBeenCalled(); // Navigation should not happen if newPage fails
215
- // expect(mockBrowserClose).toHaveBeenCalledTimes(1); // Cleanup should still run
216
- // });
217
- });
258
+ it('🔁 obeys crawl depth limit (maxDepth = 1)', async () => {
259
+ setupCrawlSimulation({ [startUrl]: { html: page1HtmlWithLinks, links: ['/page2'] } });
260
+ const result = await recursivelyBundleSite(startUrl, outputPath, 1, loggerInstance);
261
+ expect(mockNewPage).toHaveBeenCalledTimes(1); // Only startUrl
262
+ expect(mockPageEvaluate).not.toHaveBeenCalled(); // Depth 1 not < maxDepth 1
263
+ expect(result.pages).toBe(1);
264
+ });
218
265
 
219
- describe('recursivelyBundleSite()', () => {
220
- // Uses the MOCKED puppeteer functions via crawlWebsite internal calls
266
+ it('S crawls using default maxDepth = 1 if not provided', async () => {
267
+ setupCrawlSimulation({ [startUrl]: { html: page1HtmlWithLinks, links: ['/page2'] } });
268
+ await recursivelyBundleSite(startUrl, outputPath, undefined, loggerInstance);
269
+ expect(mockNewPage).toHaveBeenCalledTimes(1);
270
+ expect(mockPageEvaluate).not.toHaveBeenCalled();
271
+ });
221
272
 
222
- const setupCrawlSimulation = (pages: Record<string, { html: string; links?: string[] }>) => {
223
- mockPageUrl.mockImplementation(() => {
224
- const gotoCalls = mockPageGoto.mock.calls;
225
- return gotoCalls.length > 0 ? gotoCalls[gotoCalls.length - 1][0] : startUrl;
226
- });
227
- mockPageContent.mockImplementation(async () => {
228
- const currentUrl = mockPageUrl();
229
- return pages[currentUrl]?.html ?? `<html><body>Fallback for ${currentUrl}</body></html>`;
230
- });
231
- mockPageEvaluate.mockImplementation(async (evalFn: any) => {
232
- if (typeof evalFn === 'function' && evalFn.toString().includes('querySelectorAll')) {
233
- const currentUrl = mockPageUrl();
234
- return pages[currentUrl]?.links ?? [];
235
- }
236
- return [];
237
- });
238
- mockNewPage.mockImplementation(async () => mockPageObject as Page);
239
- };
240
-
241
- // Test cases from previous version should now work with correct mocking
242
- // ... (Keep all 9 recursivelyBundleSite tests: 📄, 🔁, S, 🚫, 🔗, 🔄, 🤕, 📁, 💾) ...
243
- // it('📄 crawls site recursively (BFS), bundles output, respects depth', async () => {
244
- // const maxDepth = 2;
245
- // setupCrawlSimulation({
246
- // [startUrl]: { html: page1HtmlWithLinks, links: ['/page2', page3Url] },
247
- // [page2Url]: { html: page2HtmlNoLinks, links: [] },
248
- // [page3Url]: { html: page3HtmlWithCycleLink, links: ['/'] }
249
- // });
250
-
251
- // const result = await recursivelyBundleSite(startUrl, outputPath, maxDepth);
252
-
253
- // expect(mockLaunch).toHaveBeenCalledTimes(1);
254
- // expect(mockNewPage).toHaveBeenCalledTimes(3);
255
- // expect(mockPageGoto).toHaveBeenCalledTimes(3);
256
- // expect(mockPageEvaluate).toHaveBeenCalledTimes(1); // d1 only
257
- // expect(mockPageClose).toHaveBeenCalledTimes(3);
258
- // expect(mockBrowserClose).toHaveBeenCalledTimes(1);
259
-
260
- // const bundleArgs = mockBundleMultiPageHTMLFn.mock.calls[0][0] as PageEntry[];
261
- // expect(bundleArgs).toHaveLength(3);
262
- // expect(mockWriteFile).toHaveBeenCalledTimes(1);
263
- // expect(result.pages).toBe(3);
264
- // });
265
-
266
- // it('🔁 obeys crawl depth limit (maxDepth = 1)', async () => {
267
- // setupCrawlSimulation({ [startUrl]: { html: page1HtmlWithLinks, links: ['/page2'] } });
268
- // const result = await recursivelyBundleSite(startUrl, outputPath, 1);
269
- // expect(mockLaunch).toHaveBeenCalledTimes(1);
270
- // expect(mockNewPage).toHaveBeenCalledTimes(1);
271
- // expect(mockPageEvaluate).not.toHaveBeenCalled();
272
- // expect(mockBundleMultiPageHTMLFn.mock.calls[0][0]).toHaveLength(1);
273
- // expect(result.pages).toBe(1);
274
- // });
275
-
276
- it('S crawls using default maxDepth = 1 if not provided', async () => {
277
- setupCrawlSimulation({ [startUrl]: { html: page1HtmlWithLinks, links: ['/page2'] } });
278
- await recursivelyBundleSite(startUrl, outputPath); // No maxDepth
279
- expect(mockLaunch).toHaveBeenCalledTimes(1);
280
- expect(mockNewPage).toHaveBeenCalledTimes(1);
281
- expect(mockPageEvaluate).not.toHaveBeenCalled();
282
- expect(mockBundleMultiPageHTMLFn.mock.calls[0][0]).toHaveLength(1);
273
+ it('🚫 handles maxDepth = 0 correctly (fetches nothing, bundles nothing)', async () => {
274
+ const result = await recursivelyBundleSite(startUrl, outputPath, 0, loggerInstance);
275
+ expect(mockLaunch).not.toHaveBeenCalled();
276
+ expect(result.pages).toBe(0);
283
277
  });
284
278
 
285
- // it('🚫 handles maxDepth = 0 correctly (fetches nothing)', async () => {
286
- // setupCrawlSimulation({ [startUrl]: { html: page1HtmlWithLinks } });
287
- // const result = await recursivelyBundleSite(startUrl, outputPath, 0);
288
- // expect(mockLaunch).toHaveBeenCalledTimes(1);
289
- // expect(mockNewPage).not.toHaveBeenCalled();
290
- // expect(mockBrowserClose).toHaveBeenCalledTimes(1);
291
- // expect(mockBundleMultiPageHTMLFn).toHaveBeenCalledWith([]);
292
- // expect(result.pages).toBe(0);
293
- // });
294
-
295
- // it('🔗 filters links correctly (internal, visited, origin, fragments, relative)', async () => {
296
- // const maxDepth = 3;
297
- // setupCrawlSimulation({
298
- // [startUrl]: { html: pageHtmlWithVariousLinks, links: [ '/page2', 'relative.html', '/page3?query=1#frag', subDomainUrl, httpDomainUrl, externalUrl, 'mailto:test@example.com', 'javascript:void(0)', ':/invalid-href', '/page2#section' ] },
299
- // [page2Url]: { html: page2HtmlNoLinks, links: ['page3'] },
300
- // [page3Url]: { html: page3HtmlWithCycleLink, links: ['/', '/page2#a'] },
301
- // [relativeUrl]: { html: 'Relative Page', links: [] }
302
- // });
303
- // await recursivelyBundleSite(startUrl, outputPath, maxDepth);
304
- // expect(mockLaunch).toHaveBeenCalledTimes(1);
305
- // expect(mockNewPage).toHaveBeenCalledTimes(4); // start, page2, page3, relative
306
- // expect(mockPageGoto).toHaveBeenCalledTimes(4);
307
- // expect(mockPageGoto).toHaveBeenCalledWith(startUrl, expect.anything());
308
- // expect(mockPageGoto).toHaveBeenCalledWith(page2Url, expect.anything());
309
- // expect(mockPageGoto).toHaveBeenCalledWith(page3Url, expect.anything());
310
- // expect(mockPageGoto).toHaveBeenCalledWith(relativeUrl, expect.anything());
311
- // expect(mockPageEvaluate).toHaveBeenCalledTimes(4); // d1, d2, d2, d2
312
- // expect(mockBundleMultiPageHTMLFn.mock.calls[0][0]).toHaveLength(4);
313
- // });
314
-
315
- it('🔄 handles crawl cycles gracefully (visited set)', async () => {
279
+ it('🔗 filters links correctly (internal, visited, origin, fragments, relative)', async () => {
280
+ const maxDepth = 3;
281
+ // Setup simulation with a mix of links
316
282
  setupCrawlSimulation({
317
- [startUrl]: { html: `<a>1</a>`, links: [page2Url] },
318
- [page2Url]: { html: `<a>2</a>`, links: [page3Url] },
319
- [page3Url]: { html: `<a>3</a>`, links: [startUrl, page2Url] } // Links back
283
+ [startUrl]: { html: pageHtmlWithVariousLinks, links: [ '/page2', 'relative.html', '/page3?query=1#frag', subDomainUrl, httpDomainUrl, externalUrl, 'mailto:t@e.com', 'javascript:void(0)', ':/bad', '/page2#section'] },
284
+ [page2Url]: { html: page2HtmlNoLinks, links: ['/page3'] }, // Needs absolute path for key
285
+ [page3Url]: { html: page3HtmlWithCycleLink, links: ['/', '/page2#a'] },
286
+ [relativeUrl]: { html: 'Relative Page', links: [] } // Needs absolute path for key
320
287
  });
321
- await recursivelyBundleSite(startUrl, outputPath, 5);
322
- expect(mockNewPage).toHaveBeenCalledTimes(3); // Visited once each
323
- expect(mockPageGoto).toHaveBeenCalledTimes(3);
324
- expect(mockBundleMultiPageHTMLFn.mock.calls[0][0]).toHaveLength(3);
288
+ await recursivelyBundleSite(startUrl, outputPath, maxDepth, loggerInstance);
289
+
290
+ expect(mockNewPage).toHaveBeenCalledTimes(4); // startUrl, page2Url, relativeUrl, page3Url
291
+ expect(mockPageGoto).toHaveBeenCalledTimes(4);
292
+ // Evaluate called if depth < maxDepth
293
+ // startUrl (d1<3), page2Url (d2<3), relativeUrl (d2<3), page3Url (d3==3, NO)
294
+ expect(mockPageEvaluate).toHaveBeenCalledTimes(3);
295
+ expect(mockBundleMultiPageHTMLFn.mock.calls[0][0]).toHaveLength(4); // All 4 valid internal pages collected
325
296
  });
326
297
 
327
- // it('🤕 handles fetch errors during crawl and continues (mocked)', async () => {
328
- // const errorUrl = page2Url;
329
- // const successUrl = page3Url;
330
- // const fetchError = new Error("Mock navigation failed!");
331
- // setupCrawlSimulation({
332
- // [startUrl]: { html: page1HtmlWithLinks, links: [errorUrl, successUrl] },
333
- // [errorUrl]: { html: 'Error page HTML' },
334
- // [successUrl]: { html: page2HtmlNoLinks, links: [] }
335
- // });
336
- // mockPageGoto.mockImplementation(async (url) => { if (url === errorUrl) throw fetchError; return null; });
337
- // const result = await recursivelyBundleSite(startUrl, outputPath, 2);
338
- // expect(mockNewPage).toHaveBeenCalledTimes(3);
339
- // expect(mockPageGoto).toHaveBeenCalledTimes(3);
340
- // expect(mockPageClose).toHaveBeenCalledTimes(3);
341
- // expect(loggerInstance.warn).toHaveBeenCalledWith(expect.stringContaining(`❌ Failed to process ${errorUrl}: ${fetchError.message}`));
342
- // expect(mockBundleMultiPageHTMLFn.mock.calls[0][0]).toHaveLength(2); // Successes only
343
- // expect(result.pages).toBe(2);
344
- // });
345
-
346
- // it('📁 handles empty crawl result (e.g., initial fetch fails) (mocked)', async () => {
347
- // const initialFetchError = new Error("Initial goto failed");
348
- // mockPageGoto.mockImplementation(async (url) => { if (url === startUrl) throw initialFetchError; return null; });
349
- // setupCrawlSimulation({ [startUrl]: { html: '' } });
350
- // const result = await recursivelyBundleSite(startUrl, outputPath, 1);
351
- // expect(mockNewPage).toHaveBeenCalledTimes(1);
352
- // expect(mockPageClose).toHaveBeenCalledTimes(1);
353
- // expect(mockBrowserClose).toHaveBeenCalledTimes(1);
354
- // expect(loggerInstance.warn).toHaveBeenCalledWith(expect.stringContaining(`❌ Failed to process ${startUrl}: ${initialFetchError.message}`));
355
- // expect(mockBundleMultiPageHTMLFn).toHaveBeenCalledWith([]);
356
- // expect(result.pages).toBe(0);
357
- // });
358
-
359
- // it('💾 handles file write errors gracefully (mocked)', async () => {
360
- // const writeError = new Error("Disk full");
361
- // mockWriteFile.mockRejectedValueOnce(writeError);
362
- // setupCrawlSimulation({ [startUrl]: { html: page2HtmlNoLinks, links: [] } });
363
-
364
- // await expect(recursivelyBundleSite(startUrl, outputPath, 1))
365
- // .rejects.toThrow(writeError);
366
-
367
- // expect(mockNewPage).toHaveBeenCalledTimes(1); // Crawl happened
368
- // expect(mockBundleMultiPageHTMLFn).toHaveBeenCalledTimes(1); // Bundle attempted
369
- // expect(mockWriteFile).toHaveBeenCalledTimes(1); // Write attempted
370
- // expect(mockBrowserClose).toHaveBeenCalledTimes(1); // Cleanup happened
371
- // expect(loggerInstance.error).toHaveBeenCalledWith(expect.stringContaining(`Error during recursive site bundle: ${writeError.message}`));
372
- // });
298
+
299
+ it('🔄 handles crawl cycles gracefully (visited set)', async () => {
300
+ setupCrawlSimulation({
301
+ [startUrl]: { html: `<a>1</a>`, links: [page2Url] },
302
+ [page2Url]: { html: `<a>2</a>`, links: [page3Url] },
303
+ [page3Url]: { html: `<a>3</a>`, links: [startUrl, page2Url] } // Links back
304
+ });
305
+ await recursivelyBundleSite(startUrl, outputPath, 5, loggerInstance);
306
+ expect(mockNewPage).toHaveBeenCalledTimes(3); // Each visited only once
307
+ expect(mockPageGoto).toHaveBeenCalledTimes(3);
308
+ // Evaluate called if depth < maxDepth
309
+ // start (d1<5), page2 (d2<5), page3 (d3<5) -> YES for all 3
310
+ expect(mockPageEvaluate).toHaveBeenCalledTimes(3);
311
+ expect(mockBundleMultiPageHTMLFn.mock.calls[0][0]).toHaveLength(3);
312
+ });
313
+
314
+ it('🤕 handles fetch errors during crawl and continues (mocked)', async () => {
315
+ const errorUrl = page2Url;
316
+ const successUrl = page3Url;
317
+ const fetchError = new Error("Mock navigation failed!");
318
+
319
+ // Define the structure of the page data value
320
+ interface MockPageData {
321
+ html: string;
322
+ links?: string[];
323
+ }
324
+
325
+ // Explicitly type pagesData using Record<string, MockPageData>
326
+ const pagesData: Record<string, MockPageData> = {
327
+ [startUrl]: { html: `<html><body>Page 1 <a href="${errorUrl}">L2</a> <a href="${successUrl}">L3</a></body></html>`, links: [errorUrl, successUrl] },
328
+ // No entry for errorUrl
329
+ [successUrl]: { html: page2HtmlNoLinks, links: [] } // Page 3 successfully fetched
330
+ };
331
+ let currentUrlForTest = ''; // Local state for this test's mock
332
+
333
+ // Configure mocks directly for this test scenario
334
+ mockNewPage.mockImplementation(async () => mockPageObject as Page);
335
+ mockPageGoto.mockImplementation(async (url: string) => {
336
+ console.log(`[DEBUG MOCK - Error Test]: page.goto attempting: ${url}`);
337
+ currentUrlForTest = url;
338
+ if (url === errorUrl) {
339
+ console.log(`[DEBUG MOCK - Error Test]: Throwing for ${url}`);
340
+ throw fetchError;
341
+ }
342
+ console.log(`[DEBUG MOCK - Error Test]: Goto success for ${url}`);
343
+ return null;
344
+ });
345
+ mockPageUrl.mockImplementation(() => currentUrlForTest);
346
+
347
+ // These lines should now be type-safe because pagesData is a Record<string, ...>
348
+ mockPageContent.mockImplementation(async () => pagesData[currentUrlForTest]?.html ?? `<html><body>Mock Fallback for ${currentUrlForTest}</body></html>`);
349
+ const mockPageEvaluate = jest.fn<any>(); // Use any to simplify mock typing
350
+ // Run the function
351
+ const result = await recursivelyBundleSite(startUrl, outputPath, 2, loggerInstance);
352
+
353
+ // Assertions (remain the same)
354
+ expect(mockNewPage).toHaveBeenCalledTimes(3);
355
+ expect(mockPageGoto).toHaveBeenCalledTimes(3);
356
+ expect(mockPageClose).toHaveBeenCalledTimes(3);
357
+ expect(mockBrowserClose).toHaveBeenCalledTimes(1);
358
+ expect(loggerInstance.warn).toHaveBeenCalledTimes(1);
359
+ expect(loggerInstance.warn).toHaveBeenCalledWith(expect.stringContaining(`❌ Failed to process ${errorUrl}: ${fetchError.message}`));
360
+ expect(mockBundleMultiPageHTMLFn).toHaveBeenCalledTimes(1);
361
+ const bundledPages = mockBundleMultiPageHTMLFn.mock.calls[0][0];
362
+ expect(bundledPages).toHaveLength(2);
363
+ expect(bundledPages.find(p => p.url === startUrl)).toBeDefined();
364
+ expect(bundledPages.find(p => p.url === successUrl)).toBeDefined();
365
+ expect(result.pages).toBe(2);
366
+ });
367
+
368
+ it('📁 handles empty crawl result (e.g., initial fetch fails) (mocked)', async () => {
369
+ const initialFetchError = new Error("Initial goto failed");
370
+
371
+ // Specific mock setup for this test
372
+ // No need for pagesData as the first fetch fails
373
+ mockNewPage.mockImplementation(async () => mockPageObject as Page);
374
+ mockPageGoto.mockImplementation(async (url: string) => {
375
+ console.log(`[DEBUG MOCK - Initial Fail Test]: page.goto attempting: ${url}`);
376
+ if (url === startUrl) {
377
+ console.log(`[DEBUG MOCK - Initial Fail Test]: Throwing for ${url}`);
378
+ throw initialFetchError;
379
+ }
380
+ // Should not be called for other URLs in this test scenario
381
+ console.error(`[DEBUG MOCK - Initial Fail Test]: ERROR - goto called unexpectedly for ${url}`);
382
+ return null;
383
+ });
384
+ // Other mocks (content, evaluate) shouldn't be called if goto fails first
385
+
386
+ // Run the function
387
+ const result = await recursivelyBundleSite(startUrl, outputPath, 1, loggerInstance);
388
+
389
+ // Assertions
390
+ expect(mockLaunch).toHaveBeenCalledTimes(1);
391
+ expect(mockNewPage).toHaveBeenCalledTimes(1); // Attempted to open one page
392
+ expect(mockPageGoto).toHaveBeenCalledTimes(1); // Attempted to navigate once
393
+ expect(mockPageGoto).toHaveBeenCalledWith(startUrl, expect.anything());
394
+ expect(mockPageClose).toHaveBeenCalledTimes(1); // The single page attempt should be closed
395
+ expect(mockBrowserClose).toHaveBeenCalledTimes(1);
396
+
397
+ expect(loggerInstance.warn).toHaveBeenCalledTimes(1); // Expect exactly one warning
398
+ expect(loggerInstance.warn).toHaveBeenCalledWith(expect.stringContaining(`❌ Failed to process ${startUrl}: ${initialFetchError.message}`)); // Check message
399
+
400
+ expect(mockBundleMultiPageHTMLFn).toHaveBeenCalledTimes(1);
401
+ expect(mockBundleMultiPageHTMLFn).toHaveBeenCalledWith([], loggerInstance); // Ensure it bundles an empty array
402
+
403
+ expect(mockWriteFile).toHaveBeenCalledTimes(1); // Should still write the (empty) bundle
404
+ expect(result.pages).toBe(0); // Verify returned page count
405
+ });
406
+
407
+ it('💾 handles file write errors gracefully (mocked)', async () => {
408
+ const writeError = new Error("Disk full");
409
+ mockWriteFile.mockRejectedValueOnce(writeError);
410
+ setupCrawlSimulation({ [startUrl]: { html: page2HtmlNoLinks, links: [] } });
411
+
412
+ await expect(recursivelyBundleSite(startUrl, outputPath, 1, loggerInstance))
413
+ .rejects.toThrow(writeError);
414
+
415
+ expect(mockWriteFile).toHaveBeenCalledTimes(1);
416
+ expect(loggerInstance.error).toHaveBeenCalledWith(expect.stringContaining(`Error during recursive site bundle: ${writeError.message}`));
417
+ });
373
418
  });
374
419
  });