portapack 0.2.1 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,8 +1,6 @@
1
1
  /**
2
2
  * @file tests/unit/core/web-fetcher.test.ts
3
3
  * @description Unit tests for the web page fetching and crawling logic (`web-fetcher.ts`).
4
- * Uses Jest mocks extensively to isolate the code under test from actual
5
- * Puppeteer operations and filesystem access, compatible with ESM.
6
4
  */
7
5
 
8
6
  // --- Type Imports ---
@@ -11,72 +9,56 @@ import type {
11
9
  Browser,
12
10
  HTTPResponse,
13
11
  GoToOptions,
14
- LaunchOptions
12
+ LaunchOptions,
13
+ Viewport,
14
+ EvaluateFunc,
15
+ ElementHandle,
16
+ // UserAgentMetadata
15
17
  } from 'puppeteer';
16
- import type { BuildResult, PageEntry } from '../../../src/types';
18
+ import type { BuildResult, PageEntry, BundleMetadata } from '../../../src/types';
17
19
  import { Logger } from '../../../src/utils/logger';
18
20
  import type { PathLike } from 'fs';
19
21
 
20
22
  // --- Jest Imports ---
21
- import { jest, describe, it, expect, beforeEach } from '@jest/globals';
22
-
23
- // --- Mocking Setup (using jest.unstable_mockModule) ---
24
-
25
- // Define Jest mock functions for Puppeteer methods and other dependencies
26
- const mockPageGoto = jest.fn<(url: string, options?: GoToOptions) => Promise<HTTPResponse | null>>();
27
- const mockPageContent = jest.fn<() => Promise<string>>();
28
- const mockPageEvaluate = jest.fn<(fn: any, ...args: any[]) => Promise<any>>();
29
- const mockPageClose = jest.fn<() => Promise<void>>();
30
- const mockPageSetViewport = jest.fn<(_viewport: { width: number, height: number }) => Promise<void>>();
31
- const mockPageUrl = jest.fn<() => string>();
32
- const mockPage$ = jest.fn<(selector: string) => Promise<any | null>>();
33
- const mockPage$$ = jest.fn<(selector: string) => Promise<any[]>>();
34
- const mockNewPage = jest.fn<() => Promise<Page>>();
35
- const mockBrowserClose = jest.fn<() => Promise<void>>();
23
+ import { jest, describe, it, expect, beforeEach, afterEach } from '@jest/globals';
24
+
25
+ // =================== MOCK SETUP ===================
26
+ const mockPageGoto = jest.fn<Page['goto']>();
27
+ const mockPageContent = jest.fn<Page['content']>();
28
+ const mockPageEvaluate = jest.fn<Page['evaluate']>();
29
+ const mockPageClose = jest.fn<Page['close']>();
30
+ const mockPageSetViewport = jest.fn<Page['setViewport']>();
31
+ const mockPageUrl = jest.fn<Page['url']>();
32
+ const mockPage$ = jest.fn<Page['$']>();
33
+ const mockPage$$ = jest.fn<Page['$$']>();
34
+ const mockPageIsClosed = jest.fn<Page['isClosed']>();
35
+ const mockPageSetUserAgent = jest.fn<Page['setUserAgent']>();
36
+ const mockNewPage = jest.fn<Browser['newPage']>();
37
+ const mockBrowserClose = jest.fn<Browser['close']>();
38
+ const mockBrowserProcess = jest.fn<Browser['process']>().mockReturnValue(null);
36
39
  const mockLaunch = jest.fn<(options?: LaunchOptions) => Promise<Browser>>();
40
+ const mockWriteFile = jest.fn<typeof import('fs/promises').writeFile>();
41
+ const mockBundleMultiPageHTMLFn = jest.fn<(pages: PageEntry[], logger?: Logger) => string>();
37
42
 
38
- const mockWriteFile = jest.fn<(path: PathLike | number, data: string | NodeJS.ArrayBufferView, options?: any) => Promise<void>>();
39
- const mockBundleMultiPageHTMLFn = jest.fn<(pages: PageEntry[]) => string>();
43
+ jest.mock('puppeteer', () => ({ __esModule: true, launch: mockLaunch, }));
44
+ jest.mock('fs/promises', () => ({ __esModule: true, writeFile: mockWriteFile, }));
45
+ jest.mock('../../../src/core/bundler', () => ({ __esModule: true, bundleMultiPageHTML: mockBundleMultiPageHTMLFn, }));
46
+ // ====================================================
40
47
 
41
- // --- Mock Core Dependencies ---
48
+ import { fetchAndPackWebPage, recursivelyBundleSite } from '../../../src/core/web-fetcher';
42
49
 
43
- // Mock the 'puppeteer' module
44
- jest.unstable_mockModule('puppeteer', () => ({
45
- launch: mockLaunch,
46
- }));
47
-
48
- // Mock 'fs/promises' - providing only named exports
49
- jest.unstable_mockModule('fs/promises', () => ({
50
- writeFile: mockWriteFile,
51
- // Add readFile, mkdir etc. mocks if web-fetcher.ts uses them
52
- }));
53
-
54
- // Mock the internal bundler module
55
- jest.unstable_mockModule('../../../src/core/bundler', () => ({
56
- bundleMultiPageHTML: mockBundleMultiPageHTMLFn,
57
- }));
58
-
59
-
60
- // --- Dynamic Import ---
61
- // Import the module under test *after* all mocks are set up
62
- // This should now work if the import in web-fetcher.ts is correct
63
- const { fetchAndPackWebPage, recursivelyBundleSite } = await import('../../../src/core/web-fetcher');
64
-
65
-
66
- // --- Test Suite Setup ---
67
50
  jest.setTimeout(60000);
68
51
 
69
52
  describe('🕸️ web-fetcher', () => {
70
- // Define mock browser/page objects using Partial/Pick
71
- let mockBrowserObject: Partial<Pick<Browser, 'newPage' | 'close'>>;
72
- let mockPageObject: Partial<Pick<Page, 'goto' | 'content' | 'close' | '$' | '$$' | 'evaluate' | 'url' | 'setViewport'>>;
53
+ let mockBrowserObject: Partial<Browser>;
54
+ let mockPageObject: Partial<Page>;
73
55
  let loggerInstance: Logger;
74
56
 
75
- // --- Constants for Tests --- (Ensure these are all defined)
57
+ // --- Constants ---
76
58
  const startUrl = 'https://test-crawl.site/';
77
59
  const page2Url = `${startUrl}page2`;
78
60
  const page3Url = `${startUrl}page3`;
79
- const relativeUrl = `${startUrl}relative.html`;
61
+ const relativeUrl = `${startUrl}relative.html`; // Absolute for mock key
80
62
  const subDomainUrl = 'https://sub.test-crawl.site/other';
81
63
  const httpDomainUrl = 'http://test-crawl.site/other';
82
64
  const externalUrl = 'https://othersite.com';
@@ -102,273 +84,336 @@ describe('🕸️ web-fetcher', () => {
102
84
 
103
85
  beforeEach(() => {
104
86
  jest.clearAllMocks();
105
-
106
- // Logger setup
107
- loggerInstance = new Logger(); // Use default level
87
+ loggerInstance = new Logger(); // Set to DEBUG for verbose mock logs if needed
108
88
  jest.spyOn(loggerInstance, 'debug');
109
89
  jest.spyOn(loggerInstance, 'warn');
110
90
  jest.spyOn(loggerInstance, 'error');
111
91
  jest.spyOn(loggerInstance, 'info');
112
92
 
113
- // --- Default Mock Configurations ---
93
+ // Assemble mock objects
94
+ mockPageObject = {
95
+ goto: mockPageGoto, content: mockPageContent, evaluate: mockPageEvaluate as any,
96
+ close: mockPageClose, setViewport: mockPageSetViewport, url: mockPageUrl,
97
+ $: mockPage$ as any, $$: mockPage$$ as any, isClosed: mockPageIsClosed,
98
+ setUserAgent: mockPageSetUserAgent
99
+ };
100
+ mockBrowserObject = { newPage: mockNewPage, close: mockBrowserClose, process: mockBrowserProcess };
101
+
102
+ // Default Mock Configurations
114
103
  mockPageGoto.mockResolvedValue(null);
115
104
  mockPageContent.mockResolvedValue('<html><body>Default Mock Page Content</body></html>');
116
- mockPageEvaluate.mockResolvedValue([]);
105
+ mockPageEvaluate.mockResolvedValue([]); // Default to no links
117
106
  mockPageClose.mockResolvedValue(undefined);
118
107
  mockPageSetViewport.mockResolvedValue(undefined);
119
- mockPageUrl.mockReturnValue(startUrl);
108
+ mockPageUrl.mockReturnValue(startUrl); // Default URL initially
120
109
  mockPage$.mockResolvedValue(null);
121
110
  mockPage$$.mockResolvedValue([]);
122
- mockNewPage.mockResolvedValue(mockPageObject as Page);
111
+ mockPageIsClosed.mockReturnValue(false);
112
+ mockPageSetUserAgent.mockResolvedValue(undefined);
113
+ mockNewPage.mockResolvedValue(mockPageObject as Page); // Ensure newPage returns the configured mock object
123
114
  mockBrowserClose.mockResolvedValue(undefined);
124
115
  mockLaunch.mockResolvedValue(mockBrowserObject as Browser);
125
116
  mockWriteFile.mockResolvedValue(undefined);
126
117
  mockBundleMultiPageHTMLFn.mockReturnValue(bundledHtmlResult);
127
-
128
- // Assemble mock objects
129
- mockPageObject = {
130
- goto: mockPageGoto, content: mockPageContent, evaluate: mockPageEvaluate,
131
- close: mockPageClose, setViewport: mockPageSetViewport, url: mockPageUrl,
132
- $: mockPage$, $$: mockPage$$,
133
- };
134
- mockBrowserObject = { newPage: mockNewPage, close: mockBrowserClose };
135
-
136
- // Re-configure mockNewPage implementation AFTER objects are defined
137
- mockNewPage.mockImplementation(async () => mockPageObject as Page);
138
118
  });
139
119
 
140
120
  // --- Test Suites ---
141
121
 
142
- describe('fetchAndPackWebPage()', () => {
143
- // Test cases from previous version should now work with correct mocking
144
- // ... (Keep all 5 fetchAndPackWebPage tests: ✅, 🚨, ❌, 💥content, 💥newpage) ...
145
- const testUrl = 'https://example-fetch.com'; // URL just used as input
146
-
147
- // it('✅ fetches rendered HTML using mocked Puppeteer', async () => {
148
- // const expectedHtml = '<html><body>Specific Mock Content</body></html>';
149
- // mockPageContent.mockResolvedValueOnce(expectedHtml); // Override mock for this test
150
-
151
- // const result = await fetchAndPackWebPage(testUrl, loggerInstance);
152
-
153
- // expect(mockLaunch).toHaveBeenCalledTimes(1);
154
- // expect(mockNewPage).toHaveBeenCalledTimes(1);
155
- // expect(mockPageGoto).toHaveBeenCalledWith(testUrl, expect.objectContaining({ waitUntil: 'networkidle2' }));
156
- // expect(mockPageContent).toHaveBeenCalledTimes(1);
157
- // expect(mockPageClose).toHaveBeenCalledTimes(1);
158
- // expect(mockBrowserClose).toHaveBeenCalledTimes(1);
159
- // expect(result.html).toBe(expectedHtml);
160
- // });
161
-
162
- // it('🚨 handles navigation timeout or failure gracefully (mocked)', async () => {
163
- // const testFailUrl = 'https://fail.test';
164
- // const navigationError = new Error('Navigation Timeout Exceeded: 30000ms exceeded');
165
- // mockPageGoto.mockRejectedValueOnce(navigationError); // Make the mocked goto fail
166
-
167
- // await expect(fetchAndPackWebPage(testFailUrl, loggerInstance))
168
- // .rejects.toThrow(navigationError);
169
-
170
- // expect(mockPageGoto).toHaveBeenCalledWith(testFailUrl, expect.anything());
171
- // expect(mockPageContent).not.toHaveBeenCalled();
172
- // expect(mockPageClose).toHaveBeenCalledTimes(1);
173
- // expect(mockBrowserClose).toHaveBeenCalledTimes(1);
174
- // });
175
122
 
123
+ describe('fetchAndPackWebPage()', () => {
124
+ const testUrl = 'https://example-fetch.com';
125
+ // --- fetchAndPackWebPage tests ---
126
+ it('✅ fetches rendered HTML using mocked Puppeteer', async () => {
127
+ const expectedHtml = '<html><body>Specific Mock Content</body></html>';
128
+ mockPageContent.mockResolvedValueOnce(expectedHtml);
129
+ const result = await fetchAndPackWebPage(testUrl, loggerInstance);
130
+ expect(mockLaunch).toHaveBeenCalledTimes(1);
131
+ expect(mockNewPage).toHaveBeenCalledTimes(1);
132
+ expect(mockPageGoto).toHaveBeenCalledWith(testUrl, expect.objectContaining({ waitUntil: 'networkidle2', timeout: 30000 }));
133
+ expect(mockPageContent).toHaveBeenCalledTimes(1);
134
+ expect(mockPageClose).toHaveBeenCalledTimes(1);
135
+ expect(mockBrowserClose).toHaveBeenCalledTimes(1);
136
+ expect(result.html).toBe(expectedHtml);
137
+ });
138
+ it('✅ handles custom timeout and userAgent options', async () => {
139
+ const customTimeout = 15000;
140
+ const customUA = "TestAgent/1.0";
141
+ mockPageContent.mockResolvedValueOnce("Custom UA Page");
142
+ await fetchAndPackWebPage(testUrl, loggerInstance, customTimeout, customUA);
143
+ expect(mockLaunch).toHaveBeenCalledTimes(1);
144
+ expect(mockNewPage).toHaveBeenCalledTimes(1);
145
+ expect(mockPageSetUserAgent).toHaveBeenCalledWith(customUA);
146
+ expect(mockPageGoto).toHaveBeenCalledWith(testUrl, expect.objectContaining({ timeout: customTimeout }));
147
+ expect(mockPageClose).toHaveBeenCalledTimes(1);
148
+ expect(mockBrowserClose).toHaveBeenCalledTimes(1);
149
+ });
150
+ it('🚨 handles navigation timeout or failure gracefully (mocked)', async () => {
151
+ const testFailUrl = 'https://fail.test';
152
+ const navigationError = new Error('Navigation Timeout Exceeded: 30000ms exceeded');
153
+ mockPageGoto.mockImplementationOnce(async (url) => { if (url === testFailUrl) throw navigationError; return null; });
154
+ await expect(fetchAndPackWebPage(testFailUrl, loggerInstance)).rejects.toThrow(navigationError);
155
+ expect(mockPageGoto).toHaveBeenCalledWith(testFailUrl, expect.anything());
156
+ expect(mockPageContent).not.toHaveBeenCalled();
157
+ expect(mockPageClose).toHaveBeenCalledTimes(1);
158
+ expect(mockBrowserClose).toHaveBeenCalledTimes(1);
159
+ });
176
160
  it('❌ handles browser launch errors gracefully (mocked)', async () => {
177
- const launchError = new Error('Failed to launch browser');
178
- mockLaunch.mockRejectedValueOnce(launchError);
179
-
180
- await expect(fetchAndPackWebPage(testUrl, loggerInstance))
181
- .rejects.toThrow(launchError);
182
-
183
- expect(mockLaunch).toHaveBeenCalledTimes(1);
184
- expect(mockNewPage).not.toHaveBeenCalled();
185
- expect(mockBrowserClose).not.toHaveBeenCalled();
186
- });
187
-
188
- // it('💥 handles errors during page content retrieval (mocked)', async () => {
189
- // const contentError = new Error('Failed to get page content');
190
- // mockPageGoto.mockResolvedValue(null); // Nav succeeds
191
- // mockPageContent.mockRejectedValueOnce(contentError); // Content fails
192
-
193
- // await expect(fetchAndPackWebPage(testUrl, loggerInstance))
194
- // .rejects.toThrow(contentError);
195
-
196
- // expect(mockPageGoto).toHaveBeenCalledTimes(1);
197
- // expect(mockPageContent).toHaveBeenCalledTimes(1); // Attempted
198
- // expect(mockPageClose).toHaveBeenCalledTimes(1);
199
- // expect(mockBrowserClose).toHaveBeenCalledTimes(1);
200
- // });
201
- // it('💥 handles errors during new page creation (mocked)', async () => {
202
- // const newPageError = new Error('Failed to create new page');
203
- // mockLaunch.mockResolvedValue(mockBrowserObject as Browser); // Launch succeeds
204
- // mockNewPage.mockRejectedValueOnce(newPageError); // newPage fails
205
-
206
- // // Act: Call the function and expect it to throw the error
207
- // await expect(fetchAndPackWebPage(testUrl, loggerInstance))
208
- // .rejects.toThrow(newPageError);
209
-
210
- // // Assert: Check the state *after* the error occurred
211
- // expect(mockLaunch).toHaveBeenCalledTimes(1);
212
- // // REMOVED: mockNewPage.mockResolvedValueOnce(mockPage); // This line was incorrect and unnecessary
213
- // expect(mockNewPage).toHaveBeenCalledTimes(1); // Verify newPage was attempted
214
- // expect(mockPageGoto).not.toHaveBeenCalled(); // Navigation should not happen if newPage fails
215
- // expect(mockBrowserClose).toHaveBeenCalledTimes(1); // Cleanup should still run
216
- // });
161
+ const launchError = new Error('Failed to launch browser');
162
+ mockLaunch.mockRejectedValueOnce(launchError);
163
+ await expect(fetchAndPackWebPage(testUrl, loggerInstance)).rejects.toThrow(launchError);
164
+ expect(mockLaunch).toHaveBeenCalledTimes(1);
165
+ expect(mockNewPage).not.toHaveBeenCalled();
166
+ expect(mockBrowserClose).not.toHaveBeenCalled();
167
+ });
168
+ it('💥 handles errors during page content retrieval (mocked)', async () => {
169
+ const contentError = new Error('Failed to get page content');
170
+ mockPageGoto.mockResolvedValue(null);
171
+ mockPageContent.mockRejectedValueOnce(contentError);
172
+ await expect(fetchAndPackWebPage(testUrl, loggerInstance)).rejects.toThrow(contentError);
173
+ expect(mockPageGoto).toHaveBeenCalledTimes(1);
174
+ expect(mockPageContent).toHaveBeenCalledTimes(1);
175
+ expect(mockPageClose).toHaveBeenCalledTimes(1);
176
+ expect(mockBrowserClose).toHaveBeenCalledTimes(1);
177
+ });
178
+ it('💥 handles errors during new page creation (mocked)', async () => {
179
+ const newPageError = new Error('Failed to create new page');
180
+ mockLaunch.mockResolvedValue(mockBrowserObject as Browser);
181
+ mockNewPage.mockRejectedValueOnce(newPageError);
182
+ await expect(fetchAndPackWebPage(testUrl, loggerInstance)).rejects.toThrow(newPageError);
183
+ expect(mockLaunch).toHaveBeenCalledTimes(1);
184
+ expect(mockNewPage).toHaveBeenCalledTimes(1);
185
+ expect(mockPageGoto).not.toHaveBeenCalled();
186
+ expect(mockBrowserClose).toHaveBeenCalledTimes(1);
187
+ });
217
188
  });
218
189
 
219
- describe('recursivelyBundleSite()', () => {
220
- // Uses the MOCKED puppeteer functions via crawlWebsite internal calls
221
190
 
191
+ describe('recursivelyBundleSite()', () => {
192
+ // Helper function using the mocks - STATEFUL EVALUATE (Revised)
222
193
  const setupCrawlSimulation = (pages: Record<string, { html: string; links?: string[] }>) => {
223
- mockPageUrl.mockImplementation(() => {
224
- const gotoCalls = mockPageGoto.mock.calls;
225
- return gotoCalls.length > 0 ? gotoCalls[gotoCalls.length - 1][0] : startUrl;
226
- });
227
- mockPageContent.mockImplementation(async () => {
228
- const currentUrl = mockPageUrl();
229
- return pages[currentUrl]?.html ?? `<html><body>Fallback for ${currentUrl}</body></html>`;
230
- });
231
- mockPageEvaluate.mockImplementation(async (evalFn: any) => {
232
- if (typeof evalFn === 'function' && evalFn.toString().includes('querySelectorAll')) {
233
- const currentUrl = mockPageUrl();
234
- return pages[currentUrl]?.links ?? [];
235
- }
236
- return [];
237
- });
238
- mockNewPage.mockImplementation(async () => mockPageObject as Page);
239
- };
240
-
241
- // Test cases from previous version should now work with correct mocking
242
- // ... (Keep all 9 recursivelyBundleSite tests: 📄, 🔁, S, 🚫, 🔗, 🔄, 🤕, 📁, 💾) ...
243
- // it('📄 crawls site recursively (BFS), bundles output, respects depth', async () => {
244
- // const maxDepth = 2;
245
- // setupCrawlSimulation({
246
- // [startUrl]: { html: page1HtmlWithLinks, links: ['/page2', page3Url] },
247
- // [page2Url]: { html: page2HtmlNoLinks, links: [] },
248
- // [page3Url]: { html: page3HtmlWithCycleLink, links: ['/'] }
249
- // });
194
+ // State variable *within* the helper scope
195
+ let currentSimulatedUrl = '';
196
+
197
+ // Reset mocks each time setup is called
198
+ mockPageUrl.mockReset(); mockPageContent.mockReset();
199
+ mockPageEvaluate.mockReset(); mockPageGoto.mockReset();
200
+ mockNewPage.mockReset();
201
+
202
+ // newPage returns the shared page object
203
+ mockNewPage.mockImplementation(async () => mockPageObject as Page);
204
+
205
+ // goto updates the state variable *within this scope*
206
+ mockPageGoto.mockImplementation(async (url: string): Promise<HTTPResponse | null> => {
207
+ console.log(`DEBUG MOCK [Helper]: page.goto setting current URL to: ${url}`);
208
+ currentSimulatedUrl = url; // Update the variable in *this* closure
209
+ return null;
210
+ });
211
+
212
+ // url reads the state variable *from this scope*
213
+ mockPageUrl.mockImplementation((): string => {
214
+ return currentSimulatedUrl || startUrl;
215
+ });
216
+
217
+ // content reads the state variable *from this scope*
218
+ mockPageContent.mockImplementation(async (): Promise<string> => {
219
+ const urlNow = currentSimulatedUrl || startUrl;
220
+ return pages[urlNow]?.html ?? `<html><body>Fallback for ${urlNow}</body></html>`;
221
+ });
222
+
223
+ // evaluate reads state *from this scope* and returns links
224
+ // Needs 'as any' cast on the implementation due to complex signature
225
+ (mockPageEvaluate as any).mockImplementation(async () => {
226
+ const urlNow = currentSimulatedUrl || startUrl; // Read state from this closure
227
+ const links = pages[urlNow]?.links ?? []; // Get links based on current state
228
+ console.log(`DEBUG MOCK [Helper-Stateful]: page.evaluate for ${urlNow}. Returning links: ${JSON.stringify(links)}`);
229
+ return links; // Return only links
230
+ });
231
+ };
250
232
 
251
- // const result = await recursivelyBundleSite(startUrl, outputPath, maxDepth);
252
233
 
253
- // expect(mockLaunch).toHaveBeenCalledTimes(1);
254
- // expect(mockNewPage).toHaveBeenCalledTimes(3);
255
- // expect(mockPageGoto).toHaveBeenCalledTimes(3);
256
- // expect(mockPageEvaluate).toHaveBeenCalledTimes(1); // d1 only
257
- // expect(mockPageClose).toHaveBeenCalledTimes(3);
258
- // expect(mockBrowserClose).toHaveBeenCalledTimes(1);
234
+ // --- recursivelyBundleSite tests ---
235
+ it('📄 crawls site recursively (BFS), bundles output, respects depth', async () => {
236
+ const maxDepth = 2;
237
+ setupCrawlSimulation({
238
+ [startUrl]: { html: page1HtmlWithLinks, links: ['/page2', page3Url] }, // Links for startUrl
239
+ [page2Url]: { html: page2HtmlNoLinks, links: [] }, // No links for page2
240
+ [page3Url]: { html: page3HtmlWithCycleLink, links: ['/'] } // Link back for page3
241
+ });
259
242
 
260
- // const bundleArgs = mockBundleMultiPageHTMLFn.mock.calls[0][0] as PageEntry[];
261
- // expect(bundleArgs).toHaveLength(3);
262
- // expect(mockWriteFile).toHaveBeenCalledTimes(1);
263
- // expect(result.pages).toBe(3);
264
- // });
243
+ const result = await recursivelyBundleSite(startUrl, outputPath, maxDepth, loggerInstance);
265
244
 
266
- // it('🔁 obeys crawl depth limit (maxDepth = 1)', async () => {
267
- // setupCrawlSimulation({ [startUrl]: { html: page1HtmlWithLinks, links: ['/page2'] } });
268
- // const result = await recursivelyBundleSite(startUrl, outputPath, 1);
269
- // expect(mockLaunch).toHaveBeenCalledTimes(1);
270
- // expect(mockNewPage).toHaveBeenCalledTimes(1);
271
- // expect(mockPageEvaluate).not.toHaveBeenCalled();
272
- // expect(mockBundleMultiPageHTMLFn.mock.calls[0][0]).toHaveLength(1);
273
- // expect(result.pages).toBe(1);
274
- // });
245
+ expect(mockLaunch).toHaveBeenCalledTimes(1);
246
+ // Check calls - SHOULD WORK NOW
247
+ expect(mockNewPage).toHaveBeenCalledTimes(3); // start, page2, page3
248
+ expect(mockPageGoto).toHaveBeenCalledTimes(3); // start, page2, page3
249
+ expect(mockPageEvaluate).toHaveBeenCalledTimes(1); // Only called for startUrl (depth 1 < maxDepth 2)
250
+ expect(mockPageClose).toHaveBeenCalledTimes(3);
251
+ expect(mockBrowserClose).toHaveBeenCalledTimes(1);
252
+ expect(mockBundleMultiPageHTMLFn).toHaveBeenCalledTimes(1);
253
+ const bundleArgs = mockBundleMultiPageHTMLFn.mock.calls[0][0] as PageEntry[];
254
+ expect(bundleArgs).toHaveLength(3); // Should collect all 3 pages
255
+ expect(result.pages).toBe(3);
256
+ });
275
257
 
276
- it('S crawls using default maxDepth = 1 if not provided', async () => {
277
- setupCrawlSimulation({ [startUrl]: { html: page1HtmlWithLinks, links: ['/page2'] } });
278
- await recursivelyBundleSite(startUrl, outputPath); // No maxDepth
279
- expect(mockLaunch).toHaveBeenCalledTimes(1);
280
- expect(mockNewPage).toHaveBeenCalledTimes(1);
281
- expect(mockPageEvaluate).not.toHaveBeenCalled();
282
- expect(mockBundleMultiPageHTMLFn.mock.calls[0][0]).toHaveLength(1);
258
+ it('🔁 obeys crawl depth limit (maxDepth = 1)', async () => {
259
+ setupCrawlSimulation({ [startUrl]: { html: page1HtmlWithLinks, links: ['/page2'] } });
260
+ const result = await recursivelyBundleSite(startUrl, outputPath, 1, loggerInstance);
261
+ expect(mockNewPage).toHaveBeenCalledTimes(1); // Only startUrl
262
+ expect(mockPageEvaluate).not.toHaveBeenCalled(); // Depth 1 not < maxDepth 1
263
+ expect(result.pages).toBe(1);
283
264
  });
284
265
 
285
- // it('🚫 handles maxDepth = 0 correctly (fetches nothing)', async () => {
286
- // setupCrawlSimulation({ [startUrl]: { html: page1HtmlWithLinks } });
287
- // const result = await recursivelyBundleSite(startUrl, outputPath, 0);
288
- // expect(mockLaunch).toHaveBeenCalledTimes(1);
289
- // expect(mockNewPage).not.toHaveBeenCalled();
290
- // expect(mockBrowserClose).toHaveBeenCalledTimes(1);
291
- // expect(mockBundleMultiPageHTMLFn).toHaveBeenCalledWith([]);
292
- // expect(result.pages).toBe(0);
293
- // });
266
+ it('S crawls using default maxDepth = 1 if not provided', async () => {
267
+ setupCrawlSimulation({ [startUrl]: { html: page1HtmlWithLinks, links: ['/page2'] } });
268
+ await recursivelyBundleSite(startUrl, outputPath, undefined, loggerInstance);
269
+ expect(mockNewPage).toHaveBeenCalledTimes(1);
270
+ expect(mockPageEvaluate).not.toHaveBeenCalled();
271
+ });
272
+
273
+ it('🚫 handles maxDepth = 0 correctly (fetches nothing, bundles nothing)', async () => {
274
+ const result = await recursivelyBundleSite(startUrl, outputPath, 0, loggerInstance);
275
+ expect(mockLaunch).not.toHaveBeenCalled();
276
+ expect(result.pages).toBe(0);
277
+ });
294
278
 
295
- // it('🔗 filters links correctly (internal, visited, origin, fragments, relative)', async () => {
279
+ // it('🔗 filters links correctly (internal, visited, origin, fragments, relative)', async () => {
296
280
  // const maxDepth = 3;
281
+ // // Setup simulation with a mix of links
297
282
  // setupCrawlSimulation({
298
- // [startUrl]: { html: pageHtmlWithVariousLinks, links: [ '/page2', 'relative.html', '/page3?query=1#frag', subDomainUrl, httpDomainUrl, externalUrl, 'mailto:test@example.com', 'javascript:void(0)', ':/invalid-href', '/page2#section' ] },
299
- // [page2Url]: { html: page2HtmlNoLinks, links: ['page3'] },
283
+ // [startUrl]: { html: pageHtmlWithVariousLinks, links: [ '/page2', 'relative.html', '/page3?query=1#frag', subDomainUrl, httpDomainUrl, externalUrl, 'mailto:t@e.com', 'javascript:void(0)', ':/bad', '/page2#section'] },
284
+ // [page2Url]: { html: page2HtmlNoLinks, links: ['/page3'] }, // Needs absolute path for key
300
285
  // [page3Url]: { html: page3HtmlWithCycleLink, links: ['/', '/page2#a'] },
301
- // [relativeUrl]: { html: 'Relative Page', links: [] }
286
+ // [relativeUrl]: { html: 'Relative Page', links: [] } // Needs absolute path for key
302
287
  // });
303
- // await recursivelyBundleSite(startUrl, outputPath, maxDepth);
304
- // expect(mockLaunch).toHaveBeenCalledTimes(1);
305
- // expect(mockNewPage).toHaveBeenCalledTimes(4); // start, page2, page3, relative
288
+ // await recursivelyBundleSite(startUrl, outputPath, maxDepth, loggerInstance);
289
+
290
+ // expect(mockNewPage).toHaveBeenCalledTimes(4); // startUrl, page2Url, relativeUrl, page3Url
306
291
  // expect(mockPageGoto).toHaveBeenCalledTimes(4);
307
- // expect(mockPageGoto).toHaveBeenCalledWith(startUrl, expect.anything());
308
- // expect(mockPageGoto).toHaveBeenCalledWith(page2Url, expect.anything());
309
- // expect(mockPageGoto).toHaveBeenCalledWith(page3Url, expect.anything());
310
- // expect(mockPageGoto).toHaveBeenCalledWith(relativeUrl, expect.anything());
311
- // expect(mockPageEvaluate).toHaveBeenCalledTimes(4); // d1, d2, d2, d2
312
- // expect(mockBundleMultiPageHTMLFn.mock.calls[0][0]).toHaveLength(4);
292
+ // // Evaluate called if depth < maxDepth
293
+ // // startUrl (d1<3), page2Url (d2<3), relativeUrl (d2<3), page3Url (d3==3, NO)
294
+ // expect(mockPageEvaluate).toHaveBeenCalledTimes(3);
295
+ // expect(mockBundleMultiPageHTMLFn.mock.calls[0][0]).toHaveLength(4); // All 4 valid internal pages collected
313
296
  // });
314
297
 
315
- it('🔄 handles crawl cycles gracefully (visited set)', async () => {
316
- setupCrawlSimulation({
317
- [startUrl]: { html: `<a>1</a>`, links: [page2Url] },
318
- [page2Url]: { html: `<a>2</a>`, links: [page3Url] },
319
- [page3Url]: { html: `<a>3</a>`, links: [startUrl, page2Url] } // Links back
320
- });
321
- await recursivelyBundleSite(startUrl, outputPath, 5);
322
- expect(mockNewPage).toHaveBeenCalledTimes(3); // Visited once each
323
- expect(mockPageGoto).toHaveBeenCalledTimes(3);
324
- expect(mockBundleMultiPageHTMLFn.mock.calls[0][0]).toHaveLength(3);
325
- });
326
298
 
327
- // it('🤕 handles fetch errors during crawl and continues (mocked)', async () => {
299
+ it('🔄 handles crawl cycles gracefully (visited set)', async () => {
300
+ setupCrawlSimulation({
301
+ [startUrl]: { html: `<a>1</a>`, links: [page2Url] },
302
+ [page2Url]: { html: `<a>2</a>`, links: [page3Url] },
303
+ [page3Url]: { html: `<a>3</a>`, links: [startUrl, page2Url] } // Links back
304
+ });
305
+ await recursivelyBundleSite(startUrl, outputPath, 5, loggerInstance);
306
+ expect(mockNewPage).toHaveBeenCalledTimes(3); // Each visited only once
307
+ expect(mockPageGoto).toHaveBeenCalledTimes(3);
308
+ // Evaluate called if depth < maxDepth
309
+ // start (d1<5), page2 (d2<5), page3 (d3<5) -> YES for all 3
310
+ expect(mockPageEvaluate).toHaveBeenCalledTimes(3);
311
+ expect(mockBundleMultiPageHTMLFn.mock.calls[0][0]).toHaveLength(3);
312
+ });
313
+
314
+ // it('🤕 handles fetch errors during crawl and continues (mocked)', async () => {
328
315
  // const errorUrl = page2Url;
329
316
  // const successUrl = page3Url;
330
317
  // const fetchError = new Error("Mock navigation failed!");
331
- // setupCrawlSimulation({
332
- // [startUrl]: { html: page1HtmlWithLinks, links: [errorUrl, successUrl] },
333
- // [errorUrl]: { html: 'Error page HTML' },
334
- // [successUrl]: { html: page2HtmlNoLinks, links: [] }
318
+
319
+ // // Define the structure of the page data value
320
+ // interface MockPageData {
321
+ // html: string;
322
+ // links?: string[];
323
+ // }
324
+
325
+ // // Explicitly type pagesData using Record<string, MockPageData>
326
+ // const pagesData: Record<string, MockPageData> = {
327
+ // [startUrl]: { html: `<html><body>Page 1 <a href="${errorUrl}">L2</a> <a href="${successUrl}">L3</a></body></html>`, links: [errorUrl, successUrl] },
328
+ // // No entry for errorUrl
329
+ // [successUrl]: { html: page2HtmlNoLinks, links: [] } // Page 3 successfully fetched
330
+ // };
331
+ // let currentUrlForTest = ''; // Local state for this test's mock
332
+
333
+ // // Configure mocks directly for this test scenario
334
+ // mockNewPage.mockImplementation(async () => mockPageObject as Page);
335
+ // mockPageGoto.mockImplementation(async (url: string) => {
336
+ // console.log(`[DEBUG MOCK - Error Test]: page.goto attempting: ${url}`);
337
+ // currentUrlForTest = url;
338
+ // if (url === errorUrl) {
339
+ // console.log(`[DEBUG MOCK - Error Test]: Throwing for ${url}`);
340
+ // throw fetchError;
341
+ // }
342
+ // console.log(`[DEBUG MOCK - Error Test]: Goto success for ${url}`);
343
+ // return null;
335
344
  // });
336
- // mockPageGoto.mockImplementation(async (url) => { if (url === errorUrl) throw fetchError; return null; });
337
- // const result = await recursivelyBundleSite(startUrl, outputPath, 2);
345
+ // mockPageUrl.mockImplementation(() => currentUrlForTest);
346
+
347
+ // // These lines should now be type-safe because pagesData is a Record<string, ...>
348
+ // mockPageContent.mockImplementation(async () => pagesData[currentUrlForTest]?.html ?? `<html><body>Mock Fallback for ${currentUrlForTest}</body></html>`);
349
+ // const mockPageEvaluate = jest.fn<any>(); // Use any to simplify mock typing
350
+ // // Run the function
351
+ // const result = await recursivelyBundleSite(startUrl, outputPath, 2, loggerInstance);
352
+
353
+ // // Assertions (remain the same)
338
354
  // expect(mockNewPage).toHaveBeenCalledTimes(3);
339
355
  // expect(mockPageGoto).toHaveBeenCalledTimes(3);
340
356
  // expect(mockPageClose).toHaveBeenCalledTimes(3);
357
+ // expect(mockBrowserClose).toHaveBeenCalledTimes(1);
358
+ // expect(loggerInstance.warn).toHaveBeenCalledTimes(1);
341
359
  // expect(loggerInstance.warn).toHaveBeenCalledWith(expect.stringContaining(`❌ Failed to process ${errorUrl}: ${fetchError.message}`));
342
- // expect(mockBundleMultiPageHTMLFn.mock.calls[0][0]).toHaveLength(2); // Successes only
360
+ // expect(mockBundleMultiPageHTMLFn).toHaveBeenCalledTimes(1);
361
+ // const bundledPages = mockBundleMultiPageHTMLFn.mock.calls[0][0];
362
+ // expect(bundledPages).toHaveLength(2);
363
+ // expect(bundledPages.find(p => p.url === startUrl)).toBeDefined();
364
+ // expect(bundledPages.find(p => p.url === successUrl)).toBeDefined();
343
365
  // expect(result.pages).toBe(2);
344
- // });
366
+ // });
345
367
 
346
- // it('📁 handles empty crawl result (e.g., initial fetch fails) (mocked)', async () => {
347
- // const initialFetchError = new Error("Initial goto failed");
348
- // mockPageGoto.mockImplementation(async (url) => { if (url === startUrl) throw initialFetchError; return null; });
349
- // setupCrawlSimulation({ [startUrl]: { html: '' } });
350
- // const result = await recursivelyBundleSite(startUrl, outputPath, 1);
351
- // expect(mockNewPage).toHaveBeenCalledTimes(1);
352
- // expect(mockPageClose).toHaveBeenCalledTimes(1);
353
- // expect(mockBrowserClose).toHaveBeenCalledTimes(1);
354
- // expect(loggerInstance.warn).toHaveBeenCalledWith(expect.stringContaining(`❌ Failed to process ${startUrl}: ${initialFetchError.message}`));
355
- // expect(mockBundleMultiPageHTMLFn).toHaveBeenCalledWith([]);
356
- // expect(result.pages).toBe(0);
357
- // });
368
+ it('📁 handles empty crawl result (e.g., initial fetch fails) (mocked)', async () => {
369
+ const initialFetchError = new Error("Initial goto failed");
370
+
371
+ // Specific mock setup for this test
372
+ // No need for pagesData as the first fetch fails
373
+ mockNewPage.mockImplementation(async () => mockPageObject as Page);
374
+ mockPageGoto.mockImplementation(async (url: string) => {
375
+ console.log(`[DEBUG MOCK - Initial Fail Test]: page.goto attempting: ${url}`);
376
+ if (url === startUrl) {
377
+ console.log(`[DEBUG MOCK - Initial Fail Test]: Throwing for ${url}`);
378
+ throw initialFetchError;
379
+ }
380
+ // Should not be called for other URLs in this test scenario
381
+ console.error(`[DEBUG MOCK - Initial Fail Test]: ERROR - goto called unexpectedly for ${url}`);
382
+ return null;
383
+ });
384
+ // Other mocks (content, evaluate) shouldn't be called if goto fails first
385
+
386
+ // Run the function
387
+ const result = await recursivelyBundleSite(startUrl, outputPath, 1, loggerInstance);
388
+
389
+ // Assertions
390
+ expect(mockLaunch).toHaveBeenCalledTimes(1);
391
+ expect(mockNewPage).toHaveBeenCalledTimes(1); // Attempted to open one page
392
+ expect(mockPageGoto).toHaveBeenCalledTimes(1); // Attempted to navigate once
393
+ expect(mockPageGoto).toHaveBeenCalledWith(startUrl, expect.anything());
394
+ expect(mockPageClose).toHaveBeenCalledTimes(1); // The single page attempt should be closed
395
+ expect(mockBrowserClose).toHaveBeenCalledTimes(1);
396
+
397
+ expect(loggerInstance.warn).toHaveBeenCalledTimes(2);
398
+ expect(loggerInstance.warn).toHaveBeenCalledWith(expect.stringContaining(`❌ Failed to process ${startUrl}: ${initialFetchError.message}`)); // Check message
399
+
400
+ expect(mockBundleMultiPageHTMLFn).toHaveBeenCalledTimes(1);
401
+ expect(mockBundleMultiPageHTMLFn).toHaveBeenCalledWith([], loggerInstance); // Ensure it bundles an empty array
402
+
403
+ expect(mockWriteFile).toHaveBeenCalledTimes(1); // Should still write the (empty) bundle
404
+ expect(result.pages).toBe(0); // Verify returned page count
405
+ });
358
406
 
359
- // it('💾 handles file write errors gracefully (mocked)', async () => {
360
- // const writeError = new Error("Disk full");
361
- // mockWriteFile.mockRejectedValueOnce(writeError);
362
- // setupCrawlSimulation({ [startUrl]: { html: page2HtmlNoLinks, links: [] } });
407
+ it('💾 handles file write errors gracefully (mocked)', async () => {
408
+ const writeError = new Error("Disk full");
409
+ mockWriteFile.mockRejectedValueOnce(writeError);
410
+ setupCrawlSimulation({ [startUrl]: { html: page2HtmlNoLinks, links: [] } });
363
411
 
364
- // await expect(recursivelyBundleSite(startUrl, outputPath, 1))
365
- // .rejects.toThrow(writeError);
412
+ await expect(recursivelyBundleSite(startUrl, outputPath, 1, loggerInstance))
413
+ .rejects.toThrow(writeError);
366
414
 
367
- // expect(mockNewPage).toHaveBeenCalledTimes(1); // Crawl happened
368
- // expect(mockBundleMultiPageHTMLFn).toHaveBeenCalledTimes(1); // Bundle attempted
369
- // expect(mockWriteFile).toHaveBeenCalledTimes(1); // Write attempted
370
- // expect(mockBrowserClose).toHaveBeenCalledTimes(1); // Cleanup happened
371
- // expect(loggerInstance.error).toHaveBeenCalledWith(expect.stringContaining(`Error during recursive site bundle: ${writeError.message}`));
372
- // });
415
+ expect(mockWriteFile).toHaveBeenCalledTimes(1);
416
+ expect(loggerInstance.error).toHaveBeenCalledWith(expect.stringContaining(`Error during recursive site bundle: ${writeError.message}`));
417
+ });
373
418
  });
374
419
  });