@fanboynz/network-scanner 1.0.35

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,384 @@
1
+ // redirect.js - Enhanced redirect handling module for nwss.js
2
+ // Handles HTTP redirects, JavaScript redirects, meta refresh, and delayed redirects
3
+
4
+ /**
5
+ * Enhanced navigation with comprehensive redirect detection including JavaScript redirects
6
+ * @param {Page} page - Puppeteer page instance
7
+ * @param {string} currentUrl - Original URL to navigate to
8
+ * @param {object} siteConfig - Site configuration
9
+ * @param {object} gotoOptions - Computed goto options from existing logic
10
+ * @param {boolean} forceDebug - Debug logging flag
11
+ * @param {Function} formatLogMessage - Log formatting function from main script
12
+ * @returns {Promise<{finalUrl: string, redirected: boolean, redirectChain: string[]}>}
13
+ */
14
+ async function navigateWithRedirectHandling(page, currentUrl, siteConfig, gotoOptions = {}, forceDebug = false, formatLogMessage) {
15
+ const redirectChain = [currentUrl];
16
+ let finalUrl = currentUrl;
17
+ let redirected = false;
18
+ const jsRedirectTimeout = siteConfig.js_redirect_timeout || 5000; // Wait 5s for JS redirects
19
+ const maxRedirects = siteConfig.max_redirects || 10;
20
+ const detectJSPatterns = siteConfig.detect_js_patterns !== false; // Default to true
21
+
22
+ // Monitor frame navigations to detect redirects
23
+ const navigationHandler = (frame) => {
24
+ if (frame === page.mainFrame()) {
25
+ const frameUrl = frame.url();
26
+ if (frameUrl && frameUrl !== 'about:blank' && !redirectChain.includes(frameUrl)) {
27
+ // Check redirect limit before adding
28
+ if (redirectChain.length >= maxRedirects) {
29
+ if (forceDebug) {
30
+ console.log(formatLogMessage('debug', `Maximum redirects (${maxRedirects}) reached, stopping redirect chain`));
31
+ }
32
+ return; // Stop processing more redirects
33
+ }
34
+ redirectChain.push(frameUrl);
35
+ finalUrl = frameUrl;
36
+ redirected = true;
37
+
38
+ if (forceDebug) {
39
+ console.log(formatLogMessage('debug', `Frame navigation detected: ${frameUrl}`));
40
+ }
41
+ }
42
+ }
43
+ };
44
+
45
+ // Monitor JavaScript redirects by intercepting location changes
46
+ const jsRedirectDetector = async () => {
47
+ try {
48
+ await page.evaluateOnNewDocument(() => {
49
+ // Store original location methods
50
+ const originalReplace = window.location.replace;
51
+ const originalAssign = window.location.assign;
52
+ const originalHrefSetter = Object.getOwnPropertyDescriptor(window.location, 'href')?.set;
53
+
54
+ // Flag to track if JS redirect occurred
55
+ window._jsRedirectDetected = false;
56
+ window._jsRedirectUrl = null;
57
+ window._jsRedirectType = null;
58
+
59
+ // Intercept location.replace()
60
+ window.location.replace = function(url) {
61
+ window._jsRedirectDetected = true;
62
+ window._jsRedirectUrl = url;
63
+ window._jsRedirectType = 'location.replace';
64
+ console.log('[jsRedirect] location.replace:', url);
65
+ return originalReplace.call(this, url);
66
+ };
67
+
68
+ // Intercept location.assign()
69
+ window.location.assign = function(url) {
70
+ window._jsRedirectDetected = true;
71
+ window._jsRedirectUrl = url;
72
+ window._jsRedirectType = 'location.assign';
73
+ console.log('[jsRedirect] location.assign:', url);
74
+ return originalAssign.call(this, url);
75
+ };
76
+
77
+ // Intercept location.href setter
78
+ if (originalHrefSetter) {
79
+ Object.defineProperty(window.location, 'href', {
80
+ set: function(url) {
81
+ window._jsRedirectDetected = true;
82
+ window._jsRedirectUrl = url;
83
+ window._jsRedirectType = 'location.href';
84
+ console.log('[jsRedirect] location.href set:', url);
85
+ return originalHrefSetter.call(this, url);
86
+ },
87
+ get: function() {
88
+ return window.location.toString();
89
+ }
90
+ });
91
+ }
92
+
93
+ // Monitor meta refresh redirects
94
+ const observer = new MutationObserver((mutations) => {
95
+ mutations.forEach((mutation) => {
96
+ mutation.addedNodes.forEach((node) => {
97
+ if (node.nodeName === 'META' &&
98
+ node.getAttribute &&
99
+ node.getAttribute('http-equiv') === 'refresh') {
100
+ const content = node.getAttribute('content');
101
+ if (content) {
102
+ window._jsRedirectDetected = true;
103
+ window._jsRedirectUrl = content;
104
+ window._jsRedirectType = 'meta.refresh';
105
+ console.log('[jsRedirect] meta refresh:', content);
106
+ }
107
+ }
108
+ });
109
+ });
110
+ });
111
+
112
+ // Start observing when DOM is ready
113
+ if (document.head) {
114
+ observer.observe(document.head, { childList: true, subtree: true });
115
+ } else {
116
+ document.addEventListener('DOMContentLoaded', () => {
117
+ if (document.head) {
118
+ observer.observe(document.head, { childList: true, subtree: true });
119
+ }
120
+ });
121
+ }
122
+ });
123
+ } catch (jsErr) {
124
+ if (forceDebug) {
125
+ console.log(formatLogMessage('debug', `Failed to inject JS redirect detector: ${jsErr.message}`));
126
+ }
127
+ }
128
+ };
129
+
130
+ try {
131
+ // Set up event listeners
132
+ page.on('framenavigated', navigationHandler);
133
+
134
+ // Inject JavaScript redirect detection
135
+ await jsRedirectDetector();
136
+
137
+ if (forceDebug && Object.keys(gotoOptions).length > 0) {
138
+ console.log(formatLogMessage('debug', `Using goto options: ${JSON.stringify(gotoOptions)}`));
139
+ }
140
+
141
+ // Initial navigation
142
+ const response = await page.goto(currentUrl, gotoOptions);
143
+
144
+ if (response && response.url() !== currentUrl) {
145
+ // Check redirect limit before adding
146
+ if (redirectChain.length >= maxRedirects) {
147
+ if (forceDebug) {
148
+ console.log(formatLogMessage('debug', `Maximum redirects (${maxRedirects}) reached during HTTP redirect`));
149
+ }
150
+ finalUrl = currentUrl; // Keep original URL
151
+ } else {
152
+ finalUrl = response.url();
153
+ redirected = true;
154
+ if (!redirectChain.includes(finalUrl)) redirectChain.push(finalUrl);
155
+ }
156
+ if (forceDebug) {
157
+ console.log(formatLogMessage('debug', `HTTP redirect detected: ${currentUrl} ? ${finalUrl}`));
158
+ }
159
+ }
160
+
161
+ // Wait for potential JavaScript redirects
162
+ if (forceDebug) {
163
+ console.log(formatLogMessage('debug', `Waiting ${jsRedirectTimeout}ms for potential JavaScript redirects...`));
164
+ }
165
+
166
+ let jsRedirectAttempts = 0;
167
+ const maxJsRedirectAttempts = 3;
168
+
169
+ while (jsRedirectAttempts < maxJsRedirectAttempts) {
170
+ await new Promise(resolve => setTimeout(resolve, jsRedirectTimeout / maxJsRedirectAttempts));
171
+
172
+ try {
173
+ // Check for JavaScript redirect detection
174
+ const jsRedirectResult = await page.evaluate(() => {
175
+ return {
176
+ detected: window._jsRedirectDetected || false,
177
+ url: window._jsRedirectUrl || null,
178
+ type: window._jsRedirectType || null,
179
+ currentUrl: window.location.href
180
+ };
181
+ });
182
+
183
+ // Check if URL changed (either through JS redirect or automatic redirect)
184
+ const currentPageUrl = page.url();
185
+ if (currentPageUrl && currentPageUrl !== finalUrl && !redirectChain.includes(currentPageUrl)) {
186
+ // Check redirect limit before adding
187
+ if (redirectChain.length >= maxRedirects) {
188
+ if (forceDebug) {
189
+ console.log(formatLogMessage('debug', `Maximum redirects (${maxRedirects}) reached during JS redirect detection`));
190
+ }
191
+ break; // Stop processing more redirects
192
+ }
193
+ redirectChain.push(currentPageUrl);
194
+ finalUrl = currentPageUrl;
195
+ redirected = true;
196
+
197
+ if (forceDebug) {
198
+ if (jsRedirectResult.detected) {
199
+ console.log(formatLogMessage('debug', `JavaScript redirect detected (${jsRedirectResult.type}): ${jsRedirectResult.url || currentPageUrl}`));
200
+ } else {
201
+ console.log(formatLogMessage('debug', `URL change detected: ${currentPageUrl}`));
202
+ }
203
+ }
204
+ }
205
+
206
+ // If JS redirect was explicitly detected but URL hasn't changed yet, wait a bit more
207
+ if (jsRedirectResult.detected && !redirected) {
208
+ if (forceDebug) {
209
+ console.log(formatLogMessage('debug', `JS redirect detected (${jsRedirectResult.type}) but not yet executed, waiting...`));
210
+ }
211
+ jsRedirectAttempts++;
212
+ continue;
213
+ }
214
+
215
+ // If no new redirects detected, break out of loop
216
+ if (!jsRedirectResult.detected) {
217
+ break;
218
+ }
219
+
220
+ } catch (evalErr) {
221
+ if (forceDebug) {
222
+ console.log(formatLogMessage('debug', `Error checking JS redirects: ${evalErr.message}`));
223
+ }
224
+ break;
225
+ }
226
+
227
+ jsRedirectAttempts++;
228
+ }
229
+
230
+ // Optional: Detect common JavaScript redirect patterns in page source
231
+ if (detectJSPatterns) {
232
+ await detectCommonJSRedirects(page, forceDebug, formatLogMessage);
233
+ }
234
+
235
+ // Final URL check
236
+ const finalPageUrl = page.url();
237
+ if (finalPageUrl && finalPageUrl !== finalUrl) {
238
+ // Check redirect limit before final update
239
+ if (redirectChain.length >= maxRedirects) {
240
+ if (forceDebug) {
241
+ console.log(formatLogMessage('debug', `Maximum redirects (${maxRedirects}) reached, keeping current finalUrl`));
242
+ }
243
+ } else {
244
+ finalUrl = finalPageUrl;
245
+ redirected = true;
246
+ if (!redirectChain.includes(finalUrl)) {
247
+ redirectChain.push(finalUrl);
248
+ }
249
+ }
250
+ }
251
+
252
+ } finally {
253
+ page.off('framenavigated', navigationHandler);
254
+ }
255
+
256
+ // Log redirect summary
257
+ if (redirected && forceDebug) {
258
+ console.log(formatLogMessage('debug', `Redirect chain: ${redirectChain.join(' ? ')}`));
259
+ }
260
+
261
+ // Extract redirect domains to exclude from matching
262
+ let redirectDomains = [];
263
+ if (redirected && redirectChain.length > 1) {
264
+ // Get all intermediate domains (exclude the final domain)
265
+ const intermediateDomains = redirectChain.slice(0, -1).map(url => {
266
+ try {
267
+ return new URL(url).hostname;
268
+ } catch {
269
+ return null;
270
+ }
271
+ }).filter(Boolean);
272
+ redirectDomains = intermediateDomains;
273
+ }
274
+
275
+ return { finalUrl, redirected, redirectChain, originalUrl: currentUrl, redirectDomains };
276
+ }
277
+
278
+ /**
279
+ * Detect common JavaScript redirect patterns in page source
280
+ * @param {Page} page - Puppeteer page instance
281
+ * @param {boolean} forceDebug - Debug logging flag
282
+ * @param {Function} formatLogMessage - Log formatting function
283
+ * @returns {Promise<Array>} Array of detected patterns
284
+ */
285
+ async function detectCommonJSRedirects(page, forceDebug = false, formatLogMessage) {
286
+ try {
287
+ const redirectPatterns = await page.evaluate(() => {
288
+ const patterns = [];
289
+
290
+ // Check for common redirect patterns in page source
291
+ const pageSource = document.documentElement.outerHTML;
292
+
293
+ // Pattern 1: window.location = "url"
294
+ const locationAssign = pageSource.match(/window\.location\s*=\s*["']([^"']+)["']/g);
295
+ if (locationAssign) {
296
+ patterns.push({ type: 'window.location assignment', matches: locationAssign });
297
+ }
298
+
299
+ // Pattern 2: location.href = "url"
300
+ const hrefAssign = pageSource.match(/location\.href\s*=\s*["']([^"']+)["']/g);
301
+ if (hrefAssign) {
302
+ patterns.push({ type: 'location.href assignment', matches: hrefAssign });
303
+ }
304
+
305
+ // Pattern 3: setTimeout redirects
306
+ const timeoutRedirect = pageSource.match(/setTimeout\s*\([^)]*location[^)]*\)/g);
307
+ if (timeoutRedirect) {
308
+ patterns.push({ type: 'setTimeout redirect', matches: timeoutRedirect });
309
+ }
310
+
311
+ // Pattern 4: Meta refresh
312
+ const metaRefresh = document.querySelector('meta[http-equiv="refresh"]');
313
+ if (metaRefresh) {
314
+ patterns.push({ type: 'meta refresh', content: metaRefresh.getAttribute('content') });
315
+ }
316
+
317
+ // Pattern 5: document.location redirects
318
+ const docLocationAssign = pageSource.match(/document\.location\s*=\s*["']([^"']+)["']/g);
319
+ if (docLocationAssign) {
320
+ patterns.push({ type: 'document.location assignment', matches: docLocationAssign });
321
+ }
322
+
323
+ return patterns;
324
+ });
325
+
326
+ if (redirectPatterns.length > 0 && forceDebug) {
327
+ console.log(formatLogMessage('debug', `Found ${redirectPatterns.length} potential JS redirect pattern(s):`));
328
+ redirectPatterns.forEach((pattern, idx) => {
329
+ console.log(formatLogMessage('debug', ` [${idx + 1}] ${pattern.type}: ${JSON.stringify(pattern.matches || pattern.content)}`));
330
+ });
331
+ }
332
+
333
+ return redirectPatterns;
334
+
335
+ } catch (detectErr) {
336
+ if (forceDebug) {
337
+ console.log(formatLogMessage('debug', `Error detecting JS redirect patterns: ${detectErr.message}`));
338
+ }
339
+ return [];
340
+ }
341
+ }
342
+
343
+ /**
344
+ * Enhanced timeout error handling for partial redirects
345
+ * @param {Page} page - Puppeteer page instance
346
+ * @param {string} originalUrl - Original URL that was requested
347
+ * @param {Error} error - Navigation timeout error
348
+ * @param {Function} safeGetDomain - Domain extraction function
349
+ * @param {boolean} forceDebug - Debug logging flag
350
+ * @param {Function} formatLogMessage - Log formatting function
351
+ * @returns {Promise<{success: boolean, finalUrl: string, redirected: boolean}>}
352
+ */
353
+ async function handleRedirectTimeout(page, originalUrl, error, safeGetDomain, forceDebug = false, formatLogMessage) {
354
+ if (!error.message.includes('Navigation timeout')) {
355
+ return { success: false, finalUrl: originalUrl, redirected: false };
356
+ }
357
+
358
+ try {
359
+ const currentPageUrl = page.url();
360
+ if (currentPageUrl && currentPageUrl !== 'about:blank' && currentPageUrl !== originalUrl) {
361
+ const originalDomain = safeGetDomain(originalUrl);
362
+ const currentDomain = safeGetDomain(currentPageUrl);
363
+
364
+ if (originalDomain !== currentDomain) {
365
+ if (forceDebug) {
366
+ console.log(formatLogMessage('debug', `Partial redirect timeout recovered: ${originalDomain} ? ${currentDomain}`));
367
+ }
368
+ return { success: true, finalUrl: currentPageUrl, redirected: true };
369
+ }
370
+ }
371
+ return { success: false, finalUrl: originalUrl, redirected: false };
372
+ } catch (urlError) {
373
+ if (forceDebug) {
374
+ console.log(formatLogMessage('debug', `Error during timeout recovery: ${urlError.message}`));
375
+ }
376
+ return { success: false, finalUrl: originalUrl, redirected: false };
377
+ }
378
+ }
379
+
380
+ module.exports = {
381
+ navigateWithRedirectHandling,
382
+ detectCommonJSRedirects,
383
+ handleRedirectTimeout
384
+ };