clean-web-scraper 3.7.5 → 3.7.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/main.js +8 -3
- package/package.json +1 -1
package/main.js
CHANGED
@@ -208,8 +208,8 @@ class WebScraper
|
|
208
208
|
try
|
209
209
|
{
|
210
210
|
const response = await this.retryAxiosRequest( url )
|
211
|
-
const contentType = response
|
212
|
-
if ( !contentType
|
211
|
+
const contentType = response?.headers["content-type"] || "";
|
212
|
+
if ( !contentType?.startsWith( "text" ) )
|
213
213
|
{
|
214
214
|
console.log( `Skipping non-HTML content for ${url}: Content-Type is ${contentType}` );
|
215
215
|
response.data.destroy();
|
@@ -572,15 +572,20 @@ class WebScraper
|
|
572
572
|
{
|
573
573
|
try
|
574
574
|
{
|
575
|
+
if ( this.hasReachedMax( depth ) )
|
576
|
+
{
|
577
|
+
throw new Error( "Max reached" );
|
578
|
+
}
|
575
579
|
return await axios.get( url, options );
|
576
580
|
}
|
577
581
|
catch ( error )
|
578
582
|
{
|
579
|
-
if ( attempt
|
583
|
+
if ( attempt >= this.maxRetries ) throw error;
|
580
584
|
await WebScraper.sleep( 40000 * attempt );
|
581
585
|
console.error( `Retrying request to ${url} (Attempt ${attempt + 1}/${this.maxRetries})`, error.message, error.code );
|
582
586
|
}
|
583
587
|
}
|
588
|
+
throw new Error( "Max retries reached" );
|
584
589
|
}
|
585
590
|
|
586
591
|
configurePuppeteer ( )
|