clean-web-scraper 3.8.5 → 3.8.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/example-usage.js CHANGED
@@ -129,11 +129,17 @@ async function electronicintifada ( enable )
129
129
  csvOutputPath: "./dataset/electronicintifada/train.csv",
130
130
  includeMetadata: true,
131
131
  metadataFields: ["author", "title", "description", "dateScrapedDate"],
132
- maxDepth: 12,
132
+ maxDepth: 13,
133
133
  maxArticles: 2000,
134
134
  concurrencyLimit: 3,
135
135
  axiosHeaders: headers,
136
- retryDelay: 10000
136
+ retryDelay: 10000,
137
+ axiosProxy: {
138
+ host: "localhost",
139
+ port: 2080,
140
+ protocol: "http"
141
+ },
142
+ useProxyAsFallback: true
137
143
  });
138
144
  if ( enable )
139
145
  {
package/main.js CHANGED
@@ -38,6 +38,7 @@ class WebScraper
38
38
  // Network options
39
39
  axiosHeaders,
40
40
  axiosProxy,
41
+ useProxyAsFallback,
41
42
 
42
43
  // Puppeteer options
43
44
  usePuppeteer,
@@ -80,6 +81,7 @@ class WebScraper
80
81
  // Network configuration
81
82
  this.axiosHeaders = axiosHeaders;
82
83
  this.axiosProxy = axiosProxy;
84
+ this.useProxyAsFallback = useProxyAsFallback || false;
83
85
  this.axiosOptions = {};
84
86
  if ( this.axiosHeaders )
85
87
  {
@@ -572,10 +574,10 @@ class WebScraper
572
574
 
573
575
  async retryAxiosRequest ( url )
574
576
  {
575
- const options = {
577
+ let options = {
576
578
  responseType: "stream",
577
579
  maxRedirects: 5,
578
- timeout: 20000,
580
+ timeout: 30000,
579
581
  ...this.axiosOptions,
580
582
  };
581
583
 
@@ -587,6 +589,14 @@ class WebScraper
587
589
  {
588
590
  break;
589
591
  }
592
+ if ( attempt === this.maxRetries && this.useProxyAsFallback && this.axiosProxy )
593
+ {
594
+ options = {
595
+ ...options,
596
+ proxy: this.axiosProxy
597
+ };
598
+ }
599
+
590
600
  return await axios.get( url, options );
591
601
  }
592
602
  catch ( error )
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "clean-web-scraper",
3
- "version": "3.8.5",
3
+ "version": "3.8.7",
4
4
  "main": "main.js",
5
5
  "scripts": {
6
6
  "start": "node main.js",