clean-web-scraper 3.8.5 → 3.8.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/example-usage.js +8 -2
- package/main.js +11 -1
- package/package.json +1 -1
package/example-usage.js
CHANGED
@@ -129,11 +129,17 @@ async function electronicintifada ( enable )
|
|
129
129
|
csvOutputPath: "./dataset/electronicintifada/train.csv",
|
130
130
|
includeMetadata: true,
|
131
131
|
metadataFields: ["author", "title", "description", "dateScrapedDate"],
|
132
|
-
maxDepth:
|
132
|
+
maxDepth: 13,
|
133
133
|
maxArticles: 2000,
|
134
134
|
concurrencyLimit: 3,
|
135
135
|
axiosHeaders: headers,
|
136
|
-
retryDelay: 10000
|
136
|
+
retryDelay: 10000,
|
137
|
+
axiosProxy: {
|
138
|
+
host: "localhost",
|
139
|
+
port: 2080,
|
140
|
+
protocol: "http"
|
141
|
+
},
|
142
|
+
useProxyAsFallback: true
|
137
143
|
});
|
138
144
|
if ( enable )
|
139
145
|
{
|
package/main.js
CHANGED
@@ -38,6 +38,7 @@ class WebScraper
|
|
38
38
|
// Network options
|
39
39
|
axiosHeaders,
|
40
40
|
axiosProxy,
|
41
|
+
useProxyAsFallback,
|
41
42
|
|
42
43
|
// Puppeteer options
|
43
44
|
usePuppeteer,
|
@@ -80,6 +81,7 @@ class WebScraper
|
|
80
81
|
// Network configuration
|
81
82
|
this.axiosHeaders = axiosHeaders;
|
82
83
|
this.axiosProxy = axiosProxy;
|
84
|
+
this.useProxyAsFallback = useProxyAsFallback || false;
|
83
85
|
this.axiosOptions = {};
|
84
86
|
if ( this.axiosHeaders )
|
85
87
|
{
|
@@ -572,7 +574,7 @@ class WebScraper
|
|
572
574
|
|
573
575
|
async retryAxiosRequest ( url )
|
574
576
|
{
|
575
|
-
|
577
|
+
let options = {
|
576
578
|
responseType: "stream",
|
577
579
|
maxRedirects: 5,
|
578
580
|
timeout: 20000,
|
@@ -587,6 +589,14 @@ class WebScraper
|
|
587
589
|
{
|
588
590
|
break;
|
589
591
|
}
|
592
|
+
if ( attempt === this.maxRetries && this.useProxyAsFallback && this.axiosProxy )
|
593
|
+
{
|
594
|
+
options = {
|
595
|
+
...options,
|
596
|
+
proxy: this.axiosProxy
|
597
|
+
};
|
598
|
+
}
|
599
|
+
|
590
600
|
return await axios.get( url, options );
|
591
601
|
}
|
592
602
|
catch ( error )
|