clean-web-scraper 3.5.4 → 3.5.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/example-usage.js +2 -1
- package/package.json +2 -2
- package/src/WebScraper.js +1 -5
package/example-usage.js
CHANGED
|
@@ -211,8 +211,8 @@ void async function main ()
|
|
|
211
211
|
const palianswersScraper = await palianswers( true );
|
|
212
212
|
const decolonizepalestineScraper = await decolonizepalestine( true );
|
|
213
213
|
const khameneiIrFreePalestineTagScraper = await khameneiIrFreePalestineTag( true );
|
|
214
|
-
const bdsmovementScraper = await bdsmovement( false );
|
|
215
214
|
const electronicintifadaScraper = await electronicintifada( true );
|
|
215
|
+
const bdsmovementScraper = await bdsmovement( false );
|
|
216
216
|
const palestinerememberedScraper = await palestineremembered( false );
|
|
217
217
|
|
|
218
218
|
await WebScraper.combineResults( "./dataset/combined", [
|
|
@@ -225,4 +225,5 @@ void async function main ()
|
|
|
225
225
|
] );
|
|
226
226
|
|
|
227
227
|
// 7 https://stand-with-palestine.org/blogs
|
|
228
|
+
// https://mondoweiss.net
|
|
228
229
|
}()
|
package/package.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "clean-web-scraper",
|
|
3
|
-
"version": "3.5.
|
|
3
|
+
"version": "3.5.6",
|
|
4
4
|
"main": "main.js",
|
|
5
5
|
"scripts": {
|
|
6
6
|
"start": "node main.js",
|
|
7
|
-
"test": "
|
|
7
|
+
"test": "node --max-old-space-size=8192 example-usage.js"
|
|
8
8
|
},
|
|
9
9
|
"keywords": [
|
|
10
10
|
"clean-web-scraper",
|
package/src/WebScraper.js
CHANGED
|
@@ -137,11 +137,7 @@ class WebScraper
|
|
|
137
137
|
return;
|
|
138
138
|
}
|
|
139
139
|
this.visited.add( url );
|
|
140
|
-
if ( !this.isValidFileType( url ) )
|
|
141
|
-
{
|
|
142
|
-
return;
|
|
143
|
-
}
|
|
144
|
-
if ( !this.isValidDomain( url ) )
|
|
140
|
+
if ( !this.isValidFileType( url ) || !this.isValidDomain( url ) )
|
|
145
141
|
{
|
|
146
142
|
return;
|
|
147
143
|
}
|