clean-web-scraper 3.8.3 → 3.8.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/example-usage.js CHANGED
@@ -30,7 +30,8 @@ async function palianswers ( enable )
30
30
  csvOutputPath: "./dataset/palianswers/train.csv",
31
31
  includeMetadata: true,
32
32
  metadataFields: ["author", "title", "description", "dateScrapedDate"],
33
- retryDelay: 10000
33
+ retryDelay: 10000,
34
+ concurrencyLimit: 4,
34
35
  });
35
36
  if ( enable )
36
37
  {
@@ -56,7 +57,8 @@ async function khameneiIrFreePalestineTag ( enable )
56
57
  csvOutputPath: "./dataset/khamenei-ir-free-palestine-tag/train.csv",
57
58
  includeMetadata: true,
58
59
  metadataFields: ["author", "title", "description", "dateScrapedDate"],
59
- retryDelay: 10000
60
+ retryDelay: 10000,
61
+ concurrencyLimit: 4,
60
62
  });
61
63
  if ( enable )
62
64
  {
@@ -87,7 +89,8 @@ async function decolonizepalestine ( enable )
87
89
  csvOutputPath: "./dataset/decolonizepalestine/train.csv",
88
90
  includeMetadata: true,
89
91
  metadataFields: ["author", "title", "description", "dateScrapedDate"],
90
- retryDelay: 10000
92
+ retryDelay: 10000,
93
+ concurrencyLimit: 4,
91
94
  });
92
95
  if ( enable )
93
96
  {
package/main.js CHANGED
@@ -154,6 +154,7 @@ class WebScraper
154
154
  }
155
155
  try
156
156
  {
157
+ await WebScraper.sleep( 5000 );
157
158
  const data = await this.fetchContent( url );
158
159
  if ( !data ) return;
159
160
  const dom = new JSDOM( data, { url });
@@ -191,7 +192,6 @@ class WebScraper
191
192
  {
192
193
  return;
193
194
  }
194
- await WebScraper.sleep( 5000 );
195
195
  const batch = unvisitedLinks.slice( i, i + this.concurrencyLimit );
196
196
  const results = await Promise.allSettled( batch.map( link => { return this.fetchPage( link, depth + 1 ) }) );
197
197
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "clean-web-scraper",
3
- "version": "3.8.3",
3
+ "version": "3.8.4",
4
4
  "main": "main.js",
5
5
  "scripts": {
6
6
  "start": "node main.js",