clean-web-scraper 3.2.0 → 3.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -57,10 +57,10 @@ const scraper = new WebScraper({
57
57
  includeMetadata: false, // Optional: Include metadata in output files
58
58
  metadataFields: ['title', 'description'] // Optional: Specify metadata fields to include
59
59
  });
60
- scraper.start();
60
+ await scraper.start();
61
61
 
62
62
  // Combine results from multiple scrapers
63
- WebScraper.combineResults('./combined-dataset', [scraper1, scraper2]);
63
+ await WebScraper.combineResults('./combined-dataset', [scraper1, scraper2]);
64
64
  ```
65
65
 
66
66
  ```bash
package/example-usage.js CHANGED
@@ -22,7 +22,7 @@ async function khameneiIrFreePalestineTag ()
22
22
  includeMetadata: true,
23
23
  metadataFields: ["title", "description", "author", "lastModified", "language"]
24
24
  });
25
- // await scraper.start();
25
+ await scraper.start();
26
26
  return scraper;
27
27
  }
28
28
 
@@ -50,7 +50,7 @@ async function decolonizepalestine ()
50
50
  includeMetadata: true,
51
51
  metadataFields: ["title", "description", "author", "lastModified", "language"]
52
52
  });
53
- // await scraper.start();
53
+ await scraper.start();
54
54
  return scraper;
55
55
  }
56
56
 
@@ -58,8 +58,7 @@ void async function main ()
58
58
  {
59
59
  const khameneiIrFreePalestineTagScraper = await khameneiIrFreePalestineTag();
60
60
  const decolonizepalestineScraper = await decolonizepalestine();
61
- await WebScraper.sleep( 1000 ); // Sleeps for 1 second
62
- WebScraper.combineResults( "./dataset/combined", [
61
+ await WebScraper.combineResults( "./dataset/combined", [
63
62
  khameneiIrFreePalestineTagScraper,
64
63
  decolonizepalestineScraper
65
64
  ] );
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "clean-web-scraper",
3
- "version": "3.2.0",
3
+ "version": "3.2.1",
4
4
  "main": "main.js",
5
5
  "scripts": {
6
6
  "start": "node main.js",
package/src/WebScraper.js CHANGED
@@ -378,8 +378,9 @@ class WebScraper
378
378
  return new Promise( resolve => { return setTimeout( resolve, ms ) });
379
379
  }
380
380
 
381
- static combineResults ( outputPath, websites )
381
+ static async combineResults ( outputPath, websites )
382
382
  {
383
+ await WebScraper.sleep( 1000 );
383
384
  const fullOutputPath = path.join( __dirname, outputPath );
384
385
 
385
386
  // Create output directories