clean-web-scraper 3.2.0 → 3.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/example-usage.js +3 -4
- package/package.json +1 -1
- package/src/WebScraper.js +2 -1
package/README.md
CHANGED
|
@@ -57,10 +57,10 @@ const scraper = new WebScraper({
|
|
|
57
57
|
includeMetadata: false, // Optional: Include metadata in output files
|
|
58
58
|
metadataFields: ['title', 'description'] // Optional: Specify metadata fields to include
|
|
59
59
|
});
|
|
60
|
-
scraper.start();
|
|
60
|
+
await scraper.start();
|
|
61
61
|
|
|
62
62
|
// Combine results from multiple scrapers
|
|
63
|
-
WebScraper.combineResults('./combined-dataset', [scraper1, scraper2]);
|
|
63
|
+
await WebScraper.combineResults('./combined-dataset', [scraper1, scraper2]);
|
|
64
64
|
```
|
|
65
65
|
|
|
66
66
|
```bash
|
package/example-usage.js
CHANGED
|
@@ -22,7 +22,7 @@ async function khameneiIrFreePalestineTag ()
|
|
|
22
22
|
includeMetadata: true,
|
|
23
23
|
metadataFields: ["title", "description", "author", "lastModified", "language"]
|
|
24
24
|
});
|
|
25
|
-
|
|
25
|
+
await scraper.start();
|
|
26
26
|
return scraper;
|
|
27
27
|
}
|
|
28
28
|
|
|
@@ -50,7 +50,7 @@ async function decolonizepalestine ()
|
|
|
50
50
|
includeMetadata: true,
|
|
51
51
|
metadataFields: ["title", "description", "author", "lastModified", "language"]
|
|
52
52
|
});
|
|
53
|
-
|
|
53
|
+
await scraper.start();
|
|
54
54
|
return scraper;
|
|
55
55
|
}
|
|
56
56
|
|
|
@@ -58,8 +58,7 @@ void async function main ()
|
|
|
58
58
|
{
|
|
59
59
|
const khameneiIrFreePalestineTagScraper = await khameneiIrFreePalestineTag();
|
|
60
60
|
const decolonizepalestineScraper = await decolonizepalestine();
|
|
61
|
-
await WebScraper.
|
|
62
|
-
WebScraper.combineResults( "./dataset/combined", [
|
|
61
|
+
await WebScraper.combineResults( "./dataset/combined", [
|
|
63
62
|
khameneiIrFreePalestineTagScraper,
|
|
64
63
|
decolonizepalestineScraper
|
|
65
64
|
] );
|
package/package.json
CHANGED
package/src/WebScraper.js
CHANGED
|
@@ -378,8 +378,9 @@ class WebScraper
|
|
|
378
378
|
return new Promise( resolve => { return setTimeout( resolve, ms ) });
|
|
379
379
|
}
|
|
380
380
|
|
|
381
|
-
static combineResults ( outputPath, websites )
|
|
381
|
+
static async combineResults ( outputPath, websites )
|
|
382
382
|
{
|
|
383
|
+
await WebScraper.sleep( 1000 );
|
|
383
384
|
const fullOutputPath = path.join( __dirname, outputPath );
|
|
384
385
|
|
|
385
386
|
// Create output directories
|