clean-web-scraper 3.5.6 → 3.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/example-usage.js +7 -7
- package/main.js +832 -1
- package/package.json +1 -1
- package/src/WebScraper.js +0 -822
package/example-usage.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
const WebScraper = require( "./
|
|
1
|
+
const WebScraper = require( "./main" );
|
|
2
2
|
|
|
3
3
|
// const cookies = "cf_clearance=ENHJkpw.ycd1tZ_A.d0O27QdslTN0EHaNurhCznfimg-1738241402-1.2.1.1-BlO.WitkGwE3U3vSamX35xP.AgN1HyvHWL03Jhe.twbn4QWojiw1T4.0M4lE_TcIeZrQ6ErwV9kQBMBKmfU0S6lQth1BJx7UpWn4T6wtFm83LmF.cB13PQYSQgGFGsH7qOkGIjbBhMbceQNp.y2XZgLq_hdntGKSBMe0iCUotx_xsqlzkolQIqnUYID3BLEQXZqNvqJOwkzLZ7.kzrwP42VdEuWEvT4jt7F3TkTaU9rumAp8FSNO1.hnr76Tv23OITm17rPD3__Ghdu1D0E.4v693nEiVYO_KQYNf_8gk0vXP.KAvUKA2zQyBmDXkfW3M1MkoLjFNZCanx9FPRVO7g";
|
|
4
4
|
// const headers = {
|
|
@@ -28,7 +28,7 @@ async function palianswers ( enable )
|
|
|
28
28
|
textOutputPath: "./dataset/palianswers/texts",
|
|
29
29
|
csvOutputPath: "./dataset/palianswers/train.csv",
|
|
30
30
|
includeMetadata: true,
|
|
31
|
-
metadataFields: ["
|
|
31
|
+
metadataFields: ["author", "title", "description"]
|
|
32
32
|
});
|
|
33
33
|
if ( enable )
|
|
34
34
|
{
|
|
@@ -53,7 +53,7 @@ async function khameneiIrFreePalestineTag ( enable )
|
|
|
53
53
|
textOutputPath: "./dataset/khamenei-ir-free-palestine-tag/texts",
|
|
54
54
|
csvOutputPath: "./dataset/khamenei-ir-free-palestine-tag/train.csv",
|
|
55
55
|
includeMetadata: true,
|
|
56
|
-
metadataFields: ["
|
|
56
|
+
metadataFields: ["author", "title", "description"]
|
|
57
57
|
});
|
|
58
58
|
if ( enable )
|
|
59
59
|
{
|
|
@@ -82,7 +82,7 @@ async function decolonizepalestine ( enable )
|
|
|
82
82
|
textOutputPath: "./dataset/decolonizepalestine/texts",
|
|
83
83
|
csvOutputPath: "./dataset/decolonizepalestine/train.csv",
|
|
84
84
|
includeMetadata: true,
|
|
85
|
-
metadataFields: ["
|
|
85
|
+
metadataFields: ["author", "title", "description"]
|
|
86
86
|
});
|
|
87
87
|
if ( enable )
|
|
88
88
|
{
|
|
@@ -108,7 +108,7 @@ async function bdsmovement ( enable )
|
|
|
108
108
|
textOutputPath: "./dataset/bdsmovement/texts",
|
|
109
109
|
csvOutputPath: "./dataset/bdsmovement/train.csv",
|
|
110
110
|
includeMetadata: true,
|
|
111
|
-
metadataFields: ["
|
|
111
|
+
metadataFields: ["author", "title", "description"],
|
|
112
112
|
puppeteerProxy: "socks5://127.0.0.1:2080",
|
|
113
113
|
puppeteerExecutablePath: "/usr/bin/chromium",
|
|
114
114
|
puppeteerRealProxy: {
|
|
@@ -152,7 +152,7 @@ async function electronicintifada ( enable )
|
|
|
152
152
|
csvOutputPath: "./dataset/electronicintifada/train.csv",
|
|
153
153
|
includeMetadata: true,
|
|
154
154
|
maxArticles: 2000,
|
|
155
|
-
metadataFields: ["
|
|
155
|
+
metadataFields: ["author", "title", "description"]
|
|
156
156
|
});
|
|
157
157
|
if ( enable )
|
|
158
158
|
{
|
|
@@ -191,7 +191,7 @@ async function palestineremembered ( enable )
|
|
|
191
191
|
textOutputPath: "./dataset/palestineremembered/texts",
|
|
192
192
|
csvOutputPath: "./dataset/palestineremembered/train.csv",
|
|
193
193
|
includeMetadata: true,
|
|
194
|
-
metadataFields: ["
|
|
194
|
+
metadataFields: ["author", "title", "description"],
|
|
195
195
|
axiosProxy: {
|
|
196
196
|
host: "localhost",
|
|
197
197
|
port: 2080,
|