clean-web-scraper 4.0.4 → 4.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/example-usage.js +36 -2
  2. package/package.json +1 -1
package/example-usage.js CHANGED
@@ -167,7 +167,15 @@ async function mondoweiss ( enable )
167
167
  "https://mondoweiss.net/recent-comments/",
168
168
  "https://mondoweiss.net/email-newsletters",
169
169
  "https://mondoweiss.net/author",
170
- "https://mondoweiss.net/tag/"
170
+ "https://mondoweiss.net/tag/",
171
+ "https://mondoweiss.net/wp-login.php",
172
+ "https://mondoweiss.net/news/page/",
173
+ "https://mondoweiss.net/news-letters/page/",
174
+ "https://mondoweiss.net/opinion/page/",
175
+ "https://mondoweiss.net/podcasts/page/",
176
+ "https://mondoweiss.net/media-analysis/page/",
177
+ "https://mondoweiss.net/culture/page/",
178
+ "https://mondoweiss.net/activism/page/"
171
179
  ],
172
180
  exactExcludeList: [
173
181
  "https://mondoweiss.net",
@@ -177,7 +185,33 @@ async function mondoweiss ( enable )
177
185
  "https://mondoweiss.net/media-analysis/",
178
186
  "https://mondoweiss.net/culture/",
179
187
  "https://mondoweiss.net/activism/",
180
- "https://mondoweiss.net/news-letters/"
188
+ "https://mondoweiss.net/news-letters/",
189
+ "https://mondoweiss.net/newsletters",
190
+ "https://mondoweiss.net/2006/",
191
+ "https://mondoweiss.net/2007/",
192
+ "https://mondoweiss.net/2008/",
193
+ "https://mondoweiss.net/2009/",
194
+ "https://mondoweiss.net/2010/",
195
+ "https://mondoweiss.net/2011/",
196
+ "https://mondoweiss.net/2012/",
197
+ "https://mondoweiss.net/2013/",
198
+ "https://mondoweiss.net/2014/",
199
+ "https://mondoweiss.net/2015/",
200
+ "https://mondoweiss.net/2016/",
201
+ "https://mondoweiss.net/2017/",
202
+ "https://mondoweiss.net/2018/",
203
+ "https://mondoweiss.net/2019/",
204
+ "https://mondoweiss.net/2020/",
205
+ "https://mondoweiss.net/2021/",
206
+ "https://mondoweiss.net/2022/",
207
+ "https://mondoweiss.net/2023/",
208
+ "https://mondoweiss.net/2024/",
209
+ "https://mondoweiss.net/2025/",
210
+ "https://mondoweiss.net/daily-headlines",
211
+ "https://mondoweiss.net/palestineletter",
212
+ "https://mondoweiss.net/podcasts/",
213
+ "https://mondoweiss.net/the-shift",
214
+ "https://mondoweiss.net/weekly-briefing"
181
215
  ],
182
216
  scrapResultPath: "./dataset/mondoweiss/website",
183
217
  jsonlOutputPath: "./dataset/mondoweiss/train.jsonl",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "clean-web-scraper",
3
- "version": "4.0.4",
3
+ "version": "4.0.5",
4
4
  "main": "main.js",
5
5
  "scripts": {
6
6
  "start": "node main.js",