spamscanner 3.0.6 → 5.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -2,9 +2,7 @@
2
2
  <a href="https://spamscanner.net"><img src="https://d1i8ikybhfrv4r.cloudfront.net/spamscanner.png" alt="spamscanner" /></a>
3
3
  </h1>
4
4
  <div align="center">
5
- <a href="https://join.slack.com/t/ladjs/shared_invite/zt-fqei6z11-Bq2trhwHQxVc5x~ifiZG0g"><img src="https://img.shields.io/badge/chat-join%20slack-brightgreen" alt="chat" /></a>
6
- <a href="https://travis-ci.com/spamscanner/spamscanner"><img src="https://travis-ci.com/spamscanner/spamscanner.svg?branch=master" alt="build status" /></a>
7
- <a href="https://codecov.io/github/spamscanner/spamscanner"><img src="https://img.shields.io/codecov/c/github/spamscanner/spamscanner/master.svg" alt="code coverage" /></a>
5
+ <a href="https://github.com/spamscanner/spamscanner/actions/workflows/ci.yml"><img src="https://github.com/spamscanner/spamscanner/actions/workflows/ci.yml/badge.svg" alt="build status" /></a>
8
6
  <a href="https://github.com/sindresorhus/xo"><img src="https://img.shields.io/badge/code_style-XO-5ed9c7.svg" alt="code style" /></a>
9
7
  <a href="https://github.com/prettier/prettier"><img src="https://img.shields.io/badge/styled_with-prettier-ff69b4.svg" alt="styled with prettier" /></a>
10
8
  <a href="https://lass.js.org"><img src="https://img.shields.io/badge/made_with-lass-95CC28.svg" alt="made with lass" /></a>
@@ -48,6 +46,7 @@
48
46
  * [`scanner.getVirusResults(mail)`](#scannergetvirusresultsmail)
49
47
  * [`scanner.parseLocale(locale)`](#scannerparselocalelocale)
50
48
  * [Caching](#caching)
49
+ * [Debugging](#debugging)
51
50
  * [Contributors](#contributors)
52
51
  * [References](#references)
53
52
  * [License](#license)
@@ -188,11 +187,48 @@ Note that you can simply use the Spam Scanner API for free at <https://spamscann
188
187
  2. Configure ClamAV:
189
188
 
190
189
  ```sh
190
+ # if you are on Intel macOS
191
+ sudo mv /usr/local/etc/clamav/clamd.conf.sample /usr/local/etc/clamav/clamd.conf
192
+
193
+ # if you are on M1 macOS (or newer brew which installs to `/opt/homebrew`)
194
+ sudo mv /opt/homebrew/etc/clamav/clamd.conf.sample /opt/homebrew/etc/clamav/clamd.conf
195
+ ```
196
+
197
+ ```sh
198
+ # if you are on Intel macOS
199
+ sudo vim /usr/local/etc/clamav/clamd.conf
200
+
201
+ # if you are on M1 macOS (or newer brew which installs to `/opt/homebrew`)
202
+ sudo vim /opt/homebrew/etc/clamav/clamd.conf
203
+ ```
204
+
205
+ ```diff
206
+ -Example
207
+ +#Example
208
+
209
+ -#StreamMaxLength 10M
210
+ +StreamMaxLength 50M
211
+
212
+ +# this file path may be different on your OS (that's OK)
213
+
214
+ \-#LocalSocket /tmp/clamd.socket
215
+ \+LocalSocket /tmp/clamd.socket
216
+ ```
217
+
218
+ ```sh
219
+ # if you are on Intel macOS
191
220
  sudo mv /usr/local/etc/clamav/freshclam.conf.sample /usr/local/etc/clamav/freshclam.conf
221
+
222
+ # if you are on M1 macOS (or newer brew which installs to `/opt/homebrew`)
223
+ sudo mv /opt/homebrew/etc/clamav/freshclam.conf.sample /opt/homebrew/etc/clamav/freshclam.conf
192
224
  ```
193
225
 
194
226
  ```sh
227
+ # if you are on Intel macOS
195
228
  sudo vim /usr/local/etc/clamav/freshclam.conf
229
+
230
+ # if you are on M1 macOS (or newer brew which installs to `/opt/homebrew`)
231
+ sudo vim /opt/homebrew/etc/clamav/freshclam.conf
196
232
  ```
197
233
 
198
234
  ```diff
@@ -210,6 +246,8 @@ Note that you can simply use the Spam Scanner API for free at <https://spamscann
210
246
  sudo vim /Library/LaunchDaemons/org.clamav.clamd.plist
211
247
  ```
212
248
 
249
+ > If you are on Intel macOS:
250
+
213
251
  ```plist
214
252
  <?xml version="1.0" encoding="UTF-8"?>
215
253
  <!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
@@ -231,12 +269,37 @@ Note that you can simply use the Spam Scanner API for free at <https://spamscann
231
269
  </plist>
232
270
  ```
233
271
 
272
+ > If you are on M1 macOS (or newer brew which installs to `/opt/homebrew`)
273
+
274
+ ```plist
275
+ <?xml version="1.0" encoding="UTF-8"?>
276
+ <!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
277
+ <plist version="1.0">
278
+ <dict>
279
+ <key>Label</key>
280
+ <string>org.clamav.clamd</string>
281
+ <key>KeepAlive</key>
282
+ <true/>
283
+ <key>Program</key>
284
+ <string>/opt/homebrew/sbin/clamd</string>
285
+ <key>ProgramArguments</key>
286
+ <array>
287
+ <string>clamd</string>
288
+ </array>
289
+ <key>RunAtLoad</key>
290
+ <true/>
291
+ </dict>
292
+ </plist>
293
+ ```
294
+
295
+ 4. Enable it and start it on boot:
296
+
234
297
  ```sh
235
298
  sudo launchctl load /Library/LaunchDaemons/org.clamav.clamd.plist
236
299
  sudo launchctl start /Library/LaunchDaemons/org.clamav.clamd.plist
237
300
  ```
238
301
 
239
- 4. You may want to periodically run `freshclam` to update the config, or configure a similar `plist` configuration for `launchctl`.
302
+ 5. You may want to periodically run `freshclam` to update the config, or configure a similar `plist` configuration for `launchctl`.
240
303
 
241
304
 
242
305
  ## Install
@@ -244,7 +307,7 @@ Note that you can simply use the Spam Scanner API for free at <https://spamscann
244
307
  [npm][]:
245
308
 
246
309
  ```sh
247
- npm install spamscanner node-snowball
310
+ npm install spamscanner
248
311
  ```
249
312
 
250
313
 
@@ -359,7 +422,7 @@ Currently Spam Scanner supports the following locales for tokenization, stemming
359
422
  | Finnish | `fn` |
360
423
  | Farsi | `fa` |
361
424
  | French | `fr` |
362
- | German | `gr` |
425
+ | German | `de` |
363
426
  | Hungarian | `hr` |
364
427
  | Indonesian | `in` |
365
428
  | Italian | `it` |
@@ -406,7 +469,7 @@ A common example of this is a link of `рaypal.com` which when converted to ASCI
406
469
 
407
470
  This method checks against [Cloudflare for Families](https://developers.cloudflare.com/1.1.1.1/1.1.1.1-for-families) servers for both adult-related content, malware, and phishing. This means we do two separate DNS over HTTPS requests to `1.1.1.2` for malware and `1.1.1.3` for adult-related content. You can parse the messages results Array for messages that contain "adult-related content" if you need to parse whether or not you want to flag for adult-related content or not on your application.
408
471
 
409
- If you are using Cloudflare for Families DNS servers as mentioned in [Requirements](#requirements)), then if there are any HTTPS over DNS request errors, it will fallback to use the DNS servers set on the system for lookups, which would in turn use Cloudflare for Family DNS. (using DNS over HTTPS with a fallback of [dns.resolve4](https://nodejs.org/api/dns.html#dns_dns_resolve4\_hostname_options_callback)) – and if it returns `0.0.0.0` then it is considered to be phishing.
472
+ If you are using Cloudflare for Families DNS servers as mentioned in [Requirements](#requirements)), then if there are any HTTPS over DNS request errors, it will fallback to use the DNS servers set on the system for lookups, which would in turn use Cloudflare for Family DNS. (using DNS over HTTPS with a fallback of [dns.resolve4](https://nodejs.org/api/dns.html#dns_dns_resolve4_hostname_options_callback)) – and if it returns `0.0.0.0` then it is considered to be phishing.
410
473
 
411
474
  We actually helped Cloudflare in August 2020 to update their documentation to note that this result of `0.0.0.0` is returned for maliciously found content on FQDN and IP lookups.
412
475
 
@@ -501,6 +564,13 @@ const scanner = new SpamScanner({
501
564
  Note that in [Forward Email][forward-email] we use the `client` approach as we have multiple threads across multiple servers running, and in-memory caching would not be efficient.
502
565
 
503
566
 
567
+ ## Debugging
568
+
569
+ Spam Scanner has built-in debug output via `util.debuglog('spamscanner')`.
570
+
571
+ This means you can run your app with `NODE_DEBUG=spamscanner node app.js` to get useful debug output to your console.
572
+
573
+
504
574
  ## Contributors
505
575
 
506
576
  | Name | Website |
package/index.js CHANGED
@@ -1,8 +1,9 @@
1
+ const process = require('process');
1
2
  const dns = require('dns');
2
3
  const fs = require('fs');
3
- const { promisify } = require('util');
4
+ const { debuglog } = require('util');
4
5
 
5
- // eslint-disable-next-line node/no-deprecated-api
6
+ // eslint-disable-next-line n/no-deprecated-api
6
7
  const punycode = require('punycode');
7
8
 
8
9
  const ClamScan = require('clamscan');
@@ -12,7 +13,6 @@ const RE2 = require('re2');
12
13
  const bitcoinRegex = require('bitcoin-regex');
13
14
  const contractions = require('expand-contractions');
14
15
  const creditCardRegex = require('credit-card-regex');
15
- const debug = require('debug')('spamscanner');
16
16
  const emailRegexSafe = require('email-regex-safe');
17
17
  const emojiPatterns = require('emoji-patterns');
18
18
  const escapeStringRegexp = require('escape-string-regexp');
@@ -46,12 +46,15 @@ const toEmoji = require('gemoji/name-to-emoji');
46
46
  const universalify = require('universalify');
47
47
  const urlRegexSafe = require('url-regex-safe');
48
48
  const validator = require('validator');
49
+ const which = require('which');
49
50
  const { Iconv } = require('iconv');
50
51
  const { codes } = require('currency-codes');
51
52
  const { fromUrl, NO_HOSTNAME } = require('parse-domain');
52
53
  const { parse } = require('node-html-parser');
53
54
  const { simpleParser } = require('mailparser');
54
55
 
56
+ const debug = debuglog('spamscanner');
57
+
55
58
  const aggressiveTokenizer = new natural.AggressiveTokenizer();
56
59
  const orthographyTokenizer = new natural.OrthographyTokenizer({
57
60
  language: 'fi'
@@ -69,20 +72,115 @@ const aggressiveTokenizerSv = new natural.AggressiveTokenizerSv();
69
72
  const aggressiveTokenizerRu = new natural.AggressiveTokenizerRu();
70
73
  const aggressiveTokenizerVi = new natural.AggressiveTokenizerVi();
71
74
 
72
- const stopwordsEn = require('natural/lib/natural/util/stopwords').words;
73
- const stopwordsEs = require('natural/lib/natural/util/stopwords_es').words;
74
- const stopwordsFa = require('natural/lib/natural/util/stopwords_fa').words;
75
- const stopwordsFr = require('natural/lib/natural/util/stopwords_fr').words;
76
- const stopwordsId = require('natural/lib/natural/util/stopwords_id').words;
77
- const stopwordsJa = require('natural/lib/natural/util/stopwords_ja').words;
78
- const stopwordsIt = require('natural/lib/natural/util/stopwords_it').words;
79
- const stopwordsNl = require('natural/lib/natural/util/stopwords_nl').words;
80
- const stopwordsNo = require('natural/lib/natural/util/stopwords_no').words;
81
- const stopwordsPl = require('natural/lib/natural/util/stopwords_pl').words;
82
- const stopwordsPt = require('natural/lib/natural/util/stopwords_pt').words;
83
- const stopwordsRu = require('natural/lib/natural/util/stopwords_ru').words;
84
- const stopwordsSv = require('natural/lib/natural/util/stopwords_sv').words;
85
- const stopwordsZh = require('natural/lib/natural/util/stopwords_zh').words;
75
+ const stopwordsEn = new Set([
76
+ ...require('natural/lib/natural/util/stopwords').words,
77
+ ...sw.eng
78
+ ]);
79
+ const stopwordsEs = new Set([
80
+ ...require('natural/lib/natural/util/stopwords_es').words,
81
+ ...sw.spa
82
+ ]);
83
+ const stopwordsFa = new Set([
84
+ ...require('natural/lib/natural/util/stopwords_fa').words,
85
+ ...sw.fas
86
+ ]);
87
+ const stopwordsFr = new Set([
88
+ ...require('natural/lib/natural/util/stopwords_fr').words,
89
+ ...sw.fra
90
+ ]);
91
+ const stopwordsId = new Set([
92
+ ...require('natural/lib/natural/util/stopwords_id').words,
93
+ ...sw.ind
94
+ ]);
95
+ const stopwordsJa = new Set([
96
+ ...require('natural/lib/natural/util/stopwords_ja').words,
97
+ ...sw.jpn
98
+ ]);
99
+ const stopwordsIt = new Set([
100
+ ...require('natural/lib/natural/util/stopwords_it').words,
101
+ ...sw.ita
102
+ ]);
103
+ const stopwordsNl = new Set([
104
+ ...require('natural/lib/natural/util/stopwords_nl').words,
105
+ ...sw.nld
106
+ ]);
107
+ const stopwordsNo = new Set([
108
+ ...require('natural/lib/natural/util/stopwords_no').words,
109
+ ...sw.nob
110
+ ]);
111
+ const stopwordsPl = new Set([
112
+ ...require('natural/lib/natural/util/stopwords_pl').words,
113
+ ...sw.pol
114
+ ]);
115
+ const stopwordsPt = new Set([
116
+ ...require('natural/lib/natural/util/stopwords_pt').words,
117
+ ...sw.por,
118
+ ...sw.porBr
119
+ ]);
120
+ const stopwordsRu = new Set([
121
+ ...require('natural/lib/natural/util/stopwords_ru').words,
122
+ ...sw.rus
123
+ ]);
124
+ const stopwordsSv = new Set([
125
+ ...require('natural/lib/natural/util/stopwords_sv').words,
126
+ ...sw.swe
127
+ ]);
128
+ const stopwordsZh = new Set([
129
+ ...require('natural/lib/natural/util/stopwords_zh').words,
130
+ ...sw.zho
131
+ ]);
132
+
133
+ const stopwordsRon = new Set(sw.ron);
134
+ const stopwordsTur = new Set(sw.tur);
135
+ const stopwordsVie = new Set(sw.vie);
136
+ const stopwordsDeu = new Set(sw.deu);
137
+ const stopwordsHun = new Set(sw.hun);
138
+ const stopwordsAra = new Set(sw.ara);
139
+ const stopwordsDan = new Set(sw.dan);
140
+ const stopwordsFin = new Set(sw.fin);
141
+
142
+ // TODO: add stopword pairing for these langs:
143
+ // afr
144
+ // ben
145
+ // bre
146
+ // bul
147
+ // cat
148
+ // ces
149
+ // ell
150
+ // epo
151
+ // est
152
+ // eus
153
+ // fra
154
+ // gle
155
+ // glg
156
+ // guj
157
+ // hau
158
+ // heb
159
+ // hin
160
+ // hrv
161
+ // hye
162
+ // kor
163
+ // kur
164
+ // lat
165
+ // lav
166
+ // lgg
167
+ // lggNd
168
+ // lit
169
+ // mar
170
+ // msa
171
+ // mya
172
+ // panGu
173
+ // slk
174
+ // slv
175
+ // som
176
+ // sot
177
+ // swa
178
+ // tgl
179
+ // tha
180
+ // ukr
181
+ // urd
182
+ // yor
183
+ // zul
86
184
 
87
185
  // <https://stackoverflow.com/a/41353282>
88
186
  // <https://www.ietf.org/rfc/rfc3986.txt>
@@ -92,19 +190,18 @@ const ENDING_RESERVED_REGEX = new RE2(
92
190
 
93
191
  const PKG = require('./package.json');
94
192
 
95
- const VOCABULARY_LIMIT = require('./vocabulary-limit');
193
+ const VOCABULARY_LIMIT = require('./vocabulary-limit.js');
96
194
 
97
- const ISO_CODE_MAPPING = require('./iso-code-mapping');
195
+ // TODO: convert this into a Map
196
+ const ISO_CODE_MAPPING = require('./iso-code-mapping.json');
98
197
 
99
198
  // <https://kb.smarshmail.com/Article/23567>
100
- const EXECUTABLES = require('./executables');
199
+ const EXECUTABLES = new Set(require('./executables.json'));
101
200
 
102
- const REPLACEMENT_WORDS = require('./replacement-words');
201
+ const REPLACEMENT_WORDS = require('./replacement-words.json');
103
202
 
104
203
  const locales = new Set(i18nLocales.map((l) => l.toLowerCase()));
105
204
 
106
- const readFile = promisify(fs.readFile);
107
-
108
205
  const normalizeUrlOptions = {
109
206
  stripProtocol: true,
110
207
  stripWWW: false,
@@ -154,7 +251,8 @@ for (const code of codes()) {
154
251
  const symbol = getSymbolFromCurrency(code);
155
252
  if (
156
253
  typeof symbol === 'string' &&
157
- !currencySymbols.includes(symbol) &&
254
+ // eslint-disable-next-line unicorn/prefer-includes
255
+ currencySymbols.indexOf(symbol) === -1 &&
158
256
  !new RE2(/^[a-z]+$/i).test(symbol)
159
257
  )
160
258
  currencySymbols.push(escapeStringRegexp(symbol));
@@ -187,11 +285,13 @@ const isURLOptions = {
187
285
  class SpamScanner {
188
286
  constructor(config = {}) {
189
287
  this.config = {
190
- debug: process.env.NODE_ENV === 'test',
288
+ debug:
289
+ process.env.NODE_ENV === 'test' ||
290
+ process.env.NODE_ENV === 'development',
191
291
  checkIDNHomographAttack: false,
192
292
  // note that if you attempt to train an existing `scanner.classifier`
193
293
  // then you will need to re-use these, so we suggest you store them
194
- replacements: config.replacements || require('./replacements'),
294
+ replacements: config.replacements || require('./replacements.js'),
195
295
  // <https://nodemailer.com/extras/mailparser/>
196
296
  // NOTE: `iconv` package's Iconv cannot be used in worker threads
197
297
  // AND it can not also be shared in worker threads either (e.g. cloned)
@@ -203,7 +303,7 @@ class SpamScanner {
203
303
  // `wget --mirror --passive-ftp ftp://ftp.ietf.org/ietf-mail-archive`
204
304
  // `wget --mirror --passive-ftp ftp://ftp.ietf.org/concluded-wg-ietf-mail-archive`
205
305
  // (spam dataset is private at the moment)
206
- classifier: config.classifier || require('./get-classifier'),
306
+ classifier: config.classifier || require('./get-classifier.js'),
207
307
  // default locale validated against i18n-locales
208
308
  locale: 'en',
209
309
  // we recommend to use axe/cabin, see https://cabinjs.com
@@ -310,10 +410,17 @@ class SpamScanner {
310
410
  allowedAttributes: false
311
411
  },
312
412
  userAgent: `${PKG.name}/${PKG.version}`,
313
- timeout: ms('5s'),
413
+ timeout: ms('10s'),
314
414
  clamscan: {
415
+ debugMode:
416
+ process.env.NODE_ENV === 'test' ||
417
+ process.env.NODE_ENV === 'development',
418
+ clamscan: {
419
+ path: which.sync('clamscan', { nothrow: true })
420
+ },
315
421
  clamdscan: {
316
422
  timeout: ms('10s'),
423
+ path: which.sync('clamdscan', { nothrow: true }),
317
424
  socket: macosVersion.isMacOS
318
425
  ? '/tmp/clamd.socket'
319
426
  : '/var/run/clamav/clamd.ctl'
@@ -416,7 +523,6 @@ class SpamScanner {
416
523
  // cache in the background
417
524
  this.config.client
418
525
  .set(key, `${isAdult}:${isMalware}`, 'PX', this.config.ttlMs)
419
- // eslint-disable-next-line promise/prefer-await-to-then
420
526
  .then(this.config.logger.info)
421
527
  .catch(this.config.logger.error);
422
528
  return { isAdult, isMalware };
@@ -431,6 +537,27 @@ class SpamScanner {
431
537
  throw new Error(
432
538
  `Locale of ${this.config.locale} was not valid according to locales list.`
433
539
  );
540
+
541
+ //
542
+ // set up regex helpers
543
+ //
544
+ this.EMAIL_REPLACEMENT_REGEX = new RE2(this.config.replacements.email, 'g');
545
+ const replacementRegexes = [];
546
+ for (const key of Object.keys(this.config.replacements)) {
547
+ replacementRegexes.push(
548
+ escapeStringRegexp(this.config.replacements[key])
549
+ );
550
+ }
551
+
552
+ this.REPLACEMENTS_REGEX = new RE2(
553
+ new RegExp(replacementRegexes.join('|'), 'g')
554
+ );
555
+
556
+ //
557
+ // set up helper Map and Sets for fast lookup
558
+ // (Set.has is 2x faster than includes, and 50% faster than indexOf)
559
+ //
560
+ this.WHITELISTED_WORDS = new Set(Object.values(this.config.replacements));
434
561
  }
435
562
 
436
563
  getHostname(link) {
@@ -520,17 +647,12 @@ class SpamScanner {
520
647
  const stream = isStream(attachment.content)
521
648
  ? attachment.content
522
649
  : intoStream(attachment.content);
523
- const {
524
- is_infected: isInfected,
525
- viruses
526
- } = await clamscan.scan_stream(stream);
650
+ const { isInfected, viruses } = await clamscan.scanStream(stream);
527
651
  const name = isSANB(attachment.filename)
528
652
  ? `"${attachment.filename}"`
529
653
  : `#${i + 1}`;
530
654
  if (isInfected)
531
- messages.push(
532
- `Attachment ${name} was infected with "${viruses}".`
533
- );
655
+ messages.push(`Attachment ${name} was infected with ${viruses}.`);
534
656
  } catch (err) {
535
657
  this.config.logger.error(err);
536
658
  }
@@ -548,13 +670,16 @@ class SpamScanner {
548
670
 
549
671
  let gtube = false;
550
672
 
551
- if (isSANB(mail.html) && mail.html.includes(GTUBE)) gtube = true;
673
+ // eslint-disable-next-line unicorn/prefer-includes
674
+ if (isSANB(mail.html) && mail.html.indexOf(GTUBE) !== -1) gtube = true;
552
675
 
553
- if (isSANB(mail.text) && !gtube && mail.text.includes(GTUBE)) gtube = true;
676
+ // eslint-disable-next-line unicorn/prefer-includes
677
+ if (isSANB(mail.text) && !gtube && mail.text.indexOf(GTUBE) !== -1)
678
+ gtube = true;
554
679
 
555
680
  if (gtube)
556
681
  messages.push(
557
- 'Message detected to contain the GTUBE test from <https://spamassassin.apache.org/gtube/>.'
682
+ 'Message detected to contain the GTUBE test from https://spamassassin.apache.org/gtube/.'
558
683
  );
559
684
 
560
685
  return messages;
@@ -597,9 +722,8 @@ class SpamScanner {
597
722
  records[0] === '0.0.0.0'
598
723
  );
599
724
  } catch (err) {
600
- this.config.logger.error(err);
601
- // return true if there is an error with DNS lookups
602
- return true;
725
+ this.config.logger.warn(err);
726
+ return false;
603
727
  }
604
728
  }
605
729
  }
@@ -621,8 +745,6 @@ class SpamScanner {
621
745
  //
622
746
  // However we don't recommend this and therefore have our servers set to standard Cloudflare DNS
623
747
  //
624
- // TODO: we need to do two lookups in parallel, one against adult and one against malware
625
- // and also make sure the messages aren't duplicated when we concatenate final array of messages
626
748
  const [isAdult, isMalware] = await Promise.all([
627
749
  this.malwareLookup('https://family.cloudflare-dns.com/dns-query', name),
628
750
  this.malwareLookup('https://security.cloudflare-dns.com/dns-query', name)
@@ -744,14 +866,14 @@ class SpamScanner {
744
866
  })
745
867
  .match(URL_REGEX) || [];
746
868
 
747
- const array = [];
869
+ const array = new Set();
748
870
  for (const url of urls) {
749
871
  const normalized = this.getNormalizedUrl(url);
750
872
 
751
- if (normalized && !array.includes(normalized)) array.push(normalized);
873
+ if (normalized) array.add(normalized);
752
874
  }
753
875
 
754
- return array;
876
+ return [...array];
755
877
  }
756
878
 
757
879
  parseLocale(locale) {
@@ -765,12 +887,6 @@ class SpamScanner {
765
887
  // <https://github.com/NaturalNode/natural#stemmers>
766
888
  // eslint-disable-next-line complexity
767
889
  async getTokens(string, locale, isHTML = false) {
768
- // get the current email replacement regex
769
- const EMAIL_REPLACEMENT_REGEX = new RE2(
770
- this.config.replacements.email,
771
- 'g'
772
- );
773
-
774
890
  //
775
891
  // parse HTML for <html> tag with lang attr
776
892
  // otherwise if that wasn't found then look for this
@@ -818,17 +934,6 @@ class SpamScanner {
818
934
 
819
935
  if (isHTML) string = sanitizeHtml(string, this.config.sanitizeHtml);
820
936
 
821
- const replacementRegexes = [];
822
- for (const key of Object.keys(this.config.replacements)) {
823
- replacementRegexes.push(
824
- escapeStringRegexp(this.config.replacements[key])
825
- );
826
- }
827
-
828
- const REPLACEMENTS_REGEX = new RE2(
829
- new RegExp(replacementRegexes.join('|'), 'g')
830
- );
831
-
832
937
  string = striptags(string, [], ' ')
833
938
  .trim()
834
939
  // replace newlines
@@ -837,7 +942,7 @@ class SpamScanner {
837
942
  // attackers may try to inject our replacements into the message
838
943
  // therefore we should strip all of them before doing any replacements
839
944
  //
840
- .replace(REPLACEMENTS_REGEX, ' ');
945
+ .replace(this.REPLACEMENTS_REGEX, ' ');
841
946
 
842
947
  //
843
948
  // we should instead use language detection to determine
@@ -855,7 +960,8 @@ class SpamScanner {
855
960
 
856
961
  locale = this.parseLocale(isSANB(locale) ? locale : this.config.locale);
857
962
 
858
- if (!locales.has(locale)) {
963
+ // NOTE: "in" and "po" are valid locales but not from i18n
964
+ if (!locales.has(locale) && locale !== 'in' && locale !== 'po') {
859
965
  debug(`Locale ${locale} was not valid and will use default`);
860
966
  locale = this.parseLocale(this.config.locale);
861
967
  }
@@ -867,103 +973,145 @@ class SpamScanner {
867
973
  let stopwords = stopwordsEn;
868
974
  let language = 'english';
869
975
  let stemword = 'default';
976
+
870
977
  switch (locale) {
871
978
  case 'ar':
979
+ // arb
980
+ // ISO 639-3 = ara
981
+ stopwords = stopwordsAra;
872
982
  language = 'arabic';
873
983
  break;
874
984
  case 'da':
985
+ // dan
875
986
  language = 'danish';
987
+ stopwords = stopwordsDan;
876
988
  break;
877
989
  case 'nl':
990
+ // nld
878
991
  stopwords = stopwordsNl;
879
992
  language = 'dutch';
880
993
  break;
881
994
  case 'en':
995
+ // eng
882
996
  language = 'english';
883
997
  break;
884
998
  case 'fi':
999
+ // fin
885
1000
  language = 'finnish';
886
1001
  tokenizer = orthographyTokenizer;
1002
+ stopwords = stopwordsFin;
887
1003
  break;
888
1004
  case 'fa':
1005
+ // fas (Persian/Farsi)
889
1006
  language = 'farsi';
890
1007
  tokenizer = aggressiveTokenizerFa;
891
1008
  stopwords = stopwordsFa;
892
1009
  stemword = natural.PorterStemmerFa.stem.bind(natural.PorterStemmerFa);
893
1010
  break;
894
1011
  case 'fr':
1012
+ // fra
895
1013
  language = 'french';
896
1014
  tokenizer = aggressiveTokenizerFr;
897
1015
  stopwords = stopwordsFr;
898
1016
  break;
899
1017
  case 'de':
1018
+ // deu
900
1019
  language = 'german';
1020
+ stopwords = stopwordsDeu;
901
1021
  break;
902
1022
  case 'hu':
1023
+ // hun
903
1024
  language = 'hungarian';
1025
+ stopwords = stopwordsHun;
904
1026
  break;
905
1027
  case 'in':
1028
+ // ind
906
1029
  language = 'indonesian';
907
1030
  tokenizer = aggressiveTokenizerId;
908
1031
  stopwords = stopwordsId;
909
1032
  break;
910
1033
  case 'it':
1034
+ // ita
911
1035
  language = 'italian';
912
1036
  tokenizer = aggressiveTokenizerIt;
913
1037
  stopwords = stopwordsIt;
914
1038
  break;
915
1039
  case 'ja':
1040
+ // jpn
916
1041
  tokenizer = tokenizerJa;
917
1042
  stopwords = stopwordsJa;
918
1043
  stemword = natural.StemmerJa.stem.bind(natural.StemmerJa);
919
1044
  break;
920
1045
  case 'nb':
1046
+ // nob
1047
+ language = 'norwegian';
1048
+ tokenizer = aggressiveTokenizerNo;
1049
+ stopwords = stopwordsNo;
1050
+ break;
921
1051
  case 'nn':
1052
+ // nno
1053
+ // ISO 639-3 = nob
922
1054
  language = 'norwegian';
923
1055
  tokenizer = aggressiveTokenizerNo;
924
1056
  stopwords = stopwordsNo;
925
1057
  break;
926
1058
  case 'po':
1059
+ // pol
927
1060
  language = 'polish';
928
1061
  tokenizer = aggressiveTokenizerPl;
929
1062
  stopwords = stopwordsPl;
930
1063
  stemword = false;
931
1064
  break;
932
1065
  case 'pt':
1066
+ // por
933
1067
  language = 'portuguese';
934
1068
  tokenizer = aggressiveTokenizerPt;
935
1069
  stopwords = stopwordsPt;
936
1070
  break;
937
1071
  case 'es':
1072
+ // spa
938
1073
  language = 'spanish';
939
1074
  tokenizer = aggressiveTokenizerEs;
940
1075
  stopwords = stopwordsEs;
941
1076
  break;
942
1077
  case 'sv':
1078
+ // swe
943
1079
  language = 'swedish';
944
1080
  tokenizer = aggressiveTokenizerSv;
945
1081
  stopwords = stopwordsSv;
946
1082
  break;
947
1083
  case 'ro':
1084
+ // ron
948
1085
  language = 'romanian';
1086
+ stopwords = stopwordsRon;
949
1087
  break;
950
1088
  case 'ru':
1089
+ // rus
951
1090
  language = 'russian';
952
1091
  tokenizer = aggressiveTokenizerRu;
953
1092
  stopwords = stopwordsRu;
954
1093
  break;
955
1094
  case 'ta':
1095
+ // tam
1096
+ // NOTE: no stopwords available
956
1097
  language = 'tamil';
957
1098
  break;
958
1099
  case 'tr':
1100
+ // tur
959
1101
  language = 'turkish';
1102
+ stopwords = stopwordsTur;
960
1103
  break;
961
1104
  case 'vi':
1105
+ // vie
962
1106
  language = 'vietnamese';
963
1107
  tokenizer = aggressiveTokenizerVi;
1108
+ stopwords = stopwordsVie;
964
1109
  stemword = false;
965
1110
  break;
966
1111
  case 'zh':
1112
+ // cmn
1113
+ // TODO: use this instead https://github.com/yishn/chinese-tokenizer
1114
+ // ISO 639-3 = zho (Chinese, Macrolanguage)
967
1115
  language = 'chinese';
968
1116
  stopwords = stopwordsZh;
969
1117
  stemword = false;
@@ -981,7 +1129,7 @@ class SpamScanner {
981
1129
  string
982
1130
  .split(' ')
983
1131
  .map((_string) =>
984
- _string.startsWith(':') &&
1132
+ _string.indexOf(':') === 0 &&
985
1133
  _string.endsWith(':') &&
986
1134
  typeof toEmoji[_string.slice(1, -1)] === 'string'
987
1135
  ? toEmoji[_string.slice(1, -1)]
@@ -1029,7 +1177,10 @@ class SpamScanner {
1029
1177
 
1030
1178
  // now we ensure that URL's and EMAIL's are properly spaced out
1031
1179
  // (e.g. in case ?email=some@email.com was in a URL)
1032
- .replace(EMAIL_REPLACEMENT_REGEX, ` ${this.config.replacements.email} `)
1180
+ .replace(
1181
+ this.EMAIL_REPLACEMENT_REGEX,
1182
+ ` ${this.config.replacements.email} `
1183
+ )
1033
1184
 
1034
1185
  // TODO: replace file paths, file dirs, dotfiles, and dotdirs
1035
1186
 
@@ -1044,12 +1195,14 @@ class SpamScanner {
1044
1195
  // replace currency
1045
1196
  .replace(CURRENCY_REGEX, ` ${this.config.replacements.currency} `);
1046
1197
 
1198
+ //
1047
1199
  // expand contractions so "they're" -> [ they, are ] vs. [ they, re ]
1048
1200
  // <https://github.com/NaturalNode/natural/issues/533>
1049
- if (locale === 'en') string = contractions.expand(string);
1050
-
1051
- // whitelist exclusions
1052
- const whitelistedWords = Object.values(this.config.replacements);
1201
+ //
1202
+ // NOTE: we're doing this for all languages now, not just en
1203
+ // if (locale === 'en')
1204
+ //
1205
+ string = contractions.expand(string);
1053
1206
 
1054
1207
  //
1055
1208
  // Future research:
@@ -1063,43 +1216,32 @@ class SpamScanner {
1063
1216
  for (const token of tokenizer.tokenize(string.toLowerCase())) {
1064
1217
  // whitelist words from being stemmed (safeguard)
1065
1218
  if (
1066
- whitelistedWords.includes(token) ||
1067
- token.startsWith(this.config.replacements.initialism) ||
1068
- token.startsWith(this.config.replacements.abbrevation)
1219
+ this.WHITELISTED_WORDS.has(token) ||
1220
+ token.indexOf(this.config.replacements.initialism) === 0 ||
1221
+ token.indexOf(this.config.replacements.abbrevation) === 0
1069
1222
  ) {
1070
1223
  tokens.push(token);
1071
1224
  continue;
1072
1225
  }
1073
1226
 
1074
- if (
1075
- stopwords.includes(token) ||
1076
- (sw[locale] && sw[locale].includes(token)) ||
1077
- (locale !== 'en' &&
1078
- (stopwordsEn.includes(token) || sw.en.includes(token)))
1079
- )
1227
+ if (stopwords.has(token) || (locale !== 'en' && stopwordsEn.has(token))) {
1080
1228
  continue;
1229
+ }
1081
1230
 
1082
1231
  // locale specific stopwords to ignore
1083
1232
  let localeStem;
1084
1233
  if (typeof stemword === 'function') {
1085
1234
  localeStem = stemword(token);
1086
- if (
1087
- localeStem &&
1088
- (stopwords.includes(localeStem) ||
1089
- (sw[locale] && sw[locale].includes(localeStem)))
1090
- )
1235
+ if (localeStem && stopwords.has(localeStem)) {
1091
1236
  continue;
1237
+ }
1092
1238
  }
1093
1239
 
1094
1240
  // always check against English stemwords
1095
1241
  let englishStem;
1096
1242
  if (locale !== 'en') {
1097
1243
  englishStem = snowball.stemword(token, 'english');
1098
- if (
1099
- englishStem &&
1100
- (stopwordsEn.includes(englishStem) || sw.en.includes(englishStem))
1101
- )
1102
- continue;
1244
+ if (englishStem && stopwordsEn.has(englishStem)) continue;
1103
1245
  }
1104
1246
 
1105
1247
  tokens.push(
@@ -1107,6 +1249,8 @@ class SpamScanner {
1107
1249
  );
1108
1250
  }
1109
1251
 
1252
+ debug('locale', locale, 'tokens', tokens);
1253
+
1110
1254
  if (this.config.debug) return tokens;
1111
1255
 
1112
1256
  // we should sha256 all tokens with hasha if not in debug mode
@@ -1119,7 +1263,7 @@ class SpamScanner {
1119
1263
  let source = string;
1120
1264
  if (isBuffer(string)) source = string.toString();
1121
1265
  else if (typeof string === 'string' && isValidPath(string))
1122
- source = await readFile(string);
1266
+ source = await fs.promises.readFile(string);
1123
1267
 
1124
1268
  const tokens = [];
1125
1269
  const mail = await simpleParser(source, this.config.simpleParser);
@@ -1157,12 +1301,11 @@ class SpamScanner {
1157
1301
 
1158
1302
  // eslint-disable-next-line complexity
1159
1303
  async getPhishingResults(mail) {
1160
- const messages = [];
1161
-
1304
+ const messages = new Set();
1162
1305
  //
1163
1306
  // NOTE: all links pushed are lowercased
1164
1307
  //
1165
- const links = [];
1308
+ const links = new Set();
1166
1309
 
1167
1310
  // parse <a> tags with different org domain in text vs the link
1168
1311
  if (isSANB(mail.html)) {
@@ -1172,7 +1315,7 @@ class SpamScanner {
1172
1315
  // elements concatenate to form a URL which is malicious or phishing
1173
1316
  //
1174
1317
  for (const link of this.getUrls(striptags(mail.html, [], ' ').trim())) {
1175
- if (!links.includes(link)) links.push(link);
1318
+ links.add(link);
1176
1319
  }
1177
1320
 
1178
1321
  //
@@ -1214,7 +1357,7 @@ class SpamScanner {
1214
1357
  // (this is needed because some have "Web:%20http://google.com" for example in href tags)
1215
1358
  [href] = this.getUrls(href);
1216
1359
  // eslint-disable-next-line max-depth
1217
- if (href && !links.includes(href)) links.push(href);
1360
+ if (href) links.add(href);
1218
1361
  }
1219
1362
 
1220
1363
  // the text content could contain multiple URL's
@@ -1224,18 +1367,17 @@ class SpamScanner {
1224
1367
  isSANB(href) &&
1225
1368
  validator.isURL(href, isURLOptions)
1226
1369
  ) {
1227
- const string = `Anchor link with href of "${href}" and inner text value of "${textContent}"`;
1370
+ const string = `Anchor link with href of ${href} and inner text value of "${textContent}"`;
1228
1371
  // eslint-disable-next-line max-depth
1229
1372
  if (this.config.checkIDNHomographAttack) {
1230
1373
  const anchorUrlHostname = this.getHostname(href);
1231
1374
  // eslint-disable-next-line max-depth
1232
1375
  if (anchorUrlHostname) {
1233
- const anchorUrlHostnameToASCII = punycode.toASCII(
1234
- anchorUrlHostname
1235
- );
1376
+ const anchorUrlHostnameToASCII =
1377
+ punycode.toASCII(anchorUrlHostname);
1236
1378
  // eslint-disable-next-line max-depth
1237
- if (anchorUrlHostnameToASCII.startsWith('xn--'))
1238
- messages.push(
1379
+ if (anchorUrlHostnameToASCII.indexOf('xn--') === 0)
1380
+ messages.add(
1239
1381
  `${string} has possible IDN homograph attack from anchor hostname.`
1240
1382
  );
1241
1383
  }
@@ -1244,20 +1386,19 @@ class SpamScanner {
1244
1386
  // eslint-disable-next-line max-depth
1245
1387
  for (const link of this.getUrls(textContent)) {
1246
1388
  // this link should have already been included but just in case
1247
- // eslint-disable-next-line max-depth
1248
- if (!links.includes(link)) links.push(link);
1389
+
1390
+ links.add(link);
1249
1391
 
1250
1392
  // eslint-disable-next-line max-depth
1251
1393
  if (this.config.checkIDNHomographAttack) {
1252
1394
  const innerTextUrlHostname = this.getHostname(link);
1253
1395
  // eslint-disable-next-line max-depth
1254
1396
  if (innerTextUrlHostname) {
1255
- const innerTextUrlHostnameToASCII = punycode.toASCII(
1256
- innerTextUrlHostname
1257
- );
1397
+ const innerTextUrlHostnameToASCII =
1398
+ punycode.toASCII(innerTextUrlHostname);
1258
1399
  // eslint-disable-next-line max-depth
1259
- if (innerTextUrlHostnameToASCII.startsWith('xn--'))
1260
- messages.push(
1400
+ if (innerTextUrlHostnameToASCII.indexOf('xn--') === 0)
1401
+ messages.add(
1261
1402
  `${string} has possible IDN homograph attack from inner text hostname.`
1262
1403
  );
1263
1404
  }
@@ -1273,49 +1414,46 @@ class SpamScanner {
1273
1414
  for (const prop of MAIL_PHISHING_PROPS) {
1274
1415
  if (isSANB(mail[prop])) {
1275
1416
  for (const link of this.getUrls(mail[prop])) {
1276
- if (!links.includes(link)) links.push(link);
1417
+ links.add(link);
1277
1418
  }
1278
1419
  }
1279
1420
  }
1280
1421
 
1281
- for (const link of links) {
1282
- const urlHostname = this.getHostname(link);
1283
- if (urlHostname) {
1284
- const toASCII = punycode.toASCII(urlHostname);
1285
- if (toASCII.startsWith('xn--'))
1286
- messages.push(
1287
- `Possible IDN homograph attack from link of "${link}" with punycode converted hostname of "${toASCII}".`
1288
- );
1422
+ if (this.config.checkIDNHomographAttack) {
1423
+ for (const link of links) {
1424
+ const urlHostname = this.getHostname(link);
1425
+ if (urlHostname) {
1426
+ const toASCII = punycode.toASCII(urlHostname);
1427
+ if (toASCII.indexOf('xn--') === 0)
1428
+ messages.add(
1429
+ `Possible IDN homograph attack from link of ${link} with punycode converted hostname of ${toASCII}.`
1430
+ );
1431
+ }
1289
1432
  }
1290
1433
  }
1291
1434
 
1292
1435
  // check against Cloudflare malware/phishing/adult DNS lookup
1293
1436
  // if it returns `0.0.0.0` it means it was flagged
1294
1437
  await Promise.all(
1295
- links.map(async (link) => {
1438
+ [...links].map(async (link) => {
1296
1439
  try {
1297
1440
  const urlHostname = this.getHostname(link);
1298
1441
  if (urlHostname) {
1299
1442
  const toASCII = punycode.toASCII(urlHostname);
1300
- const adultMessage = `Link hostname of "${toASCII}" was detected by Cloudflare's Family DNS to contain adult-related content, phishing, and/or malware.`;
1301
- const malwareMessage = `Link hostname of ${toASCII}" was detected by Cloudflare's Security DNS to contain phishing and/or malware.`;
1443
+ const adultMessage = `Link hostname of ${toASCII} was detected by Cloudflare's Family DNS to contain adult-related content, phishing, and/or malware.`;
1444
+ const malwareMessage = `Link hostname of ${toASCII} was detected by Cloudflare's Security DNS to contain phishing and/or malware.`;
1302
1445
 
1303
1446
  // if it already included both messages then return early
1304
- if (
1305
- messages.includes(adultMessage) &&
1306
- messages.includes(malwareMessage)
1307
- )
1447
+ if (messages.has(adultMessage) && messages.has(malwareMessage))
1308
1448
  return;
1309
1449
 
1310
- const {
1311
- isAdult,
1312
- isMalware
1313
- } = await this.memoizedIsCloudflareBlocked(toASCII);
1450
+ const { isAdult, isMalware } =
1451
+ await this.memoizedIsCloudflareBlocked(toASCII);
1314
1452
 
1315
- if (isAdult && !messages.includes(adultMessage))
1316
- messages.push(adultMessage);
1317
- if (isMalware && !messages.includes(malwareMessage))
1318
- messages.push(malwareMessage);
1453
+ if (isAdult && !messages.has(adultMessage))
1454
+ messages.add(adultMessage);
1455
+ if (isMalware && !messages.has(malwareMessage))
1456
+ messages.add(malwareMessage);
1319
1457
  }
1320
1458
  } catch (err) {
1321
1459
  this.config.logger.error(err);
@@ -1323,7 +1461,7 @@ class SpamScanner {
1323
1461
  })
1324
1462
  );
1325
1463
 
1326
- return { messages, links };
1464
+ return { messages: [...messages], links: [...links] };
1327
1465
  }
1328
1466
 
1329
1467
  // getNSFWResults() {
@@ -1344,7 +1482,7 @@ class SpamScanner {
1344
1482
  try {
1345
1483
  const fileType = await FileType.fromBuffer(attachment.content);
1346
1484
 
1347
- if (fileType && fileType.ext && EXECUTABLES.includes(fileType.ext))
1485
+ if (fileType && fileType.ext && EXECUTABLES.has(fileType.ext))
1348
1486
  messages.push(
1349
1487
  `Attachment's "magic number" indicated it was a dangerous executable with a ".${fileType.ext}" extension.`
1350
1488
  );
@@ -1359,7 +1497,7 @@ class SpamScanner {
1359
1497
  punycode.toUnicode(attachment.filename.split('?')[0])
1360
1498
  );
1361
1499
  const ext = fileExtension(filename);
1362
- if (ext && EXECUTABLES.includes(ext))
1500
+ if (ext && EXECUTABLES.has(ext))
1363
1501
  messages.push(
1364
1502
  `Attachment's file name indicated it was a dangerous executable with a ".${ext}" extension.`
1365
1503
  );
@@ -1367,7 +1505,7 @@ class SpamScanner {
1367
1505
 
1368
1506
  if (isSANB(attachment.contentType)) {
1369
1507
  const ext = mime.extension(attachment.contentType);
1370
- if (isSANB(ext) && EXECUTABLES.includes(ext))
1508
+ if (isSANB(ext) && EXECUTABLES.has(ext))
1371
1509
  messages.push(
1372
1510
  `Attachment's Content-Type was a dangerous executable with a ".${ext}" extension.`
1373
1511
  );
package/package.json CHANGED
@@ -1,22 +1,12 @@
1
1
  {
2
2
  "name": "spamscanner",
3
3
  "description": "Spam Scanner - The Best Anti-Spam Scanning Service and Anti-Spam API",
4
- "version": "3.0.6",
4
+ "version": "5.0.0",
5
5
  "author": "Niftylettuce, LLC. <niftylettuce@gmail.com> (https://niftylettuce.com/)",
6
- "ava": {
7
- "timeout": "30s",
8
- "verbose": true,
9
- "serial": true
10
- },
11
6
  "bugs": {
12
7
  "url": "https://github.com/spamscanner/spamscanner/issues",
13
8
  "email": "niftylettuce@gmail.com"
14
9
  },
15
- "commitlint": {
16
- "extends": [
17
- "@commitlint/config-conventional"
18
- ]
19
- },
20
10
  "contributors": [
21
11
  "Nick Baugh <niftylettuce@gmail.com> (http://niftylettuce.com/)",
22
12
  "Shaun Warman <shaunwarman1@gmail.com> (http://shaunwarman.com/)"
@@ -24,82 +14,81 @@
24
14
  "dependencies": {
25
15
  "@ladjs/naivebayes": "^0.1.0",
26
16
  "bitcoin-regex": "^2.0.0",
27
- "clamscan": "^1.3.3",
17
+ "clamscan": "^2.1.2",
28
18
  "credit-card-regex": "^3.0.0",
29
- "crypto-random-string": "^3.3.1",
19
+ "crypto-random-string": "3",
30
20
  "currency-codes": "^2.1.0",
31
- "currency-symbol-map": "^5.0.1",
32
- "debug": "^4.3.1",
21
+ "currency-symbol-map": "^5.1.0",
33
22
  "email-regex-safe": "^1.0.2",
34
- "emoji-patterns": "^13.1.0",
35
- "escape-string-regexp": "^4.0.0",
23
+ "emoji-patterns": "^14.0.1",
24
+ "escape-string-regexp": "4",
36
25
  "expand-contractions": "^1.0.1",
37
26
  "file-extension": "^4.0.5",
38
- "file-type": "^16.2.0",
27
+ "file-type": "16",
39
28
  "floating-point-regex": "^0.1.0",
40
- "franc": "^5.0.0",
41
- "gemoji": "^6.1.0",
29
+ "franc": "5",
30
+ "gemoji": "6",
42
31
  "hasha": "^5.2.2",
43
32
  "hexa-color-regex": "^1.0.0",
44
- "i18n-locales": "^0.0.4",
45
- "iconv": "^3.0.0",
46
- "into-stream": "^6.0.0",
47
- "ip-regex": "^4.3.0",
33
+ "i18n-locales": "^0.0.5",
34
+ "iconv": "^3.0.1",
35
+ "into-stream": "6",
36
+ "ip-regex": "4",
48
37
  "is-buffer": "^2.0.5",
49
- "is-stream": "^2.0.0",
38
+ "is-stream": "2",
50
39
  "is-string-and-not-blank": "^0.0.2",
51
40
  "is-valid-path": "^0.1.1",
52
41
  "mac-regex": "^1.0.0",
53
- "macos-version": "^5.2.1",
54
- "mailparser": "^3.0.1",
42
+ "macos-version": "5",
43
+ "mailparser": "^3.5.0",
55
44
  "memoizee": "^0.4.15",
56
- "mime-types": "^2.1.28",
45
+ "mime-types": "^2.1.35",
57
46
  "ms": "^2.1.3",
58
- "natural": "^4.0.0",
47
+ "natural": "^5.2.2",
59
48
  "newline-remove": "^1.0.2",
60
- "node-html-parser": "^2.1.0",
49
+ "node-html-parser": "4",
61
50
  "node-snowball": "^0.6.0",
62
- "normalize-url": "^5.3.0",
63
- "parse-domain": "^3.0.3",
51
+ "normalize-url": "5",
52
+ "parse-domain": "5",
64
53
  "phone-regex": "^2.1.0",
65
54
  "punycode": "^2.1.1",
66
- "re2": "^1.15.9",
67
- "sanitize-html": "^2.3.2",
68
- "stopword": "^1.0.6",
69
- "striptags": "^3.1.1",
70
- "superagent": "^6.1.0",
55
+ "re2": "^1.17.6",
56
+ "sanitize-html": "^2.7.0",
57
+ "stopword": "^2.0.2",
58
+ "striptags": "^3.2.0",
59
+ "superagent": "^7.1.6",
71
60
  "trim-leading-whitespace": "^0.1.1",
72
61
  "universalify": "^2.0.0",
73
- "url-regex-safe": "^2.0.2",
74
- "validator": "^13.5.2"
62
+ "url-regex-safe": "^3.0.0",
63
+ "validator": "^13.7.0",
64
+ "which": "^2.0.2"
75
65
  },
76
66
  "devDependencies": {
77
- "@commitlint/cli": "^11.0.0",
78
- "@commitlint/config-conventional": "^11.0.0",
67
+ "@commitlint/cli": "^17.0.2",
68
+ "@commitlint/config-conventional": "^17.0.2",
79
69
  "@ladjs/redis": "^1.0.7",
80
- "ava": "^3.15.0",
81
- "codecov": "^3.8.1",
70
+ "ava": "^4.3.0",
82
71
  "cross-env": "^7.0.3",
83
72
  "delay": "^5.0.0",
84
- "eslint": "^7.20.0",
85
- "eslint-config-xo-lass": "^1.0.5",
73
+ "eslint": "^8.17.0",
74
+ "eslint-config-xo-lass": "^2.0.1",
86
75
  "fixpack": "^4.0.0",
87
- "husky": "^5.0.9",
88
- "is-ci": "^2.0.0",
89
- "lint-staged": "^10.5.4",
90
- "lookpath": "^1.1.0",
76
+ "husky": "^8.0.1",
77
+ "is-ci": "^3.0.1",
78
+ "lint-staged": "^13.0.1",
79
+ "lookpath": "^1.2.2",
91
80
  "make-dir": "^3.1.0",
92
81
  "node-mbox": "^1.0.0",
93
82
  "numeral": "^2.0.6",
94
83
  "nyc": "^15.1.0",
95
- "p-map": "^4.0.0",
84
+ "p-map": "4",
96
85
  "read-dir-deep": "^7.0.1",
97
- "remark-cli": "^9.0.0",
98
- "remark-preset-github": "^4.0.1",
99
- "xo": "^0.37.1"
86
+ "remark-cli": "^10.0.1",
87
+ "remark-preset-github": "^4.0.4",
88
+ "xo": "^0.50.0"
100
89
  },
101
90
  "engines": {
102
- "node": ">=12.11.0"
91
+ "node": ">=14"
103
92
  },
104
93
  "files": [
105
94
  "package.json",
@@ -114,12 +103,6 @@
114
103
  "classifier.json"
115
104
  ],
116
105
  "homepage": "https://github.com/spamscanner/spamscanner",
117
- "husky": {
118
- "hooks": {
119
- "pre-commit": "lint-staged",
120
- "commit-msg": "commitlint -E HUSKY_GIT_PARAMS"
121
- }
122
- },
123
106
  "keywords": [
124
107
  "adult",
125
108
  "api",
@@ -172,38 +155,17 @@
172
155
  ],
173
156
  "license": "Business Source License 1.1",
174
157
  "main": "index.js",
175
- "prettier": {
176
- "singleQuote": true,
177
- "bracketSpacing": true,
178
- "trailingComma": "none"
179
- },
180
- "remarkConfig": {
181
- "plugins": [
182
- "preset-github"
183
- ]
184
- },
185
158
  "repository": {
186
159
  "type": "git",
187
160
  "url": "https://github.com/spamscanner/spamscanner"
188
161
  },
189
162
  "scripts": {
190
163
  "ava": "cross-env NODE_ENV=test ava",
191
- "coverage": "nyc report --reporter=text-lcov > coverage.lcov && codecov",
192
- "lint": "xo && remark . -qfo",
164
+ "lint": "xo --fix && remark . -qfo && fixpack",
193
165
  "nyc": "cross-env NODE_ENV=test nyc ava",
194
- "test": "npm run lint && npm run ava",
166
+ "prepare": "husky install",
167
+ "pretest": "npm run lint",
168
+ "test": "npm run test-coverage",
195
169
  "test-coverage": "npm run lint && npm run nyc"
196
- },
197
- "xo": {
198
- "prettier": true,
199
- "space": true,
200
- "extends": [
201
- "xo-lass"
202
- ],
203
- "ignores": [
204
- "data",
205
- "classifier.json",
206
- "bag-of-words.json"
207
- ]
208
170
  }
209
171
  }
@@ -1,5 +1,7 @@
1
+ const process = require('process');
2
+
1
3
  module.exports =
2
4
  typeof process.env.VOCABULARY_LIMIT !== 'undefined' &&
3
5
  Number.isFinite(Number.parseInt(process.env.VOCABULARY_LIMIT, 10))
4
6
  ? Number.parseInt(process.env.VOCABULARY_LIMIT, 10)
5
- : 20000;
7
+ : 20_000;