spamscanner 4.0.0 → 5.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -2,9 +2,7 @@
2
2
  <a href="https://spamscanner.net"><img src="https://d1i8ikybhfrv4r.cloudfront.net/spamscanner.png" alt="spamscanner" /></a>
3
3
  </h1>
4
4
  <div align="center">
5
- <a href="https://join.slack.com/t/ladjs/shared_invite/zt-fqei6z11-Bq2trhwHQxVc5x~ifiZG0g"><img src="https://img.shields.io/badge/chat-join%20slack-brightgreen" alt="chat" /></a>
6
- <a href="https://travis-ci.com/spamscanner/spamscanner"><img src="https://travis-ci.com/spamscanner/spamscanner.svg?branch=master" alt="build status" /></a>
7
- <a href="https://codecov.io/github/spamscanner/spamscanner"><img src="https://img.shields.io/codecov/c/github/spamscanner/spamscanner/master.svg" alt="code coverage" /></a>
5
+ <a href="https://github.com/spamscanner/spamscanner/actions/workflows/ci.yml"><img src="https://github.com/spamscanner/spamscanner/actions/workflows/ci.yml/badge.svg" alt="build status" /></a>
8
6
  <a href="https://github.com/sindresorhus/xo"><img src="https://img.shields.io/badge/code_style-XO-5ed9c7.svg" alt="code style" /></a>
9
7
  <a href="https://github.com/prettier/prettier"><img src="https://img.shields.io/badge/styled_with-prettier-ff69b4.svg" alt="styled with prettier" /></a>
10
8
  <a href="https://lass.js.org"><img src="https://img.shields.io/badge/made_with-lass-95CC28.svg" alt="made with lass" /></a>
@@ -48,6 +46,7 @@
48
46
  * [`scanner.getVirusResults(mail)`](#scannergetvirusresultsmail)
49
47
  * [`scanner.parseLocale(locale)`](#scannerparselocalelocale)
50
48
  * [Caching](#caching)
49
+ * [Debugging](#debugging)
51
50
  * [Contributors](#contributors)
52
51
  * [References](#references)
53
52
  * [License](#license)
@@ -188,11 +187,48 @@ Note that you can simply use the Spam Scanner API for free at <https://spamscann
188
187
  2. Configure ClamAV:
189
188
 
190
189
  ```sh
190
+ # if you are on Intel macOS
191
+ sudo mv /usr/local/etc/clamav/clamd.conf.sample /usr/local/etc/clamav/clamd.conf
192
+
193
+ # if you are on M1 macOS (or newer brew which installs to `/opt/homebrew`)
194
+ sudo mv /opt/homebrew/etc/clamav/clamd.conf.sample /opt/homebrew/etc/clamav/clamd.conf
195
+ ```
196
+
197
+ ```sh
198
+ # if you are on Intel macOS
199
+ sudo vim /usr/local/etc/clamav/clamd.conf
200
+
201
+ # if you are on M1 macOS (or newer brew which installs to `/opt/homebrew`)
202
+ sudo vim /opt/homebrew/etc/clamav/clamd.conf
203
+ ```
204
+
205
+ ```diff
206
+ -Example
207
+ +#Example
208
+
209
+ -#StreamMaxLength 10M
210
+ +StreamMaxLength 50M
211
+
212
+ +# this file path may be different on your OS (that's OK)
213
+
214
+ \-#LocalSocket /tmp/clamd.socket
215
+ \+LocalSocket /tmp/clamd.socket
216
+ ```
217
+
218
+ ```sh
219
+ # if you are on Intel macOS
191
220
  sudo mv /usr/local/etc/clamav/freshclam.conf.sample /usr/local/etc/clamav/freshclam.conf
221
+
222
+ # if you are on M1 macOS (or newer brew which installs to `/opt/homebrew`)
223
+ sudo mv /opt/homebrew/etc/clamav/freshclam.conf.sample /opt/homebrew/etc/clamav/freshclam.conf
192
224
  ```
193
225
 
194
226
  ```sh
227
+ # if you are on Intel macOS
195
228
  sudo vim /usr/local/etc/clamav/freshclam.conf
229
+
230
+ # if you are on M1 macOS (or newer brew which installs to `/opt/homebrew`)
231
+ sudo vim /opt/homebrew/etc/clamav/freshclam.conf
196
232
  ```
197
233
 
198
234
  ```diff
@@ -210,6 +246,8 @@ Note that you can simply use the Spam Scanner API for free at <https://spamscann
210
246
  sudo vim /Library/LaunchDaemons/org.clamav.clamd.plist
211
247
  ```
212
248
 
249
+ > If you are on Intel macOS:
250
+
213
251
  ```plist
214
252
  <?xml version="1.0" encoding="UTF-8"?>
215
253
  <!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
@@ -231,12 +269,37 @@ Note that you can simply use the Spam Scanner API for free at <https://spamscann
231
269
  </plist>
232
270
  ```
233
271
 
272
+ > If you are on M1 macOS (or newer brew which installs to `/opt/homebrew`)
273
+
274
+ ```plist
275
+ <?xml version="1.0" encoding="UTF-8"?>
276
+ <!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
277
+ <plist version="1.0">
278
+ <dict>
279
+ <key>Label</key>
280
+ <string>org.clamav.clamd</string>
281
+ <key>KeepAlive</key>
282
+ <true/>
283
+ <key>Program</key>
284
+ <string>/opt/homebrew/sbin/clamd</string>
285
+ <key>ProgramArguments</key>
286
+ <array>
287
+ <string>clamd</string>
288
+ </array>
289
+ <key>RunAtLoad</key>
290
+ <true/>
291
+ </dict>
292
+ </plist>
293
+ ```
294
+
295
+ 4. Enable it and start it on boot:
296
+
234
297
  ```sh
235
298
  sudo launchctl load /Library/LaunchDaemons/org.clamav.clamd.plist
236
299
  sudo launchctl start /Library/LaunchDaemons/org.clamav.clamd.plist
237
300
  ```
238
301
 
239
- 4. You may want to periodically run `freshclam` to update the config, or configure a similar `plist` configuration for `launchctl`.
302
+ 5. You may want to periodically run `freshclam` to update the config, or configure a similar `plist` configuration for `launchctl`.
240
303
 
241
304
 
242
305
  ## Install
@@ -244,7 +307,7 @@ Note that you can simply use the Spam Scanner API for free at <https://spamscann
244
307
  [npm][]:
245
308
 
246
309
  ```sh
247
- npm install spamscanner node-snowball
310
+ npm install spamscanner
248
311
  ```
249
312
 
250
313
 
@@ -359,7 +422,7 @@ Currently Spam Scanner supports the following locales for tokenization, stemming
359
422
  | Finnish | `fn` |
360
423
  | Farsi | `fa` |
361
424
  | French | `fr` |
362
- | German | `gr` |
425
+ | German | `de` |
363
426
  | Hungarian | `hr` |
364
427
  | Indonesian | `in` |
365
428
  | Italian | `it` |
@@ -406,7 +469,7 @@ A common example of this is a link of `рaypal.com` which when converted to ASCI
406
469
 
407
470
  This method checks against [Cloudflare for Families](https://developers.cloudflare.com/1.1.1.1/1.1.1.1-for-families) servers for both adult-related content, malware, and phishing. This means we do two separate DNS over HTTPS requests to `1.1.1.2` for malware and `1.1.1.3` for adult-related content. You can parse the messages results Array for messages that contain "adult-related content" if you need to parse whether or not you want to flag for adult-related content or not on your application.
408
471
 
409
- If you are using Cloudflare for Families DNS servers as mentioned in [Requirements](#requirements)), then if there are any HTTPS over DNS request errors, it will fallback to use the DNS servers set on the system for lookups, which would in turn use Cloudflare for Family DNS. (using DNS over HTTPS with a fallback of [dns.resolve4](https://nodejs.org/api/dns.html#dns_dns_resolve4\_hostname_options_callback)) – and if it returns `0.0.0.0` then it is considered to be phishing.
472
+ If you are using Cloudflare for Families DNS servers as mentioned in [Requirements](#requirements)), then if there are any HTTPS over DNS request errors, it will fallback to use the DNS servers set on the system for lookups, which would in turn use Cloudflare for Family DNS. (using DNS over HTTPS with a fallback of [dns.resolve4](https://nodejs.org/api/dns.html#dns_dns_resolve4_hostname_options_callback)) – and if it returns `0.0.0.0` then it is considered to be phishing.
410
473
 
411
474
  We actually helped Cloudflare in August 2020 to update their documentation to note that this result of `0.0.0.0` is returned for maliciously found content on FQDN and IP lookups.
412
475
 
@@ -501,6 +564,13 @@ const scanner = new SpamScanner({
501
564
  Note that in [Forward Email][forward-email] we use the `client` approach as we have multiple threads across multiple servers running, and in-memory caching would not be efficient.
502
565
 
503
566
 
567
+ ## Debugging
568
+
569
+ Spam Scanner has built-in debug output via `util.debuglog('spamscanner')`.
570
+
571
+ This means you can run your app with `NODE_DEBUG=spamscanner node app.js` to get useful debug output to your console.
572
+
573
+
504
574
  ## Contributors
505
575
 
506
576
  | Name | Website |
package/index.js CHANGED
@@ -1,8 +1,9 @@
1
+ const process = require('process');
1
2
  const dns = require('dns');
2
3
  const fs = require('fs');
3
- const { promisify } = require('util');
4
+ const { debuglog } = require('util');
4
5
 
5
- // eslint-disable-next-line node/no-deprecated-api
6
+ // eslint-disable-next-line n/no-deprecated-api
6
7
  const punycode = require('punycode');
7
8
 
8
9
  const ClamScan = require('clamscan');
@@ -12,7 +13,6 @@ const RE2 = require('re2');
12
13
  const bitcoinRegex = require('bitcoin-regex');
13
14
  const contractions = require('expand-contractions');
14
15
  const creditCardRegex = require('credit-card-regex');
15
- const debug = require('debug')('spamscanner');
16
16
  const emailRegexSafe = require('email-regex-safe');
17
17
  const emojiPatterns = require('emoji-patterns');
18
18
  const escapeStringRegexp = require('escape-string-regexp');
@@ -46,12 +46,15 @@ const toEmoji = require('gemoji/name-to-emoji');
46
46
  const universalify = require('universalify');
47
47
  const urlRegexSafe = require('url-regex-safe');
48
48
  const validator = require('validator');
49
+ const which = require('which');
49
50
  const { Iconv } = require('iconv');
50
51
  const { codes } = require('currency-codes');
51
52
  const { fromUrl, NO_HOSTNAME } = require('parse-domain');
52
53
  const { parse } = require('node-html-parser');
53
54
  const { simpleParser } = require('mailparser');
54
55
 
56
+ const debug = debuglog('spamscanner');
57
+
55
58
  const aggressiveTokenizer = new natural.AggressiveTokenizer();
56
59
  const orthographyTokenizer = new natural.OrthographyTokenizer({
57
60
  language: 'fi'
@@ -69,20 +72,115 @@ const aggressiveTokenizerSv = new natural.AggressiveTokenizerSv();
69
72
  const aggressiveTokenizerRu = new natural.AggressiveTokenizerRu();
70
73
  const aggressiveTokenizerVi = new natural.AggressiveTokenizerVi();
71
74
 
72
- const stopwordsEn = require('natural/lib/natural/util/stopwords').words;
73
- const stopwordsEs = require('natural/lib/natural/util/stopwords_es').words;
74
- const stopwordsFa = require('natural/lib/natural/util/stopwords_fa').words;
75
- const stopwordsFr = require('natural/lib/natural/util/stopwords_fr').words;
76
- const stopwordsId = require('natural/lib/natural/util/stopwords_id').words;
77
- const stopwordsJa = require('natural/lib/natural/util/stopwords_ja').words;
78
- const stopwordsIt = require('natural/lib/natural/util/stopwords_it').words;
79
- const stopwordsNl = require('natural/lib/natural/util/stopwords_nl').words;
80
- const stopwordsNo = require('natural/lib/natural/util/stopwords_no').words;
81
- const stopwordsPl = require('natural/lib/natural/util/stopwords_pl').words;
82
- const stopwordsPt = require('natural/lib/natural/util/stopwords_pt').words;
83
- const stopwordsRu = require('natural/lib/natural/util/stopwords_ru').words;
84
- const stopwordsSv = require('natural/lib/natural/util/stopwords_sv').words;
85
- const stopwordsZh = require('natural/lib/natural/util/stopwords_zh').words;
75
+ const stopwordsEn = new Set([
76
+ ...require('natural/lib/natural/util/stopwords').words,
77
+ ...sw.eng
78
+ ]);
79
+ const stopwordsEs = new Set([
80
+ ...require('natural/lib/natural/util/stopwords_es').words,
81
+ ...sw.spa
82
+ ]);
83
+ const stopwordsFa = new Set([
84
+ ...require('natural/lib/natural/util/stopwords_fa').words,
85
+ ...sw.fas
86
+ ]);
87
+ const stopwordsFr = new Set([
88
+ ...require('natural/lib/natural/util/stopwords_fr').words,
89
+ ...sw.fra
90
+ ]);
91
+ const stopwordsId = new Set([
92
+ ...require('natural/lib/natural/util/stopwords_id').words,
93
+ ...sw.ind
94
+ ]);
95
+ const stopwordsJa = new Set([
96
+ ...require('natural/lib/natural/util/stopwords_ja').words,
97
+ ...sw.jpn
98
+ ]);
99
+ const stopwordsIt = new Set([
100
+ ...require('natural/lib/natural/util/stopwords_it').words,
101
+ ...sw.ita
102
+ ]);
103
+ const stopwordsNl = new Set([
104
+ ...require('natural/lib/natural/util/stopwords_nl').words,
105
+ ...sw.nld
106
+ ]);
107
+ const stopwordsNo = new Set([
108
+ ...require('natural/lib/natural/util/stopwords_no').words,
109
+ ...sw.nob
110
+ ]);
111
+ const stopwordsPl = new Set([
112
+ ...require('natural/lib/natural/util/stopwords_pl').words,
113
+ ...sw.pol
114
+ ]);
115
+ const stopwordsPt = new Set([
116
+ ...require('natural/lib/natural/util/stopwords_pt').words,
117
+ ...sw.por,
118
+ ...sw.porBr
119
+ ]);
120
+ const stopwordsRu = new Set([
121
+ ...require('natural/lib/natural/util/stopwords_ru').words,
122
+ ...sw.rus
123
+ ]);
124
+ const stopwordsSv = new Set([
125
+ ...require('natural/lib/natural/util/stopwords_sv').words,
126
+ ...sw.swe
127
+ ]);
128
+ const stopwordsZh = new Set([
129
+ ...require('natural/lib/natural/util/stopwords_zh').words,
130
+ ...sw.zho
131
+ ]);
132
+
133
+ const stopwordsRon = new Set(sw.ron);
134
+ const stopwordsTur = new Set(sw.tur);
135
+ const stopwordsVie = new Set(sw.vie);
136
+ const stopwordsDeu = new Set(sw.deu);
137
+ const stopwordsHun = new Set(sw.hun);
138
+ const stopwordsAra = new Set(sw.ara);
139
+ const stopwordsDan = new Set(sw.dan);
140
+ const stopwordsFin = new Set(sw.fin);
141
+
142
+ // TODO: add stopword pairing for these langs:
143
+ // afr
144
+ // ben
145
+ // bre
146
+ // bul
147
+ // cat
148
+ // ces
149
+ // ell
150
+ // epo
151
+ // est
152
+ // eus
153
+ // fra
154
+ // gle
155
+ // glg
156
+ // guj
157
+ // hau
158
+ // heb
159
+ // hin
160
+ // hrv
161
+ // hye
162
+ // kor
163
+ // kur
164
+ // lat
165
+ // lav
166
+ // lgg
167
+ // lggNd
168
+ // lit
169
+ // mar
170
+ // msa
171
+ // mya
172
+ // panGu
173
+ // slk
174
+ // slv
175
+ // som
176
+ // sot
177
+ // swa
178
+ // tgl
179
+ // tha
180
+ // ukr
181
+ // urd
182
+ // yor
183
+ // zul
86
184
 
87
185
  // <https://stackoverflow.com/a/41353282>
88
186
  // <https://www.ietf.org/rfc/rfc3986.txt>
@@ -94,17 +192,16 @@ const PKG = require('./package.json');
94
192
 
95
193
  const VOCABULARY_LIMIT = require('./vocabulary-limit.js');
96
194
 
195
+ // TODO: convert this into a Map
97
196
  const ISO_CODE_MAPPING = require('./iso-code-mapping.json');
98
197
 
99
198
  // <https://kb.smarshmail.com/Article/23567>
100
- const EXECUTABLES = require('./executables.json');
199
+ const EXECUTABLES = new Set(require('./executables.json'));
101
200
 
102
201
  const REPLACEMENT_WORDS = require('./replacement-words.json');
103
202
 
104
203
  const locales = new Set(i18nLocales.map((l) => l.toLowerCase()));
105
204
 
106
- const readFile = promisify(fs.readFile);
107
-
108
205
  const normalizeUrlOptions = {
109
206
  stripProtocol: true,
110
207
  stripWWW: false,
@@ -154,7 +251,8 @@ for (const code of codes()) {
154
251
  const symbol = getSymbolFromCurrency(code);
155
252
  if (
156
253
  typeof symbol === 'string' &&
157
- !currencySymbols.includes(symbol) &&
254
+ // eslint-disable-next-line unicorn/prefer-includes
255
+ currencySymbols.indexOf(symbol) === -1 &&
158
256
  !new RE2(/^[a-z]+$/i).test(symbol)
159
257
  )
160
258
  currencySymbols.push(escapeStringRegexp(symbol));
@@ -187,7 +285,9 @@ const isURLOptions = {
187
285
  class SpamScanner {
188
286
  constructor(config = {}) {
189
287
  this.config = {
190
- debug: process.env.NODE_ENV === 'test',
288
+ debug:
289
+ process.env.NODE_ENV === 'test' ||
290
+ process.env.NODE_ENV === 'development',
191
291
  checkIDNHomographAttack: false,
192
292
  // note that if you attempt to train an existing `scanner.classifier`
193
293
  // then you will need to re-use these, so we suggest you store them
@@ -312,8 +412,15 @@ class SpamScanner {
312
412
  userAgent: `${PKG.name}/${PKG.version}`,
313
413
  timeout: ms('10s'),
314
414
  clamscan: {
415
+ debugMode:
416
+ process.env.NODE_ENV === 'test' ||
417
+ process.env.NODE_ENV === 'development',
418
+ clamscan: {
419
+ path: which.sync('clamscan', { nothrow: true })
420
+ },
315
421
  clamdscan: {
316
422
  timeout: ms('10s'),
423
+ path: which.sync('clamdscan', { nothrow: true }),
317
424
  socket: macosVersion.isMacOS
318
425
  ? '/tmp/clamd.socket'
319
426
  : '/var/run/clamav/clamd.ctl'
@@ -416,9 +523,7 @@ class SpamScanner {
416
523
  // cache in the background
417
524
  this.config.client
418
525
  .set(key, `${isAdult}:${isMalware}`, 'PX', this.config.ttlMs)
419
- // eslint-disable-next-line promise/prefer-await-to-then
420
526
  .then(this.config.logger.info)
421
- // eslint-disable-next-line promise/prefer-await-to-then
422
527
  .catch(this.config.logger.error);
423
528
  return { isAdult, isMalware };
424
529
  };
@@ -432,6 +537,27 @@ class SpamScanner {
432
537
  throw new Error(
433
538
  `Locale of ${this.config.locale} was not valid according to locales list.`
434
539
  );
540
+
541
+ //
542
+ // set up regex helpers
543
+ //
544
+ this.EMAIL_REPLACEMENT_REGEX = new RE2(this.config.replacements.email, 'g');
545
+ const replacementRegexes = [];
546
+ for (const key of Object.keys(this.config.replacements)) {
547
+ replacementRegexes.push(
548
+ escapeStringRegexp(this.config.replacements[key])
549
+ );
550
+ }
551
+
552
+ this.REPLACEMENTS_REGEX = new RE2(
553
+ new RegExp(replacementRegexes.join('|'), 'g')
554
+ );
555
+
556
+ //
557
+ // set up helper Map and Sets for fast lookup
558
+ // (Set.has is 2x faster than includes, and 50% faster than indexOf)
559
+ //
560
+ this.WHITELISTED_WORDS = new Set(Object.values(this.config.replacements));
435
561
  }
436
562
 
437
563
  getHostname(link) {
@@ -521,15 +647,12 @@ class SpamScanner {
521
647
  const stream = isStream(attachment.content)
522
648
  ? attachment.content
523
649
  : intoStream(attachment.content);
524
- const { is_infected: isInfected, viruses } =
525
- await clamscan.scan_stream(stream);
650
+ const { isInfected, viruses } = await clamscan.scanStream(stream);
526
651
  const name = isSANB(attachment.filename)
527
652
  ? `"${attachment.filename}"`
528
653
  : `#${i + 1}`;
529
654
  if (isInfected)
530
- messages.push(
531
- `Attachment ${name} was infected with "${viruses}".`
532
- );
655
+ messages.push(`Attachment ${name} was infected with ${viruses}.`);
533
656
  } catch (err) {
534
657
  this.config.logger.error(err);
535
658
  }
@@ -547,13 +670,16 @@ class SpamScanner {
547
670
 
548
671
  let gtube = false;
549
672
 
550
- if (isSANB(mail.html) && mail.html.includes(GTUBE)) gtube = true;
673
+ // eslint-disable-next-line unicorn/prefer-includes
674
+ if (isSANB(mail.html) && mail.html.indexOf(GTUBE) !== -1) gtube = true;
551
675
 
552
- if (isSANB(mail.text) && !gtube && mail.text.includes(GTUBE)) gtube = true;
676
+ // eslint-disable-next-line unicorn/prefer-includes
677
+ if (isSANB(mail.text) && !gtube && mail.text.indexOf(GTUBE) !== -1)
678
+ gtube = true;
553
679
 
554
680
  if (gtube)
555
681
  messages.push(
556
- 'Message detected to contain the GTUBE test from <https://spamassassin.apache.org/gtube/>.'
682
+ 'Message detected to contain the GTUBE test from https://spamassassin.apache.org/gtube/.'
557
683
  );
558
684
 
559
685
  return messages;
@@ -619,8 +745,6 @@ class SpamScanner {
619
745
  //
620
746
  // However we don't recommend this and therefore have our servers set to standard Cloudflare DNS
621
747
  //
622
- // TODO: we need to do two lookups in parallel, one against adult and one against malware
623
- // and also make sure the messages aren't duplicated when we concatenate final array of messages
624
748
  const [isAdult, isMalware] = await Promise.all([
625
749
  this.malwareLookup('https://family.cloudflare-dns.com/dns-query', name),
626
750
  this.malwareLookup('https://security.cloudflare-dns.com/dns-query', name)
@@ -742,14 +866,14 @@ class SpamScanner {
742
866
  })
743
867
  .match(URL_REGEX) || [];
744
868
 
745
- const array = [];
869
+ const array = new Set();
746
870
  for (const url of urls) {
747
871
  const normalized = this.getNormalizedUrl(url);
748
872
 
749
- if (normalized && !array.includes(normalized)) array.push(normalized);
873
+ if (normalized) array.add(normalized);
750
874
  }
751
875
 
752
- return array;
876
+ return [...array];
753
877
  }
754
878
 
755
879
  parseLocale(locale) {
@@ -763,12 +887,6 @@ class SpamScanner {
763
887
  // <https://github.com/NaturalNode/natural#stemmers>
764
888
  // eslint-disable-next-line complexity
765
889
  async getTokens(string, locale, isHTML = false) {
766
- // get the current email replacement regex
767
- const EMAIL_REPLACEMENT_REGEX = new RE2(
768
- this.config.replacements.email,
769
- 'g'
770
- );
771
-
772
890
  //
773
891
  // parse HTML for <html> tag with lang attr
774
892
  // otherwise if that wasn't found then look for this
@@ -816,17 +934,6 @@ class SpamScanner {
816
934
 
817
935
  if (isHTML) string = sanitizeHtml(string, this.config.sanitizeHtml);
818
936
 
819
- const replacementRegexes = [];
820
- for (const key of Object.keys(this.config.replacements)) {
821
- replacementRegexes.push(
822
- escapeStringRegexp(this.config.replacements[key])
823
- );
824
- }
825
-
826
- const REPLACEMENTS_REGEX = new RE2(
827
- new RegExp(replacementRegexes.join('|'), 'g')
828
- );
829
-
830
937
  string = striptags(string, [], ' ')
831
938
  .trim()
832
939
  // replace newlines
@@ -835,7 +942,7 @@ class SpamScanner {
835
942
  // attackers may try to inject our replacements into the message
836
943
  // therefore we should strip all of them before doing any replacements
837
944
  //
838
- .replace(REPLACEMENTS_REGEX, ' ');
945
+ .replace(this.REPLACEMENTS_REGEX, ' ');
839
946
 
840
947
  //
841
948
  // we should instead use language detection to determine
@@ -853,7 +960,8 @@ class SpamScanner {
853
960
 
854
961
  locale = this.parseLocale(isSANB(locale) ? locale : this.config.locale);
855
962
 
856
- if (!locales.has(locale)) {
963
+ // NOTE: "in" and "po" are valid locales but not from i18n
964
+ if (!locales.has(locale) && locale !== 'in' && locale !== 'po') {
857
965
  debug(`Locale ${locale} was not valid and will use default`);
858
966
  locale = this.parseLocale(this.config.locale);
859
967
  }
@@ -865,103 +973,145 @@ class SpamScanner {
865
973
  let stopwords = stopwordsEn;
866
974
  let language = 'english';
867
975
  let stemword = 'default';
976
+
868
977
  switch (locale) {
869
978
  case 'ar':
979
+ // arb
980
+ // ISO 639-3 = ara
981
+ stopwords = stopwordsAra;
870
982
  language = 'arabic';
871
983
  break;
872
984
  case 'da':
985
+ // dan
873
986
  language = 'danish';
987
+ stopwords = stopwordsDan;
874
988
  break;
875
989
  case 'nl':
990
+ // nld
876
991
  stopwords = stopwordsNl;
877
992
  language = 'dutch';
878
993
  break;
879
994
  case 'en':
995
+ // eng
880
996
  language = 'english';
881
997
  break;
882
998
  case 'fi':
999
+ // fin
883
1000
  language = 'finnish';
884
1001
  tokenizer = orthographyTokenizer;
1002
+ stopwords = stopwordsFin;
885
1003
  break;
886
1004
  case 'fa':
1005
+ // fas (Persian/Farsi)
887
1006
  language = 'farsi';
888
1007
  tokenizer = aggressiveTokenizerFa;
889
1008
  stopwords = stopwordsFa;
890
1009
  stemword = natural.PorterStemmerFa.stem.bind(natural.PorterStemmerFa);
891
1010
  break;
892
1011
  case 'fr':
1012
+ // fra
893
1013
  language = 'french';
894
1014
  tokenizer = aggressiveTokenizerFr;
895
1015
  stopwords = stopwordsFr;
896
1016
  break;
897
1017
  case 'de':
1018
+ // deu
898
1019
  language = 'german';
1020
+ stopwords = stopwordsDeu;
899
1021
  break;
900
1022
  case 'hu':
1023
+ // hun
901
1024
  language = 'hungarian';
1025
+ stopwords = stopwordsHun;
902
1026
  break;
903
1027
  case 'in':
1028
+ // ind
904
1029
  language = 'indonesian';
905
1030
  tokenizer = aggressiveTokenizerId;
906
1031
  stopwords = stopwordsId;
907
1032
  break;
908
1033
  case 'it':
1034
+ // ita
909
1035
  language = 'italian';
910
1036
  tokenizer = aggressiveTokenizerIt;
911
1037
  stopwords = stopwordsIt;
912
1038
  break;
913
1039
  case 'ja':
1040
+ // jpn
914
1041
  tokenizer = tokenizerJa;
915
1042
  stopwords = stopwordsJa;
916
1043
  stemword = natural.StemmerJa.stem.bind(natural.StemmerJa);
917
1044
  break;
918
1045
  case 'nb':
1046
+ // nob
1047
+ language = 'norwegian';
1048
+ tokenizer = aggressiveTokenizerNo;
1049
+ stopwords = stopwordsNo;
1050
+ break;
919
1051
  case 'nn':
1052
+ // nno
1053
+ // ISO 639-3 = nob
920
1054
  language = 'norwegian';
921
1055
  tokenizer = aggressiveTokenizerNo;
922
1056
  stopwords = stopwordsNo;
923
1057
  break;
924
1058
  case 'po':
1059
+ // pol
925
1060
  language = 'polish';
926
1061
  tokenizer = aggressiveTokenizerPl;
927
1062
  stopwords = stopwordsPl;
928
1063
  stemword = false;
929
1064
  break;
930
1065
  case 'pt':
1066
+ // por
931
1067
  language = 'portuguese';
932
1068
  tokenizer = aggressiveTokenizerPt;
933
1069
  stopwords = stopwordsPt;
934
1070
  break;
935
1071
  case 'es':
1072
+ // spa
936
1073
  language = 'spanish';
937
1074
  tokenizer = aggressiveTokenizerEs;
938
1075
  stopwords = stopwordsEs;
939
1076
  break;
940
1077
  case 'sv':
1078
+ // swe
941
1079
  language = 'swedish';
942
1080
  tokenizer = aggressiveTokenizerSv;
943
1081
  stopwords = stopwordsSv;
944
1082
  break;
945
1083
  case 'ro':
1084
+ // ron
946
1085
  language = 'romanian';
1086
+ stopwords = stopwordsRon;
947
1087
  break;
948
1088
  case 'ru':
1089
+ // rus
949
1090
  language = 'russian';
950
1091
  tokenizer = aggressiveTokenizerRu;
951
1092
  stopwords = stopwordsRu;
952
1093
  break;
953
1094
  case 'ta':
1095
+ // tam
1096
+ // NOTE: no stopwords available
954
1097
  language = 'tamil';
955
1098
  break;
956
1099
  case 'tr':
1100
+ // tur
957
1101
  language = 'turkish';
1102
+ stopwords = stopwordsTur;
958
1103
  break;
959
1104
  case 'vi':
1105
+ // vie
960
1106
  language = 'vietnamese';
961
1107
  tokenizer = aggressiveTokenizerVi;
1108
+ stopwords = stopwordsVie;
962
1109
  stemword = false;
963
1110
  break;
964
1111
  case 'zh':
1112
+ // cmn
1113
+ // TODO: use this instead https://github.com/yishn/chinese-tokenizer
1114
+ // ISO 639-3 = zho (Chinese, Macrolanguage)
965
1115
  language = 'chinese';
966
1116
  stopwords = stopwordsZh;
967
1117
  stemword = false;
@@ -979,7 +1129,7 @@ class SpamScanner {
979
1129
  string
980
1130
  .split(' ')
981
1131
  .map((_string) =>
982
- _string.startsWith(':') &&
1132
+ _string.indexOf(':') === 0 &&
983
1133
  _string.endsWith(':') &&
984
1134
  typeof toEmoji[_string.slice(1, -1)] === 'string'
985
1135
  ? toEmoji[_string.slice(1, -1)]
@@ -1027,7 +1177,10 @@ class SpamScanner {
1027
1177
 
1028
1178
  // now we ensure that URL's and EMAIL's are properly spaced out
1029
1179
  // (e.g. in case ?email=some@email.com was in a URL)
1030
- .replace(EMAIL_REPLACEMENT_REGEX, ` ${this.config.replacements.email} `)
1180
+ .replace(
1181
+ this.EMAIL_REPLACEMENT_REGEX,
1182
+ ` ${this.config.replacements.email} `
1183
+ )
1031
1184
 
1032
1185
  // TODO: replace file paths, file dirs, dotfiles, and dotdirs
1033
1186
 
@@ -1042,12 +1195,14 @@ class SpamScanner {
1042
1195
  // replace currency
1043
1196
  .replace(CURRENCY_REGEX, ` ${this.config.replacements.currency} `);
1044
1197
 
1198
+ //
1045
1199
  // expand contractions so "they're" -> [ they, are ] vs. [ they, re ]
1046
1200
  // <https://github.com/NaturalNode/natural/issues/533>
1047
- if (locale === 'en') string = contractions.expand(string);
1048
-
1049
- // whitelist exclusions
1050
- const whitelistedWords = Object.values(this.config.replacements);
1201
+ //
1202
+ // NOTE: we're doing this for all languages now, not just en
1203
+ // if (locale === 'en')
1204
+ //
1205
+ string = contractions.expand(string);
1051
1206
 
1052
1207
  //
1053
1208
  // Future research:
@@ -1061,43 +1216,32 @@ class SpamScanner {
1061
1216
  for (const token of tokenizer.tokenize(string.toLowerCase())) {
1062
1217
  // whitelist words from being stemmed (safeguard)
1063
1218
  if (
1064
- whitelistedWords.includes(token) ||
1065
- token.startsWith(this.config.replacements.initialism) ||
1066
- token.startsWith(this.config.replacements.abbrevation)
1219
+ this.WHITELISTED_WORDS.has(token) ||
1220
+ token.indexOf(this.config.replacements.initialism) === 0 ||
1221
+ token.indexOf(this.config.replacements.abbrevation) === 0
1067
1222
  ) {
1068
1223
  tokens.push(token);
1069
1224
  continue;
1070
1225
  }
1071
1226
 
1072
- if (
1073
- stopwords.includes(token) ||
1074
- (sw[locale] && sw[locale].includes(token)) ||
1075
- (locale !== 'en' &&
1076
- (stopwordsEn.includes(token) || sw.en.includes(token)))
1077
- )
1227
+ if (stopwords.has(token) || (locale !== 'en' && stopwordsEn.has(token))) {
1078
1228
  continue;
1229
+ }
1079
1230
 
1080
1231
  // locale specific stopwords to ignore
1081
1232
  let localeStem;
1082
1233
  if (typeof stemword === 'function') {
1083
1234
  localeStem = stemword(token);
1084
- if (
1085
- localeStem &&
1086
- (stopwords.includes(localeStem) ||
1087
- (sw[locale] && sw[locale].includes(localeStem)))
1088
- )
1235
+ if (localeStem && stopwords.has(localeStem)) {
1089
1236
  continue;
1237
+ }
1090
1238
  }
1091
1239
 
1092
1240
  // always check against English stemwords
1093
1241
  let englishStem;
1094
1242
  if (locale !== 'en') {
1095
1243
  englishStem = snowball.stemword(token, 'english');
1096
- if (
1097
- englishStem &&
1098
- (stopwordsEn.includes(englishStem) || sw.en.includes(englishStem))
1099
- )
1100
- continue;
1244
+ if (englishStem && stopwordsEn.has(englishStem)) continue;
1101
1245
  }
1102
1246
 
1103
1247
  tokens.push(
@@ -1105,6 +1249,8 @@ class SpamScanner {
1105
1249
  );
1106
1250
  }
1107
1251
 
1252
+ debug('locale', locale, 'tokens', tokens);
1253
+
1108
1254
  if (this.config.debug) return tokens;
1109
1255
 
1110
1256
  // we should sha256 all tokens with hasha if not in debug mode
@@ -1117,7 +1263,7 @@ class SpamScanner {
1117
1263
  let source = string;
1118
1264
  if (isBuffer(string)) source = string.toString();
1119
1265
  else if (typeof string === 'string' && isValidPath(string))
1120
- source = await readFile(string);
1266
+ source = await fs.promises.readFile(string);
1121
1267
 
1122
1268
  const tokens = [];
1123
1269
  const mail = await simpleParser(source, this.config.simpleParser);
@@ -1155,12 +1301,11 @@ class SpamScanner {
1155
1301
 
1156
1302
  // eslint-disable-next-line complexity
1157
1303
  async getPhishingResults(mail) {
1158
- const messages = [];
1159
-
1304
+ const messages = new Set();
1160
1305
  //
1161
1306
  // NOTE: all links pushed are lowercased
1162
1307
  //
1163
- const links = [];
1308
+ const links = new Set();
1164
1309
 
1165
1310
  // parse <a> tags with different org domain in text vs the link
1166
1311
  if (isSANB(mail.html)) {
@@ -1170,7 +1315,7 @@ class SpamScanner {
1170
1315
  // elements concatenate to form a URL which is malicious or phishing
1171
1316
  //
1172
1317
  for (const link of this.getUrls(striptags(mail.html, [], ' ').trim())) {
1173
- if (!links.includes(link)) links.push(link);
1318
+ links.add(link);
1174
1319
  }
1175
1320
 
1176
1321
  //
@@ -1212,7 +1357,7 @@ class SpamScanner {
1212
1357
  // (this is needed because some have "Web:%20http://google.com" for example in href tags)
1213
1358
  [href] = this.getUrls(href);
1214
1359
  // eslint-disable-next-line max-depth
1215
- if (href && !links.includes(href)) links.push(href);
1360
+ if (href) links.add(href);
1216
1361
  }
1217
1362
 
1218
1363
  // the text content could contain multiple URL's
@@ -1222,7 +1367,7 @@ class SpamScanner {
1222
1367
  isSANB(href) &&
1223
1368
  validator.isURL(href, isURLOptions)
1224
1369
  ) {
1225
- const string = `Anchor link with href of "${href}" and inner text value of "${textContent}"`;
1370
+ const string = `Anchor link with href of ${href} and inner text value of "${textContent}"`;
1226
1371
  // eslint-disable-next-line max-depth
1227
1372
  if (this.config.checkIDNHomographAttack) {
1228
1373
  const anchorUrlHostname = this.getHostname(href);
@@ -1231,8 +1376,8 @@ class SpamScanner {
1231
1376
  const anchorUrlHostnameToASCII =
1232
1377
  punycode.toASCII(anchorUrlHostname);
1233
1378
  // eslint-disable-next-line max-depth
1234
- if (anchorUrlHostnameToASCII.startsWith('xn--'))
1235
- messages.push(
1379
+ if (anchorUrlHostnameToASCII.indexOf('xn--') === 0)
1380
+ messages.add(
1236
1381
  `${string} has possible IDN homograph attack from anchor hostname.`
1237
1382
  );
1238
1383
  }
@@ -1241,8 +1386,8 @@ class SpamScanner {
1241
1386
  // eslint-disable-next-line max-depth
1242
1387
  for (const link of this.getUrls(textContent)) {
1243
1388
  // this link should have already been included but just in case
1244
- // eslint-disable-next-line max-depth
1245
- if (!links.includes(link)) links.push(link);
1389
+
1390
+ links.add(link);
1246
1391
 
1247
1392
  // eslint-disable-next-line max-depth
1248
1393
  if (this.config.checkIDNHomographAttack) {
@@ -1252,8 +1397,8 @@ class SpamScanner {
1252
1397
  const innerTextUrlHostnameToASCII =
1253
1398
  punycode.toASCII(innerTextUrlHostname);
1254
1399
  // eslint-disable-next-line max-depth
1255
- if (innerTextUrlHostnameToASCII.startsWith('xn--'))
1256
- messages.push(
1400
+ if (innerTextUrlHostnameToASCII.indexOf('xn--') === 0)
1401
+ messages.add(
1257
1402
  `${string} has possible IDN homograph attack from inner text hostname.`
1258
1403
  );
1259
1404
  }
@@ -1269,7 +1414,7 @@ class SpamScanner {
1269
1414
  for (const prop of MAIL_PHISHING_PROPS) {
1270
1415
  if (isSANB(mail[prop])) {
1271
1416
  for (const link of this.getUrls(mail[prop])) {
1272
- if (!links.includes(link)) links.push(link);
1417
+ links.add(link);
1273
1418
  }
1274
1419
  }
1275
1420
  }
@@ -1279,9 +1424,9 @@ class SpamScanner {
1279
1424
  const urlHostname = this.getHostname(link);
1280
1425
  if (urlHostname) {
1281
1426
  const toASCII = punycode.toASCII(urlHostname);
1282
- if (toASCII.startsWith('xn--'))
1283
- messages.push(
1284
- `Possible IDN homograph attack from link of "${link}" with punycode converted hostname of "${toASCII}".`
1427
+ if (toASCII.indexOf('xn--') === 0)
1428
+ messages.add(
1429
+ `Possible IDN homograph attack from link of ${link} with punycode converted hostname of ${toASCII}.`
1285
1430
  );
1286
1431
  }
1287
1432
  }
@@ -1290,28 +1435,25 @@ class SpamScanner {
1290
1435
  // check against Cloudflare malware/phishing/adult DNS lookup
1291
1436
  // if it returns `0.0.0.0` it means it was flagged
1292
1437
  await Promise.all(
1293
- links.map(async (link) => {
1438
+ [...links].map(async (link) => {
1294
1439
  try {
1295
1440
  const urlHostname = this.getHostname(link);
1296
1441
  if (urlHostname) {
1297
1442
  const toASCII = punycode.toASCII(urlHostname);
1298
- const adultMessage = `Link hostname of "${toASCII}" was detected by Cloudflare's Family DNS to contain adult-related content, phishing, and/or malware.`;
1299
- const malwareMessage = `Link hostname of ${toASCII}" was detected by Cloudflare's Security DNS to contain phishing and/or malware.`;
1443
+ const adultMessage = `Link hostname of ${toASCII} was detected by Cloudflare's Family DNS to contain adult-related content, phishing, and/or malware.`;
1444
+ const malwareMessage = `Link hostname of ${toASCII} was detected by Cloudflare's Security DNS to contain phishing and/or malware.`;
1300
1445
 
1301
1446
  // if it already included both messages then return early
1302
- if (
1303
- messages.includes(adultMessage) &&
1304
- messages.includes(malwareMessage)
1305
- )
1447
+ if (messages.has(adultMessage) && messages.has(malwareMessage))
1306
1448
  return;
1307
1449
 
1308
1450
  const { isAdult, isMalware } =
1309
1451
  await this.memoizedIsCloudflareBlocked(toASCII);
1310
1452
 
1311
- if (isAdult && !messages.includes(adultMessage))
1312
- messages.push(adultMessage);
1313
- if (isMalware && !messages.includes(malwareMessage))
1314
- messages.push(malwareMessage);
1453
+ if (isAdult && !messages.has(adultMessage))
1454
+ messages.add(adultMessage);
1455
+ if (isMalware && !messages.has(malwareMessage))
1456
+ messages.add(malwareMessage);
1315
1457
  }
1316
1458
  } catch (err) {
1317
1459
  this.config.logger.error(err);
@@ -1319,7 +1461,7 @@ class SpamScanner {
1319
1461
  })
1320
1462
  );
1321
1463
 
1322
- return { messages, links };
1464
+ return { messages: [...messages], links: [...links] };
1323
1465
  }
1324
1466
 
1325
1467
  // getNSFWResults() {
@@ -1340,7 +1482,7 @@ class SpamScanner {
1340
1482
  try {
1341
1483
  const fileType = await FileType.fromBuffer(attachment.content);
1342
1484
 
1343
- if (fileType && fileType.ext && EXECUTABLES.includes(fileType.ext))
1485
+ if (fileType && fileType.ext && EXECUTABLES.has(fileType.ext))
1344
1486
  messages.push(
1345
1487
  `Attachment's "magic number" indicated it was a dangerous executable with a ".${fileType.ext}" extension.`
1346
1488
  );
@@ -1355,7 +1497,7 @@ class SpamScanner {
1355
1497
  punycode.toUnicode(attachment.filename.split('?')[0])
1356
1498
  );
1357
1499
  const ext = fileExtension(filename);
1358
- if (ext && EXECUTABLES.includes(ext))
1500
+ if (ext && EXECUTABLES.has(ext))
1359
1501
  messages.push(
1360
1502
  `Attachment's file name indicated it was a dangerous executable with a ".${ext}" extension.`
1361
1503
  );
@@ -1363,7 +1505,7 @@ class SpamScanner {
1363
1505
 
1364
1506
  if (isSANB(attachment.contentType)) {
1365
1507
  const ext = mime.extension(attachment.contentType);
1366
- if (isSANB(ext) && EXECUTABLES.includes(ext))
1508
+ if (isSANB(ext) && EXECUTABLES.has(ext))
1367
1509
  messages.push(
1368
1510
  `Attachment's Content-Type was a dangerous executable with a ".${ext}" extension.`
1369
1511
  );
package/package.json CHANGED
@@ -1,22 +1,12 @@
1
1
  {
2
2
  "name": "spamscanner",
3
3
  "description": "Spam Scanner - The Best Anti-Spam Scanning Service and Anti-Spam API",
4
- "version": "4.0.0",
4
+ "version": "5.0.0",
5
5
  "author": "Niftylettuce, LLC. <niftylettuce@gmail.com> (https://niftylettuce.com/)",
6
- "ava": {
7
- "timeout": "30s",
8
- "verbose": true,
9
- "serial": true
10
- },
11
6
  "bugs": {
12
7
  "url": "https://github.com/spamscanner/spamscanner/issues",
13
8
  "email": "niftylettuce@gmail.com"
14
9
  },
15
- "commitlint": {
16
- "extends": [
17
- "@commitlint/config-conventional"
18
- ]
19
- },
20
10
  "contributors": [
21
11
  "Nick Baugh <niftylettuce@gmail.com> (http://niftylettuce.com/)",
22
12
  "Shaun Warman <shaunwarman1@gmail.com> (http://shaunwarman.com/)"
@@ -24,69 +14,68 @@
24
14
  "dependencies": {
25
15
  "@ladjs/naivebayes": "^0.1.0",
26
16
  "bitcoin-regex": "^2.0.0",
27
- "clamscan": "^1.4.2",
17
+ "clamscan": "^2.1.2",
28
18
  "credit-card-regex": "^3.0.0",
29
19
  "crypto-random-string": "3",
30
20
  "currency-codes": "^2.1.0",
31
- "currency-symbol-map": "^5.0.1",
32
- "debug": "^4.3.2",
21
+ "currency-symbol-map": "^5.1.0",
33
22
  "email-regex-safe": "^1.0.2",
34
- "emoji-patterns": "^13.1.0",
23
+ "emoji-patterns": "^14.0.1",
35
24
  "escape-string-regexp": "4",
36
25
  "expand-contractions": "^1.0.1",
37
26
  "file-extension": "^4.0.5",
38
- "file-type": "^16.5.3",
27
+ "file-type": "16",
39
28
  "floating-point-regex": "^0.1.0",
40
29
  "franc": "5",
41
30
  "gemoji": "6",
42
31
  "hasha": "^5.2.2",
43
32
  "hexa-color-regex": "^1.0.0",
44
33
  "i18n-locales": "^0.0.5",
45
- "iconv": "^3.0.0",
34
+ "iconv": "^3.0.1",
46
35
  "into-stream": "6",
47
- "ip-regex": "^4.3.0",
36
+ "ip-regex": "4",
48
37
  "is-buffer": "^2.0.5",
49
38
  "is-stream": "2",
50
39
  "is-string-and-not-blank": "^0.0.2",
51
40
  "is-valid-path": "^0.1.1",
52
41
  "mac-regex": "^1.0.0",
53
42
  "macos-version": "5",
54
- "mailparser": "^3.3.0",
43
+ "mailparser": "^3.5.0",
55
44
  "memoizee": "^0.4.15",
56
- "mime-types": "^2.1.32",
45
+ "mime-types": "^2.1.35",
57
46
  "ms": "^2.1.3",
58
- "natural": "^5.0.4",
47
+ "natural": "^5.2.2",
59
48
  "newline-remove": "^1.0.2",
60
- "node-html-parser": "^4.1.3",
49
+ "node-html-parser": "4",
61
50
  "node-snowball": "^0.6.0",
62
51
  "normalize-url": "5",
63
- "parse-domain": "^3.0.3",
52
+ "parse-domain": "5",
64
53
  "phone-regex": "^2.1.0",
65
54
  "punycode": "^2.1.1",
66
- "re2": "^1.16.0",
67
- "sanitize-html": "^2.4.0",
68
- "stopword": "^1.0.11",
55
+ "re2": "^1.17.6",
56
+ "sanitize-html": "^2.7.0",
57
+ "stopword": "^2.0.2",
69
58
  "striptags": "^3.2.0",
70
- "superagent": "^6.1.0",
59
+ "superagent": "^7.1.6",
71
60
  "trim-leading-whitespace": "^0.1.1",
72
61
  "universalify": "^2.0.0",
73
- "url-regex-safe": "^2.0.2",
74
- "validator": "^13.6.0"
62
+ "url-regex-safe": "^3.0.0",
63
+ "validator": "^13.7.0",
64
+ "which": "^2.0.2"
75
65
  },
76
66
  "devDependencies": {
77
- "@commitlint/cli": "^13.1.0",
78
- "@commitlint/config-conventional": "^13.1.0",
67
+ "@commitlint/cli": "^17.0.2",
68
+ "@commitlint/config-conventional": "^17.0.2",
79
69
  "@ladjs/redis": "^1.0.7",
80
- "ava": "^3.15.0",
81
- "codecov": "^3.8.3",
70
+ "ava": "^4.3.0",
82
71
  "cross-env": "^7.0.3",
83
72
  "delay": "^5.0.0",
84
- "eslint": "^7.32.0",
85
- "eslint-config-xo-lass": "^1.0.5",
73
+ "eslint": "^8.17.0",
74
+ "eslint-config-xo-lass": "^2.0.1",
86
75
  "fixpack": "^4.0.0",
87
- "husky": "^7.0.1",
88
- "is-ci": "^3.0.0",
89
- "lint-staged": "^11.1.2",
76
+ "husky": "^8.0.1",
77
+ "is-ci": "^3.0.1",
78
+ "lint-staged": "^13.0.1",
90
79
  "lookpath": "^1.2.2",
91
80
  "make-dir": "^3.1.0",
92
81
  "node-mbox": "^1.0.0",
@@ -94,12 +83,12 @@
94
83
  "nyc": "^15.1.0",
95
84
  "p-map": "4",
96
85
  "read-dir-deep": "^7.0.1",
97
- "remark-cli": "^10.0.0",
98
- "remark-preset-github": "^4.0.1",
99
- "xo": "0.39"
86
+ "remark-cli": "^10.0.1",
87
+ "remark-preset-github": "^4.0.4",
88
+ "xo": "^0.50.0"
100
89
  },
101
90
  "engines": {
102
- "node": ">=12.11.0"
91
+ "node": ">=14"
103
92
  },
104
93
  "files": [
105
94
  "package.json",
@@ -114,12 +103,6 @@
114
103
  "classifier.json"
115
104
  ],
116
105
  "homepage": "https://github.com/spamscanner/spamscanner",
117
- "husky": {
118
- "hooks": {
119
- "pre-commit": "lint-staged",
120
- "commit-msg": "commitlint -E HUSKY_GIT_PARAMS"
121
- }
122
- },
123
106
  "keywords": [
124
107
  "adult",
125
108
  "api",
@@ -172,38 +155,17 @@
172
155
  ],
173
156
  "license": "Business Source License 1.1",
174
157
  "main": "index.js",
175
- "prettier": {
176
- "singleQuote": true,
177
- "bracketSpacing": true,
178
- "trailingComma": "none"
179
- },
180
- "remarkConfig": {
181
- "plugins": [
182
- "preset-github"
183
- ]
184
- },
185
158
  "repository": {
186
159
  "type": "git",
187
160
  "url": "https://github.com/spamscanner/spamscanner"
188
161
  },
189
162
  "scripts": {
190
163
  "ava": "cross-env NODE_ENV=test ava",
191
- "coverage": "nyc report --reporter=text-lcov > coverage.lcov && codecov",
192
- "lint": "xo && remark . -qfo",
164
+ "lint": "xo --fix && remark . -qfo && fixpack",
193
165
  "nyc": "cross-env NODE_ENV=test nyc ava",
194
- "test": "npm run lint && npm run ava",
166
+ "prepare": "husky install",
167
+ "pretest": "npm run lint",
168
+ "test": "npm run test-coverage",
195
169
  "test-coverage": "npm run lint && npm run nyc"
196
- },
197
- "xo": {
198
- "prettier": true,
199
- "space": true,
200
- "extends": [
201
- "xo-lass"
202
- ],
203
- "ignores": [
204
- "data",
205
- "classifier.json",
206
- "bag-of-words.json"
207
- ]
208
170
  }
209
171
  }
@@ -1,5 +1,7 @@
1
+ const process = require('process');
2
+
1
3
  module.exports =
2
4
  typeof process.env.VOCABULARY_LIMIT !== 'undefined' &&
3
5
  Number.isFinite(Number.parseInt(process.env.VOCABULARY_LIMIT, 10))
4
6
  ? Number.parseInt(process.env.VOCABULARY_LIMIT, 10)
5
- : 20000;
7
+ : 20_000;