spamscanner 3.0.6 → 5.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +77 -7
- package/index.js +276 -138
- package/package.json +47 -85
- package/vocabulary-limit.js +3 -1
package/README.md
CHANGED
|
@@ -2,9 +2,7 @@
|
|
|
2
2
|
<a href="https://spamscanner.net"><img src="https://d1i8ikybhfrv4r.cloudfront.net/spamscanner.png" alt="spamscanner" /></a>
|
|
3
3
|
</h1>
|
|
4
4
|
<div align="center">
|
|
5
|
-
<a href="https://
|
|
6
|
-
<a href="https://travis-ci.com/spamscanner/spamscanner"><img src="https://travis-ci.com/spamscanner/spamscanner.svg?branch=master" alt="build status" /></a>
|
|
7
|
-
<a href="https://codecov.io/github/spamscanner/spamscanner"><img src="https://img.shields.io/codecov/c/github/spamscanner/spamscanner/master.svg" alt="code coverage" /></a>
|
|
5
|
+
<a href="https://github.com/spamscanner/spamscanner/actions/workflows/ci.yml"><img src="https://github.com/spamscanner/spamscanner/actions/workflows/ci.yml/badge.svg" alt="build status" /></a>
|
|
8
6
|
<a href="https://github.com/sindresorhus/xo"><img src="https://img.shields.io/badge/code_style-XO-5ed9c7.svg" alt="code style" /></a>
|
|
9
7
|
<a href="https://github.com/prettier/prettier"><img src="https://img.shields.io/badge/styled_with-prettier-ff69b4.svg" alt="styled with prettier" /></a>
|
|
10
8
|
<a href="https://lass.js.org"><img src="https://img.shields.io/badge/made_with-lass-95CC28.svg" alt="made with lass" /></a>
|
|
@@ -48,6 +46,7 @@
|
|
|
48
46
|
* [`scanner.getVirusResults(mail)`](#scannergetvirusresultsmail)
|
|
49
47
|
* [`scanner.parseLocale(locale)`](#scannerparselocalelocale)
|
|
50
48
|
* [Caching](#caching)
|
|
49
|
+
* [Debugging](#debugging)
|
|
51
50
|
* [Contributors](#contributors)
|
|
52
51
|
* [References](#references)
|
|
53
52
|
* [License](#license)
|
|
@@ -188,11 +187,48 @@ Note that you can simply use the Spam Scanner API for free at <https://spamscann
|
|
|
188
187
|
2. Configure ClamAV:
|
|
189
188
|
|
|
190
189
|
```sh
|
|
190
|
+
# if you are on Intel macOS
|
|
191
|
+
sudo mv /usr/local/etc/clamav/clamd.conf.sample /usr/local/etc/clamav/clamd.conf
|
|
192
|
+
|
|
193
|
+
# if you are on M1 macOS (or newer brew which installs to `/opt/homebrew`)
|
|
194
|
+
sudo mv /opt/homebrew/etc/clamav/clamd.conf.sample /opt/homebrew/etc/clamav/clamd.conf
|
|
195
|
+
```
|
|
196
|
+
|
|
197
|
+
```sh
|
|
198
|
+
# if you are on Intel macOS
|
|
199
|
+
sudo vim /usr/local/etc/clamav/clamd.conf
|
|
200
|
+
|
|
201
|
+
# if you are on M1 macOS (or newer brew which installs to `/opt/homebrew`)
|
|
202
|
+
sudo vim /opt/homebrew/etc/clamav/clamd.conf
|
|
203
|
+
```
|
|
204
|
+
|
|
205
|
+
```diff
|
|
206
|
+
-Example
|
|
207
|
+
+#Example
|
|
208
|
+
|
|
209
|
+
-#StreamMaxLength 10M
|
|
210
|
+
+StreamMaxLength 50M
|
|
211
|
+
|
|
212
|
+
+# this file path may be different on your OS (that's OK)
|
|
213
|
+
|
|
214
|
+
\-#LocalSocket /tmp/clamd.socket
|
|
215
|
+
\+LocalSocket /tmp/clamd.socket
|
|
216
|
+
```
|
|
217
|
+
|
|
218
|
+
```sh
|
|
219
|
+
# if you are on Intel macOS
|
|
191
220
|
sudo mv /usr/local/etc/clamav/freshclam.conf.sample /usr/local/etc/clamav/freshclam.conf
|
|
221
|
+
|
|
222
|
+
# if you are on M1 macOS (or newer brew which installs to `/opt/homebrew`)
|
|
223
|
+
sudo mv /opt/homebrew/etc/clamav/freshclam.conf.sample /opt/homebrew/etc/clamav/freshclam.conf
|
|
192
224
|
```
|
|
193
225
|
|
|
194
226
|
```sh
|
|
227
|
+
# if you are on Intel macOS
|
|
195
228
|
sudo vim /usr/local/etc/clamav/freshclam.conf
|
|
229
|
+
|
|
230
|
+
# if you are on M1 macOS (or newer brew which installs to `/opt/homebrew`)
|
|
231
|
+
sudo vim /opt/homebrew/etc/clamav/freshclam.conf
|
|
196
232
|
```
|
|
197
233
|
|
|
198
234
|
```diff
|
|
@@ -210,6 +246,8 @@ Note that you can simply use the Spam Scanner API for free at <https://spamscann
|
|
|
210
246
|
sudo vim /Library/LaunchDaemons/org.clamav.clamd.plist
|
|
211
247
|
```
|
|
212
248
|
|
|
249
|
+
> If you are on Intel macOS:
|
|
250
|
+
|
|
213
251
|
```plist
|
|
214
252
|
<?xml version="1.0" encoding="UTF-8"?>
|
|
215
253
|
<!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
|
@@ -231,12 +269,37 @@ Note that you can simply use the Spam Scanner API for free at <https://spamscann
|
|
|
231
269
|
</plist>
|
|
232
270
|
```
|
|
233
271
|
|
|
272
|
+
> If you are on M1 macOS (or newer brew which installs to `/opt/homebrew`)
|
|
273
|
+
|
|
274
|
+
```plist
|
|
275
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
|
276
|
+
<!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
|
277
|
+
<plist version="1.0">
|
|
278
|
+
<dict>
|
|
279
|
+
<key>Label</key>
|
|
280
|
+
<string>org.clamav.clamd</string>
|
|
281
|
+
<key>KeepAlive</key>
|
|
282
|
+
<true/>
|
|
283
|
+
<key>Program</key>
|
|
284
|
+
<string>/opt/homebrew/sbin/clamd</string>
|
|
285
|
+
<key>ProgramArguments</key>
|
|
286
|
+
<array>
|
|
287
|
+
<string>clamd</string>
|
|
288
|
+
</array>
|
|
289
|
+
<key>RunAtLoad</key>
|
|
290
|
+
<true/>
|
|
291
|
+
</dict>
|
|
292
|
+
</plist>
|
|
293
|
+
```
|
|
294
|
+
|
|
295
|
+
4. Enable it and start it on boot:
|
|
296
|
+
|
|
234
297
|
```sh
|
|
235
298
|
sudo launchctl load /Library/LaunchDaemons/org.clamav.clamd.plist
|
|
236
299
|
sudo launchctl start /Library/LaunchDaemons/org.clamav.clamd.plist
|
|
237
300
|
```
|
|
238
301
|
|
|
239
|
-
|
|
302
|
+
5. You may want to periodically run `freshclam` to update the config, or configure a similar `plist` configuration for `launchctl`.
|
|
240
303
|
|
|
241
304
|
|
|
242
305
|
## Install
|
|
@@ -244,7 +307,7 @@ Note that you can simply use the Spam Scanner API for free at <https://spamscann
|
|
|
244
307
|
[npm][]:
|
|
245
308
|
|
|
246
309
|
```sh
|
|
247
|
-
npm install spamscanner
|
|
310
|
+
npm install spamscanner
|
|
248
311
|
```
|
|
249
312
|
|
|
250
313
|
|
|
@@ -359,7 +422,7 @@ Currently Spam Scanner supports the following locales for tokenization, stemming
|
|
|
359
422
|
| Finnish | `fn` |
|
|
360
423
|
| Farsi | `fa` |
|
|
361
424
|
| French | `fr` |
|
|
362
|
-
| German | `
|
|
425
|
+
| German | `de` |
|
|
363
426
|
| Hungarian | `hr` |
|
|
364
427
|
| Indonesian | `in` |
|
|
365
428
|
| Italian | `it` |
|
|
@@ -406,7 +469,7 @@ A common example of this is a link of `рaypal.com` which when converted to ASCI
|
|
|
406
469
|
|
|
407
470
|
This method checks against [Cloudflare for Families](https://developers.cloudflare.com/1.1.1.1/1.1.1.1-for-families) servers for both adult-related content, malware, and phishing. This means we do two separate DNS over HTTPS requests to `1.1.1.2` for malware and `1.1.1.3` for adult-related content. You can parse the messages results Array for messages that contain "adult-related content" if you need to parse whether or not you want to flag for adult-related content or not on your application.
|
|
408
471
|
|
|
409
|
-
If you are using Cloudflare for Families DNS servers as mentioned in [Requirements](#requirements)), then if there are any HTTPS over DNS request errors, it will fallback to use the DNS servers set on the system for lookups, which would in turn use Cloudflare for Family DNS. (using DNS over HTTPS with a fallback of [dns.resolve4](https://nodejs.org/api/dns.html#
|
|
472
|
+
If you are using Cloudflare for Families DNS servers as mentioned in [Requirements](#requirements)), then if there are any HTTPS over DNS request errors, it will fallback to use the DNS servers set on the system for lookups, which would in turn use Cloudflare for Family DNS. (using DNS over HTTPS with a fallback of [dns.resolve4](https://nodejs.org/api/dns.html#dns_dns_resolve4_hostname_options_callback)) – and if it returns `0.0.0.0` then it is considered to be phishing.
|
|
410
473
|
|
|
411
474
|
We actually helped Cloudflare in August 2020 to update their documentation to note that this result of `0.0.0.0` is returned for maliciously found content on FQDN and IP lookups.
|
|
412
475
|
|
|
@@ -501,6 +564,13 @@ const scanner = new SpamScanner({
|
|
|
501
564
|
Note that in [Forward Email][forward-email] we use the `client` approach as we have multiple threads across multiple servers running, and in-memory caching would not be efficient.
|
|
502
565
|
|
|
503
566
|
|
|
567
|
+
## Debugging
|
|
568
|
+
|
|
569
|
+
Spam Scanner has built-in debug output via `util.debuglog('spamscanner')`.
|
|
570
|
+
|
|
571
|
+
This means you can run your app with `NODE_DEBUG=spamscanner node app.js` to get useful debug output to your console.
|
|
572
|
+
|
|
573
|
+
|
|
504
574
|
## Contributors
|
|
505
575
|
|
|
506
576
|
| Name | Website |
|
package/index.js
CHANGED
|
@@ -1,8 +1,9 @@
|
|
|
1
|
+
const process = require('process');
|
|
1
2
|
const dns = require('dns');
|
|
2
3
|
const fs = require('fs');
|
|
3
|
-
const {
|
|
4
|
+
const { debuglog } = require('util');
|
|
4
5
|
|
|
5
|
-
// eslint-disable-next-line
|
|
6
|
+
// eslint-disable-next-line n/no-deprecated-api
|
|
6
7
|
const punycode = require('punycode');
|
|
7
8
|
|
|
8
9
|
const ClamScan = require('clamscan');
|
|
@@ -12,7 +13,6 @@ const RE2 = require('re2');
|
|
|
12
13
|
const bitcoinRegex = require('bitcoin-regex');
|
|
13
14
|
const contractions = require('expand-contractions');
|
|
14
15
|
const creditCardRegex = require('credit-card-regex');
|
|
15
|
-
const debug = require('debug')('spamscanner');
|
|
16
16
|
const emailRegexSafe = require('email-regex-safe');
|
|
17
17
|
const emojiPatterns = require('emoji-patterns');
|
|
18
18
|
const escapeStringRegexp = require('escape-string-regexp');
|
|
@@ -46,12 +46,15 @@ const toEmoji = require('gemoji/name-to-emoji');
|
|
|
46
46
|
const universalify = require('universalify');
|
|
47
47
|
const urlRegexSafe = require('url-regex-safe');
|
|
48
48
|
const validator = require('validator');
|
|
49
|
+
const which = require('which');
|
|
49
50
|
const { Iconv } = require('iconv');
|
|
50
51
|
const { codes } = require('currency-codes');
|
|
51
52
|
const { fromUrl, NO_HOSTNAME } = require('parse-domain');
|
|
52
53
|
const { parse } = require('node-html-parser');
|
|
53
54
|
const { simpleParser } = require('mailparser');
|
|
54
55
|
|
|
56
|
+
const debug = debuglog('spamscanner');
|
|
57
|
+
|
|
55
58
|
const aggressiveTokenizer = new natural.AggressiveTokenizer();
|
|
56
59
|
const orthographyTokenizer = new natural.OrthographyTokenizer({
|
|
57
60
|
language: 'fi'
|
|
@@ -69,20 +72,115 @@ const aggressiveTokenizerSv = new natural.AggressiveTokenizerSv();
|
|
|
69
72
|
const aggressiveTokenizerRu = new natural.AggressiveTokenizerRu();
|
|
70
73
|
const aggressiveTokenizerVi = new natural.AggressiveTokenizerVi();
|
|
71
74
|
|
|
72
|
-
const stopwordsEn =
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
const
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
const
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
const
|
|
85
|
-
|
|
75
|
+
const stopwordsEn = new Set([
|
|
76
|
+
...require('natural/lib/natural/util/stopwords').words,
|
|
77
|
+
...sw.eng
|
|
78
|
+
]);
|
|
79
|
+
const stopwordsEs = new Set([
|
|
80
|
+
...require('natural/lib/natural/util/stopwords_es').words,
|
|
81
|
+
...sw.spa
|
|
82
|
+
]);
|
|
83
|
+
const stopwordsFa = new Set([
|
|
84
|
+
...require('natural/lib/natural/util/stopwords_fa').words,
|
|
85
|
+
...sw.fas
|
|
86
|
+
]);
|
|
87
|
+
const stopwordsFr = new Set([
|
|
88
|
+
...require('natural/lib/natural/util/stopwords_fr').words,
|
|
89
|
+
...sw.fra
|
|
90
|
+
]);
|
|
91
|
+
const stopwordsId = new Set([
|
|
92
|
+
...require('natural/lib/natural/util/stopwords_id').words,
|
|
93
|
+
...sw.ind
|
|
94
|
+
]);
|
|
95
|
+
const stopwordsJa = new Set([
|
|
96
|
+
...require('natural/lib/natural/util/stopwords_ja').words,
|
|
97
|
+
...sw.jpn
|
|
98
|
+
]);
|
|
99
|
+
const stopwordsIt = new Set([
|
|
100
|
+
...require('natural/lib/natural/util/stopwords_it').words,
|
|
101
|
+
...sw.ita
|
|
102
|
+
]);
|
|
103
|
+
const stopwordsNl = new Set([
|
|
104
|
+
...require('natural/lib/natural/util/stopwords_nl').words,
|
|
105
|
+
...sw.nld
|
|
106
|
+
]);
|
|
107
|
+
const stopwordsNo = new Set([
|
|
108
|
+
...require('natural/lib/natural/util/stopwords_no').words,
|
|
109
|
+
...sw.nob
|
|
110
|
+
]);
|
|
111
|
+
const stopwordsPl = new Set([
|
|
112
|
+
...require('natural/lib/natural/util/stopwords_pl').words,
|
|
113
|
+
...sw.pol
|
|
114
|
+
]);
|
|
115
|
+
const stopwordsPt = new Set([
|
|
116
|
+
...require('natural/lib/natural/util/stopwords_pt').words,
|
|
117
|
+
...sw.por,
|
|
118
|
+
...sw.porBr
|
|
119
|
+
]);
|
|
120
|
+
const stopwordsRu = new Set([
|
|
121
|
+
...require('natural/lib/natural/util/stopwords_ru').words,
|
|
122
|
+
...sw.rus
|
|
123
|
+
]);
|
|
124
|
+
const stopwordsSv = new Set([
|
|
125
|
+
...require('natural/lib/natural/util/stopwords_sv').words,
|
|
126
|
+
...sw.swe
|
|
127
|
+
]);
|
|
128
|
+
const stopwordsZh = new Set([
|
|
129
|
+
...require('natural/lib/natural/util/stopwords_zh').words,
|
|
130
|
+
...sw.zho
|
|
131
|
+
]);
|
|
132
|
+
|
|
133
|
+
const stopwordsRon = new Set(sw.ron);
|
|
134
|
+
const stopwordsTur = new Set(sw.tur);
|
|
135
|
+
const stopwordsVie = new Set(sw.vie);
|
|
136
|
+
const stopwordsDeu = new Set(sw.deu);
|
|
137
|
+
const stopwordsHun = new Set(sw.hun);
|
|
138
|
+
const stopwordsAra = new Set(sw.ara);
|
|
139
|
+
const stopwordsDan = new Set(sw.dan);
|
|
140
|
+
const stopwordsFin = new Set(sw.fin);
|
|
141
|
+
|
|
142
|
+
// TODO: add stopword pairing for these langs:
|
|
143
|
+
// afr
|
|
144
|
+
// ben
|
|
145
|
+
// bre
|
|
146
|
+
// bul
|
|
147
|
+
// cat
|
|
148
|
+
// ces
|
|
149
|
+
// ell
|
|
150
|
+
// epo
|
|
151
|
+
// est
|
|
152
|
+
// eus
|
|
153
|
+
// fra
|
|
154
|
+
// gle
|
|
155
|
+
// glg
|
|
156
|
+
// guj
|
|
157
|
+
// hau
|
|
158
|
+
// heb
|
|
159
|
+
// hin
|
|
160
|
+
// hrv
|
|
161
|
+
// hye
|
|
162
|
+
// kor
|
|
163
|
+
// kur
|
|
164
|
+
// lat
|
|
165
|
+
// lav
|
|
166
|
+
// lgg
|
|
167
|
+
// lggNd
|
|
168
|
+
// lit
|
|
169
|
+
// mar
|
|
170
|
+
// msa
|
|
171
|
+
// mya
|
|
172
|
+
// panGu
|
|
173
|
+
// slk
|
|
174
|
+
// slv
|
|
175
|
+
// som
|
|
176
|
+
// sot
|
|
177
|
+
// swa
|
|
178
|
+
// tgl
|
|
179
|
+
// tha
|
|
180
|
+
// ukr
|
|
181
|
+
// urd
|
|
182
|
+
// yor
|
|
183
|
+
// zul
|
|
86
184
|
|
|
87
185
|
// <https://stackoverflow.com/a/41353282>
|
|
88
186
|
// <https://www.ietf.org/rfc/rfc3986.txt>
|
|
@@ -92,19 +190,18 @@ const ENDING_RESERVED_REGEX = new RE2(
|
|
|
92
190
|
|
|
93
191
|
const PKG = require('./package.json');
|
|
94
192
|
|
|
95
|
-
const VOCABULARY_LIMIT = require('./vocabulary-limit');
|
|
193
|
+
const VOCABULARY_LIMIT = require('./vocabulary-limit.js');
|
|
96
194
|
|
|
97
|
-
|
|
195
|
+
// TODO: convert this into a Map
|
|
196
|
+
const ISO_CODE_MAPPING = require('./iso-code-mapping.json');
|
|
98
197
|
|
|
99
198
|
// <https://kb.smarshmail.com/Article/23567>
|
|
100
|
-
const EXECUTABLES = require('./executables');
|
|
199
|
+
const EXECUTABLES = new Set(require('./executables.json'));
|
|
101
200
|
|
|
102
|
-
const REPLACEMENT_WORDS = require('./replacement-words');
|
|
201
|
+
const REPLACEMENT_WORDS = require('./replacement-words.json');
|
|
103
202
|
|
|
104
203
|
const locales = new Set(i18nLocales.map((l) => l.toLowerCase()));
|
|
105
204
|
|
|
106
|
-
const readFile = promisify(fs.readFile);
|
|
107
|
-
|
|
108
205
|
const normalizeUrlOptions = {
|
|
109
206
|
stripProtocol: true,
|
|
110
207
|
stripWWW: false,
|
|
@@ -154,7 +251,8 @@ for (const code of codes()) {
|
|
|
154
251
|
const symbol = getSymbolFromCurrency(code);
|
|
155
252
|
if (
|
|
156
253
|
typeof symbol === 'string' &&
|
|
157
|
-
|
|
254
|
+
// eslint-disable-next-line unicorn/prefer-includes
|
|
255
|
+
currencySymbols.indexOf(symbol) === -1 &&
|
|
158
256
|
!new RE2(/^[a-z]+$/i).test(symbol)
|
|
159
257
|
)
|
|
160
258
|
currencySymbols.push(escapeStringRegexp(symbol));
|
|
@@ -187,11 +285,13 @@ const isURLOptions = {
|
|
|
187
285
|
class SpamScanner {
|
|
188
286
|
constructor(config = {}) {
|
|
189
287
|
this.config = {
|
|
190
|
-
debug:
|
|
288
|
+
debug:
|
|
289
|
+
process.env.NODE_ENV === 'test' ||
|
|
290
|
+
process.env.NODE_ENV === 'development',
|
|
191
291
|
checkIDNHomographAttack: false,
|
|
192
292
|
// note that if you attempt to train an existing `scanner.classifier`
|
|
193
293
|
// then you will need to re-use these, so we suggest you store them
|
|
194
|
-
replacements: config.replacements || require('./replacements'),
|
|
294
|
+
replacements: config.replacements || require('./replacements.js'),
|
|
195
295
|
// <https://nodemailer.com/extras/mailparser/>
|
|
196
296
|
// NOTE: `iconv` package's Iconv cannot be used in worker threads
|
|
197
297
|
// AND it can not also be shared in worker threads either (e.g. cloned)
|
|
@@ -203,7 +303,7 @@ class SpamScanner {
|
|
|
203
303
|
// `wget --mirror --passive-ftp ftp://ftp.ietf.org/ietf-mail-archive`
|
|
204
304
|
// `wget --mirror --passive-ftp ftp://ftp.ietf.org/concluded-wg-ietf-mail-archive`
|
|
205
305
|
// (spam dataset is private at the moment)
|
|
206
|
-
classifier: config.classifier || require('./get-classifier'),
|
|
306
|
+
classifier: config.classifier || require('./get-classifier.js'),
|
|
207
307
|
// default locale validated against i18n-locales
|
|
208
308
|
locale: 'en',
|
|
209
309
|
// we recommend to use axe/cabin, see https://cabinjs.com
|
|
@@ -310,10 +410,17 @@ class SpamScanner {
|
|
|
310
410
|
allowedAttributes: false
|
|
311
411
|
},
|
|
312
412
|
userAgent: `${PKG.name}/${PKG.version}`,
|
|
313
|
-
timeout: ms('
|
|
413
|
+
timeout: ms('10s'),
|
|
314
414
|
clamscan: {
|
|
415
|
+
debugMode:
|
|
416
|
+
process.env.NODE_ENV === 'test' ||
|
|
417
|
+
process.env.NODE_ENV === 'development',
|
|
418
|
+
clamscan: {
|
|
419
|
+
path: which.sync('clamscan', { nothrow: true })
|
|
420
|
+
},
|
|
315
421
|
clamdscan: {
|
|
316
422
|
timeout: ms('10s'),
|
|
423
|
+
path: which.sync('clamdscan', { nothrow: true }),
|
|
317
424
|
socket: macosVersion.isMacOS
|
|
318
425
|
? '/tmp/clamd.socket'
|
|
319
426
|
: '/var/run/clamav/clamd.ctl'
|
|
@@ -416,7 +523,6 @@ class SpamScanner {
|
|
|
416
523
|
// cache in the background
|
|
417
524
|
this.config.client
|
|
418
525
|
.set(key, `${isAdult}:${isMalware}`, 'PX', this.config.ttlMs)
|
|
419
|
-
// eslint-disable-next-line promise/prefer-await-to-then
|
|
420
526
|
.then(this.config.logger.info)
|
|
421
527
|
.catch(this.config.logger.error);
|
|
422
528
|
return { isAdult, isMalware };
|
|
@@ -431,6 +537,27 @@ class SpamScanner {
|
|
|
431
537
|
throw new Error(
|
|
432
538
|
`Locale of ${this.config.locale} was not valid according to locales list.`
|
|
433
539
|
);
|
|
540
|
+
|
|
541
|
+
//
|
|
542
|
+
// set up regex helpers
|
|
543
|
+
//
|
|
544
|
+
this.EMAIL_REPLACEMENT_REGEX = new RE2(this.config.replacements.email, 'g');
|
|
545
|
+
const replacementRegexes = [];
|
|
546
|
+
for (const key of Object.keys(this.config.replacements)) {
|
|
547
|
+
replacementRegexes.push(
|
|
548
|
+
escapeStringRegexp(this.config.replacements[key])
|
|
549
|
+
);
|
|
550
|
+
}
|
|
551
|
+
|
|
552
|
+
this.REPLACEMENTS_REGEX = new RE2(
|
|
553
|
+
new RegExp(replacementRegexes.join('|'), 'g')
|
|
554
|
+
);
|
|
555
|
+
|
|
556
|
+
//
|
|
557
|
+
// set up helper Map and Sets for fast lookup
|
|
558
|
+
// (Set.has is 2x faster than includes, and 50% faster than indexOf)
|
|
559
|
+
//
|
|
560
|
+
this.WHITELISTED_WORDS = new Set(Object.values(this.config.replacements));
|
|
434
561
|
}
|
|
435
562
|
|
|
436
563
|
getHostname(link) {
|
|
@@ -520,17 +647,12 @@ class SpamScanner {
|
|
|
520
647
|
const stream = isStream(attachment.content)
|
|
521
648
|
? attachment.content
|
|
522
649
|
: intoStream(attachment.content);
|
|
523
|
-
const {
|
|
524
|
-
is_infected: isInfected,
|
|
525
|
-
viruses
|
|
526
|
-
} = await clamscan.scan_stream(stream);
|
|
650
|
+
const { isInfected, viruses } = await clamscan.scanStream(stream);
|
|
527
651
|
const name = isSANB(attachment.filename)
|
|
528
652
|
? `"${attachment.filename}"`
|
|
529
653
|
: `#${i + 1}`;
|
|
530
654
|
if (isInfected)
|
|
531
|
-
messages.push(
|
|
532
|
-
`Attachment ${name} was infected with "${viruses}".`
|
|
533
|
-
);
|
|
655
|
+
messages.push(`Attachment ${name} was infected with ${viruses}.`);
|
|
534
656
|
} catch (err) {
|
|
535
657
|
this.config.logger.error(err);
|
|
536
658
|
}
|
|
@@ -548,13 +670,16 @@ class SpamScanner {
|
|
|
548
670
|
|
|
549
671
|
let gtube = false;
|
|
550
672
|
|
|
551
|
-
|
|
673
|
+
// eslint-disable-next-line unicorn/prefer-includes
|
|
674
|
+
if (isSANB(mail.html) && mail.html.indexOf(GTUBE) !== -1) gtube = true;
|
|
552
675
|
|
|
553
|
-
|
|
676
|
+
// eslint-disable-next-line unicorn/prefer-includes
|
|
677
|
+
if (isSANB(mail.text) && !gtube && mail.text.indexOf(GTUBE) !== -1)
|
|
678
|
+
gtube = true;
|
|
554
679
|
|
|
555
680
|
if (gtube)
|
|
556
681
|
messages.push(
|
|
557
|
-
'Message detected to contain the GTUBE test from
|
|
682
|
+
'Message detected to contain the GTUBE test from https://spamassassin.apache.org/gtube/.'
|
|
558
683
|
);
|
|
559
684
|
|
|
560
685
|
return messages;
|
|
@@ -597,9 +722,8 @@ class SpamScanner {
|
|
|
597
722
|
records[0] === '0.0.0.0'
|
|
598
723
|
);
|
|
599
724
|
} catch (err) {
|
|
600
|
-
this.config.logger.
|
|
601
|
-
|
|
602
|
-
return true;
|
|
725
|
+
this.config.logger.warn(err);
|
|
726
|
+
return false;
|
|
603
727
|
}
|
|
604
728
|
}
|
|
605
729
|
}
|
|
@@ -621,8 +745,6 @@ class SpamScanner {
|
|
|
621
745
|
//
|
|
622
746
|
// However we don't recommend this and therefore have our servers set to standard Cloudflare DNS
|
|
623
747
|
//
|
|
624
|
-
// TODO: we need to do two lookups in parallel, one against adult and one against malware
|
|
625
|
-
// and also make sure the messages aren't duplicated when we concatenate final array of messages
|
|
626
748
|
const [isAdult, isMalware] = await Promise.all([
|
|
627
749
|
this.malwareLookup('https://family.cloudflare-dns.com/dns-query', name),
|
|
628
750
|
this.malwareLookup('https://security.cloudflare-dns.com/dns-query', name)
|
|
@@ -744,14 +866,14 @@ class SpamScanner {
|
|
|
744
866
|
})
|
|
745
867
|
.match(URL_REGEX) || [];
|
|
746
868
|
|
|
747
|
-
const array =
|
|
869
|
+
const array = new Set();
|
|
748
870
|
for (const url of urls) {
|
|
749
871
|
const normalized = this.getNormalizedUrl(url);
|
|
750
872
|
|
|
751
|
-
if (normalized
|
|
873
|
+
if (normalized) array.add(normalized);
|
|
752
874
|
}
|
|
753
875
|
|
|
754
|
-
return array;
|
|
876
|
+
return [...array];
|
|
755
877
|
}
|
|
756
878
|
|
|
757
879
|
parseLocale(locale) {
|
|
@@ -765,12 +887,6 @@ class SpamScanner {
|
|
|
765
887
|
// <https://github.com/NaturalNode/natural#stemmers>
|
|
766
888
|
// eslint-disable-next-line complexity
|
|
767
889
|
async getTokens(string, locale, isHTML = false) {
|
|
768
|
-
// get the current email replacement regex
|
|
769
|
-
const EMAIL_REPLACEMENT_REGEX = new RE2(
|
|
770
|
-
this.config.replacements.email,
|
|
771
|
-
'g'
|
|
772
|
-
);
|
|
773
|
-
|
|
774
890
|
//
|
|
775
891
|
// parse HTML for <html> tag with lang attr
|
|
776
892
|
// otherwise if that wasn't found then look for this
|
|
@@ -818,17 +934,6 @@ class SpamScanner {
|
|
|
818
934
|
|
|
819
935
|
if (isHTML) string = sanitizeHtml(string, this.config.sanitizeHtml);
|
|
820
936
|
|
|
821
|
-
const replacementRegexes = [];
|
|
822
|
-
for (const key of Object.keys(this.config.replacements)) {
|
|
823
|
-
replacementRegexes.push(
|
|
824
|
-
escapeStringRegexp(this.config.replacements[key])
|
|
825
|
-
);
|
|
826
|
-
}
|
|
827
|
-
|
|
828
|
-
const REPLACEMENTS_REGEX = new RE2(
|
|
829
|
-
new RegExp(replacementRegexes.join('|'), 'g')
|
|
830
|
-
);
|
|
831
|
-
|
|
832
937
|
string = striptags(string, [], ' ')
|
|
833
938
|
.trim()
|
|
834
939
|
// replace newlines
|
|
@@ -837,7 +942,7 @@ class SpamScanner {
|
|
|
837
942
|
// attackers may try to inject our replacements into the message
|
|
838
943
|
// therefore we should strip all of them before doing any replacements
|
|
839
944
|
//
|
|
840
|
-
.replace(REPLACEMENTS_REGEX, ' ');
|
|
945
|
+
.replace(this.REPLACEMENTS_REGEX, ' ');
|
|
841
946
|
|
|
842
947
|
//
|
|
843
948
|
// we should instead use language detection to determine
|
|
@@ -855,7 +960,8 @@ class SpamScanner {
|
|
|
855
960
|
|
|
856
961
|
locale = this.parseLocale(isSANB(locale) ? locale : this.config.locale);
|
|
857
962
|
|
|
858
|
-
|
|
963
|
+
// NOTE: "in" and "po" are valid locales but not from i18n
|
|
964
|
+
if (!locales.has(locale) && locale !== 'in' && locale !== 'po') {
|
|
859
965
|
debug(`Locale ${locale} was not valid and will use default`);
|
|
860
966
|
locale = this.parseLocale(this.config.locale);
|
|
861
967
|
}
|
|
@@ -867,103 +973,145 @@ class SpamScanner {
|
|
|
867
973
|
let stopwords = stopwordsEn;
|
|
868
974
|
let language = 'english';
|
|
869
975
|
let stemword = 'default';
|
|
976
|
+
|
|
870
977
|
switch (locale) {
|
|
871
978
|
case 'ar':
|
|
979
|
+
// arb
|
|
980
|
+
// ISO 639-3 = ara
|
|
981
|
+
stopwords = stopwordsAra;
|
|
872
982
|
language = 'arabic';
|
|
873
983
|
break;
|
|
874
984
|
case 'da':
|
|
985
|
+
// dan
|
|
875
986
|
language = 'danish';
|
|
987
|
+
stopwords = stopwordsDan;
|
|
876
988
|
break;
|
|
877
989
|
case 'nl':
|
|
990
|
+
// nld
|
|
878
991
|
stopwords = stopwordsNl;
|
|
879
992
|
language = 'dutch';
|
|
880
993
|
break;
|
|
881
994
|
case 'en':
|
|
995
|
+
// eng
|
|
882
996
|
language = 'english';
|
|
883
997
|
break;
|
|
884
998
|
case 'fi':
|
|
999
|
+
// fin
|
|
885
1000
|
language = 'finnish';
|
|
886
1001
|
tokenizer = orthographyTokenizer;
|
|
1002
|
+
stopwords = stopwordsFin;
|
|
887
1003
|
break;
|
|
888
1004
|
case 'fa':
|
|
1005
|
+
// fas (Persian/Farsi)
|
|
889
1006
|
language = 'farsi';
|
|
890
1007
|
tokenizer = aggressiveTokenizerFa;
|
|
891
1008
|
stopwords = stopwordsFa;
|
|
892
1009
|
stemword = natural.PorterStemmerFa.stem.bind(natural.PorterStemmerFa);
|
|
893
1010
|
break;
|
|
894
1011
|
case 'fr':
|
|
1012
|
+
// fra
|
|
895
1013
|
language = 'french';
|
|
896
1014
|
tokenizer = aggressiveTokenizerFr;
|
|
897
1015
|
stopwords = stopwordsFr;
|
|
898
1016
|
break;
|
|
899
1017
|
case 'de':
|
|
1018
|
+
// deu
|
|
900
1019
|
language = 'german';
|
|
1020
|
+
stopwords = stopwordsDeu;
|
|
901
1021
|
break;
|
|
902
1022
|
case 'hu':
|
|
1023
|
+
// hun
|
|
903
1024
|
language = 'hungarian';
|
|
1025
|
+
stopwords = stopwordsHun;
|
|
904
1026
|
break;
|
|
905
1027
|
case 'in':
|
|
1028
|
+
// ind
|
|
906
1029
|
language = 'indonesian';
|
|
907
1030
|
tokenizer = aggressiveTokenizerId;
|
|
908
1031
|
stopwords = stopwordsId;
|
|
909
1032
|
break;
|
|
910
1033
|
case 'it':
|
|
1034
|
+
// ita
|
|
911
1035
|
language = 'italian';
|
|
912
1036
|
tokenizer = aggressiveTokenizerIt;
|
|
913
1037
|
stopwords = stopwordsIt;
|
|
914
1038
|
break;
|
|
915
1039
|
case 'ja':
|
|
1040
|
+
// jpn
|
|
916
1041
|
tokenizer = tokenizerJa;
|
|
917
1042
|
stopwords = stopwordsJa;
|
|
918
1043
|
stemword = natural.StemmerJa.stem.bind(natural.StemmerJa);
|
|
919
1044
|
break;
|
|
920
1045
|
case 'nb':
|
|
1046
|
+
// nob
|
|
1047
|
+
language = 'norwegian';
|
|
1048
|
+
tokenizer = aggressiveTokenizerNo;
|
|
1049
|
+
stopwords = stopwordsNo;
|
|
1050
|
+
break;
|
|
921
1051
|
case 'nn':
|
|
1052
|
+
// nno
|
|
1053
|
+
// ISO 639-3 = nob
|
|
922
1054
|
language = 'norwegian';
|
|
923
1055
|
tokenizer = aggressiveTokenizerNo;
|
|
924
1056
|
stopwords = stopwordsNo;
|
|
925
1057
|
break;
|
|
926
1058
|
case 'po':
|
|
1059
|
+
// pol
|
|
927
1060
|
language = 'polish';
|
|
928
1061
|
tokenizer = aggressiveTokenizerPl;
|
|
929
1062
|
stopwords = stopwordsPl;
|
|
930
1063
|
stemword = false;
|
|
931
1064
|
break;
|
|
932
1065
|
case 'pt':
|
|
1066
|
+
// por
|
|
933
1067
|
language = 'portuguese';
|
|
934
1068
|
tokenizer = aggressiveTokenizerPt;
|
|
935
1069
|
stopwords = stopwordsPt;
|
|
936
1070
|
break;
|
|
937
1071
|
case 'es':
|
|
1072
|
+
// spa
|
|
938
1073
|
language = 'spanish';
|
|
939
1074
|
tokenizer = aggressiveTokenizerEs;
|
|
940
1075
|
stopwords = stopwordsEs;
|
|
941
1076
|
break;
|
|
942
1077
|
case 'sv':
|
|
1078
|
+
// swe
|
|
943
1079
|
language = 'swedish';
|
|
944
1080
|
tokenizer = aggressiveTokenizerSv;
|
|
945
1081
|
stopwords = stopwordsSv;
|
|
946
1082
|
break;
|
|
947
1083
|
case 'ro':
|
|
1084
|
+
// ron
|
|
948
1085
|
language = 'romanian';
|
|
1086
|
+
stopwords = stopwordsRon;
|
|
949
1087
|
break;
|
|
950
1088
|
case 'ru':
|
|
1089
|
+
// rus
|
|
951
1090
|
language = 'russian';
|
|
952
1091
|
tokenizer = aggressiveTokenizerRu;
|
|
953
1092
|
stopwords = stopwordsRu;
|
|
954
1093
|
break;
|
|
955
1094
|
case 'ta':
|
|
1095
|
+
// tam
|
|
1096
|
+
// NOTE: no stopwords available
|
|
956
1097
|
language = 'tamil';
|
|
957
1098
|
break;
|
|
958
1099
|
case 'tr':
|
|
1100
|
+
// tur
|
|
959
1101
|
language = 'turkish';
|
|
1102
|
+
stopwords = stopwordsTur;
|
|
960
1103
|
break;
|
|
961
1104
|
case 'vi':
|
|
1105
|
+
// vie
|
|
962
1106
|
language = 'vietnamese';
|
|
963
1107
|
tokenizer = aggressiveTokenizerVi;
|
|
1108
|
+
stopwords = stopwordsVie;
|
|
964
1109
|
stemword = false;
|
|
965
1110
|
break;
|
|
966
1111
|
case 'zh':
|
|
1112
|
+
// cmn
|
|
1113
|
+
// TODO: use this instead https://github.com/yishn/chinese-tokenizer
|
|
1114
|
+
// ISO 639-3 = zho (Chinese, Macrolanguage)
|
|
967
1115
|
language = 'chinese';
|
|
968
1116
|
stopwords = stopwordsZh;
|
|
969
1117
|
stemword = false;
|
|
@@ -981,7 +1129,7 @@ class SpamScanner {
|
|
|
981
1129
|
string
|
|
982
1130
|
.split(' ')
|
|
983
1131
|
.map((_string) =>
|
|
984
|
-
_string.
|
|
1132
|
+
_string.indexOf(':') === 0 &&
|
|
985
1133
|
_string.endsWith(':') &&
|
|
986
1134
|
typeof toEmoji[_string.slice(1, -1)] === 'string'
|
|
987
1135
|
? toEmoji[_string.slice(1, -1)]
|
|
@@ -1029,7 +1177,10 @@ class SpamScanner {
|
|
|
1029
1177
|
|
|
1030
1178
|
// now we ensure that URL's and EMAIL's are properly spaced out
|
|
1031
1179
|
// (e.g. in case ?email=some@email.com was in a URL)
|
|
1032
|
-
.replace(
|
|
1180
|
+
.replace(
|
|
1181
|
+
this.EMAIL_REPLACEMENT_REGEX,
|
|
1182
|
+
` ${this.config.replacements.email} `
|
|
1183
|
+
)
|
|
1033
1184
|
|
|
1034
1185
|
// TODO: replace file paths, file dirs, dotfiles, and dotdirs
|
|
1035
1186
|
|
|
@@ -1044,12 +1195,14 @@ class SpamScanner {
|
|
|
1044
1195
|
// replace currency
|
|
1045
1196
|
.replace(CURRENCY_REGEX, ` ${this.config.replacements.currency} `);
|
|
1046
1197
|
|
|
1198
|
+
//
|
|
1047
1199
|
// expand contractions so "they're" -> [ they, are ] vs. [ they, re ]
|
|
1048
1200
|
// <https://github.com/NaturalNode/natural/issues/533>
|
|
1049
|
-
|
|
1050
|
-
|
|
1051
|
-
//
|
|
1052
|
-
|
|
1201
|
+
//
|
|
1202
|
+
// NOTE: we're doing this for all languages now, not just en
|
|
1203
|
+
// if (locale === 'en')
|
|
1204
|
+
//
|
|
1205
|
+
string = contractions.expand(string);
|
|
1053
1206
|
|
|
1054
1207
|
//
|
|
1055
1208
|
// Future research:
|
|
@@ -1063,43 +1216,32 @@ class SpamScanner {
|
|
|
1063
1216
|
for (const token of tokenizer.tokenize(string.toLowerCase())) {
|
|
1064
1217
|
// whitelist words from being stemmed (safeguard)
|
|
1065
1218
|
if (
|
|
1066
|
-
|
|
1067
|
-
token.
|
|
1068
|
-
token.
|
|
1219
|
+
this.WHITELISTED_WORDS.has(token) ||
|
|
1220
|
+
token.indexOf(this.config.replacements.initialism) === 0 ||
|
|
1221
|
+
token.indexOf(this.config.replacements.abbrevation) === 0
|
|
1069
1222
|
) {
|
|
1070
1223
|
tokens.push(token);
|
|
1071
1224
|
continue;
|
|
1072
1225
|
}
|
|
1073
1226
|
|
|
1074
|
-
if (
|
|
1075
|
-
stopwords.includes(token) ||
|
|
1076
|
-
(sw[locale] && sw[locale].includes(token)) ||
|
|
1077
|
-
(locale !== 'en' &&
|
|
1078
|
-
(stopwordsEn.includes(token) || sw.en.includes(token)))
|
|
1079
|
-
)
|
|
1227
|
+
if (stopwords.has(token) || (locale !== 'en' && stopwordsEn.has(token))) {
|
|
1080
1228
|
continue;
|
|
1229
|
+
}
|
|
1081
1230
|
|
|
1082
1231
|
// locale specific stopwords to ignore
|
|
1083
1232
|
let localeStem;
|
|
1084
1233
|
if (typeof stemword === 'function') {
|
|
1085
1234
|
localeStem = stemword(token);
|
|
1086
|
-
if (
|
|
1087
|
-
localeStem &&
|
|
1088
|
-
(stopwords.includes(localeStem) ||
|
|
1089
|
-
(sw[locale] && sw[locale].includes(localeStem)))
|
|
1090
|
-
)
|
|
1235
|
+
if (localeStem && stopwords.has(localeStem)) {
|
|
1091
1236
|
continue;
|
|
1237
|
+
}
|
|
1092
1238
|
}
|
|
1093
1239
|
|
|
1094
1240
|
// always check against English stemwords
|
|
1095
1241
|
let englishStem;
|
|
1096
1242
|
if (locale !== 'en') {
|
|
1097
1243
|
englishStem = snowball.stemword(token, 'english');
|
|
1098
|
-
if (
|
|
1099
|
-
englishStem &&
|
|
1100
|
-
(stopwordsEn.includes(englishStem) || sw.en.includes(englishStem))
|
|
1101
|
-
)
|
|
1102
|
-
continue;
|
|
1244
|
+
if (englishStem && stopwordsEn.has(englishStem)) continue;
|
|
1103
1245
|
}
|
|
1104
1246
|
|
|
1105
1247
|
tokens.push(
|
|
@@ -1107,6 +1249,8 @@ class SpamScanner {
|
|
|
1107
1249
|
);
|
|
1108
1250
|
}
|
|
1109
1251
|
|
|
1252
|
+
debug('locale', locale, 'tokens', tokens);
|
|
1253
|
+
|
|
1110
1254
|
if (this.config.debug) return tokens;
|
|
1111
1255
|
|
|
1112
1256
|
// we should sha256 all tokens with hasha if not in debug mode
|
|
@@ -1119,7 +1263,7 @@ class SpamScanner {
|
|
|
1119
1263
|
let source = string;
|
|
1120
1264
|
if (isBuffer(string)) source = string.toString();
|
|
1121
1265
|
else if (typeof string === 'string' && isValidPath(string))
|
|
1122
|
-
source = await readFile(string);
|
|
1266
|
+
source = await fs.promises.readFile(string);
|
|
1123
1267
|
|
|
1124
1268
|
const tokens = [];
|
|
1125
1269
|
const mail = await simpleParser(source, this.config.simpleParser);
|
|
@@ -1157,12 +1301,11 @@ class SpamScanner {
|
|
|
1157
1301
|
|
|
1158
1302
|
// eslint-disable-next-line complexity
|
|
1159
1303
|
async getPhishingResults(mail) {
|
|
1160
|
-
const messages =
|
|
1161
|
-
|
|
1304
|
+
const messages = new Set();
|
|
1162
1305
|
//
|
|
1163
1306
|
// NOTE: all links pushed are lowercased
|
|
1164
1307
|
//
|
|
1165
|
-
const links =
|
|
1308
|
+
const links = new Set();
|
|
1166
1309
|
|
|
1167
1310
|
// parse <a> tags with different org domain in text vs the link
|
|
1168
1311
|
if (isSANB(mail.html)) {
|
|
@@ -1172,7 +1315,7 @@ class SpamScanner {
|
|
|
1172
1315
|
// elements concatenate to form a URL which is malicious or phishing
|
|
1173
1316
|
//
|
|
1174
1317
|
for (const link of this.getUrls(striptags(mail.html, [], ' ').trim())) {
|
|
1175
|
-
|
|
1318
|
+
links.add(link);
|
|
1176
1319
|
}
|
|
1177
1320
|
|
|
1178
1321
|
//
|
|
@@ -1214,7 +1357,7 @@ class SpamScanner {
|
|
|
1214
1357
|
// (this is needed because some have "Web:%20http://google.com" for example in href tags)
|
|
1215
1358
|
[href] = this.getUrls(href);
|
|
1216
1359
|
// eslint-disable-next-line max-depth
|
|
1217
|
-
if (href
|
|
1360
|
+
if (href) links.add(href);
|
|
1218
1361
|
}
|
|
1219
1362
|
|
|
1220
1363
|
// the text content could contain multiple URL's
|
|
@@ -1224,18 +1367,17 @@ class SpamScanner {
|
|
|
1224
1367
|
isSANB(href) &&
|
|
1225
1368
|
validator.isURL(href, isURLOptions)
|
|
1226
1369
|
) {
|
|
1227
|
-
const string = `Anchor link with href of
|
|
1370
|
+
const string = `Anchor link with href of ${href} and inner text value of "${textContent}"`;
|
|
1228
1371
|
// eslint-disable-next-line max-depth
|
|
1229
1372
|
if (this.config.checkIDNHomographAttack) {
|
|
1230
1373
|
const anchorUrlHostname = this.getHostname(href);
|
|
1231
1374
|
// eslint-disable-next-line max-depth
|
|
1232
1375
|
if (anchorUrlHostname) {
|
|
1233
|
-
const anchorUrlHostnameToASCII =
|
|
1234
|
-
anchorUrlHostname
|
|
1235
|
-
);
|
|
1376
|
+
const anchorUrlHostnameToASCII =
|
|
1377
|
+
punycode.toASCII(anchorUrlHostname);
|
|
1236
1378
|
// eslint-disable-next-line max-depth
|
|
1237
|
-
if (anchorUrlHostnameToASCII.
|
|
1238
|
-
messages.
|
|
1379
|
+
if (anchorUrlHostnameToASCII.indexOf('xn--') === 0)
|
|
1380
|
+
messages.add(
|
|
1239
1381
|
`${string} has possible IDN homograph attack from anchor hostname.`
|
|
1240
1382
|
);
|
|
1241
1383
|
}
|
|
@@ -1244,20 +1386,19 @@ class SpamScanner {
|
|
|
1244
1386
|
// eslint-disable-next-line max-depth
|
|
1245
1387
|
for (const link of this.getUrls(textContent)) {
|
|
1246
1388
|
// this link should have already been included but just in case
|
|
1247
|
-
|
|
1248
|
-
|
|
1389
|
+
|
|
1390
|
+
links.add(link);
|
|
1249
1391
|
|
|
1250
1392
|
// eslint-disable-next-line max-depth
|
|
1251
1393
|
if (this.config.checkIDNHomographAttack) {
|
|
1252
1394
|
const innerTextUrlHostname = this.getHostname(link);
|
|
1253
1395
|
// eslint-disable-next-line max-depth
|
|
1254
1396
|
if (innerTextUrlHostname) {
|
|
1255
|
-
const innerTextUrlHostnameToASCII =
|
|
1256
|
-
innerTextUrlHostname
|
|
1257
|
-
);
|
|
1397
|
+
const innerTextUrlHostnameToASCII =
|
|
1398
|
+
punycode.toASCII(innerTextUrlHostname);
|
|
1258
1399
|
// eslint-disable-next-line max-depth
|
|
1259
|
-
if (innerTextUrlHostnameToASCII.
|
|
1260
|
-
messages.
|
|
1400
|
+
if (innerTextUrlHostnameToASCII.indexOf('xn--') === 0)
|
|
1401
|
+
messages.add(
|
|
1261
1402
|
`${string} has possible IDN homograph attack from inner text hostname.`
|
|
1262
1403
|
);
|
|
1263
1404
|
}
|
|
@@ -1273,49 +1414,46 @@ class SpamScanner {
|
|
|
1273
1414
|
for (const prop of MAIL_PHISHING_PROPS) {
|
|
1274
1415
|
if (isSANB(mail[prop])) {
|
|
1275
1416
|
for (const link of this.getUrls(mail[prop])) {
|
|
1276
|
-
|
|
1417
|
+
links.add(link);
|
|
1277
1418
|
}
|
|
1278
1419
|
}
|
|
1279
1420
|
}
|
|
1280
1421
|
|
|
1281
|
-
|
|
1282
|
-
const
|
|
1283
|
-
|
|
1284
|
-
|
|
1285
|
-
|
|
1286
|
-
|
|
1287
|
-
|
|
1288
|
-
|
|
1422
|
+
if (this.config.checkIDNHomographAttack) {
|
|
1423
|
+
for (const link of links) {
|
|
1424
|
+
const urlHostname = this.getHostname(link);
|
|
1425
|
+
if (urlHostname) {
|
|
1426
|
+
const toASCII = punycode.toASCII(urlHostname);
|
|
1427
|
+
if (toASCII.indexOf('xn--') === 0)
|
|
1428
|
+
messages.add(
|
|
1429
|
+
`Possible IDN homograph attack from link of ${link} with punycode converted hostname of ${toASCII}.`
|
|
1430
|
+
);
|
|
1431
|
+
}
|
|
1289
1432
|
}
|
|
1290
1433
|
}
|
|
1291
1434
|
|
|
1292
1435
|
// check against Cloudflare malware/phishing/adult DNS lookup
|
|
1293
1436
|
// if it returns `0.0.0.0` it means it was flagged
|
|
1294
1437
|
await Promise.all(
|
|
1295
|
-
links.map(async (link) => {
|
|
1438
|
+
[...links].map(async (link) => {
|
|
1296
1439
|
try {
|
|
1297
1440
|
const urlHostname = this.getHostname(link);
|
|
1298
1441
|
if (urlHostname) {
|
|
1299
1442
|
const toASCII = punycode.toASCII(urlHostname);
|
|
1300
|
-
const adultMessage = `Link hostname of
|
|
1301
|
-
const malwareMessage = `Link hostname of ${toASCII}
|
|
1443
|
+
const adultMessage = `Link hostname of ${toASCII} was detected by Cloudflare's Family DNS to contain adult-related content, phishing, and/or malware.`;
|
|
1444
|
+
const malwareMessage = `Link hostname of ${toASCII} was detected by Cloudflare's Security DNS to contain phishing and/or malware.`;
|
|
1302
1445
|
|
|
1303
1446
|
// if it already included both messages then return early
|
|
1304
|
-
if (
|
|
1305
|
-
messages.includes(adultMessage) &&
|
|
1306
|
-
messages.includes(malwareMessage)
|
|
1307
|
-
)
|
|
1447
|
+
if (messages.has(adultMessage) && messages.has(malwareMessage))
|
|
1308
1448
|
return;
|
|
1309
1449
|
|
|
1310
|
-
const {
|
|
1311
|
-
|
|
1312
|
-
isMalware
|
|
1313
|
-
} = await this.memoizedIsCloudflareBlocked(toASCII);
|
|
1450
|
+
const { isAdult, isMalware } =
|
|
1451
|
+
await this.memoizedIsCloudflareBlocked(toASCII);
|
|
1314
1452
|
|
|
1315
|
-
if (isAdult && !messages.
|
|
1316
|
-
messages.
|
|
1317
|
-
if (isMalware && !messages.
|
|
1318
|
-
messages.
|
|
1453
|
+
if (isAdult && !messages.has(adultMessage))
|
|
1454
|
+
messages.add(adultMessage);
|
|
1455
|
+
if (isMalware && !messages.has(malwareMessage))
|
|
1456
|
+
messages.add(malwareMessage);
|
|
1319
1457
|
}
|
|
1320
1458
|
} catch (err) {
|
|
1321
1459
|
this.config.logger.error(err);
|
|
@@ -1323,7 +1461,7 @@ class SpamScanner {
|
|
|
1323
1461
|
})
|
|
1324
1462
|
);
|
|
1325
1463
|
|
|
1326
|
-
return { messages, links };
|
|
1464
|
+
return { messages: [...messages], links: [...links] };
|
|
1327
1465
|
}
|
|
1328
1466
|
|
|
1329
1467
|
// getNSFWResults() {
|
|
@@ -1344,7 +1482,7 @@ class SpamScanner {
|
|
|
1344
1482
|
try {
|
|
1345
1483
|
const fileType = await FileType.fromBuffer(attachment.content);
|
|
1346
1484
|
|
|
1347
|
-
if (fileType && fileType.ext && EXECUTABLES.
|
|
1485
|
+
if (fileType && fileType.ext && EXECUTABLES.has(fileType.ext))
|
|
1348
1486
|
messages.push(
|
|
1349
1487
|
`Attachment's "magic number" indicated it was a dangerous executable with a ".${fileType.ext}" extension.`
|
|
1350
1488
|
);
|
|
@@ -1359,7 +1497,7 @@ class SpamScanner {
|
|
|
1359
1497
|
punycode.toUnicode(attachment.filename.split('?')[0])
|
|
1360
1498
|
);
|
|
1361
1499
|
const ext = fileExtension(filename);
|
|
1362
|
-
if (ext && EXECUTABLES.
|
|
1500
|
+
if (ext && EXECUTABLES.has(ext))
|
|
1363
1501
|
messages.push(
|
|
1364
1502
|
`Attachment's file name indicated it was a dangerous executable with a ".${ext}" extension.`
|
|
1365
1503
|
);
|
|
@@ -1367,7 +1505,7 @@ class SpamScanner {
|
|
|
1367
1505
|
|
|
1368
1506
|
if (isSANB(attachment.contentType)) {
|
|
1369
1507
|
const ext = mime.extension(attachment.contentType);
|
|
1370
|
-
if (isSANB(ext) && EXECUTABLES.
|
|
1508
|
+
if (isSANB(ext) && EXECUTABLES.has(ext))
|
|
1371
1509
|
messages.push(
|
|
1372
1510
|
`Attachment's Content-Type was a dangerous executable with a ".${ext}" extension.`
|
|
1373
1511
|
);
|
package/package.json
CHANGED
|
@@ -1,22 +1,12 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "spamscanner",
|
|
3
3
|
"description": "Spam Scanner - The Best Anti-Spam Scanning Service and Anti-Spam API",
|
|
4
|
-
"version": "
|
|
4
|
+
"version": "5.0.0",
|
|
5
5
|
"author": "Niftylettuce, LLC. <niftylettuce@gmail.com> (https://niftylettuce.com/)",
|
|
6
|
-
"ava": {
|
|
7
|
-
"timeout": "30s",
|
|
8
|
-
"verbose": true,
|
|
9
|
-
"serial": true
|
|
10
|
-
},
|
|
11
6
|
"bugs": {
|
|
12
7
|
"url": "https://github.com/spamscanner/spamscanner/issues",
|
|
13
8
|
"email": "niftylettuce@gmail.com"
|
|
14
9
|
},
|
|
15
|
-
"commitlint": {
|
|
16
|
-
"extends": [
|
|
17
|
-
"@commitlint/config-conventional"
|
|
18
|
-
]
|
|
19
|
-
},
|
|
20
10
|
"contributors": [
|
|
21
11
|
"Nick Baugh <niftylettuce@gmail.com> (http://niftylettuce.com/)",
|
|
22
12
|
"Shaun Warman <shaunwarman1@gmail.com> (http://shaunwarman.com/)"
|
|
@@ -24,82 +14,81 @@
|
|
|
24
14
|
"dependencies": {
|
|
25
15
|
"@ladjs/naivebayes": "^0.1.0",
|
|
26
16
|
"bitcoin-regex": "^2.0.0",
|
|
27
|
-
"clamscan": "^1.
|
|
17
|
+
"clamscan": "^2.1.2",
|
|
28
18
|
"credit-card-regex": "^3.0.0",
|
|
29
|
-
"crypto-random-string": "
|
|
19
|
+
"crypto-random-string": "3",
|
|
30
20
|
"currency-codes": "^2.1.0",
|
|
31
|
-
"currency-symbol-map": "^5.0
|
|
32
|
-
"debug": "^4.3.1",
|
|
21
|
+
"currency-symbol-map": "^5.1.0",
|
|
33
22
|
"email-regex-safe": "^1.0.2",
|
|
34
|
-
"emoji-patterns": "^
|
|
35
|
-
"escape-string-regexp": "
|
|
23
|
+
"emoji-patterns": "^14.0.1",
|
|
24
|
+
"escape-string-regexp": "4",
|
|
36
25
|
"expand-contractions": "^1.0.1",
|
|
37
26
|
"file-extension": "^4.0.5",
|
|
38
|
-
"file-type": "
|
|
27
|
+
"file-type": "16",
|
|
39
28
|
"floating-point-regex": "^0.1.0",
|
|
40
|
-
"franc": "
|
|
41
|
-
"gemoji": "
|
|
29
|
+
"franc": "5",
|
|
30
|
+
"gemoji": "6",
|
|
42
31
|
"hasha": "^5.2.2",
|
|
43
32
|
"hexa-color-regex": "^1.0.0",
|
|
44
|
-
"i18n-locales": "^0.0.
|
|
45
|
-
"iconv": "^3.0.
|
|
46
|
-
"into-stream": "
|
|
47
|
-
"ip-regex": "
|
|
33
|
+
"i18n-locales": "^0.0.5",
|
|
34
|
+
"iconv": "^3.0.1",
|
|
35
|
+
"into-stream": "6",
|
|
36
|
+
"ip-regex": "4",
|
|
48
37
|
"is-buffer": "^2.0.5",
|
|
49
|
-
"is-stream": "
|
|
38
|
+
"is-stream": "2",
|
|
50
39
|
"is-string-and-not-blank": "^0.0.2",
|
|
51
40
|
"is-valid-path": "^0.1.1",
|
|
52
41
|
"mac-regex": "^1.0.0",
|
|
53
|
-
"macos-version": "
|
|
54
|
-
"mailparser": "^3.0
|
|
42
|
+
"macos-version": "5",
|
|
43
|
+
"mailparser": "^3.5.0",
|
|
55
44
|
"memoizee": "^0.4.15",
|
|
56
|
-
"mime-types": "^2.1.
|
|
45
|
+
"mime-types": "^2.1.35",
|
|
57
46
|
"ms": "^2.1.3",
|
|
58
|
-
"natural": "^
|
|
47
|
+
"natural": "^5.2.2",
|
|
59
48
|
"newline-remove": "^1.0.2",
|
|
60
|
-
"node-html-parser": "
|
|
49
|
+
"node-html-parser": "4",
|
|
61
50
|
"node-snowball": "^0.6.0",
|
|
62
|
-
"normalize-url": "
|
|
63
|
-
"parse-domain": "
|
|
51
|
+
"normalize-url": "5",
|
|
52
|
+
"parse-domain": "5",
|
|
64
53
|
"phone-regex": "^2.1.0",
|
|
65
54
|
"punycode": "^2.1.1",
|
|
66
|
-
"re2": "^1.
|
|
67
|
-
"sanitize-html": "^2.
|
|
68
|
-
"stopword": "^
|
|
69
|
-
"striptags": "^3.
|
|
70
|
-
"superagent": "^
|
|
55
|
+
"re2": "^1.17.6",
|
|
56
|
+
"sanitize-html": "^2.7.0",
|
|
57
|
+
"stopword": "^2.0.2",
|
|
58
|
+
"striptags": "^3.2.0",
|
|
59
|
+
"superagent": "^7.1.6",
|
|
71
60
|
"trim-leading-whitespace": "^0.1.1",
|
|
72
61
|
"universalify": "^2.0.0",
|
|
73
|
-
"url-regex-safe": "^
|
|
74
|
-
"validator": "^13.
|
|
62
|
+
"url-regex-safe": "^3.0.0",
|
|
63
|
+
"validator": "^13.7.0",
|
|
64
|
+
"which": "^2.0.2"
|
|
75
65
|
},
|
|
76
66
|
"devDependencies": {
|
|
77
|
-
"@commitlint/cli": "^
|
|
78
|
-
"@commitlint/config-conventional": "^
|
|
67
|
+
"@commitlint/cli": "^17.0.2",
|
|
68
|
+
"@commitlint/config-conventional": "^17.0.2",
|
|
79
69
|
"@ladjs/redis": "^1.0.7",
|
|
80
|
-
"ava": "^3.
|
|
81
|
-
"codecov": "^3.8.1",
|
|
70
|
+
"ava": "^4.3.0",
|
|
82
71
|
"cross-env": "^7.0.3",
|
|
83
72
|
"delay": "^5.0.0",
|
|
84
|
-
"eslint": "^
|
|
85
|
-
"eslint-config-xo-lass": "^
|
|
73
|
+
"eslint": "^8.17.0",
|
|
74
|
+
"eslint-config-xo-lass": "^2.0.1",
|
|
86
75
|
"fixpack": "^4.0.0",
|
|
87
|
-
"husky": "^
|
|
88
|
-
"is-ci": "^
|
|
89
|
-
"lint-staged": "^
|
|
90
|
-
"lookpath": "^1.
|
|
76
|
+
"husky": "^8.0.1",
|
|
77
|
+
"is-ci": "^3.0.1",
|
|
78
|
+
"lint-staged": "^13.0.1",
|
|
79
|
+
"lookpath": "^1.2.2",
|
|
91
80
|
"make-dir": "^3.1.0",
|
|
92
81
|
"node-mbox": "^1.0.0",
|
|
93
82
|
"numeral": "^2.0.6",
|
|
94
83
|
"nyc": "^15.1.0",
|
|
95
|
-
"p-map": "
|
|
84
|
+
"p-map": "4",
|
|
96
85
|
"read-dir-deep": "^7.0.1",
|
|
97
|
-
"remark-cli": "^
|
|
98
|
-
"remark-preset-github": "^4.0.
|
|
99
|
-
"xo": "^0.
|
|
86
|
+
"remark-cli": "^10.0.1",
|
|
87
|
+
"remark-preset-github": "^4.0.4",
|
|
88
|
+
"xo": "^0.50.0"
|
|
100
89
|
},
|
|
101
90
|
"engines": {
|
|
102
|
-
"node": ">=
|
|
91
|
+
"node": ">=14"
|
|
103
92
|
},
|
|
104
93
|
"files": [
|
|
105
94
|
"package.json",
|
|
@@ -114,12 +103,6 @@
|
|
|
114
103
|
"classifier.json"
|
|
115
104
|
],
|
|
116
105
|
"homepage": "https://github.com/spamscanner/spamscanner",
|
|
117
|
-
"husky": {
|
|
118
|
-
"hooks": {
|
|
119
|
-
"pre-commit": "lint-staged",
|
|
120
|
-
"commit-msg": "commitlint -E HUSKY_GIT_PARAMS"
|
|
121
|
-
}
|
|
122
|
-
},
|
|
123
106
|
"keywords": [
|
|
124
107
|
"adult",
|
|
125
108
|
"api",
|
|
@@ -172,38 +155,17 @@
|
|
|
172
155
|
],
|
|
173
156
|
"license": "Business Source License 1.1",
|
|
174
157
|
"main": "index.js",
|
|
175
|
-
"prettier": {
|
|
176
|
-
"singleQuote": true,
|
|
177
|
-
"bracketSpacing": true,
|
|
178
|
-
"trailingComma": "none"
|
|
179
|
-
},
|
|
180
|
-
"remarkConfig": {
|
|
181
|
-
"plugins": [
|
|
182
|
-
"preset-github"
|
|
183
|
-
]
|
|
184
|
-
},
|
|
185
158
|
"repository": {
|
|
186
159
|
"type": "git",
|
|
187
160
|
"url": "https://github.com/spamscanner/spamscanner"
|
|
188
161
|
},
|
|
189
162
|
"scripts": {
|
|
190
163
|
"ava": "cross-env NODE_ENV=test ava",
|
|
191
|
-
"
|
|
192
|
-
"lint": "xo && remark . -qfo",
|
|
164
|
+
"lint": "xo --fix && remark . -qfo && fixpack",
|
|
193
165
|
"nyc": "cross-env NODE_ENV=test nyc ava",
|
|
194
|
-
"
|
|
166
|
+
"prepare": "husky install",
|
|
167
|
+
"pretest": "npm run lint",
|
|
168
|
+
"test": "npm run test-coverage",
|
|
195
169
|
"test-coverage": "npm run lint && npm run nyc"
|
|
196
|
-
},
|
|
197
|
-
"xo": {
|
|
198
|
-
"prettier": true,
|
|
199
|
-
"space": true,
|
|
200
|
-
"extends": [
|
|
201
|
-
"xo-lass"
|
|
202
|
-
],
|
|
203
|
-
"ignores": [
|
|
204
|
-
"data",
|
|
205
|
-
"classifier.json",
|
|
206
|
-
"bag-of-words.json"
|
|
207
|
-
]
|
|
208
170
|
}
|
|
209
171
|
}
|
package/vocabulary-limit.js
CHANGED