spamscanner 4.0.0 → 5.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +77 -7
- package/index.js +255 -113
- package/package.json +35 -73
- package/vocabulary-limit.js +3 -1
package/README.md
CHANGED
|
@@ -2,9 +2,7 @@
|
|
|
2
2
|
<a href="https://spamscanner.net"><img src="https://d1i8ikybhfrv4r.cloudfront.net/spamscanner.png" alt="spamscanner" /></a>
|
|
3
3
|
</h1>
|
|
4
4
|
<div align="center">
|
|
5
|
-
<a href="https://
|
|
6
|
-
<a href="https://travis-ci.com/spamscanner/spamscanner"><img src="https://travis-ci.com/spamscanner/spamscanner.svg?branch=master" alt="build status" /></a>
|
|
7
|
-
<a href="https://codecov.io/github/spamscanner/spamscanner"><img src="https://img.shields.io/codecov/c/github/spamscanner/spamscanner/master.svg" alt="code coverage" /></a>
|
|
5
|
+
<a href="https://github.com/spamscanner/spamscanner/actions/workflows/ci.yml"><img src="https://github.com/spamscanner/spamscanner/actions/workflows/ci.yml/badge.svg" alt="build status" /></a>
|
|
8
6
|
<a href="https://github.com/sindresorhus/xo"><img src="https://img.shields.io/badge/code_style-XO-5ed9c7.svg" alt="code style" /></a>
|
|
9
7
|
<a href="https://github.com/prettier/prettier"><img src="https://img.shields.io/badge/styled_with-prettier-ff69b4.svg" alt="styled with prettier" /></a>
|
|
10
8
|
<a href="https://lass.js.org"><img src="https://img.shields.io/badge/made_with-lass-95CC28.svg" alt="made with lass" /></a>
|
|
@@ -48,6 +46,7 @@
|
|
|
48
46
|
* [`scanner.getVirusResults(mail)`](#scannergetvirusresultsmail)
|
|
49
47
|
* [`scanner.parseLocale(locale)`](#scannerparselocalelocale)
|
|
50
48
|
* [Caching](#caching)
|
|
49
|
+
* [Debugging](#debugging)
|
|
51
50
|
* [Contributors](#contributors)
|
|
52
51
|
* [References](#references)
|
|
53
52
|
* [License](#license)
|
|
@@ -188,11 +187,48 @@ Note that you can simply use the Spam Scanner API for free at <https://spamscann
|
|
|
188
187
|
2. Configure ClamAV:
|
|
189
188
|
|
|
190
189
|
```sh
|
|
190
|
+
# if you are on Intel macOS
|
|
191
|
+
sudo mv /usr/local/etc/clamav/clamd.conf.sample /usr/local/etc/clamav/clamd.conf
|
|
192
|
+
|
|
193
|
+
# if you are on M1 macOS (or newer brew which installs to `/opt/homebrew`)
|
|
194
|
+
sudo mv /opt/homebrew/etc/clamav/clamd.conf.sample /opt/homebrew/etc/clamav/clamd.conf
|
|
195
|
+
```
|
|
196
|
+
|
|
197
|
+
```sh
|
|
198
|
+
# if you are on Intel macOS
|
|
199
|
+
sudo vim /usr/local/etc/clamav/clamd.conf
|
|
200
|
+
|
|
201
|
+
# if you are on M1 macOS (or newer brew which installs to `/opt/homebrew`)
|
|
202
|
+
sudo vim /opt/homebrew/etc/clamav/clamd.conf
|
|
203
|
+
```
|
|
204
|
+
|
|
205
|
+
```diff
|
|
206
|
+
-Example
|
|
207
|
+
+#Example
|
|
208
|
+
|
|
209
|
+
-#StreamMaxLength 10M
|
|
210
|
+
+StreamMaxLength 50M
|
|
211
|
+
|
|
212
|
+
+# this file path may be different on your OS (that's OK)
|
|
213
|
+
|
|
214
|
+
\-#LocalSocket /tmp/clamd.socket
|
|
215
|
+
\+LocalSocket /tmp/clamd.socket
|
|
216
|
+
```
|
|
217
|
+
|
|
218
|
+
```sh
|
|
219
|
+
# if you are on Intel macOS
|
|
191
220
|
sudo mv /usr/local/etc/clamav/freshclam.conf.sample /usr/local/etc/clamav/freshclam.conf
|
|
221
|
+
|
|
222
|
+
# if you are on M1 macOS (or newer brew which installs to `/opt/homebrew`)
|
|
223
|
+
sudo mv /opt/homebrew/etc/clamav/freshclam.conf.sample /opt/homebrew/etc/clamav/freshclam.conf
|
|
192
224
|
```
|
|
193
225
|
|
|
194
226
|
```sh
|
|
227
|
+
# if you are on Intel macOS
|
|
195
228
|
sudo vim /usr/local/etc/clamav/freshclam.conf
|
|
229
|
+
|
|
230
|
+
# if you are on M1 macOS (or newer brew which installs to `/opt/homebrew`)
|
|
231
|
+
sudo vim /opt/homebrew/etc/clamav/freshclam.conf
|
|
196
232
|
```
|
|
197
233
|
|
|
198
234
|
```diff
|
|
@@ -210,6 +246,8 @@ Note that you can simply use the Spam Scanner API for free at <https://spamscann
|
|
|
210
246
|
sudo vim /Library/LaunchDaemons/org.clamav.clamd.plist
|
|
211
247
|
```
|
|
212
248
|
|
|
249
|
+
> If you are on Intel macOS:
|
|
250
|
+
|
|
213
251
|
```plist
|
|
214
252
|
<?xml version="1.0" encoding="UTF-8"?>
|
|
215
253
|
<!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
|
@@ -231,12 +269,37 @@ Note that you can simply use the Spam Scanner API for free at <https://spamscann
|
|
|
231
269
|
</plist>
|
|
232
270
|
```
|
|
233
271
|
|
|
272
|
+
> If you are on M1 macOS (or newer brew which installs to `/opt/homebrew`)
|
|
273
|
+
|
|
274
|
+
```plist
|
|
275
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
|
276
|
+
<!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
|
277
|
+
<plist version="1.0">
|
|
278
|
+
<dict>
|
|
279
|
+
<key>Label</key>
|
|
280
|
+
<string>org.clamav.clamd</string>
|
|
281
|
+
<key>KeepAlive</key>
|
|
282
|
+
<true/>
|
|
283
|
+
<key>Program</key>
|
|
284
|
+
<string>/opt/homebrew/sbin/clamd</string>
|
|
285
|
+
<key>ProgramArguments</key>
|
|
286
|
+
<array>
|
|
287
|
+
<string>clamd</string>
|
|
288
|
+
</array>
|
|
289
|
+
<key>RunAtLoad</key>
|
|
290
|
+
<true/>
|
|
291
|
+
</dict>
|
|
292
|
+
</plist>
|
|
293
|
+
```
|
|
294
|
+
|
|
295
|
+
4. Enable it and start it on boot:
|
|
296
|
+
|
|
234
297
|
```sh
|
|
235
298
|
sudo launchctl load /Library/LaunchDaemons/org.clamav.clamd.plist
|
|
236
299
|
sudo launchctl start /Library/LaunchDaemons/org.clamav.clamd.plist
|
|
237
300
|
```
|
|
238
301
|
|
|
239
|
-
|
|
302
|
+
5. You may want to periodically run `freshclam` to update the config, or configure a similar `plist` configuration for `launchctl`.
|
|
240
303
|
|
|
241
304
|
|
|
242
305
|
## Install
|
|
@@ -244,7 +307,7 @@ Note that you can simply use the Spam Scanner API for free at <https://spamscann
|
|
|
244
307
|
[npm][]:
|
|
245
308
|
|
|
246
309
|
```sh
|
|
247
|
-
npm install spamscanner
|
|
310
|
+
npm install spamscanner
|
|
248
311
|
```
|
|
249
312
|
|
|
250
313
|
|
|
@@ -359,7 +422,7 @@ Currently Spam Scanner supports the following locales for tokenization, stemming
|
|
|
359
422
|
| Finnish | `fn` |
|
|
360
423
|
| Farsi | `fa` |
|
|
361
424
|
| French | `fr` |
|
|
362
|
-
| German | `
|
|
425
|
+
| German | `de` |
|
|
363
426
|
| Hungarian | `hr` |
|
|
364
427
|
| Indonesian | `in` |
|
|
365
428
|
| Italian | `it` |
|
|
@@ -406,7 +469,7 @@ A common example of this is a link of `рaypal.com` which when converted to ASCI
|
|
|
406
469
|
|
|
407
470
|
This method checks against [Cloudflare for Families](https://developers.cloudflare.com/1.1.1.1/1.1.1.1-for-families) servers for both adult-related content, malware, and phishing. This means we do two separate DNS over HTTPS requests to `1.1.1.2` for malware and `1.1.1.3` for adult-related content. You can parse the messages results Array for messages that contain "adult-related content" if you need to parse whether or not you want to flag for adult-related content or not on your application.
|
|
408
471
|
|
|
409
|
-
If you are using Cloudflare for Families DNS servers as mentioned in [Requirements](#requirements)), then if there are any HTTPS over DNS request errors, it will fallback to use the DNS servers set on the system for lookups, which would in turn use Cloudflare for Family DNS. (using DNS over HTTPS with a fallback of [dns.resolve4](https://nodejs.org/api/dns.html#
|
|
472
|
+
If you are using Cloudflare for Families DNS servers as mentioned in [Requirements](#requirements)), then if there are any HTTPS over DNS request errors, it will fallback to use the DNS servers set on the system for lookups, which would in turn use Cloudflare for Family DNS. (using DNS over HTTPS with a fallback of [dns.resolve4](https://nodejs.org/api/dns.html#dns_dns_resolve4_hostname_options_callback)) – and if it returns `0.0.0.0` then it is considered to be phishing.
|
|
410
473
|
|
|
411
474
|
We actually helped Cloudflare in August 2020 to update their documentation to note that this result of `0.0.0.0` is returned for maliciously found content on FQDN and IP lookups.
|
|
412
475
|
|
|
@@ -501,6 +564,13 @@ const scanner = new SpamScanner({
|
|
|
501
564
|
Note that in [Forward Email][forward-email] we use the `client` approach as we have multiple threads across multiple servers running, and in-memory caching would not be efficient.
|
|
502
565
|
|
|
503
566
|
|
|
567
|
+
## Debugging
|
|
568
|
+
|
|
569
|
+
Spam Scanner has built-in debug output via `util.debuglog('spamscanner')`.
|
|
570
|
+
|
|
571
|
+
This means you can run your app with `NODE_DEBUG=spamscanner node app.js` to get useful debug output to your console.
|
|
572
|
+
|
|
573
|
+
|
|
504
574
|
## Contributors
|
|
505
575
|
|
|
506
576
|
| Name | Website |
|
package/index.js
CHANGED
|
@@ -1,8 +1,9 @@
|
|
|
1
|
+
const process = require('process');
|
|
1
2
|
const dns = require('dns');
|
|
2
3
|
const fs = require('fs');
|
|
3
|
-
const {
|
|
4
|
+
const { debuglog } = require('util');
|
|
4
5
|
|
|
5
|
-
// eslint-disable-next-line
|
|
6
|
+
// eslint-disable-next-line n/no-deprecated-api
|
|
6
7
|
const punycode = require('punycode');
|
|
7
8
|
|
|
8
9
|
const ClamScan = require('clamscan');
|
|
@@ -12,7 +13,6 @@ const RE2 = require('re2');
|
|
|
12
13
|
const bitcoinRegex = require('bitcoin-regex');
|
|
13
14
|
const contractions = require('expand-contractions');
|
|
14
15
|
const creditCardRegex = require('credit-card-regex');
|
|
15
|
-
const debug = require('debug')('spamscanner');
|
|
16
16
|
const emailRegexSafe = require('email-regex-safe');
|
|
17
17
|
const emojiPatterns = require('emoji-patterns');
|
|
18
18
|
const escapeStringRegexp = require('escape-string-regexp');
|
|
@@ -46,12 +46,15 @@ const toEmoji = require('gemoji/name-to-emoji');
|
|
|
46
46
|
const universalify = require('universalify');
|
|
47
47
|
const urlRegexSafe = require('url-regex-safe');
|
|
48
48
|
const validator = require('validator');
|
|
49
|
+
const which = require('which');
|
|
49
50
|
const { Iconv } = require('iconv');
|
|
50
51
|
const { codes } = require('currency-codes');
|
|
51
52
|
const { fromUrl, NO_HOSTNAME } = require('parse-domain');
|
|
52
53
|
const { parse } = require('node-html-parser');
|
|
53
54
|
const { simpleParser } = require('mailparser');
|
|
54
55
|
|
|
56
|
+
const debug = debuglog('spamscanner');
|
|
57
|
+
|
|
55
58
|
const aggressiveTokenizer = new natural.AggressiveTokenizer();
|
|
56
59
|
const orthographyTokenizer = new natural.OrthographyTokenizer({
|
|
57
60
|
language: 'fi'
|
|
@@ -69,20 +72,115 @@ const aggressiveTokenizerSv = new natural.AggressiveTokenizerSv();
|
|
|
69
72
|
const aggressiveTokenizerRu = new natural.AggressiveTokenizerRu();
|
|
70
73
|
const aggressiveTokenizerVi = new natural.AggressiveTokenizerVi();
|
|
71
74
|
|
|
72
|
-
const stopwordsEn =
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
const
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
const
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
const
|
|
85
|
-
|
|
75
|
+
const stopwordsEn = new Set([
|
|
76
|
+
...require('natural/lib/natural/util/stopwords').words,
|
|
77
|
+
...sw.eng
|
|
78
|
+
]);
|
|
79
|
+
const stopwordsEs = new Set([
|
|
80
|
+
...require('natural/lib/natural/util/stopwords_es').words,
|
|
81
|
+
...sw.spa
|
|
82
|
+
]);
|
|
83
|
+
const stopwordsFa = new Set([
|
|
84
|
+
...require('natural/lib/natural/util/stopwords_fa').words,
|
|
85
|
+
...sw.fas
|
|
86
|
+
]);
|
|
87
|
+
const stopwordsFr = new Set([
|
|
88
|
+
...require('natural/lib/natural/util/stopwords_fr').words,
|
|
89
|
+
...sw.fra
|
|
90
|
+
]);
|
|
91
|
+
const stopwordsId = new Set([
|
|
92
|
+
...require('natural/lib/natural/util/stopwords_id').words,
|
|
93
|
+
...sw.ind
|
|
94
|
+
]);
|
|
95
|
+
const stopwordsJa = new Set([
|
|
96
|
+
...require('natural/lib/natural/util/stopwords_ja').words,
|
|
97
|
+
...sw.jpn
|
|
98
|
+
]);
|
|
99
|
+
const stopwordsIt = new Set([
|
|
100
|
+
...require('natural/lib/natural/util/stopwords_it').words,
|
|
101
|
+
...sw.ita
|
|
102
|
+
]);
|
|
103
|
+
const stopwordsNl = new Set([
|
|
104
|
+
...require('natural/lib/natural/util/stopwords_nl').words,
|
|
105
|
+
...sw.nld
|
|
106
|
+
]);
|
|
107
|
+
const stopwordsNo = new Set([
|
|
108
|
+
...require('natural/lib/natural/util/stopwords_no').words,
|
|
109
|
+
...sw.nob
|
|
110
|
+
]);
|
|
111
|
+
const stopwordsPl = new Set([
|
|
112
|
+
...require('natural/lib/natural/util/stopwords_pl').words,
|
|
113
|
+
...sw.pol
|
|
114
|
+
]);
|
|
115
|
+
const stopwordsPt = new Set([
|
|
116
|
+
...require('natural/lib/natural/util/stopwords_pt').words,
|
|
117
|
+
...sw.por,
|
|
118
|
+
...sw.porBr
|
|
119
|
+
]);
|
|
120
|
+
const stopwordsRu = new Set([
|
|
121
|
+
...require('natural/lib/natural/util/stopwords_ru').words,
|
|
122
|
+
...sw.rus
|
|
123
|
+
]);
|
|
124
|
+
const stopwordsSv = new Set([
|
|
125
|
+
...require('natural/lib/natural/util/stopwords_sv').words,
|
|
126
|
+
...sw.swe
|
|
127
|
+
]);
|
|
128
|
+
const stopwordsZh = new Set([
|
|
129
|
+
...require('natural/lib/natural/util/stopwords_zh').words,
|
|
130
|
+
...sw.zho
|
|
131
|
+
]);
|
|
132
|
+
|
|
133
|
+
const stopwordsRon = new Set(sw.ron);
|
|
134
|
+
const stopwordsTur = new Set(sw.tur);
|
|
135
|
+
const stopwordsVie = new Set(sw.vie);
|
|
136
|
+
const stopwordsDeu = new Set(sw.deu);
|
|
137
|
+
const stopwordsHun = new Set(sw.hun);
|
|
138
|
+
const stopwordsAra = new Set(sw.ara);
|
|
139
|
+
const stopwordsDan = new Set(sw.dan);
|
|
140
|
+
const stopwordsFin = new Set(sw.fin);
|
|
141
|
+
|
|
142
|
+
// TODO: add stopword pairing for these langs:
|
|
143
|
+
// afr
|
|
144
|
+
// ben
|
|
145
|
+
// bre
|
|
146
|
+
// bul
|
|
147
|
+
// cat
|
|
148
|
+
// ces
|
|
149
|
+
// ell
|
|
150
|
+
// epo
|
|
151
|
+
// est
|
|
152
|
+
// eus
|
|
153
|
+
// fra
|
|
154
|
+
// gle
|
|
155
|
+
// glg
|
|
156
|
+
// guj
|
|
157
|
+
// hau
|
|
158
|
+
// heb
|
|
159
|
+
// hin
|
|
160
|
+
// hrv
|
|
161
|
+
// hye
|
|
162
|
+
// kor
|
|
163
|
+
// kur
|
|
164
|
+
// lat
|
|
165
|
+
// lav
|
|
166
|
+
// lgg
|
|
167
|
+
// lggNd
|
|
168
|
+
// lit
|
|
169
|
+
// mar
|
|
170
|
+
// msa
|
|
171
|
+
// mya
|
|
172
|
+
// panGu
|
|
173
|
+
// slk
|
|
174
|
+
// slv
|
|
175
|
+
// som
|
|
176
|
+
// sot
|
|
177
|
+
// swa
|
|
178
|
+
// tgl
|
|
179
|
+
// tha
|
|
180
|
+
// ukr
|
|
181
|
+
// urd
|
|
182
|
+
// yor
|
|
183
|
+
// zul
|
|
86
184
|
|
|
87
185
|
// <https://stackoverflow.com/a/41353282>
|
|
88
186
|
// <https://www.ietf.org/rfc/rfc3986.txt>
|
|
@@ -94,17 +192,16 @@ const PKG = require('./package.json');
|
|
|
94
192
|
|
|
95
193
|
const VOCABULARY_LIMIT = require('./vocabulary-limit.js');
|
|
96
194
|
|
|
195
|
+
// TODO: convert this into a Map
|
|
97
196
|
const ISO_CODE_MAPPING = require('./iso-code-mapping.json');
|
|
98
197
|
|
|
99
198
|
// <https://kb.smarshmail.com/Article/23567>
|
|
100
|
-
const EXECUTABLES = require('./executables.json');
|
|
199
|
+
const EXECUTABLES = new Set(require('./executables.json'));
|
|
101
200
|
|
|
102
201
|
const REPLACEMENT_WORDS = require('./replacement-words.json');
|
|
103
202
|
|
|
104
203
|
const locales = new Set(i18nLocales.map((l) => l.toLowerCase()));
|
|
105
204
|
|
|
106
|
-
const readFile = promisify(fs.readFile);
|
|
107
|
-
|
|
108
205
|
const normalizeUrlOptions = {
|
|
109
206
|
stripProtocol: true,
|
|
110
207
|
stripWWW: false,
|
|
@@ -154,7 +251,8 @@ for (const code of codes()) {
|
|
|
154
251
|
const symbol = getSymbolFromCurrency(code);
|
|
155
252
|
if (
|
|
156
253
|
typeof symbol === 'string' &&
|
|
157
|
-
|
|
254
|
+
// eslint-disable-next-line unicorn/prefer-includes
|
|
255
|
+
currencySymbols.indexOf(symbol) === -1 &&
|
|
158
256
|
!new RE2(/^[a-z]+$/i).test(symbol)
|
|
159
257
|
)
|
|
160
258
|
currencySymbols.push(escapeStringRegexp(symbol));
|
|
@@ -187,7 +285,9 @@ const isURLOptions = {
|
|
|
187
285
|
class SpamScanner {
|
|
188
286
|
constructor(config = {}) {
|
|
189
287
|
this.config = {
|
|
190
|
-
debug:
|
|
288
|
+
debug:
|
|
289
|
+
process.env.NODE_ENV === 'test' ||
|
|
290
|
+
process.env.NODE_ENV === 'development',
|
|
191
291
|
checkIDNHomographAttack: false,
|
|
192
292
|
// note that if you attempt to train an existing `scanner.classifier`
|
|
193
293
|
// then you will need to re-use these, so we suggest you store them
|
|
@@ -312,8 +412,15 @@ class SpamScanner {
|
|
|
312
412
|
userAgent: `${PKG.name}/${PKG.version}`,
|
|
313
413
|
timeout: ms('10s'),
|
|
314
414
|
clamscan: {
|
|
415
|
+
debugMode:
|
|
416
|
+
process.env.NODE_ENV === 'test' ||
|
|
417
|
+
process.env.NODE_ENV === 'development',
|
|
418
|
+
clamscan: {
|
|
419
|
+
path: which.sync('clamscan', { nothrow: true })
|
|
420
|
+
},
|
|
315
421
|
clamdscan: {
|
|
316
422
|
timeout: ms('10s'),
|
|
423
|
+
path: which.sync('clamdscan', { nothrow: true }),
|
|
317
424
|
socket: macosVersion.isMacOS
|
|
318
425
|
? '/tmp/clamd.socket'
|
|
319
426
|
: '/var/run/clamav/clamd.ctl'
|
|
@@ -416,9 +523,7 @@ class SpamScanner {
|
|
|
416
523
|
// cache in the background
|
|
417
524
|
this.config.client
|
|
418
525
|
.set(key, `${isAdult}:${isMalware}`, 'PX', this.config.ttlMs)
|
|
419
|
-
// eslint-disable-next-line promise/prefer-await-to-then
|
|
420
526
|
.then(this.config.logger.info)
|
|
421
|
-
// eslint-disable-next-line promise/prefer-await-to-then
|
|
422
527
|
.catch(this.config.logger.error);
|
|
423
528
|
return { isAdult, isMalware };
|
|
424
529
|
};
|
|
@@ -432,6 +537,27 @@ class SpamScanner {
|
|
|
432
537
|
throw new Error(
|
|
433
538
|
`Locale of ${this.config.locale} was not valid according to locales list.`
|
|
434
539
|
);
|
|
540
|
+
|
|
541
|
+
//
|
|
542
|
+
// set up regex helpers
|
|
543
|
+
//
|
|
544
|
+
this.EMAIL_REPLACEMENT_REGEX = new RE2(this.config.replacements.email, 'g');
|
|
545
|
+
const replacementRegexes = [];
|
|
546
|
+
for (const key of Object.keys(this.config.replacements)) {
|
|
547
|
+
replacementRegexes.push(
|
|
548
|
+
escapeStringRegexp(this.config.replacements[key])
|
|
549
|
+
);
|
|
550
|
+
}
|
|
551
|
+
|
|
552
|
+
this.REPLACEMENTS_REGEX = new RE2(
|
|
553
|
+
new RegExp(replacementRegexes.join('|'), 'g')
|
|
554
|
+
);
|
|
555
|
+
|
|
556
|
+
//
|
|
557
|
+
// set up helper Map and Sets for fast lookup
|
|
558
|
+
// (Set.has is 2x faster than includes, and 50% faster than indexOf)
|
|
559
|
+
//
|
|
560
|
+
this.WHITELISTED_WORDS = new Set(Object.values(this.config.replacements));
|
|
435
561
|
}
|
|
436
562
|
|
|
437
563
|
getHostname(link) {
|
|
@@ -521,15 +647,12 @@ class SpamScanner {
|
|
|
521
647
|
const stream = isStream(attachment.content)
|
|
522
648
|
? attachment.content
|
|
523
649
|
: intoStream(attachment.content);
|
|
524
|
-
const {
|
|
525
|
-
await clamscan.scan_stream(stream);
|
|
650
|
+
const { isInfected, viruses } = await clamscan.scanStream(stream);
|
|
526
651
|
const name = isSANB(attachment.filename)
|
|
527
652
|
? `"${attachment.filename}"`
|
|
528
653
|
: `#${i + 1}`;
|
|
529
654
|
if (isInfected)
|
|
530
|
-
messages.push(
|
|
531
|
-
`Attachment ${name} was infected with "${viruses}".`
|
|
532
|
-
);
|
|
655
|
+
messages.push(`Attachment ${name} was infected with ${viruses}.`);
|
|
533
656
|
} catch (err) {
|
|
534
657
|
this.config.logger.error(err);
|
|
535
658
|
}
|
|
@@ -547,13 +670,16 @@ class SpamScanner {
|
|
|
547
670
|
|
|
548
671
|
let gtube = false;
|
|
549
672
|
|
|
550
|
-
|
|
673
|
+
// eslint-disable-next-line unicorn/prefer-includes
|
|
674
|
+
if (isSANB(mail.html) && mail.html.indexOf(GTUBE) !== -1) gtube = true;
|
|
551
675
|
|
|
552
|
-
|
|
676
|
+
// eslint-disable-next-line unicorn/prefer-includes
|
|
677
|
+
if (isSANB(mail.text) && !gtube && mail.text.indexOf(GTUBE) !== -1)
|
|
678
|
+
gtube = true;
|
|
553
679
|
|
|
554
680
|
if (gtube)
|
|
555
681
|
messages.push(
|
|
556
|
-
'Message detected to contain the GTUBE test from
|
|
682
|
+
'Message detected to contain the GTUBE test from https://spamassassin.apache.org/gtube/.'
|
|
557
683
|
);
|
|
558
684
|
|
|
559
685
|
return messages;
|
|
@@ -619,8 +745,6 @@ class SpamScanner {
|
|
|
619
745
|
//
|
|
620
746
|
// However we don't recommend this and therefore have our servers set to standard Cloudflare DNS
|
|
621
747
|
//
|
|
622
|
-
// TODO: we need to do two lookups in parallel, one against adult and one against malware
|
|
623
|
-
// and also make sure the messages aren't duplicated when we concatenate final array of messages
|
|
624
748
|
const [isAdult, isMalware] = await Promise.all([
|
|
625
749
|
this.malwareLookup('https://family.cloudflare-dns.com/dns-query', name),
|
|
626
750
|
this.malwareLookup('https://security.cloudflare-dns.com/dns-query', name)
|
|
@@ -742,14 +866,14 @@ class SpamScanner {
|
|
|
742
866
|
})
|
|
743
867
|
.match(URL_REGEX) || [];
|
|
744
868
|
|
|
745
|
-
const array =
|
|
869
|
+
const array = new Set();
|
|
746
870
|
for (const url of urls) {
|
|
747
871
|
const normalized = this.getNormalizedUrl(url);
|
|
748
872
|
|
|
749
|
-
if (normalized
|
|
873
|
+
if (normalized) array.add(normalized);
|
|
750
874
|
}
|
|
751
875
|
|
|
752
|
-
return array;
|
|
876
|
+
return [...array];
|
|
753
877
|
}
|
|
754
878
|
|
|
755
879
|
parseLocale(locale) {
|
|
@@ -763,12 +887,6 @@ class SpamScanner {
|
|
|
763
887
|
// <https://github.com/NaturalNode/natural#stemmers>
|
|
764
888
|
// eslint-disable-next-line complexity
|
|
765
889
|
async getTokens(string, locale, isHTML = false) {
|
|
766
|
-
// get the current email replacement regex
|
|
767
|
-
const EMAIL_REPLACEMENT_REGEX = new RE2(
|
|
768
|
-
this.config.replacements.email,
|
|
769
|
-
'g'
|
|
770
|
-
);
|
|
771
|
-
|
|
772
890
|
//
|
|
773
891
|
// parse HTML for <html> tag with lang attr
|
|
774
892
|
// otherwise if that wasn't found then look for this
|
|
@@ -816,17 +934,6 @@ class SpamScanner {
|
|
|
816
934
|
|
|
817
935
|
if (isHTML) string = sanitizeHtml(string, this.config.sanitizeHtml);
|
|
818
936
|
|
|
819
|
-
const replacementRegexes = [];
|
|
820
|
-
for (const key of Object.keys(this.config.replacements)) {
|
|
821
|
-
replacementRegexes.push(
|
|
822
|
-
escapeStringRegexp(this.config.replacements[key])
|
|
823
|
-
);
|
|
824
|
-
}
|
|
825
|
-
|
|
826
|
-
const REPLACEMENTS_REGEX = new RE2(
|
|
827
|
-
new RegExp(replacementRegexes.join('|'), 'g')
|
|
828
|
-
);
|
|
829
|
-
|
|
830
937
|
string = striptags(string, [], ' ')
|
|
831
938
|
.trim()
|
|
832
939
|
// replace newlines
|
|
@@ -835,7 +942,7 @@ class SpamScanner {
|
|
|
835
942
|
// attackers may try to inject our replacements into the message
|
|
836
943
|
// therefore we should strip all of them before doing any replacements
|
|
837
944
|
//
|
|
838
|
-
.replace(REPLACEMENTS_REGEX, ' ');
|
|
945
|
+
.replace(this.REPLACEMENTS_REGEX, ' ');
|
|
839
946
|
|
|
840
947
|
//
|
|
841
948
|
// we should instead use language detection to determine
|
|
@@ -853,7 +960,8 @@ class SpamScanner {
|
|
|
853
960
|
|
|
854
961
|
locale = this.parseLocale(isSANB(locale) ? locale : this.config.locale);
|
|
855
962
|
|
|
856
|
-
|
|
963
|
+
// NOTE: "in" and "po" are valid locales but not from i18n
|
|
964
|
+
if (!locales.has(locale) && locale !== 'in' && locale !== 'po') {
|
|
857
965
|
debug(`Locale ${locale} was not valid and will use default`);
|
|
858
966
|
locale = this.parseLocale(this.config.locale);
|
|
859
967
|
}
|
|
@@ -865,103 +973,145 @@ class SpamScanner {
|
|
|
865
973
|
let stopwords = stopwordsEn;
|
|
866
974
|
let language = 'english';
|
|
867
975
|
let stemword = 'default';
|
|
976
|
+
|
|
868
977
|
switch (locale) {
|
|
869
978
|
case 'ar':
|
|
979
|
+
// arb
|
|
980
|
+
// ISO 639-3 = ara
|
|
981
|
+
stopwords = stopwordsAra;
|
|
870
982
|
language = 'arabic';
|
|
871
983
|
break;
|
|
872
984
|
case 'da':
|
|
985
|
+
// dan
|
|
873
986
|
language = 'danish';
|
|
987
|
+
stopwords = stopwordsDan;
|
|
874
988
|
break;
|
|
875
989
|
case 'nl':
|
|
990
|
+
// nld
|
|
876
991
|
stopwords = stopwordsNl;
|
|
877
992
|
language = 'dutch';
|
|
878
993
|
break;
|
|
879
994
|
case 'en':
|
|
995
|
+
// eng
|
|
880
996
|
language = 'english';
|
|
881
997
|
break;
|
|
882
998
|
case 'fi':
|
|
999
|
+
// fin
|
|
883
1000
|
language = 'finnish';
|
|
884
1001
|
tokenizer = orthographyTokenizer;
|
|
1002
|
+
stopwords = stopwordsFin;
|
|
885
1003
|
break;
|
|
886
1004
|
case 'fa':
|
|
1005
|
+
// fas (Persian/Farsi)
|
|
887
1006
|
language = 'farsi';
|
|
888
1007
|
tokenizer = aggressiveTokenizerFa;
|
|
889
1008
|
stopwords = stopwordsFa;
|
|
890
1009
|
stemword = natural.PorterStemmerFa.stem.bind(natural.PorterStemmerFa);
|
|
891
1010
|
break;
|
|
892
1011
|
case 'fr':
|
|
1012
|
+
// fra
|
|
893
1013
|
language = 'french';
|
|
894
1014
|
tokenizer = aggressiveTokenizerFr;
|
|
895
1015
|
stopwords = stopwordsFr;
|
|
896
1016
|
break;
|
|
897
1017
|
case 'de':
|
|
1018
|
+
// deu
|
|
898
1019
|
language = 'german';
|
|
1020
|
+
stopwords = stopwordsDeu;
|
|
899
1021
|
break;
|
|
900
1022
|
case 'hu':
|
|
1023
|
+
// hun
|
|
901
1024
|
language = 'hungarian';
|
|
1025
|
+
stopwords = stopwordsHun;
|
|
902
1026
|
break;
|
|
903
1027
|
case 'in':
|
|
1028
|
+
// ind
|
|
904
1029
|
language = 'indonesian';
|
|
905
1030
|
tokenizer = aggressiveTokenizerId;
|
|
906
1031
|
stopwords = stopwordsId;
|
|
907
1032
|
break;
|
|
908
1033
|
case 'it':
|
|
1034
|
+
// ita
|
|
909
1035
|
language = 'italian';
|
|
910
1036
|
tokenizer = aggressiveTokenizerIt;
|
|
911
1037
|
stopwords = stopwordsIt;
|
|
912
1038
|
break;
|
|
913
1039
|
case 'ja':
|
|
1040
|
+
// jpn
|
|
914
1041
|
tokenizer = tokenizerJa;
|
|
915
1042
|
stopwords = stopwordsJa;
|
|
916
1043
|
stemword = natural.StemmerJa.stem.bind(natural.StemmerJa);
|
|
917
1044
|
break;
|
|
918
1045
|
case 'nb':
|
|
1046
|
+
// nob
|
|
1047
|
+
language = 'norwegian';
|
|
1048
|
+
tokenizer = aggressiveTokenizerNo;
|
|
1049
|
+
stopwords = stopwordsNo;
|
|
1050
|
+
break;
|
|
919
1051
|
case 'nn':
|
|
1052
|
+
// nno
|
|
1053
|
+
// ISO 639-3 = nob
|
|
920
1054
|
language = 'norwegian';
|
|
921
1055
|
tokenizer = aggressiveTokenizerNo;
|
|
922
1056
|
stopwords = stopwordsNo;
|
|
923
1057
|
break;
|
|
924
1058
|
case 'po':
|
|
1059
|
+
// pol
|
|
925
1060
|
language = 'polish';
|
|
926
1061
|
tokenizer = aggressiveTokenizerPl;
|
|
927
1062
|
stopwords = stopwordsPl;
|
|
928
1063
|
stemword = false;
|
|
929
1064
|
break;
|
|
930
1065
|
case 'pt':
|
|
1066
|
+
// por
|
|
931
1067
|
language = 'portuguese';
|
|
932
1068
|
tokenizer = aggressiveTokenizerPt;
|
|
933
1069
|
stopwords = stopwordsPt;
|
|
934
1070
|
break;
|
|
935
1071
|
case 'es':
|
|
1072
|
+
// spa
|
|
936
1073
|
language = 'spanish';
|
|
937
1074
|
tokenizer = aggressiveTokenizerEs;
|
|
938
1075
|
stopwords = stopwordsEs;
|
|
939
1076
|
break;
|
|
940
1077
|
case 'sv':
|
|
1078
|
+
// swe
|
|
941
1079
|
language = 'swedish';
|
|
942
1080
|
tokenizer = aggressiveTokenizerSv;
|
|
943
1081
|
stopwords = stopwordsSv;
|
|
944
1082
|
break;
|
|
945
1083
|
case 'ro':
|
|
1084
|
+
// ron
|
|
946
1085
|
language = 'romanian';
|
|
1086
|
+
stopwords = stopwordsRon;
|
|
947
1087
|
break;
|
|
948
1088
|
case 'ru':
|
|
1089
|
+
// rus
|
|
949
1090
|
language = 'russian';
|
|
950
1091
|
tokenizer = aggressiveTokenizerRu;
|
|
951
1092
|
stopwords = stopwordsRu;
|
|
952
1093
|
break;
|
|
953
1094
|
case 'ta':
|
|
1095
|
+
// tam
|
|
1096
|
+
// NOTE: no stopwords available
|
|
954
1097
|
language = 'tamil';
|
|
955
1098
|
break;
|
|
956
1099
|
case 'tr':
|
|
1100
|
+
// tur
|
|
957
1101
|
language = 'turkish';
|
|
1102
|
+
stopwords = stopwordsTur;
|
|
958
1103
|
break;
|
|
959
1104
|
case 'vi':
|
|
1105
|
+
// vie
|
|
960
1106
|
language = 'vietnamese';
|
|
961
1107
|
tokenizer = aggressiveTokenizerVi;
|
|
1108
|
+
stopwords = stopwordsVie;
|
|
962
1109
|
stemword = false;
|
|
963
1110
|
break;
|
|
964
1111
|
case 'zh':
|
|
1112
|
+
// cmn
|
|
1113
|
+
// TODO: use this instead https://github.com/yishn/chinese-tokenizer
|
|
1114
|
+
// ISO 639-3 = zho (Chinese, Macrolanguage)
|
|
965
1115
|
language = 'chinese';
|
|
966
1116
|
stopwords = stopwordsZh;
|
|
967
1117
|
stemword = false;
|
|
@@ -979,7 +1129,7 @@ class SpamScanner {
|
|
|
979
1129
|
string
|
|
980
1130
|
.split(' ')
|
|
981
1131
|
.map((_string) =>
|
|
982
|
-
_string.
|
|
1132
|
+
_string.indexOf(':') === 0 &&
|
|
983
1133
|
_string.endsWith(':') &&
|
|
984
1134
|
typeof toEmoji[_string.slice(1, -1)] === 'string'
|
|
985
1135
|
? toEmoji[_string.slice(1, -1)]
|
|
@@ -1027,7 +1177,10 @@ class SpamScanner {
|
|
|
1027
1177
|
|
|
1028
1178
|
// now we ensure that URL's and EMAIL's are properly spaced out
|
|
1029
1179
|
// (e.g. in case ?email=some@email.com was in a URL)
|
|
1030
|
-
.replace(
|
|
1180
|
+
.replace(
|
|
1181
|
+
this.EMAIL_REPLACEMENT_REGEX,
|
|
1182
|
+
` ${this.config.replacements.email} `
|
|
1183
|
+
)
|
|
1031
1184
|
|
|
1032
1185
|
// TODO: replace file paths, file dirs, dotfiles, and dotdirs
|
|
1033
1186
|
|
|
@@ -1042,12 +1195,14 @@ class SpamScanner {
|
|
|
1042
1195
|
// replace currency
|
|
1043
1196
|
.replace(CURRENCY_REGEX, ` ${this.config.replacements.currency} `);
|
|
1044
1197
|
|
|
1198
|
+
//
|
|
1045
1199
|
// expand contractions so "they're" -> [ they, are ] vs. [ they, re ]
|
|
1046
1200
|
// <https://github.com/NaturalNode/natural/issues/533>
|
|
1047
|
-
|
|
1048
|
-
|
|
1049
|
-
//
|
|
1050
|
-
|
|
1201
|
+
//
|
|
1202
|
+
// NOTE: we're doing this for all languages now, not just en
|
|
1203
|
+
// if (locale === 'en')
|
|
1204
|
+
//
|
|
1205
|
+
string = contractions.expand(string);
|
|
1051
1206
|
|
|
1052
1207
|
//
|
|
1053
1208
|
// Future research:
|
|
@@ -1061,43 +1216,32 @@ class SpamScanner {
|
|
|
1061
1216
|
for (const token of tokenizer.tokenize(string.toLowerCase())) {
|
|
1062
1217
|
// whitelist words from being stemmed (safeguard)
|
|
1063
1218
|
if (
|
|
1064
|
-
|
|
1065
|
-
token.
|
|
1066
|
-
token.
|
|
1219
|
+
this.WHITELISTED_WORDS.has(token) ||
|
|
1220
|
+
token.indexOf(this.config.replacements.initialism) === 0 ||
|
|
1221
|
+
token.indexOf(this.config.replacements.abbrevation) === 0
|
|
1067
1222
|
) {
|
|
1068
1223
|
tokens.push(token);
|
|
1069
1224
|
continue;
|
|
1070
1225
|
}
|
|
1071
1226
|
|
|
1072
|
-
if (
|
|
1073
|
-
stopwords.includes(token) ||
|
|
1074
|
-
(sw[locale] && sw[locale].includes(token)) ||
|
|
1075
|
-
(locale !== 'en' &&
|
|
1076
|
-
(stopwordsEn.includes(token) || sw.en.includes(token)))
|
|
1077
|
-
)
|
|
1227
|
+
if (stopwords.has(token) || (locale !== 'en' && stopwordsEn.has(token))) {
|
|
1078
1228
|
continue;
|
|
1229
|
+
}
|
|
1079
1230
|
|
|
1080
1231
|
// locale specific stopwords to ignore
|
|
1081
1232
|
let localeStem;
|
|
1082
1233
|
if (typeof stemword === 'function') {
|
|
1083
1234
|
localeStem = stemword(token);
|
|
1084
|
-
if (
|
|
1085
|
-
localeStem &&
|
|
1086
|
-
(stopwords.includes(localeStem) ||
|
|
1087
|
-
(sw[locale] && sw[locale].includes(localeStem)))
|
|
1088
|
-
)
|
|
1235
|
+
if (localeStem && stopwords.has(localeStem)) {
|
|
1089
1236
|
continue;
|
|
1237
|
+
}
|
|
1090
1238
|
}
|
|
1091
1239
|
|
|
1092
1240
|
// always check against English stemwords
|
|
1093
1241
|
let englishStem;
|
|
1094
1242
|
if (locale !== 'en') {
|
|
1095
1243
|
englishStem = snowball.stemword(token, 'english');
|
|
1096
|
-
if (
|
|
1097
|
-
englishStem &&
|
|
1098
|
-
(stopwordsEn.includes(englishStem) || sw.en.includes(englishStem))
|
|
1099
|
-
)
|
|
1100
|
-
continue;
|
|
1244
|
+
if (englishStem && stopwordsEn.has(englishStem)) continue;
|
|
1101
1245
|
}
|
|
1102
1246
|
|
|
1103
1247
|
tokens.push(
|
|
@@ -1105,6 +1249,8 @@ class SpamScanner {
|
|
|
1105
1249
|
);
|
|
1106
1250
|
}
|
|
1107
1251
|
|
|
1252
|
+
debug('locale', locale, 'tokens', tokens);
|
|
1253
|
+
|
|
1108
1254
|
if (this.config.debug) return tokens;
|
|
1109
1255
|
|
|
1110
1256
|
// we should sha256 all tokens with hasha if not in debug mode
|
|
@@ -1117,7 +1263,7 @@ class SpamScanner {
|
|
|
1117
1263
|
let source = string;
|
|
1118
1264
|
if (isBuffer(string)) source = string.toString();
|
|
1119
1265
|
else if (typeof string === 'string' && isValidPath(string))
|
|
1120
|
-
source = await readFile(string);
|
|
1266
|
+
source = await fs.promises.readFile(string);
|
|
1121
1267
|
|
|
1122
1268
|
const tokens = [];
|
|
1123
1269
|
const mail = await simpleParser(source, this.config.simpleParser);
|
|
@@ -1155,12 +1301,11 @@ class SpamScanner {
|
|
|
1155
1301
|
|
|
1156
1302
|
// eslint-disable-next-line complexity
|
|
1157
1303
|
async getPhishingResults(mail) {
|
|
1158
|
-
const messages =
|
|
1159
|
-
|
|
1304
|
+
const messages = new Set();
|
|
1160
1305
|
//
|
|
1161
1306
|
// NOTE: all links pushed are lowercased
|
|
1162
1307
|
//
|
|
1163
|
-
const links =
|
|
1308
|
+
const links = new Set();
|
|
1164
1309
|
|
|
1165
1310
|
// parse <a> tags with different org domain in text vs the link
|
|
1166
1311
|
if (isSANB(mail.html)) {
|
|
@@ -1170,7 +1315,7 @@ class SpamScanner {
|
|
|
1170
1315
|
// elements concatenate to form a URL which is malicious or phishing
|
|
1171
1316
|
//
|
|
1172
1317
|
for (const link of this.getUrls(striptags(mail.html, [], ' ').trim())) {
|
|
1173
|
-
|
|
1318
|
+
links.add(link);
|
|
1174
1319
|
}
|
|
1175
1320
|
|
|
1176
1321
|
//
|
|
@@ -1212,7 +1357,7 @@ class SpamScanner {
|
|
|
1212
1357
|
// (this is needed because some have "Web:%20http://google.com" for example in href tags)
|
|
1213
1358
|
[href] = this.getUrls(href);
|
|
1214
1359
|
// eslint-disable-next-line max-depth
|
|
1215
|
-
if (href
|
|
1360
|
+
if (href) links.add(href);
|
|
1216
1361
|
}
|
|
1217
1362
|
|
|
1218
1363
|
// the text content could contain multiple URL's
|
|
@@ -1222,7 +1367,7 @@ class SpamScanner {
|
|
|
1222
1367
|
isSANB(href) &&
|
|
1223
1368
|
validator.isURL(href, isURLOptions)
|
|
1224
1369
|
) {
|
|
1225
|
-
const string = `Anchor link with href of
|
|
1370
|
+
const string = `Anchor link with href of ${href} and inner text value of "${textContent}"`;
|
|
1226
1371
|
// eslint-disable-next-line max-depth
|
|
1227
1372
|
if (this.config.checkIDNHomographAttack) {
|
|
1228
1373
|
const anchorUrlHostname = this.getHostname(href);
|
|
@@ -1231,8 +1376,8 @@ class SpamScanner {
|
|
|
1231
1376
|
const anchorUrlHostnameToASCII =
|
|
1232
1377
|
punycode.toASCII(anchorUrlHostname);
|
|
1233
1378
|
// eslint-disable-next-line max-depth
|
|
1234
|
-
if (anchorUrlHostnameToASCII.
|
|
1235
|
-
messages.
|
|
1379
|
+
if (anchorUrlHostnameToASCII.indexOf('xn--') === 0)
|
|
1380
|
+
messages.add(
|
|
1236
1381
|
`${string} has possible IDN homograph attack from anchor hostname.`
|
|
1237
1382
|
);
|
|
1238
1383
|
}
|
|
@@ -1241,8 +1386,8 @@ class SpamScanner {
|
|
|
1241
1386
|
// eslint-disable-next-line max-depth
|
|
1242
1387
|
for (const link of this.getUrls(textContent)) {
|
|
1243
1388
|
// this link should have already been included but just in case
|
|
1244
|
-
|
|
1245
|
-
|
|
1389
|
+
|
|
1390
|
+
links.add(link);
|
|
1246
1391
|
|
|
1247
1392
|
// eslint-disable-next-line max-depth
|
|
1248
1393
|
if (this.config.checkIDNHomographAttack) {
|
|
@@ -1252,8 +1397,8 @@ class SpamScanner {
|
|
|
1252
1397
|
const innerTextUrlHostnameToASCII =
|
|
1253
1398
|
punycode.toASCII(innerTextUrlHostname);
|
|
1254
1399
|
// eslint-disable-next-line max-depth
|
|
1255
|
-
if (innerTextUrlHostnameToASCII.
|
|
1256
|
-
messages.
|
|
1400
|
+
if (innerTextUrlHostnameToASCII.indexOf('xn--') === 0)
|
|
1401
|
+
messages.add(
|
|
1257
1402
|
`${string} has possible IDN homograph attack from inner text hostname.`
|
|
1258
1403
|
);
|
|
1259
1404
|
}
|
|
@@ -1269,7 +1414,7 @@ class SpamScanner {
|
|
|
1269
1414
|
for (const prop of MAIL_PHISHING_PROPS) {
|
|
1270
1415
|
if (isSANB(mail[prop])) {
|
|
1271
1416
|
for (const link of this.getUrls(mail[prop])) {
|
|
1272
|
-
|
|
1417
|
+
links.add(link);
|
|
1273
1418
|
}
|
|
1274
1419
|
}
|
|
1275
1420
|
}
|
|
@@ -1279,9 +1424,9 @@ class SpamScanner {
|
|
|
1279
1424
|
const urlHostname = this.getHostname(link);
|
|
1280
1425
|
if (urlHostname) {
|
|
1281
1426
|
const toASCII = punycode.toASCII(urlHostname);
|
|
1282
|
-
if (toASCII.
|
|
1283
|
-
messages.
|
|
1284
|
-
`Possible IDN homograph attack from link of
|
|
1427
|
+
if (toASCII.indexOf('xn--') === 0)
|
|
1428
|
+
messages.add(
|
|
1429
|
+
`Possible IDN homograph attack from link of ${link} with punycode converted hostname of ${toASCII}.`
|
|
1285
1430
|
);
|
|
1286
1431
|
}
|
|
1287
1432
|
}
|
|
@@ -1290,28 +1435,25 @@ class SpamScanner {
|
|
|
1290
1435
|
// check against Cloudflare malware/phishing/adult DNS lookup
|
|
1291
1436
|
// if it returns `0.0.0.0` it means it was flagged
|
|
1292
1437
|
await Promise.all(
|
|
1293
|
-
links.map(async (link) => {
|
|
1438
|
+
[...links].map(async (link) => {
|
|
1294
1439
|
try {
|
|
1295
1440
|
const urlHostname = this.getHostname(link);
|
|
1296
1441
|
if (urlHostname) {
|
|
1297
1442
|
const toASCII = punycode.toASCII(urlHostname);
|
|
1298
|
-
const adultMessage = `Link hostname of
|
|
1299
|
-
const malwareMessage = `Link hostname of ${toASCII}
|
|
1443
|
+
const adultMessage = `Link hostname of ${toASCII} was detected by Cloudflare's Family DNS to contain adult-related content, phishing, and/or malware.`;
|
|
1444
|
+
const malwareMessage = `Link hostname of ${toASCII} was detected by Cloudflare's Security DNS to contain phishing and/or malware.`;
|
|
1300
1445
|
|
|
1301
1446
|
// if it already included both messages then return early
|
|
1302
|
-
if (
|
|
1303
|
-
messages.includes(adultMessage) &&
|
|
1304
|
-
messages.includes(malwareMessage)
|
|
1305
|
-
)
|
|
1447
|
+
if (messages.has(adultMessage) && messages.has(malwareMessage))
|
|
1306
1448
|
return;
|
|
1307
1449
|
|
|
1308
1450
|
const { isAdult, isMalware } =
|
|
1309
1451
|
await this.memoizedIsCloudflareBlocked(toASCII);
|
|
1310
1452
|
|
|
1311
|
-
if (isAdult && !messages.
|
|
1312
|
-
messages.
|
|
1313
|
-
if (isMalware && !messages.
|
|
1314
|
-
messages.
|
|
1453
|
+
if (isAdult && !messages.has(adultMessage))
|
|
1454
|
+
messages.add(adultMessage);
|
|
1455
|
+
if (isMalware && !messages.has(malwareMessage))
|
|
1456
|
+
messages.add(malwareMessage);
|
|
1315
1457
|
}
|
|
1316
1458
|
} catch (err) {
|
|
1317
1459
|
this.config.logger.error(err);
|
|
@@ -1319,7 +1461,7 @@ class SpamScanner {
|
|
|
1319
1461
|
})
|
|
1320
1462
|
);
|
|
1321
1463
|
|
|
1322
|
-
return { messages, links };
|
|
1464
|
+
return { messages: [...messages], links: [...links] };
|
|
1323
1465
|
}
|
|
1324
1466
|
|
|
1325
1467
|
// getNSFWResults() {
|
|
@@ -1340,7 +1482,7 @@ class SpamScanner {
|
|
|
1340
1482
|
try {
|
|
1341
1483
|
const fileType = await FileType.fromBuffer(attachment.content);
|
|
1342
1484
|
|
|
1343
|
-
if (fileType && fileType.ext && EXECUTABLES.
|
|
1485
|
+
if (fileType && fileType.ext && EXECUTABLES.has(fileType.ext))
|
|
1344
1486
|
messages.push(
|
|
1345
1487
|
`Attachment's "magic number" indicated it was a dangerous executable with a ".${fileType.ext}" extension.`
|
|
1346
1488
|
);
|
|
@@ -1355,7 +1497,7 @@ class SpamScanner {
|
|
|
1355
1497
|
punycode.toUnicode(attachment.filename.split('?')[0])
|
|
1356
1498
|
);
|
|
1357
1499
|
const ext = fileExtension(filename);
|
|
1358
|
-
if (ext && EXECUTABLES.
|
|
1500
|
+
if (ext && EXECUTABLES.has(ext))
|
|
1359
1501
|
messages.push(
|
|
1360
1502
|
`Attachment's file name indicated it was a dangerous executable with a ".${ext}" extension.`
|
|
1361
1503
|
);
|
|
@@ -1363,7 +1505,7 @@ class SpamScanner {
|
|
|
1363
1505
|
|
|
1364
1506
|
if (isSANB(attachment.contentType)) {
|
|
1365
1507
|
const ext = mime.extension(attachment.contentType);
|
|
1366
|
-
if (isSANB(ext) && EXECUTABLES.
|
|
1508
|
+
if (isSANB(ext) && EXECUTABLES.has(ext))
|
|
1367
1509
|
messages.push(
|
|
1368
1510
|
`Attachment's Content-Type was a dangerous executable with a ".${ext}" extension.`
|
|
1369
1511
|
);
|
package/package.json
CHANGED
|
@@ -1,22 +1,12 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "spamscanner",
|
|
3
3
|
"description": "Spam Scanner - The Best Anti-Spam Scanning Service and Anti-Spam API",
|
|
4
|
-
"version": "
|
|
4
|
+
"version": "5.0.0",
|
|
5
5
|
"author": "Niftylettuce, LLC. <niftylettuce@gmail.com> (https://niftylettuce.com/)",
|
|
6
|
-
"ava": {
|
|
7
|
-
"timeout": "30s",
|
|
8
|
-
"verbose": true,
|
|
9
|
-
"serial": true
|
|
10
|
-
},
|
|
11
6
|
"bugs": {
|
|
12
7
|
"url": "https://github.com/spamscanner/spamscanner/issues",
|
|
13
8
|
"email": "niftylettuce@gmail.com"
|
|
14
9
|
},
|
|
15
|
-
"commitlint": {
|
|
16
|
-
"extends": [
|
|
17
|
-
"@commitlint/config-conventional"
|
|
18
|
-
]
|
|
19
|
-
},
|
|
20
10
|
"contributors": [
|
|
21
11
|
"Nick Baugh <niftylettuce@gmail.com> (http://niftylettuce.com/)",
|
|
22
12
|
"Shaun Warman <shaunwarman1@gmail.com> (http://shaunwarman.com/)"
|
|
@@ -24,69 +14,68 @@
|
|
|
24
14
|
"dependencies": {
|
|
25
15
|
"@ladjs/naivebayes": "^0.1.0",
|
|
26
16
|
"bitcoin-regex": "^2.0.0",
|
|
27
|
-
"clamscan": "^1.
|
|
17
|
+
"clamscan": "^2.1.2",
|
|
28
18
|
"credit-card-regex": "^3.0.0",
|
|
29
19
|
"crypto-random-string": "3",
|
|
30
20
|
"currency-codes": "^2.1.0",
|
|
31
|
-
"currency-symbol-map": "^5.0
|
|
32
|
-
"debug": "^4.3.2",
|
|
21
|
+
"currency-symbol-map": "^5.1.0",
|
|
33
22
|
"email-regex-safe": "^1.0.2",
|
|
34
|
-
"emoji-patterns": "^
|
|
23
|
+
"emoji-patterns": "^14.0.1",
|
|
35
24
|
"escape-string-regexp": "4",
|
|
36
25
|
"expand-contractions": "^1.0.1",
|
|
37
26
|
"file-extension": "^4.0.5",
|
|
38
|
-
"file-type": "
|
|
27
|
+
"file-type": "16",
|
|
39
28
|
"floating-point-regex": "^0.1.0",
|
|
40
29
|
"franc": "5",
|
|
41
30
|
"gemoji": "6",
|
|
42
31
|
"hasha": "^5.2.2",
|
|
43
32
|
"hexa-color-regex": "^1.0.0",
|
|
44
33
|
"i18n-locales": "^0.0.5",
|
|
45
|
-
"iconv": "^3.0.
|
|
34
|
+
"iconv": "^3.0.1",
|
|
46
35
|
"into-stream": "6",
|
|
47
|
-
"ip-regex": "
|
|
36
|
+
"ip-regex": "4",
|
|
48
37
|
"is-buffer": "^2.0.5",
|
|
49
38
|
"is-stream": "2",
|
|
50
39
|
"is-string-and-not-blank": "^0.0.2",
|
|
51
40
|
"is-valid-path": "^0.1.1",
|
|
52
41
|
"mac-regex": "^1.0.0",
|
|
53
42
|
"macos-version": "5",
|
|
54
|
-
"mailparser": "^3.
|
|
43
|
+
"mailparser": "^3.5.0",
|
|
55
44
|
"memoizee": "^0.4.15",
|
|
56
|
-
"mime-types": "^2.1.
|
|
45
|
+
"mime-types": "^2.1.35",
|
|
57
46
|
"ms": "^2.1.3",
|
|
58
|
-
"natural": "^5.
|
|
47
|
+
"natural": "^5.2.2",
|
|
59
48
|
"newline-remove": "^1.0.2",
|
|
60
|
-
"node-html-parser": "
|
|
49
|
+
"node-html-parser": "4",
|
|
61
50
|
"node-snowball": "^0.6.0",
|
|
62
51
|
"normalize-url": "5",
|
|
63
|
-
"parse-domain": "
|
|
52
|
+
"parse-domain": "5",
|
|
64
53
|
"phone-regex": "^2.1.0",
|
|
65
54
|
"punycode": "^2.1.1",
|
|
66
|
-
"re2": "^1.
|
|
67
|
-
"sanitize-html": "^2.
|
|
68
|
-
"stopword": "^
|
|
55
|
+
"re2": "^1.17.6",
|
|
56
|
+
"sanitize-html": "^2.7.0",
|
|
57
|
+
"stopword": "^2.0.2",
|
|
69
58
|
"striptags": "^3.2.0",
|
|
70
|
-
"superagent": "^
|
|
59
|
+
"superagent": "^7.1.6",
|
|
71
60
|
"trim-leading-whitespace": "^0.1.1",
|
|
72
61
|
"universalify": "^2.0.0",
|
|
73
|
-
"url-regex-safe": "^
|
|
74
|
-
"validator": "^13.
|
|
62
|
+
"url-regex-safe": "^3.0.0",
|
|
63
|
+
"validator": "^13.7.0",
|
|
64
|
+
"which": "^2.0.2"
|
|
75
65
|
},
|
|
76
66
|
"devDependencies": {
|
|
77
|
-
"@commitlint/cli": "^
|
|
78
|
-
"@commitlint/config-conventional": "^
|
|
67
|
+
"@commitlint/cli": "^17.0.2",
|
|
68
|
+
"@commitlint/config-conventional": "^17.0.2",
|
|
79
69
|
"@ladjs/redis": "^1.0.7",
|
|
80
|
-
"ava": "^3.
|
|
81
|
-
"codecov": "^3.8.3",
|
|
70
|
+
"ava": "^4.3.0",
|
|
82
71
|
"cross-env": "^7.0.3",
|
|
83
72
|
"delay": "^5.0.0",
|
|
84
|
-
"eslint": "^
|
|
85
|
-
"eslint-config-xo-lass": "^
|
|
73
|
+
"eslint": "^8.17.0",
|
|
74
|
+
"eslint-config-xo-lass": "^2.0.1",
|
|
86
75
|
"fixpack": "^4.0.0",
|
|
87
|
-
"husky": "^
|
|
88
|
-
"is-ci": "^3.0.
|
|
89
|
-
"lint-staged": "^
|
|
76
|
+
"husky": "^8.0.1",
|
|
77
|
+
"is-ci": "^3.0.1",
|
|
78
|
+
"lint-staged": "^13.0.1",
|
|
90
79
|
"lookpath": "^1.2.2",
|
|
91
80
|
"make-dir": "^3.1.0",
|
|
92
81
|
"node-mbox": "^1.0.0",
|
|
@@ -94,12 +83,12 @@
|
|
|
94
83
|
"nyc": "^15.1.0",
|
|
95
84
|
"p-map": "4",
|
|
96
85
|
"read-dir-deep": "^7.0.1",
|
|
97
|
-
"remark-cli": "^10.0.
|
|
98
|
-
"remark-preset-github": "^4.0.
|
|
99
|
-
"xo": "0.
|
|
86
|
+
"remark-cli": "^10.0.1",
|
|
87
|
+
"remark-preset-github": "^4.0.4",
|
|
88
|
+
"xo": "^0.50.0"
|
|
100
89
|
},
|
|
101
90
|
"engines": {
|
|
102
|
-
"node": ">=
|
|
91
|
+
"node": ">=14"
|
|
103
92
|
},
|
|
104
93
|
"files": [
|
|
105
94
|
"package.json",
|
|
@@ -114,12 +103,6 @@
|
|
|
114
103
|
"classifier.json"
|
|
115
104
|
],
|
|
116
105
|
"homepage": "https://github.com/spamscanner/spamscanner",
|
|
117
|
-
"husky": {
|
|
118
|
-
"hooks": {
|
|
119
|
-
"pre-commit": "lint-staged",
|
|
120
|
-
"commit-msg": "commitlint -E HUSKY_GIT_PARAMS"
|
|
121
|
-
}
|
|
122
|
-
},
|
|
123
106
|
"keywords": [
|
|
124
107
|
"adult",
|
|
125
108
|
"api",
|
|
@@ -172,38 +155,17 @@
|
|
|
172
155
|
],
|
|
173
156
|
"license": "Business Source License 1.1",
|
|
174
157
|
"main": "index.js",
|
|
175
|
-
"prettier": {
|
|
176
|
-
"singleQuote": true,
|
|
177
|
-
"bracketSpacing": true,
|
|
178
|
-
"trailingComma": "none"
|
|
179
|
-
},
|
|
180
|
-
"remarkConfig": {
|
|
181
|
-
"plugins": [
|
|
182
|
-
"preset-github"
|
|
183
|
-
]
|
|
184
|
-
},
|
|
185
158
|
"repository": {
|
|
186
159
|
"type": "git",
|
|
187
160
|
"url": "https://github.com/spamscanner/spamscanner"
|
|
188
161
|
},
|
|
189
162
|
"scripts": {
|
|
190
163
|
"ava": "cross-env NODE_ENV=test ava",
|
|
191
|
-
"
|
|
192
|
-
"lint": "xo && remark . -qfo",
|
|
164
|
+
"lint": "xo --fix && remark . -qfo && fixpack",
|
|
193
165
|
"nyc": "cross-env NODE_ENV=test nyc ava",
|
|
194
|
-
"
|
|
166
|
+
"prepare": "husky install",
|
|
167
|
+
"pretest": "npm run lint",
|
|
168
|
+
"test": "npm run test-coverage",
|
|
195
169
|
"test-coverage": "npm run lint && npm run nyc"
|
|
196
|
-
},
|
|
197
|
-
"xo": {
|
|
198
|
-
"prettier": true,
|
|
199
|
-
"space": true,
|
|
200
|
-
"extends": [
|
|
201
|
-
"xo-lass"
|
|
202
|
-
],
|
|
203
|
-
"ignores": [
|
|
204
|
-
"data",
|
|
205
|
-
"classifier.json",
|
|
206
|
-
"bag-of-words.json"
|
|
207
|
-
]
|
|
208
170
|
}
|
|
209
171
|
}
|
package/vocabulary-limit.js
CHANGED