spamscanner 5.1.1 → 5.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -3
- package/index.js +44 -8
- package/package.json +4 -2
package/README.md
CHANGED
|
@@ -54,7 +54,7 @@
|
|
|
54
54
|
|
|
55
55
|
## Foreword
|
|
56
56
|
|
|
57
|
-
Spam Scanner is a tool and service
|
|
57
|
+
Spam Scanner is a tool and service created after hitting countless roadblocks with existing spam-detection solutions. In other words, it's our current [plan][plan-for-spam] for [spam][better-plan-for-spam].
|
|
58
58
|
|
|
59
59
|
Our goal is to build and utilize a scalable, performant, simple, easy to maintain, and powerful API for use in our service at [Forward Email][forward-email] to limit spam and provide other measures to prevent attacks on our users.
|
|
60
60
|
|
|
@@ -606,8 +606,6 @@ This means you can run your app with `NODE_DEBUG=spamscanner node app.js` to get
|
|
|
606
606
|
|
|
607
607
|
[homograph-attack]: https://en.wikipedia.org/wiki/IDN_homograph_attack
|
|
608
608
|
|
|
609
|
-
[niftylettuce]: https://github.com/niftylettuce
|
|
610
|
-
|
|
611
609
|
[forward-email]: https://forwardemail.net
|
|
612
610
|
|
|
613
611
|
[rspamd]: https://rspamd.com/
|
package/index.js
CHANGED
|
@@ -11,6 +11,7 @@ const ClamScan = require('clamscan');
|
|
|
11
11
|
const FileType = require('file-type');
|
|
12
12
|
const NaiveBayes = require('@ladjs/naivebayes');
|
|
13
13
|
const RE2 = require('re2');
|
|
14
|
+
const arrayJoinConjunction = require('array-join-conjunction');
|
|
14
15
|
const bitcoinRegex = require('bitcoin-regex');
|
|
15
16
|
const contractions = require('expand-contractions');
|
|
16
17
|
const creditCardRegex = require('credit-card-regex');
|
|
@@ -461,6 +462,10 @@ class SpamScanner {
|
|
|
461
462
|
minLength: 5,
|
|
462
463
|
only: ISO_CODE_MAPPING_KEYS
|
|
463
464
|
},
|
|
465
|
+
// if franc detects multiple languages that have >= % threshold
|
|
466
|
+
// then if the locale detected was one of them, what is the probability
|
|
467
|
+
// it must have in order to override all the other matches
|
|
468
|
+
detectedLocaleOverrideProbability: 0.9,
|
|
464
469
|
...config
|
|
465
470
|
};
|
|
466
471
|
|
|
@@ -667,7 +672,11 @@ class SpamScanner {
|
|
|
667
672
|
? `"${attachment.filename}"`
|
|
668
673
|
: `#${i + 1}`;
|
|
669
674
|
if (isInfected)
|
|
670
|
-
messages.push(
|
|
675
|
+
messages.push(
|
|
676
|
+
`Attachment ${name} was infected with ${arrayJoinConjunction(
|
|
677
|
+
viruses
|
|
678
|
+
)}.`
|
|
679
|
+
);
|
|
671
680
|
} catch (err) {
|
|
672
681
|
this.config.logger.error(err);
|
|
673
682
|
}
|
|
@@ -966,13 +975,41 @@ class SpamScanner {
|
|
|
966
975
|
// <https://github.com/wooorm/franc/issues/86> (accurate with min length)
|
|
967
976
|
// <https://github.com/FGRibreau/node-language-detect> (not too accurate)
|
|
968
977
|
//
|
|
969
|
-
const
|
|
978
|
+
const detectedLanguages = franc.all(string, this.config.franc);
|
|
979
|
+
if (Array.isArray(detectedLanguages) && detectedLanguages.length > 0) {
|
|
980
|
+
let detected = this.config.locale;
|
|
981
|
+
let probability = 0;
|
|
982
|
+
for (const lang of detectedLanguages) {
|
|
983
|
+
// if it was undetermined then break out and revert to default (English)
|
|
984
|
+
if (lang[0] && lang[0] === 'und') break;
|
|
970
985
|
|
|
971
|
-
|
|
972
|
-
|
|
973
|
-
|
|
974
|
-
|
|
975
|
-
|
|
986
|
+
//
|
|
987
|
+
// otherwise only use detected languages that have >= 90% accuracy
|
|
988
|
+
// and if no matches were found, the revert to use English as it's most likely spam
|
|
989
|
+
// (we can assume that users would understand a different language sent to them is spam)
|
|
990
|
+
// (so we can assume that language is spoofed to bypass English, the most widely spoken)
|
|
991
|
+
//
|
|
992
|
+
if (lang[0] && ISO_CODE_MAPPING[lang[0]] && lang[1]) {
|
|
993
|
+
// we don't want to check anything lower than our threshold
|
|
994
|
+
if (lang[1] < this.config.detectedLocaleOverrideProbability) break;
|
|
995
|
+
if (probability >= lang[1]) {
|
|
996
|
+
// exit early since we found a match that matched the passed locale
|
|
997
|
+
// eslint-disable-next-line max-depth
|
|
998
|
+
if (locale && locale === ISO_CODE_MAPPING[lang[0]]) {
|
|
999
|
+
detected = locale;
|
|
1000
|
+
probability = lang[1];
|
|
1001
|
+
break;
|
|
1002
|
+
}
|
|
1003
|
+
} else {
|
|
1004
|
+
detected = ISO_CODE_MAPPING[lang[0]];
|
|
1005
|
+
probability = lang[1];
|
|
1006
|
+
}
|
|
1007
|
+
}
|
|
1008
|
+
}
|
|
1009
|
+
|
|
1010
|
+
// override the locale based off detected
|
|
1011
|
+
locale = detected;
|
|
1012
|
+
}
|
|
976
1013
|
|
|
977
1014
|
locale = this.parseLocale(isSANB(locale) ? locale : this.config.locale);
|
|
978
1015
|
|
|
@@ -1220,7 +1257,6 @@ class SpamScanner {
|
|
|
1220
1257
|
// <https://github.com/NaturalNode/natural/issues/533>
|
|
1221
1258
|
//
|
|
1222
1259
|
// NOTE: we're doing this for all languages now, not just en
|
|
1223
|
-
// if (locale === 'en')
|
|
1224
1260
|
//
|
|
1225
1261
|
string = contractions.expand(string);
|
|
1226
1262
|
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "spamscanner",
|
|
3
3
|
"description": "Spam Scanner - The Best Anti-Spam Scanning Service and Anti-Spam API",
|
|
4
|
-
"version": "5.1.
|
|
4
|
+
"version": "5.1.2",
|
|
5
5
|
"author": "Niftylettuce, LLC. <niftylettuce@gmail.com> (https://niftylettuce.com/)",
|
|
6
6
|
"bugs": {
|
|
7
7
|
"url": "https://github.com/spamscanner/spamscanner/issues",
|
|
@@ -13,6 +13,7 @@
|
|
|
13
13
|
],
|
|
14
14
|
"dependencies": {
|
|
15
15
|
"@ladjs/naivebayes": "^0.1.0",
|
|
16
|
+
"array-join-conjunction": "^1.0.0",
|
|
16
17
|
"bitcoin-regex": "^2.0.0",
|
|
17
18
|
"chinese-tokenizer": "^2.4.0",
|
|
18
19
|
"clamscan": "^2.1.2",
|
|
@@ -67,7 +68,6 @@
|
|
|
67
68
|
"devDependencies": {
|
|
68
69
|
"@commitlint/cli": "^17.0.2",
|
|
69
70
|
"@commitlint/config-conventional": "^17.0.2",
|
|
70
|
-
"@ladjs/redis": "^1.0.7",
|
|
71
71
|
"ava": "^4.3.0",
|
|
72
72
|
"cross-env": "^7.0.3",
|
|
73
73
|
"delay": "^5.0.0",
|
|
@@ -75,6 +75,8 @@
|
|
|
75
75
|
"eslint-config-xo-lass": "^2.0.1",
|
|
76
76
|
"fixpack": "^4.0.0",
|
|
77
77
|
"husky": "^8.0.1",
|
|
78
|
+
"ioredis": "^5.0.6",
|
|
79
|
+
"ioredis-mock": "^8.2.2",
|
|
78
80
|
"is-ci": "^3.0.1",
|
|
79
81
|
"lint-staged": "^13.0.1",
|
|
80
82
|
"lookpath": "^1.2.2",
|