spamscanner 6.0.0 → 6.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -55,14 +55,14 @@ var replacements_exports = {};
55
55
  __export(replacements_exports, {
56
56
  default: () => replacements_default
57
57
  });
58
- var import_node_util, import_node_fs, import_crypto_random_string, debug, randomOptions, replacements, replacements_default;
58
+ var import_node_util5, import_node_fs, import_crypto_random_string, debug5, randomOptions, replacements, replacements_default;
59
59
  var init_replacements = __esm({
60
60
  "replacements.js"() {
61
- import_node_util = require("node:util");
61
+ import_node_util5 = require("node:util");
62
62
  import_node_fs = require("node:fs");
63
63
  import_crypto_random_string = __toESM(require("crypto-random-string"), 1);
64
64
  init_replacement_words();
65
- debug = (0, import_node_util.debuglog)("spamscanner");
65
+ debug5 = (0, import_node_util5.debuglog)("spamscanner");
66
66
  randomOptions = {
67
67
  length: 10,
68
68
  characters: "abcdefghijklmnopqrstuvwxyz"
@@ -71,7 +71,7 @@ var init_replacements = __esm({
71
71
  try {
72
72
  replacements = JSON.parse((0, import_node_fs.readFileSync)("./replacements.json", "utf8"));
73
73
  } catch (error) {
74
- debug(error);
74
+ debug5(error);
75
75
  for (const replacement of replacement_words_default) {
76
76
  replacements[replacement] = `${replacement}${(0, import_crypto_random_string.default)(randomOptions)}`;
77
77
  }
@@ -85,18 +85,18 @@ var get_classifier_exports = {};
85
85
  __export(get_classifier_exports, {
86
86
  default: () => get_classifier_default
87
87
  });
88
- var import_node_util2, import_node_fs2, import_naivebayes, debug2, classifier, get_classifier_default;
88
+ var import_node_util6, import_node_fs2, import_naivebayes, debug6, classifier, get_classifier_default;
89
89
  var init_get_classifier = __esm({
90
90
  "get-classifier.js"() {
91
- import_node_util2 = require("node:util");
91
+ import_node_util6 = require("node:util");
92
92
  import_node_fs2 = require("node:fs");
93
93
  import_naivebayes = __toESM(require("@ladjs/naivebayes"), 1);
94
- debug2 = (0, import_node_util2.debuglog)("spamscanner");
94
+ debug6 = (0, import_node_util6.debuglog)("spamscanner");
95
95
  classifier = new import_naivebayes.default().toJsonObject();
96
96
  try {
97
97
  classifier = JSON.parse((0, import_node_fs2.readFileSync)("./classifier.json", "utf8"));
98
98
  } catch (error) {
99
- debug2(error);
99
+ debug6(error);
100
100
  }
101
101
  get_classifier_default = classifier;
102
102
  }
@@ -546,11 +546,12 @@ __export(index_exports, {
546
546
  default: () => index_default
547
547
  });
548
548
  module.exports = __toCommonJS(index_exports);
549
+ var import_node_buffer2 = require("node:buffer");
549
550
  var import_node_fs3 = __toESM(require("node:fs"), 1);
550
551
  var import_node_path = __toESM(require("node:path"), 1);
551
552
  var import_node_process = __toESM(require("node:process"), 1);
552
553
  var import_node_crypto2 = require("node:crypto");
553
- var import_node_util3 = require("node:util");
554
+ var import_node_util7 = require("node:util");
554
555
  var import_node_url = require("node:url");
555
556
  var import_auto_bind = __toESM(require("auto-bind"), 1);
556
557
  var import_ascii_fullwidth_halfwidth_convert = __toESM(require("ascii-fullwidth-halfwidth-convert"), 1);
@@ -581,10 +582,1461 @@ var import_stopword = __toESM(require("stopword"), 1);
581
582
  var import_url_regex_safe = __toESM(require("url-regex-safe"), 1);
582
583
  var import_mailparser = require("mailparser");
583
584
  var import_file_type = require("file-type");
585
+
586
+ // src/auth.js
587
+ var import_node_buffer = require("node:buffer");
588
+ var import_node_util = require("node:util");
589
+ var import_node_dns = __toESM(require("node:dns"), 1);
590
+ var debug = (0, import_node_util.debuglog)("spamscanner:auth");
591
+ var mailauth;
592
+ var getMailauth = async () => {
593
+ mailauth ||= await import("mailauth");
594
+ return mailauth;
595
+ };
596
+ var createResolver = (timeout = 1e4) => {
597
+ const resolver = new import_node_dns.default.promises.Resolver();
598
+ resolver.setServers(["8.8.8.8", "1.1.1.1"]);
599
+ return async (name, type) => {
600
+ try {
601
+ const controller = new AbortController();
602
+ const timeoutId = setTimeout(() => controller.abort(), timeout);
603
+ let result;
604
+ switch (type) {
605
+ case "TXT": {
606
+ result = await resolver.resolveTxt(name);
607
+ result = result.map((r) => Array.isArray(r) ? r.join("") : r);
608
+ break;
609
+ }
610
+ case "MX": {
611
+ result = await resolver.resolveMx(name);
612
+ break;
613
+ }
614
+ case "A": {
615
+ result = await resolver.resolve4(name);
616
+ break;
617
+ }
618
+ case "AAAA": {
619
+ result = await resolver.resolve6(name);
620
+ break;
621
+ }
622
+ case "PTR": {
623
+ result = await resolver.resolvePtr(name);
624
+ break;
625
+ }
626
+ case "CNAME": {
627
+ result = await resolver.resolveCname(name);
628
+ break;
629
+ }
630
+ default: {
631
+ result = await resolver.resolve(name, type);
632
+ }
633
+ }
634
+ clearTimeout(timeoutId);
635
+ return result;
636
+ } catch (error) {
637
+ debug("DNS lookup failed for %s %s: %s", type, name, error.message);
638
+ throw error;
639
+ }
640
+ };
641
+ };
642
+ async function authenticate(message, options = {}) {
643
+ const {
644
+ ip,
645
+ helo,
646
+ mta,
647
+ sender,
648
+ resolver = createResolver(options.timeout || 1e4)
649
+ } = options;
650
+ const defaultResult = {
651
+ dkim: {
652
+ results: [],
653
+ status: { result: "none", comment: "No DKIM signature found" }
654
+ },
655
+ spf: {
656
+ status: { result: "none", comment: "SPF check not performed" },
657
+ domain: null
658
+ },
659
+ dmarc: {
660
+ status: { result: "none", comment: "DMARC check not performed" },
661
+ policy: null,
662
+ domain: null
663
+ },
664
+ arc: {
665
+ status: { result: "none", comment: "No ARC chain found" },
666
+ chain: []
667
+ },
668
+ bimi: {
669
+ status: { result: "none", comment: "No BIMI record found" },
670
+ location: null,
671
+ authority: null
672
+ },
673
+ receivedChain: [],
674
+ headers: {}
675
+ };
676
+ if (!ip) {
677
+ debug("No IP address provided, skipping authentication");
678
+ return defaultResult;
679
+ }
680
+ try {
681
+ const { authenticate: mailauthAuthenticate } = await getMailauth();
682
+ const messageBuffer = import_node_buffer.Buffer.isBuffer(message) ? message : import_node_buffer.Buffer.from(message);
683
+ const authResult = await mailauthAuthenticate(messageBuffer, {
684
+ ip,
685
+ helo: helo || "unknown",
686
+ mta: mta || "spamscanner",
687
+ sender,
688
+ resolver
689
+ });
690
+ debug("Authentication result: %o", authResult);
691
+ return {
692
+ dkim: normalizeResult(authResult.dkim, "dkim"),
693
+ spf: normalizeResult(authResult.spf, "spf"),
694
+ dmarc: normalizeResult(authResult.dmarc, "dmarc"),
695
+ arc: normalizeResult(authResult.arc, "arc"),
696
+ bimi: normalizeResult(authResult.bimi, "bimi"),
697
+ receivedChain: authResult.receivedChain || [],
698
+ headers: authResult.headers || {}
699
+ };
700
+ } catch (error) {
701
+ debug("Authentication failed: %s", error.message);
702
+ return defaultResult;
703
+ }
704
+ }
705
+ function normalizeResult(result, type) {
706
+ if (!result) {
707
+ return {
708
+ status: { result: "none", comment: `No ${type.toUpperCase()} result` }
709
+ };
710
+ }
711
+ switch (type) {
712
+ case "dkim": {
713
+ return {
714
+ results: result.results || [],
715
+ status: result.status || { result: "none", comment: "No DKIM signature found" }
716
+ };
717
+ }
718
+ case "spf": {
719
+ return {
720
+ status: result.status || { result: "none", comment: "SPF check not performed" },
721
+ domain: result.domain || null,
722
+ explanation: result.explanation || null
723
+ };
724
+ }
725
+ case "dmarc": {
726
+ return {
727
+ status: result.status || { result: "none", comment: "DMARC check not performed" },
728
+ policy: result.policy || null,
729
+ domain: result.domain || null,
730
+ p: result.p || null,
731
+ sp: result.sp || null,
732
+ pct: result.pct || null
733
+ };
734
+ }
735
+ case "arc": {
736
+ return {
737
+ status: result.status || { result: "none", comment: "No ARC chain found" },
738
+ chain: result.chain || [],
739
+ i: result.i || null
740
+ };
741
+ }
742
+ case "bimi": {
743
+ return {
744
+ status: result.status || { result: "none", comment: "No BIMI record found" },
745
+ location: result.location || null,
746
+ authority: result.authority || null,
747
+ selector: result.selector || null
748
+ };
749
+ }
750
+ default: {
751
+ return result;
752
+ }
753
+ }
754
+ }
755
+ function calculateAuthScore(authResult, weights = {}) {
756
+ const defaultWeights = {
757
+ dkimPass: -2,
758
+ // Reduce spam score if DKIM passes
759
+ dkimFail: 3,
760
+ // Increase spam score if DKIM fails
761
+ spfPass: -1,
762
+ spfFail: 2,
763
+ spfSoftfail: 1,
764
+ dmarcPass: -2,
765
+ dmarcFail: 4,
766
+ arcPass: -1,
767
+ arcFail: 1,
768
+ ...weights
769
+ };
770
+ let score = 0;
771
+ const tests = [];
772
+ const dkimResult = authResult.dkim?.status?.result;
773
+ if (dkimResult === "pass") {
774
+ score += defaultWeights.dkimPass;
775
+ tests.push(`DKIM_PASS(${defaultWeights.dkimPass})`);
776
+ } else if (dkimResult === "fail") {
777
+ score += defaultWeights.dkimFail;
778
+ tests.push(`DKIM_FAIL(${defaultWeights.dkimFail})`);
779
+ }
780
+ const spfResult = authResult.spf?.status?.result;
781
+ switch (spfResult) {
782
+ case "pass": {
783
+ score += defaultWeights.spfPass;
784
+ tests.push(`SPF_PASS(${defaultWeights.spfPass})`);
785
+ break;
786
+ }
787
+ case "fail": {
788
+ score += defaultWeights.spfFail;
789
+ tests.push(`SPF_FAIL(${defaultWeights.spfFail})`);
790
+ break;
791
+ }
792
+ case "softfail": {
793
+ score += defaultWeights.spfSoftfail;
794
+ tests.push(`SPF_SOFTFAIL(${defaultWeights.spfSoftfail})`);
795
+ break;
796
+ }
797
+ }
798
+ const dmarcResult = authResult.dmarc?.status?.result;
799
+ if (dmarcResult === "pass") {
800
+ score += defaultWeights.dmarcPass;
801
+ tests.push(`DMARC_PASS(${defaultWeights.dmarcPass})`);
802
+ } else if (dmarcResult === "fail") {
803
+ score += defaultWeights.dmarcFail;
804
+ tests.push(`DMARC_FAIL(${defaultWeights.dmarcFail})`);
805
+ }
806
+ const arcResult = authResult.arc?.status?.result;
807
+ if (arcResult === "pass") {
808
+ score += defaultWeights.arcPass;
809
+ tests.push(`ARC_PASS(${defaultWeights.arcPass})`);
810
+ } else if (arcResult === "fail") {
811
+ score += defaultWeights.arcFail;
812
+ tests.push(`ARC_FAIL(${defaultWeights.arcFail})`);
813
+ }
814
+ return {
815
+ score,
816
+ tests,
817
+ details: {
818
+ dkim: dkimResult || "none",
819
+ spf: spfResult || "none",
820
+ dmarc: dmarcResult || "none",
821
+ arc: arcResult || "none"
822
+ }
823
+ };
824
+ }
825
+ function formatAuthResultsHeader(authResult, hostname = "spamscanner") {
826
+ const parts = [hostname];
827
+ if (authResult.dkim?.status?.result) {
828
+ const dkimResult = authResult.dkim.status.result;
829
+ let dkimPart = `dkim=${dkimResult}`;
830
+ if (authResult.dkim.results?.[0]?.signingDomain) {
831
+ dkimPart += ` header.d=${authResult.dkim.results[0].signingDomain}`;
832
+ }
833
+ parts.push(dkimPart);
834
+ }
835
+ if (authResult.spf?.status?.result) {
836
+ let spfPart = `spf=${authResult.spf.status.result}`;
837
+ if (authResult.spf.domain) {
838
+ spfPart += ` smtp.mailfrom=${authResult.spf.domain}`;
839
+ }
840
+ parts.push(spfPart);
841
+ }
842
+ if (authResult.dmarc?.status?.result) {
843
+ let dmarcPart = `dmarc=${authResult.dmarc.status.result}`;
844
+ if (authResult.dmarc.domain) {
845
+ dmarcPart += ` header.from=${authResult.dmarc.domain}`;
846
+ }
847
+ parts.push(dmarcPart);
848
+ }
849
+ if (authResult.arc?.status?.result) {
850
+ parts.push(`arc=${authResult.arc.status.result}`);
851
+ }
852
+ return parts.join(";\n ");
853
+ }
854
+
855
+ // src/reputation.js
856
+ var import_node_util2 = require("node:util");
857
+ var debug2 = (0, import_node_util2.debuglog)("spamscanner:reputation");
858
+ var DEFAULT_API_URL = "https://api.forwardemail.net/v1/reputation";
859
+ var cache = /* @__PURE__ */ new Map();
860
+ var CACHE_TTL = 5 * 60 * 1e3;
861
+ async function checkReputation(value, options = {}) {
862
+ const {
863
+ apiUrl = DEFAULT_API_URL,
864
+ timeout = 1e4
865
+ } = options;
866
+ if (!value || typeof value !== "string") {
867
+ return {
868
+ isTruthSource: false,
869
+ truthSourceValue: null,
870
+ isAllowlisted: false,
871
+ allowlistValue: null,
872
+ isDenylisted: false,
873
+ denylistValue: null
874
+ };
875
+ }
876
+ const cacheKey = `${apiUrl}:${value}`;
877
+ const cached = cache.get(cacheKey);
878
+ if (cached && Date.now() - cached.timestamp < CACHE_TTL) {
879
+ debug2("Cache hit for %s", value);
880
+ return cached.result;
881
+ }
882
+ try {
883
+ const url = new URL(apiUrl);
884
+ url.searchParams.set("q", value);
885
+ const controller = new AbortController();
886
+ const timeoutId = setTimeout(() => controller.abort(), timeout);
887
+ const response = await fetch(url.toString(), {
888
+ method: "GET",
889
+ headers: {
890
+ Accept: "application/json",
891
+ "User-Agent": "SpamScanner/6.0"
892
+ },
893
+ signal: controller.signal
894
+ });
895
+ clearTimeout(timeoutId);
896
+ if (!response.ok) {
897
+ debug2("API returned status %d for %s", response.status, value);
898
+ return {
899
+ isTruthSource: false,
900
+ truthSourceValue: null,
901
+ isAllowlisted: false,
902
+ allowlistValue: null,
903
+ isDenylisted: false,
904
+ denylistValue: null
905
+ };
906
+ }
907
+ const result = await response.json();
908
+ const normalizedResult = {
909
+ isTruthSource: Boolean(result.isTruthSource),
910
+ truthSourceValue: result.truthSourceValue || null,
911
+ isAllowlisted: Boolean(result.isAllowlisted),
912
+ allowlistValue: result.allowlistValue || null,
913
+ isDenylisted: Boolean(result.isDenylisted),
914
+ denylistValue: result.denylistValue || null
915
+ };
916
+ cache.set(cacheKey, {
917
+ result: normalizedResult,
918
+ timestamp: Date.now()
919
+ });
920
+ debug2("Reputation check for %s: %o", value, normalizedResult);
921
+ return normalizedResult;
922
+ } catch (error) {
923
+ debug2("Reputation check failed for %s: %s", value, error.message);
924
+ return {
925
+ isTruthSource: false,
926
+ truthSourceValue: null,
927
+ isAllowlisted: false,
928
+ allowlistValue: null,
929
+ isDenylisted: false,
930
+ denylistValue: null
931
+ };
932
+ }
933
+ }
934
+ async function checkReputationBatch(values, options = {}) {
935
+ const uniqueValues = [...new Set(values.filter(Boolean))];
936
+ const results = await Promise.all(uniqueValues.map(async (value) => {
937
+ const result = await checkReputation(value, options);
938
+ return [value, result];
939
+ }));
940
+ return new Map(results);
941
+ }
942
+ function aggregateReputationResults(results) {
943
+ const aggregated = {
944
+ isTruthSource: false,
945
+ truthSourceValue: null,
946
+ isAllowlisted: false,
947
+ allowlistValue: null,
948
+ isDenylisted: false,
949
+ denylistValue: null
950
+ };
951
+ for (const result of results) {
952
+ if (result.isTruthSource) {
953
+ aggregated.isTruthSource = true;
954
+ aggregated.truthSourceValue ||= result.truthSourceValue;
955
+ }
956
+ if (result.isAllowlisted) {
957
+ aggregated.isAllowlisted = true;
958
+ aggregated.allowlistValue ||= result.allowlistValue;
959
+ }
960
+ if (result.isDenylisted) {
961
+ aggregated.isDenylisted = true;
962
+ aggregated.denylistValue ||= result.denylistValue;
963
+ }
964
+ }
965
+ return aggregated;
966
+ }
967
+
968
+ // src/is-arbitrary.js
969
+ var import_node_util3 = require("node:util");
970
+ var debug3 = (0, import_node_util3.debuglog)("spamscanner:arbitrary");
971
+ var BLOCKED_PHRASES_PATTERN = /cheecck y0ur acc0untt|recorded you|you've been hacked|account is hacked|personal data has leaked|private information has been stolen/im;
972
+ var SYSADMIN_SUBJECT_PATTERN = /please moderate|mdadm monitoring|weekly report|wordfence|wordpress|wpforms|docker|graylog|digest|event notification|package update manager|event alert|system events|monit alert|ping|monitor|cron|yum|sendmail|exim|backup|logwatch|unattended-upgrades/im;
973
+ var SPAM_PATTERNS = {
974
+ // Subject line patterns
975
+ subjectPatterns: [
976
+ // Urgency patterns
977
+ /\b(urgent|immediate|action required|act now|limited time|expires?|deadline)\b/i,
978
+ // Money patterns
979
+ /\b(free|winner|won|prize|lottery|million|billion|cash|money|investment|profit)\b/i,
980
+ // Phishing patterns
981
+ /\b(verify|confirm|update|suspend|locked|unusual activity|security alert)\b/i,
982
+ // Adult content
983
+ /\b(viagra|cialis|pharmacy|pills|medication|prescription)\b/i,
984
+ // Crypto spam
985
+ /\b(bitcoin|crypto|btc|eth|nft|blockchain|wallet)\b/i
986
+ ],
987
+ // Body patterns
988
+ bodyPatterns: [
989
+ // Nigerian prince / advance fee fraud
990
+ /\b(nigerian?|prince|inheritance|beneficiary|next of kin|deceased|unclaimed)\b/i,
991
+ // Lottery scams
992
+ /\b(congratulations.*won|you have been selected|claim your prize)\b/i,
993
+ // Phishing
994
+ /\b(click here to verify|confirm your identity|update your account|suspended.*account)\b/i,
995
+ // Urgency
996
+ /\b(act now|limited time offer|expires in \d+|only \d+ left)\b/i,
997
+ // Financial scams
998
+ /\b(wire transfer|western union|moneygram|bank transfer|routing number)\b/i,
999
+ // Adult/pharma spam
1000
+ /\b(enlarge|enhancement|erectile|dysfunction|weight loss|diet pills)\b/i
1001
+ ],
1002
+ // Suspicious sender patterns
1003
+ senderPatterns: [
1004
+ // Random numbers in email
1005
+ /^[a-z]+\d{4,}@/i,
1006
+ // Very long local parts
1007
+ /^.{30,}@/,
1008
+ // Suspicious domains
1009
+ /@.*(\.ru|\.cn|\.tk|\.ml|\.ga|\.cf|\.gq)$/i,
1010
+ // Numeric domains
1011
+ /@(?:\d+\.){3}\d+/
1012
+ ]
1013
+ };
1014
+ var SUSPICIOUS_TLDS = /* @__PURE__ */ new Set([
1015
+ "tk",
1016
+ "ml",
1017
+ "ga",
1018
+ "cf",
1019
+ "gq",
1020
+ // Free TLDs often abused
1021
+ "xyz",
1022
+ "top",
1023
+ "wang",
1024
+ "win",
1025
+ "bid",
1026
+ "loan",
1027
+ "click",
1028
+ "link",
1029
+ "work",
1030
+ "date",
1031
+ "racing",
1032
+ "download",
1033
+ "stream",
1034
+ "trade"
1035
+ ]);
1036
+ var SPAM_KEYWORDS = /* @__PURE__ */ new Map([
1037
+ ["free", 1],
1038
+ ["winner", 2],
1039
+ ["prize", 2],
1040
+ ["lottery", 3],
1041
+ ["urgent", 1],
1042
+ ["act now", 2],
1043
+ ["limited time", 1],
1044
+ ["click here", 1],
1045
+ ["unsubscribe", -1],
1046
+ // Legitimate emails often have this
1047
+ ["verify your account", 2],
1048
+ ["suspended", 2],
1049
+ ["inheritance", 3],
1050
+ ["million dollars", 3],
1051
+ ["wire transfer", 3],
1052
+ ["western union", 3],
1053
+ ["nigerian", 3],
1054
+ ["prince", 2],
1055
+ ["beneficiary", 2],
1056
+ ["congratulations", 1],
1057
+ ["selected", 1],
1058
+ ["viagra", 3],
1059
+ ["cialis", 3],
1060
+ ["pharmacy", 2],
1061
+ ["bitcoin", 1],
1062
+ ["crypto", 1],
1063
+ ["investment opportunity", 2],
1064
+ ["guaranteed", 1],
1065
+ ["risk free", 2],
1066
+ ["no obligation", 1],
1067
+ ["dear friend", 2],
1068
+ ["dear customer", 1],
1069
+ ["dear user", 1]
1070
+ ]);
1071
+ var PAYPAL_SPAM_TYPE_IDS = /* @__PURE__ */ new Set(["PPC001017", "RT000238", "RT000542", "RT002947"]);
1072
+ var MS_SPAM_CATEGORIES = {
1073
+ // High-confidence threats (highest priority)
1074
+ highConfidence: ["cat:malw", "cat:hphsh", "cat:hphish", "cat:hspm"],
1075
+ // Impersonation attempts
1076
+ impersonation: ["cat:bimp", "cat:dimp", "cat:gimp", "cat:uimp"],
1077
+ // Phishing and spoofing
1078
+ phishingAndSpoofing: ["cat:phsh", "cat:spoof"],
1079
+ // Spam classifications
1080
+ spam: ["cat:ospm", "cat:spm"]
1081
+ };
1082
+ var MS_SPAM_VERDICTS = ["sfv:spm", "sfv:skb", "sfv:sks"];
1083
+ function isArbitrary(parsed, options = {}) {
1084
+ const {
1085
+ threshold = 5,
1086
+ checkSubject = true,
1087
+ checkBody = true,
1088
+ checkSender = true,
1089
+ checkHeaders = true,
1090
+ checkLinks = true,
1091
+ checkMicrosoftHeaders = true,
1092
+ checkVendorSpam = true,
1093
+ checkSpoofing = true,
1094
+ session = {}
1095
+ } = options;
1096
+ const reasons = [];
1097
+ let score = 0;
1098
+ let category = null;
1099
+ const getHeader = (name) => {
1100
+ if (parsed.headers?.get) {
1101
+ return parsed.headers.get(name);
1102
+ }
1103
+ if (parsed.headerLines) {
1104
+ const header = parsed.headerLines.find((h) => h.key.toLowerCase() === name.toLowerCase());
1105
+ return header?.line?.split(":").slice(1).join(":").trim();
1106
+ }
1107
+ return null;
1108
+ };
1109
+ const subject = parsed.subject || getHeader("subject") || "";
1110
+ const from = parsed.from?.value?.[0]?.address || parsed.from?.text || getHeader("from") || "";
1111
+ const sessionInfo = buildSessionInfo(parsed, session, getHeader);
1112
+ if (subject && BLOCKED_PHRASES_PATTERN.test(subject)) {
1113
+ reasons.push("BLOCKED_PHRASE_IN_SUBJECT");
1114
+ score += 10;
1115
+ category = "SPAM";
1116
+ }
1117
+ if (checkMicrosoftHeaders) {
1118
+ const msResult = checkMicrosoftExchangeHeaders(getHeader, sessionInfo);
1119
+ if (msResult.blocked) {
1120
+ reasons.push(...msResult.reasons);
1121
+ score += msResult.score;
1122
+ category = msResult.category || category;
1123
+ }
1124
+ }
1125
+ if (checkVendorSpam) {
1126
+ const vendorResult = checkVendorSpam_(parsed, sessionInfo, getHeader, subject, from);
1127
+ if (vendorResult.blocked) {
1128
+ reasons.push(...vendorResult.reasons);
1129
+ score += vendorResult.score;
1130
+ category = vendorResult.category || category;
1131
+ }
1132
+ }
1133
+ if (checkSpoofing) {
1134
+ const spoofResult = checkSpoofingAttacks(parsed, sessionInfo, getHeader, subject);
1135
+ if (spoofResult.blocked) {
1136
+ reasons.push(...spoofResult.reasons);
1137
+ score += spoofResult.score;
1138
+ category = spoofResult.category || category;
1139
+ }
1140
+ }
1141
+ if (checkSubject && subject) {
1142
+ const subjectResult = checkSubjectLine(subject);
1143
+ score += subjectResult.score;
1144
+ reasons.push(...subjectResult.reasons);
1145
+ }
1146
+ if (checkBody) {
1147
+ const bodyText = parsed.text || "";
1148
+ const bodyHtml = parsed.html || "";
1149
+ const bodyResult = checkBodyContent(bodyText, bodyHtml);
1150
+ score += bodyResult.score;
1151
+ reasons.push(...bodyResult.reasons);
1152
+ }
1153
+ if (checkSender) {
1154
+ const replyTo = parsed.replyTo?.value?.[0]?.address || parsed.replyTo?.text || "";
1155
+ const senderResult = checkSenderPatterns(from, replyTo);
1156
+ score += senderResult.score;
1157
+ reasons.push(...senderResult.reasons);
1158
+ }
1159
+ if (checkHeaders) {
1160
+ const headerResult = checkHeaderAnomalies(parsed, getHeader);
1161
+ score += headerResult.score;
1162
+ reasons.push(...headerResult.reasons);
1163
+ }
1164
+ if (checkLinks) {
1165
+ const bodyHtml = parsed.html || parsed.text || "";
1166
+ const linkResult = checkSuspiciousLinks(bodyHtml);
1167
+ score += linkResult.score;
1168
+ reasons.push(...linkResult.reasons);
1169
+ }
1170
+ const isArbitrarySpam = score >= threshold;
1171
+ debug3(
1172
+ "Arbitrary check result: score=%d, threshold=%d, isArbitrary=%s, category=%s, reasons=%o",
1173
+ score,
1174
+ threshold,
1175
+ isArbitrarySpam,
1176
+ category,
1177
+ reasons
1178
+ );
1179
+ return {
1180
+ isArbitrary: isArbitrarySpam,
1181
+ reasons,
1182
+ score,
1183
+ category
1184
+ };
1185
+ }
1186
+ function buildSessionInfo(parsed, session, getHeader) {
1187
+ const info = { ...session };
1188
+ const from = parsed.from?.value?.[0]?.address || parsed.from?.text || getHeader("from") || "";
1189
+ if (from && !info.originalFromAddress) {
1190
+ info.originalFromAddress = from.toLowerCase();
1191
+ const atIndex = from.indexOf("@");
1192
+ if (atIndex > 0) {
1193
+ info.originalFromAddressDomain = from.slice(atIndex + 1).toLowerCase();
1194
+ info.originalFromAddressRootDomain = getRootDomain(info.originalFromAddressDomain);
1195
+ }
1196
+ }
1197
+ if (!info.resolvedClientHostname) {
1198
+ info.resolvedClientHostname = extractClientHostname(parsed);
1199
+ if (info.resolvedClientHostname) {
1200
+ info.resolvedRootClientHostname = getRootDomain(info.resolvedClientHostname);
1201
+ }
1202
+ }
1203
+ info.remoteAddress ||= extractRemoteIp(parsed);
1204
+ return info;
1205
+ }
1206
+ function checkMicrosoftExchangeHeaders(getHeader, sessionInfo) {
1207
+ const result = {
1208
+ blocked: false,
1209
+ reasons: [],
1210
+ score: 0,
1211
+ category: null
1212
+ };
1213
+ const isFromMicrosoft = sessionInfo.resolvedClientHostname && sessionInfo.resolvedClientHostname.endsWith(".outbound.protection.outlook.com");
1214
+ if (!isFromMicrosoft) {
1215
+ return result;
1216
+ }
1217
+ const msAuthHeader = getHeader("x-ms-exchange-authentication-results");
1218
+ const forefrontHeader = getHeader("x-forefront-antispam-report");
1219
+ if (forefrontHeader) {
1220
+ const lowerForefront = forefrontHeader.toLowerCase();
1221
+ const sclMatch = lowerForefront.match(/scl:(\d+)/);
1222
+ const scl = sclMatch ? Number.parseInt(sclMatch[1], 10) : null;
1223
+ const sfvNotSpam = lowerForefront.includes("sfv:nspm");
1224
+ const microsoftSaysNotSpam = sfvNotSpam || scl !== null && scl <= 2;
1225
+ if (!microsoftSaysNotSpam && msAuthHeader) {
1226
+ const lowerMsAuth = msAuthHeader.toLowerCase();
1227
+ const spfPass = lowerMsAuth.includes("spf=pass");
1228
+ const dkimPass = lowerMsAuth.includes("dkim=pass");
1229
+ const dmarcPass = lowerMsAuth.includes("dmarc=pass");
1230
+ if (!spfPass && !dkimPass && !dmarcPass) {
1231
+ const spfFailed = lowerMsAuth.includes("spf=fail");
1232
+ const dkimFailed = lowerMsAuth.includes("dkim=fail");
1233
+ const dmarcFailed = lowerMsAuth.includes("dmarc=fail");
1234
+ if (spfFailed || dkimFailed || dmarcFailed) {
1235
+ result.blocked = true;
1236
+ result.reasons.push("MS_EXCHANGE_AUTH_FAILURE");
1237
+ result.score += 10;
1238
+ result.category = "AUTHENTICATION_FAILURE";
1239
+ return result;
1240
+ }
1241
+ }
1242
+ }
1243
+ for (const cat of MS_SPAM_CATEGORIES.highConfidence) {
1244
+ if (lowerForefront.includes(cat)) {
1245
+ result.blocked = true;
1246
+ result.reasons.push(`MS_HIGH_CONFIDENCE_THREAT: ${cat.toUpperCase()}`);
1247
+ result.score += 15;
1248
+ result.category = cat.includes("malw") ? "MALWARE" : cat.includes("phish") || cat.includes("phsh") ? "PHISHING" : "HIGH_CONFIDENCE_SPAM";
1249
+ return result;
1250
+ }
1251
+ }
1252
+ for (const cat of MS_SPAM_CATEGORIES.impersonation) {
1253
+ if (lowerForefront.includes(cat)) {
1254
+ result.blocked = true;
1255
+ result.reasons.push(`MS_IMPERSONATION: ${cat.toUpperCase()}`);
1256
+ result.score += 12;
1257
+ result.category = "IMPERSONATION";
1258
+ return result;
1259
+ }
1260
+ }
1261
+ for (const cat of MS_SPAM_CATEGORIES.phishingAndSpoofing) {
1262
+ if (lowerForefront.includes(cat)) {
1263
+ result.blocked = true;
1264
+ result.reasons.push(`MS_PHISHING_SPOOF: ${cat.toUpperCase()}`);
1265
+ result.score += 12;
1266
+ result.category = cat.includes("phsh") ? "PHISHING" : "SPOOFING";
1267
+ return result;
1268
+ }
1269
+ }
1270
+ for (const verdict of MS_SPAM_VERDICTS) {
1271
+ if (lowerForefront.includes(verdict)) {
1272
+ result.blocked = true;
1273
+ result.reasons.push(`MS_SPAM_VERDICT: ${verdict.toUpperCase()}`);
1274
+ result.score += 10;
1275
+ result.category = "SPAM";
1276
+ return result;
1277
+ }
1278
+ }
1279
+ for (const cat of MS_SPAM_CATEGORIES.spam) {
1280
+ if (lowerForefront.includes(cat)) {
1281
+ result.blocked = true;
1282
+ result.reasons.push(`MS_SPAM_CATEGORY: ${cat.toUpperCase()}`);
1283
+ result.score += 10;
1284
+ result.category = "SPAM";
1285
+ return result;
1286
+ }
1287
+ }
1288
+ if (scl !== null && scl >= 5) {
1289
+ result.blocked = true;
1290
+ result.reasons.push(`MS_HIGH_SCL: ${scl}`);
1291
+ result.score += 8;
1292
+ result.category = "SPAM";
1293
+ return result;
1294
+ }
1295
+ } else if (msAuthHeader) {
1296
+ const lowerMsAuth = msAuthHeader.toLowerCase();
1297
+ const spfPass = lowerMsAuth.includes("spf=pass");
1298
+ const dkimPass = lowerMsAuth.includes("dkim=pass");
1299
+ const dmarcPass = lowerMsAuth.includes("dmarc=pass");
1300
+ if (!spfPass && !dkimPass && !dmarcPass) {
1301
+ const spfFailed = lowerMsAuth.includes("spf=fail");
1302
+ const dkimFailed = lowerMsAuth.includes("dkim=fail");
1303
+ const dmarcFailed = lowerMsAuth.includes("dmarc=fail");
1304
+ if (spfFailed || dkimFailed || dmarcFailed) {
1305
+ result.blocked = true;
1306
+ result.reasons.push("MS_EXCHANGE_AUTH_FAILURE");
1307
+ result.score += 10;
1308
+ result.category = "AUTHENTICATION_FAILURE";
1309
+ }
1310
+ }
1311
+ }
1312
+ return result;
1313
+ }
1314
+ function checkVendorSpam_(parsed, sessionInfo, getHeader, subject, from) {
1315
+ const result = {
1316
+ blocked: false,
1317
+ reasons: [],
1318
+ score: 0,
1319
+ category: null
1320
+ };
1321
+ const fromLower = from.toLowerCase();
1322
+ if (sessionInfo.originalFromAddressRootDomain === "paypal.com" && getHeader("x-email-type-id")) {
1323
+ const typeId = getHeader("x-email-type-id");
1324
+ if (PAYPAL_SPAM_TYPE_IDS.has(typeId)) {
1325
+ result.blocked = true;
1326
+ result.reasons.push(`PAYPAL_INVOICE_SPAM: ${typeId}`);
1327
+ result.score += 15;
1328
+ result.category = "VENDOR_SPAM";
1329
+ return result;
1330
+ }
1331
+ }
1332
+ if (sessionInfo.originalFromAddress === "invoice@authorize.net" && sessionInfo.resolvedRootClientHostname === "visa.com") {
1333
+ result.blocked = true;
1334
+ result.reasons.push("AUTHORIZE_VISA_PHISHING");
1335
+ result.score += 15;
1336
+ result.category = "PHISHING";
1337
+ return result;
1338
+ }
1339
+ if (fromLower.includes("amazon.co.jp") && (!sessionInfo.resolvedRootClientHostname || !sessionInfo.resolvedRootClientHostname.startsWith("amazon."))) {
1340
+ result.blocked = true;
1341
+ result.reasons.push("AMAZON_JP_IMPERSONATION");
1342
+ result.score += 12;
1343
+ result.category = "IMPERSONATION";
1344
+ return result;
1345
+ }
1346
+ if (subject && subject.includes("pCloud") && sessionInfo.originalFromAddressRootDomain !== "pcloud.com" && fromLower.includes("pcloud")) {
1347
+ result.blocked = true;
1348
+ result.reasons.push("PCLOUD_IMPERSONATION");
1349
+ result.score += 12;
1350
+ result.category = "IMPERSONATION";
1351
+ return result;
1352
+ }
1353
+ if ((sessionInfo.originalFromAddress === "postmaster@outlook.com" || sessionInfo.resolvedClientHostname && sessionInfo.resolvedClientHostname.endsWith(".outbound.protection.outlook.com") || sessionInfo.originalFromAddress?.startsWith("postmaster@") && sessionInfo.originalFromAddress?.endsWith(".onmicrosoft.com")) && isAutoReply(getHeader) && subject && (subject.startsWith("Undeliverable: ") || subject.startsWith("No se puede entregar: "))) {
1354
+ result.blocked = true;
1355
+ result.reasons.push("MS_BOUNCE_SPAM");
1356
+ result.score += 10;
1357
+ result.category = "BOUNCE_SPAM";
1358
+ return result;
1359
+ }
1360
+ if (sessionInfo.originalFromAddress === "postmaster@163.com" && subject && subject.includes("\u7CFB\u7EDF\u9000\u4FE1")) {
1361
+ result.blocked = true;
1362
+ result.reasons.push("163_BOUNCE_SPAM");
1363
+ result.score += 10;
1364
+ result.category = "BOUNCE_SPAM";
1365
+ return result;
1366
+ }
1367
+ if (sessionInfo.originalFromAddress === "dse_na4@docusign.net" && sessionInfo.spf?.domain && (sessionInfo.spf.domain.endsWith(".onmicrosoft.com") || sessionInfo.spf.domain === "onmicrosoft.com")) {
1368
+ result.blocked = true;
1369
+ result.reasons.push("DOCUSIGN_MS_SCAM");
1370
+ result.score += 12;
1371
+ result.category = "PHISHING";
1372
+ return result;
1373
+ }
1374
+ return result;
1375
+ }
1376
+ function checkSpoofingAttacks(parsed, sessionInfo, getHeader, subject) {
1377
+ const result = {
1378
+ blocked: false,
1379
+ reasons: [],
1380
+ score: 0,
1381
+ category: null
1382
+ };
1383
+ if (sessionInfo.hadAlignedAndPassingDKIM || sessionInfo.isAllowlisted) {
1384
+ return result;
1385
+ }
1386
+ if (sessionInfo.hasSameHostnameAsFrom) {
1387
+ return result;
1388
+ }
1389
+ const rcptTo = sessionInfo.envelope?.rcptTo || [];
1390
+ const fromRootDomain = sessionInfo.originalFromAddressRootDomain;
1391
+ if (!fromRootDomain || rcptTo.length === 0) {
1392
+ return result;
1393
+ }
1394
+ const hasSameRcptToAsFrom = rcptTo.some((to) => {
1395
+ if (!to.address) {
1396
+ return false;
1397
+ }
1398
+ const toRootDomain = getRootDomain(parseHostFromAddress(to.address));
1399
+ return toRootDomain === fromRootDomain;
1400
+ });
1401
+ if (!hasSameRcptToAsFrom) {
1402
+ return result;
1403
+ }
1404
+ const spfResult = sessionInfo.spfFromHeader?.status?.result;
1405
+ if (spfResult === "pass") {
1406
+ return result;
1407
+ }
1408
+ sessionInfo.isPotentialPhishing = true;
1409
+ const xPhpScript = getHeader("x-php-script");
1410
+ const xMailer = getHeader("x-mailer");
1411
+ if (xPhpScript) {
1412
+ return result;
1413
+ }
1414
+ if (xMailer) {
1415
+ const mailerLower = xMailer.toLowerCase();
1416
+ if (mailerLower.includes("php") || mailerLower.includes("drupal")) {
1417
+ return result;
1418
+ }
1419
+ }
1420
+ if (subject && SYSADMIN_SUBJECT_PATTERN.test(subject)) {
1421
+ return result;
1422
+ }
1423
+ result.blocked = true;
1424
+ result.reasons.push("SPOOFING_ATTACK");
1425
+ result.score += 12;
1426
+ result.category = "SPOOFING";
1427
+ return result;
1428
+ }
1429
+ function isAutoReply(getHeader) {
1430
+ const autoSubmitted = getHeader("auto-submitted");
1431
+ if (autoSubmitted && autoSubmitted !== "no") {
1432
+ return true;
1433
+ }
1434
+ const autoResponseSuppress = getHeader("x-auto-response-suppress");
1435
+ if (autoResponseSuppress) {
1436
+ return true;
1437
+ }
1438
+ const precedence = getHeader("precedence");
1439
+ if (precedence && ["bulk", "junk", "list", "auto_reply"].includes(precedence.toLowerCase())) {
1440
+ return true;
1441
+ }
1442
+ if (getHeader("list-unsubscribe")) {
1443
+ return true;
1444
+ }
1445
+ return false;
1446
+ }
1447
+ function checkSubjectLine(subject) {
1448
+ const reasons = [];
1449
+ let score = 0;
1450
+ for (const pattern of SPAM_PATTERNS.subjectPatterns) {
1451
+ if (pattern.test(subject)) {
1452
+ const match = subject.match(pattern);
1453
+ reasons.push(`SUBJECT_SPAM_PATTERN: ${match[0]}`);
1454
+ score += 1;
1455
+ }
1456
+ }
1457
+ const upperCount = (subject.match(/[A-Z]/g) || []).length;
1458
+ const letterCount = (subject.match(/[a-zA-Z]/g) || []).length;
1459
+ if (letterCount > 10 && upperCount / letterCount > 0.7) {
1460
+ reasons.push("SUBJECT_ALL_CAPS");
1461
+ score += 2;
1462
+ }
1463
+ const punctCount = (subject.match(/[!?$]/g) || []).length;
1464
+ if (punctCount >= 3) {
1465
+ reasons.push("SUBJECT_EXCESSIVE_PUNCTUATION");
1466
+ score += 1;
1467
+ }
1468
+ if (/^(re|fw|fwd):/i.test(subject) && subject.length < 20) {
1469
+ reasons.push("SUBJECT_FAKE_REPLY");
1470
+ score += 1;
1471
+ }
1472
+ return { score, reasons };
1473
+ }
1474
+ function checkBodyContent(text, html) {
1475
+ const reasons = [];
1476
+ let score = 0;
1477
+ const content = text || html || "";
1478
+ const contentLower = content.toLowerCase();
1479
+ for (const pattern of SPAM_PATTERNS.bodyPatterns) {
1480
+ if (pattern.test(content)) {
1481
+ const match = content.match(pattern);
1482
+ reasons.push(`BODY_SPAM_PATTERN: ${match[0].slice(0, 50)}`);
1483
+ score += 1;
1484
+ }
1485
+ }
1486
+ for (const [keyword, weight] of SPAM_KEYWORDS) {
1487
+ if (contentLower.includes(keyword.toLowerCase())) {
1488
+ reasons.push(`SPAM_KEYWORD: ${keyword}`);
1489
+ score += weight;
1490
+ }
1491
+ }
1492
+ if (html) {
1493
+ if (/color:\s*#fff|color:\s*white|font-size:\s*[01]px/i.test(html)) {
1494
+ reasons.push("HIDDEN_TEXT");
1495
+ score += 3;
1496
+ }
1497
+ const imgCount = (html.match(/<img/gi) || []).length;
1498
+ const textLength = (text || "").length;
1499
+ if (imgCount > 5 && textLength < 100) {
1500
+ reasons.push("IMAGE_HEAVY_LOW_TEXT");
1501
+ score += 2;
1502
+ }
1503
+ }
1504
+ if (/data:image\/[^;]+;base64,/i.test(html || "")) {
1505
+ reasons.push("BASE64_IMAGES");
1506
+ score += 1;
1507
+ }
1508
+ const shortenerPatterns = /\b(bit\.ly|tinyurl|goo\.gl|t\.co|ow\.ly|is\.gd|buff\.ly|adf\.ly|j\.mp)\b/i;
1509
+ if (shortenerPatterns.test(content)) {
1510
+ reasons.push("URL_SHORTENER");
1511
+ score += 2;
1512
+ }
1513
+ return { score, reasons };
1514
+ }
1515
+ function checkSenderPatterns(from, replyTo) {
1516
+ const reasons = [];
1517
+ let score = 0;
1518
+ if (!from) {
1519
+ reasons.push("MISSING_FROM");
1520
+ score += 2;
1521
+ return { score, reasons };
1522
+ }
1523
+ for (const pattern of SPAM_PATTERNS.senderPatterns) {
1524
+ if (pattern.test(from)) {
1525
+ reasons.push("SUSPICIOUS_SENDER_PATTERN");
1526
+ score += 2;
1527
+ break;
1528
+ }
1529
+ }
1530
+ const tldMatch = from.match(/@[^.]+\.([a-z]+)$/i);
1531
+ if (tldMatch && SUSPICIOUS_TLDS.has(tldMatch[1].toLowerCase())) {
1532
+ reasons.push(`SUSPICIOUS_TLD: ${tldMatch[1]}`);
1533
+ score += 2;
1534
+ }
1535
+ if (replyTo && from) {
1536
+ const fromDomain = from.split("@")[1]?.toLowerCase();
1537
+ const replyDomain = replyTo.split("@")[1]?.toLowerCase();
1538
+ if (fromDomain && replyDomain && fromDomain !== replyDomain) {
1539
+ reasons.push("FROM_REPLY_TO_MISMATCH");
1540
+ score += 2;
1541
+ }
1542
+ }
1543
+ const spoofPatterns = /^(paypal|amazon|apple|microsoft|google|bank|security)/i;
1544
+ if (spoofPatterns.test(from) && !/@(paypal|amazon|apple|microsoft|google)\.com$/i.test(from)) {
1545
+ reasons.push("DISPLAY_NAME_SPOOFING");
1546
+ score += 3;
1547
+ }
1548
+ return { score, reasons };
1549
+ }
1550
+ function checkHeaderAnomalies(parsed, getHeader) {
1551
+ const reasons = [];
1552
+ let score = 0;
1553
+ if (!parsed.messageId && !getHeader("message-id")) {
1554
+ reasons.push("MISSING_MESSAGE_ID");
1555
+ score += 1;
1556
+ }
1557
+ if (parsed.date) {
1558
+ const messageDate = new Date(parsed.date);
1559
+ const now = /* @__PURE__ */ new Date();
1560
+ if (messageDate > now) {
1561
+ const hoursDiff = (messageDate - now) / (1e3 * 60 * 60);
1562
+ if (hoursDiff > 24) {
1563
+ reasons.push("FUTURE_DATE");
1564
+ score += 2;
1565
+ }
1566
+ }
1567
+ const daysDiff = (now - messageDate) / (1e3 * 60 * 60 * 24);
1568
+ if (daysDiff > 365) {
1569
+ reasons.push("VERY_OLD_DATE");
1570
+ score += 1;
1571
+ }
1572
+ } else {
1573
+ reasons.push("MISSING_DATE");
1574
+ score += 1;
1575
+ }
1576
+ const xMailer = getHeader("x-mailer") || "";
1577
+ if (xMailer) {
1578
+ const suspiciousMailers = /mass mail|bulk mail|email blast/i;
1579
+ if (suspiciousMailers.test(xMailer)) {
1580
+ reasons.push("SUSPICIOUS_MAILER");
1581
+ score += 1;
1582
+ }
1583
+ }
1584
+ const mimeVersion = getHeader("mime-version");
1585
+ if (!mimeVersion && (parsed.html || parsed.attachments?.length > 0)) {
1586
+ reasons.push("MISSING_MIME_VERSION");
1587
+ score += 1;
1588
+ }
1589
+ const toCount = parsed.to?.value?.length || 0;
1590
+ const ccCount = parsed.cc?.value?.length || 0;
1591
+ if (toCount + ccCount > 50) {
1592
+ reasons.push("EXCESSIVE_RECIPIENTS");
1593
+ score += 2;
1594
+ }
1595
+ return { score, reasons };
1596
+ }
1597
+ function checkSuspiciousLinks(content) {
1598
+ const reasons = [];
1599
+ let score = 0;
1600
+ const urlPattern = /https?:\/\/[^\s<>"']+/gi;
1601
+ const urls = content.match(urlPattern) || [];
1602
+ if (urls.length === 0) {
1603
+ return { score, reasons };
1604
+ }
1605
+ const suspiciousUrls = /* @__PURE__ */ new Set();
1606
+ for (const url of urls) {
1607
+ try {
1608
+ const parsed = new URL(url);
1609
+ const hostname = parsed.hostname.toLowerCase();
1610
+ if (/^(?:\d+\.){3}\d+$/.test(hostname)) {
1611
+ suspiciousUrls.add("IP_ADDRESS_URL");
1612
+ }
1613
+ const tld = hostname.split(".").pop();
1614
+ if (SUSPICIOUS_TLDS.has(tld)) {
1615
+ suspiciousUrls.add(`SUSPICIOUS_URL_TLD: ${tld}`);
1616
+ }
1617
+ if (parsed.port && !["80", "443", ""].includes(parsed.port)) {
1618
+ suspiciousUrls.add("URL_WITH_PORT");
1619
+ }
1620
+ if (url.length > 200) {
1621
+ suspiciousUrls.add("VERY_LONG_URL");
1622
+ }
1623
+ const subdomainCount = hostname.split(".").length - 2;
1624
+ if (subdomainCount > 3) {
1625
+ suspiciousUrls.add("EXCESSIVE_SUBDOMAINS");
1626
+ }
1627
+ if (/%[\da-f]{2}/i.test(url) && /%[\da-f]{2}.*%[\da-f]{2}/i.test(url)) {
1628
+ suspiciousUrls.add("URL_OBFUSCATION");
1629
+ }
1630
+ } catch {
1631
+ suspiciousUrls.add("INVALID_URL");
1632
+ }
1633
+ }
1634
+ for (const reason of suspiciousUrls) {
1635
+ reasons.push(reason);
1636
+ score += 1;
1637
+ }
1638
+ const linkPattern = /<a[^>]+href=["']([^"']+)["'][^>]*>([^<]+)<\/a>/gi;
1639
+ let match;
1640
+ while ((match = linkPattern.exec(content)) !== null) {
1641
+ const href = match[1];
1642
+ const text = match[2];
1643
+ if (/^https?:\/\//i.test(text)) {
1644
+ try {
1645
+ const textUrl = new URL(text);
1646
+ const hrefUrl = new URL(href);
1647
+ if (textUrl.hostname.toLowerCase() !== hrefUrl.hostname.toLowerCase()) {
1648
+ reasons.push("LINK_TEXT_URL_MISMATCH");
1649
+ score += 3;
1650
+ break;
1651
+ }
1652
+ } catch {
1653
+ }
1654
+ }
1655
+ }
1656
+ return { score, reasons };
1657
+ }
1658
+ function getRootDomain(hostname) {
1659
+ if (!hostname) {
1660
+ return "";
1661
+ }
1662
+ const parts = hostname.toLowerCase().split(".");
1663
+ if (parts.length <= 2) {
1664
+ return hostname.toLowerCase();
1665
+ }
1666
+ const multiPartTlds = ["co.uk", "com.au", "co.nz", "co.jp", "com.br", "co.in"];
1667
+ const lastTwo = parts.slice(-2).join(".");
1668
+ if (multiPartTlds.includes(lastTwo)) {
1669
+ return parts.slice(-3).join(".");
1670
+ }
1671
+ return parts.slice(-2).join(".");
1672
+ }
1673
+ function parseHostFromAddress(address) {
1674
+ if (!address) {
1675
+ return "";
1676
+ }
1677
+ const atIndex = address.indexOf("@");
1678
+ if (atIndex === -1) {
1679
+ return "";
1680
+ }
1681
+ return address.slice(atIndex + 1).toLowerCase();
1682
+ }
1683
+ function extractClientHostname(parsed) {
1684
+ let receivedHeaders = null;
1685
+ if (parsed.headers?.get) {
1686
+ receivedHeaders = parsed.headers.get("received");
1687
+ } else if (parsed.headerLines) {
1688
+ const headers = parsed.headerLines.filter((h) => h.key.toLowerCase() === "received");
1689
+ receivedHeaders = headers.map((h) => h.line?.split(":").slice(1).join(":").trim());
1690
+ }
1691
+ if (!receivedHeaders) {
1692
+ return null;
1693
+ }
1694
+ const received = Array.isArray(receivedHeaders) ? receivedHeaders[0] : receivedHeaders;
1695
+ if (!received) {
1696
+ return null;
1697
+ }
1698
+ const fromMatch = received.match(/from\s+([^\s(]+)/i);
1699
+ if (fromMatch) {
1700
+ return fromMatch[1].toLowerCase();
1701
+ }
1702
+ return null;
1703
+ }
1704
+ function extractRemoteIp(parsed) {
1705
+ let receivedHeaders = null;
1706
+ if (parsed.headers?.get) {
1707
+ receivedHeaders = parsed.headers.get("received");
1708
+ } else if (parsed.headerLines) {
1709
+ const headers = parsed.headerLines.filter((h) => h.key.toLowerCase() === "received");
1710
+ receivedHeaders = headers.map((h) => h.line?.split(":").slice(1).join(":").trim());
1711
+ }
1712
+ if (!receivedHeaders) {
1713
+ return null;
1714
+ }
1715
+ const received = Array.isArray(receivedHeaders) ? receivedHeaders[0] : receivedHeaders;
1716
+ if (!received) {
1717
+ return null;
1718
+ }
1719
+ const ipv4Match = received.match(/\[((?:\d+\.){3}\d+)]/);
1720
+ if (ipv4Match) {
1721
+ return ipv4Match[1];
1722
+ }
1723
+ const ipv6Match = received.match(/\[([a-f\d:]+)]/i);
1724
+ if (ipv6Match) {
1725
+ return ipv6Match[1];
1726
+ }
1727
+ return null;
1728
+ }
1729
+
1730
+ // src/get-attributes.js
1731
+ var import_node_util4 = require("node:util");
1732
+ var debug4 = (0, import_node_util4.debuglog)("spamscanner:attributes");
1733
+ function checkSRS(address) {
1734
+ if (!address) {
1735
+ return "";
1736
+ }
1737
+ const srs0Match = address.match(/^srs0=[^=]+=([^=]+)=([^=]+)=([^@]+)@/i);
1738
+ if (srs0Match) {
1739
+ return `${srs0Match[3]}@${srs0Match[2]}`;
1740
+ }
1741
+ const srs1Match = address.match(/^srs1=[^=]+=[^=]+==[^=]+=([^=]+)=([^=]+)=([^@]+)@/i);
1742
+ if (srs1Match) {
1743
+ return `${srs1Match[3]}@${srs1Match[2]}`;
1744
+ }
1745
+ return address;
1746
+ }
1747
+ function parseHostFromDomainOrAddress(addressOrDomain) {
1748
+ if (!addressOrDomain) {
1749
+ return "";
1750
+ }
1751
+ const atIndex = addressOrDomain.indexOf("@");
1752
+ if (atIndex !== -1) {
1753
+ return addressOrDomain.slice(atIndex + 1).toLowerCase();
1754
+ }
1755
+ return addressOrDomain.toLowerCase();
1756
+ }
1757
+ function parseRootDomain(hostname) {
1758
+ if (!hostname) {
1759
+ return "";
1760
+ }
1761
+ const parts = hostname.toLowerCase().split(".");
1762
+ if (parts.length <= 2) {
1763
+ return hostname.toLowerCase();
1764
+ }
1765
+ const multiPartTlds = /* @__PURE__ */ new Set([
1766
+ "co.uk",
1767
+ "com.au",
1768
+ "co.nz",
1769
+ "co.jp",
1770
+ "com.br",
1771
+ "co.in",
1772
+ "org.uk",
1773
+ "net.au",
1774
+ "com.mx",
1775
+ "com.cn",
1776
+ "com.tw",
1777
+ "com.hk",
1778
+ "co.za",
1779
+ "com.sg"
1780
+ ]);
1781
+ const lastTwo = parts.slice(-2).join(".");
1782
+ if (multiPartTlds.has(lastTwo)) {
1783
+ return parts.slice(-3).join(".");
1784
+ }
1785
+ return parts.slice(-2).join(".");
1786
+ }
1787
+ function parseAddresses(headerValue) {
1788
+ if (!headerValue) {
1789
+ return [];
1790
+ }
1791
+ if (Array.isArray(headerValue)) {
1792
+ return headerValue.flatMap((item) => {
1793
+ if (typeof item === "string") {
1794
+ return item;
1795
+ }
1796
+ if (item.address) {
1797
+ return item.address;
1798
+ }
1799
+ if (item.value && Array.isArray(item.value)) {
1800
+ return item.value.map((v) => v.address).filter(Boolean);
1801
+ }
1802
+ return null;
1803
+ }).filter(Boolean);
1804
+ }
1805
+ if (headerValue.value && Array.isArray(headerValue.value)) {
1806
+ return headerValue.value.map((v) => v.address).filter(Boolean);
1807
+ }
1808
+ if (typeof headerValue === "string") {
1809
+ const emailPattern = /[\w.+-]+@[\w.-]+\.[a-z]{2,}/gi;
1810
+ return headerValue.match(emailPattern) || [];
1811
+ }
1812
+ return [];
1813
+ }
1814
+ function getHeaders(headers, name) {
1815
+ if (!headers) {
1816
+ return null;
1817
+ }
1818
+ if (headers.get) {
1819
+ const value = headers.get(name);
1820
+ if (value) {
1821
+ if (typeof value === "string") {
1822
+ return value;
1823
+ }
1824
+ if (value.text) {
1825
+ return value.text;
1826
+ }
1827
+ if (value.value && Array.isArray(value.value)) {
1828
+ return value.value.map((v) => v.address || v.text || v).join(", ");
1829
+ }
1830
+ }
1831
+ return null;
1832
+ }
1833
+ if (headers.headerLines) {
1834
+ const header = headers.headerLines.find((h) => h.key.toLowerCase() === name.toLowerCase());
1835
+ if (header) {
1836
+ return header.line?.split(":").slice(1).join(":").trim();
1837
+ }
1838
+ }
1839
+ if (typeof headers === "object") {
1840
+ const key = Object.keys(headers).find((k) => k.toLowerCase() === name.toLowerCase());
1841
+ if (key) {
1842
+ const value = headers[key];
1843
+ if (typeof value === "string") {
1844
+ return value;
1845
+ }
1846
+ if (Array.isArray(value)) {
1847
+ return value[0];
1848
+ }
1849
+ }
1850
+ }
1851
+ return null;
1852
+ }
1853
+ async function getAttributes(parsed, session = {}, options = {}) {
1854
+ const { isAligned = false, authResults = null } = options;
1855
+ const headers = parsed.headers || parsed;
1856
+ const replyToHeader = getHeaders(headers, "reply-to");
1857
+ const replyToAddresses = parseAddresses(parsed.replyTo || (replyToHeader ? { value: [{ address: replyToHeader }] } : null));
1858
+ const array = [
1859
+ session.resolvedClientHostname,
1860
+ session.resolvedRootClientHostname,
1861
+ session.remoteAddress
1862
+ ];
1863
+ const from = [
1864
+ session.originalFromAddress,
1865
+ session.originalFromAddressDomain,
1866
+ session.originalFromAddressRootDomain
1867
+ ];
1868
+ const replyTo = [];
1869
+ for (const addr of replyToAddresses) {
1870
+ const checked = checkSRS(addr);
1871
+ replyTo.push(
1872
+ checked.toLowerCase(),
1873
+ parseHostFromDomainOrAddress(checked),
1874
+ parseRootDomain(parseHostFromDomainOrAddress(checked))
1875
+ );
1876
+ }
1877
+ const mailFrom = [];
1878
+ const mailFromAddress = session.envelope?.mailFrom?.address;
1879
+ if (mailFromAddress) {
1880
+ const checked = checkSRS(mailFromAddress);
1881
+ mailFrom.push(
1882
+ checked.toLowerCase(),
1883
+ parseHostFromDomainOrAddress(checked),
1884
+ parseRootDomain(parseHostFromDomainOrAddress(checked))
1885
+ );
1886
+ }
1887
+ if (isAligned) {
1888
+ const signingDomains = session.signingDomains || /* @__PURE__ */ new Set();
1889
+ const spfResult = session.spfFromHeader?.status?.result;
1890
+ const fromHasSpfPass = spfResult === "pass";
1891
+ const fromHasDkimAlignment = signingDomains.size > 0 && (signingDomains.has(session.originalFromAddressDomain) || signingDomains.has(session.originalFromAddressRootDomain));
1892
+ if (fromHasSpfPass || fromHasDkimAlignment) {
1893
+ array.push(...from);
1894
+ }
1895
+ let hasAlignedReplyTo = false;
1896
+ for (const addr of replyToAddresses) {
1897
+ const checked = checkSRS(addr);
1898
+ const domain = parseHostFromDomainOrAddress(checked);
1899
+ const rootDomain = parseRootDomain(domain);
1900
+ if (signingDomains.size > 0 && (signingDomains.has(domain) || signingDomains.has(rootDomain))) {
1901
+ hasAlignedReplyTo = true;
1902
+ break;
1903
+ }
1904
+ if (authResults?.spf) {
1905
+ const spfForReplyTo = authResults.spf.find((r) => r.domain === domain || r.domain === rootDomain);
1906
+ if (spfForReplyTo?.result === "pass") {
1907
+ hasAlignedReplyTo = true;
1908
+ break;
1909
+ }
1910
+ }
1911
+ }
1912
+ if (hasAlignedReplyTo) {
1913
+ array.push(...replyTo);
1914
+ }
1915
+ if (mailFromAddress) {
1916
+ const checked = checkSRS(mailFromAddress);
1917
+ const domain = parseHostFromDomainOrAddress(checked);
1918
+ const rootDomain = parseRootDomain(domain);
1919
+ const mailFromHasDkimAlignment = signingDomains.size > 0 && (signingDomains.has(domain) || signingDomains.has(rootDomain));
1920
+ let mailFromHasSpfPass = false;
1921
+ if (authResults?.spf) {
1922
+ const spfForMailFrom = authResults.spf.find((r) => r.domain === domain || r.domain === rootDomain);
1923
+ mailFromHasSpfPass = spfForMailFrom?.result === "pass";
1924
+ }
1925
+ if (mailFromHasDkimAlignment || mailFromHasSpfPass) {
1926
+ array.push(...mailFrom);
1927
+ }
1928
+ }
1929
+ } else {
1930
+ array.push(...from, ...replyTo, ...mailFrom);
1931
+ }
1932
+ const normalized = array.filter((string_) => typeof string_ === "string" && string_.length > 0).map((string_) => {
1933
+ try {
1934
+ return string_.toLowerCase().trim();
1935
+ } catch {
1936
+ return string_.toLowerCase().trim();
1937
+ }
1938
+ });
1939
+ const unique = [...new Set(normalized)];
1940
+ debug4("Extracted %d unique attributes (isAligned=%s): %o", unique.length, isAligned, unique);
1941
+ return unique;
1942
+ }
1943
+ function buildSessionFromParsed(parsed, existingSession = {}) {
1944
+ const session = { ...existingSession };
1945
+ const headers = parsed.headers || parsed;
1946
+ const fromHeader = getHeaders(headers, "from");
1947
+ const fromAddresses = parseAddresses(parsed.from || fromHeader);
1948
+ const fromAddress = fromAddresses[0];
1949
+ if (fromAddress && !session.originalFromAddress) {
1950
+ session.originalFromAddress = checkSRS(fromAddress).toLowerCase();
1951
+ session.originalFromAddressDomain = parseHostFromDomainOrAddress(session.originalFromAddress);
1952
+ session.originalFromAddressRootDomain = parseRootDomain(session.originalFromAddressDomain);
1953
+ }
1954
+ if (!session.resolvedClientHostname) {
1955
+ const receivedHeader = getHeaders(headers, "received");
1956
+ if (receivedHeader) {
1957
+ const received = Array.isArray(receivedHeader) ? receivedHeader[0] : receivedHeader;
1958
+ const fromMatch = received?.match(/from\s+([^\s(]+)/i);
1959
+ if (fromMatch) {
1960
+ session.resolvedClientHostname = fromMatch[1].toLowerCase();
1961
+ session.resolvedRootClientHostname = parseRootDomain(session.resolvedClientHostname);
1962
+ }
1963
+ }
1964
+ }
1965
+ if (!session.remoteAddress) {
1966
+ const receivedHeader = getHeaders(headers, "received");
1967
+ if (receivedHeader) {
1968
+ const received = Array.isArray(receivedHeader) ? receivedHeader[0] : receivedHeader;
1969
+ const ipv4Match = received?.match(/\[((?:\d+\.){3}\d+)]/);
1970
+ if (ipv4Match) {
1971
+ session.remoteAddress = ipv4Match[1];
1972
+ } else {
1973
+ const ipv6Match = received?.match(/\[([a-f\d:]+)]/i);
1974
+ if (ipv6Match) {
1975
+ session.remoteAddress = ipv6Match[1];
1976
+ }
1977
+ }
1978
+ }
1979
+ }
1980
+ if (!session.envelope) {
1981
+ session.envelope = {
1982
+ mailFrom: { address: session.originalFromAddress || "" },
1983
+ rcptTo: []
1984
+ };
1985
+ const toAddresses = parseAddresses(parsed.to || getHeaders(headers, "to"));
1986
+ const ccAddresses = parseAddresses(parsed.cc || getHeaders(headers, "cc"));
1987
+ for (const addr of [...toAddresses, ...ccAddresses]) {
1988
+ if (addr) {
1989
+ session.envelope.rcptTo.push({ address: addr });
1990
+ }
1991
+ }
1992
+ }
1993
+ return session;
1994
+ }
1995
+ async function extractAttributes(parsed, options = {}) {
1996
+ const { isAligned = false, senderIp, senderHostname, authResults } = options;
1997
+ const session = buildSessionFromParsed(parsed, {
1998
+ remoteAddress: senderIp,
1999
+ resolvedClientHostname: senderHostname,
2000
+ resolvedRootClientHostname: senderHostname ? parseRootDomain(senderHostname) : void 0
2001
+ });
2002
+ if (authResults?.dkim) {
2003
+ session.signingDomains = /* @__PURE__ */ new Set();
2004
+ for (const dkimResult of authResults.dkim) {
2005
+ if (dkimResult.result === "pass" && dkimResult.domain) {
2006
+ session.signingDomains.add(dkimResult.domain);
2007
+ session.signingDomains.add(parseRootDomain(dkimResult.domain));
2008
+ }
2009
+ }
2010
+ session.hadAlignedAndPassingDKIM = session.signingDomains.has(session.originalFromAddressDomain) || session.signingDomains.has(session.originalFromAddressRootDomain);
2011
+ }
2012
+ if (authResults?.spf) {
2013
+ const spfForFrom = authResults.spf.find((r) => r.domain === session.originalFromAddressDomain || r.domain === session.originalFromAddressRootDomain);
2014
+ if (spfForFrom) {
2015
+ session.spfFromHeader = {
2016
+ status: { result: spfForFrom.result }
2017
+ };
2018
+ }
2019
+ }
2020
+ const attributes = await getAttributes(parsed, session, { isAligned, authResults });
2021
+ return { attributes, session };
2022
+ }
2023
+
2024
+ // src/index.js
584
2025
  var import_meta = {};
585
- var __filename = (0, import_node_url.fileURLToPath)(import_meta.url);
586
- var __dirname = import_node_path.default.dirname(__filename);
587
- var executablesData = JSON.parse(import_node_fs3.default.readFileSync(import_node_path.default.join(__dirname, "..", "executables.json"), "utf8"));
2026
+ var __filename = import_meta.url ? (0, import_node_url.fileURLToPath)(import_meta.url) : "";
2027
+ var __dirname = __filename ? import_node_path.default.dirname(__filename) : import_node_process.default.cwd();
2028
+ var findPackageRoot = (startDir) => {
2029
+ let dir = startDir;
2030
+ while (dir !== import_node_path.default.dirname(dir)) {
2031
+ if (import_node_fs3.default.existsSync(import_node_path.default.join(dir, "package.json"))) {
2032
+ return dir;
2033
+ }
2034
+ dir = import_node_path.default.dirname(dir);
2035
+ }
2036
+ return startDir;
2037
+ };
2038
+ var packageRoot = findPackageRoot(__dirname);
2039
+ var executablesData = JSON.parse(import_node_fs3.default.readFileSync(import_node_path.default.join(packageRoot, "executables.json"), "utf8"));
588
2040
  var EXECUTABLES = new Set(executablesData);
589
2041
  var getReplacements = async () => {
590
2042
  const { default: replacements2 } = await Promise.resolve().then(() => (init_replacements(), replacements_exports));
@@ -594,7 +2046,7 @@ var getClassifier = async () => {
594
2046
  const { default: classifier2 } = await Promise.resolve().then(() => (init_get_classifier(), get_classifier_exports));
595
2047
  return classifier2;
596
2048
  };
597
- var debug3 = (0, import_node_util3.debuglog)("spamscanner");
2049
+ var debug7 = (0, import_node_util7.debuglog)("spamscanner");
598
2050
  var GENERIC_TOKENIZER = /[^a-zá-úÁ-Úà-úÀ-Úñü\dа-яёæøåàáảãạăắằẳẵặâấầẩẫậéèẻẽẹêếềểễệíìỉĩịóòỏõọôốồổỗộơớờởỡợúùủũụưứừửữựýỳỷỹỵđäöëïîûœçążśźęćńł-]+/i;
599
2051
  var converter = new import_ascii_fullwidth_halfwidth_convert.default();
600
2052
  var chineseTokenizer = { tokenize: (text) => text.split(/\s+/) };
@@ -680,6 +2132,41 @@ var SpamScanner = class {
680
2132
  supportedLanguages: ["en"],
681
2133
  enableMixedLanguageDetection: false,
682
2134
  enableAdvancedPatternRecognition: true,
2135
+ // Authentication options (mailauth)
2136
+ enableAuthentication: false,
2137
+ authOptions: {
2138
+ ip: null,
2139
+ // Remote IP address (required for auth)
2140
+ helo: null,
2141
+ // HELO/EHLO hostname
2142
+ mta: "spamscanner",
2143
+ // MTA hostname
2144
+ sender: null,
2145
+ // Envelope sender (MAIL FROM)
2146
+ timeout: 1e4
2147
+ // DNS lookup timeout
2148
+ },
2149
+ authScoreWeights: {
2150
+ dkimPass: -2,
2151
+ dkimFail: 3,
2152
+ spfPass: -1,
2153
+ spfFail: 2,
2154
+ spfSoftfail: 1,
2155
+ dmarcPass: -2,
2156
+ dmarcFail: 4,
2157
+ arcPass: -1,
2158
+ arcFail: 1
2159
+ },
2160
+ // Reputation API options (Forward Email)
2161
+ enableReputation: false,
2162
+ reputationOptions: {
2163
+ apiUrl: "https://api.forwardemail.net/v1/reputation",
2164
+ timeout: 1e4,
2165
+ onlyAligned: true
2166
+ },
2167
+ // Arbitrary spam detection options
2168
+ enableArbitraryDetection: true,
2169
+ arbitraryThreshold: 5,
683
2170
  // Existing options
684
2171
  debug: false,
685
2172
  logger: console,
@@ -727,7 +2214,7 @@ var SpamScanner = class {
727
2214
  return Array.isArray(tokens) ? tokens : [];
728
2215
  };
729
2216
  } catch (error) {
730
- debug3("Failed to initialize classifier:", error);
2217
+ debug7("Failed to initialize classifier:", error);
731
2218
  this.classifier = new import_naivebayes2.default();
732
2219
  }
733
2220
  }
@@ -746,7 +2233,7 @@ var SpamScanner = class {
746
2233
  throw new Error("Invalid replacements format");
747
2234
  }
748
2235
  } catch (error) {
749
- debug3("Failed to initialize replacements:", error);
2236
+ debug7("Failed to initialize replacements:", error);
750
2237
  this.replacements = /* @__PURE__ */ new Map();
751
2238
  const basicReplacements = {
752
2239
  u: "you",
@@ -774,7 +2261,7 @@ var SpamScanner = class {
774
2261
  try {
775
2262
  this.clamscan = await new import_clamscan.default().init(this.config.clamscan);
776
2263
  } catch (error) {
777
- debug3("ClamScan initialization failed:", error);
2264
+ debug7("ClamScan initialization failed:", error);
778
2265
  return [];
779
2266
  }
780
2267
  }
@@ -798,7 +2285,7 @@ var SpamScanner = class {
798
2285
  }
799
2286
  }
800
2287
  } catch (error) {
801
- debug3("Virus scan error:", error);
2288
+ debug7("Virus scan error:", error);
802
2289
  }
803
2290
  }
804
2291
  return results;
@@ -921,7 +2408,7 @@ var SpamScanner = class {
921
2408
  });
922
2409
  }
923
2410
  } catch (error) {
924
- debug3("PDF JavaScript detection error:", error);
2411
+ debug7("PDF JavaScript detection error:", error);
925
2412
  }
926
2413
  }
927
2414
  }
@@ -1164,7 +2651,7 @@ var SpamScanner = class {
1164
2651
  isPrivate: parsed.isPrivate
1165
2652
  };
1166
2653
  } catch (error) {
1167
- debug3("tldts parsing error:", error);
2654
+ debug7("tldts parsing error:", error);
1168
2655
  return null;
1169
2656
  }
1170
2657
  }
@@ -1256,13 +2743,39 @@ var SpamScanner = class {
1256
2743
  }
1257
2744
  return text;
1258
2745
  }
1259
- // Main scan method - enhanced with performance metrics and new features
1260
- async scan(source) {
2746
+ // Main scan method - enhanced with performance metrics, auth, and reputation
2747
+ async scan(source, scanOptions = {}) {
1261
2748
  const startTime = Date.now();
1262
2749
  try {
1263
2750
  await this.initializeClassifier();
1264
2751
  await this.initializeReplacements();
1265
2752
  const { tokens, mail } = await this.getTokensAndMailFromSource(source);
2753
+ const authOptions = { ...this.config.authOptions, ...scanOptions.authOptions };
2754
+ const reputationOptions = { ...this.config.reputationOptions, ...scanOptions.reputationOptions };
2755
+ const detectionPromises = [
2756
+ this.getClassification(tokens),
2757
+ this.getPhishingResults(mail),
2758
+ this.getExecutableResults(mail),
2759
+ this.config.enableMacroDetection ? this.getMacroResults(mail) : [],
2760
+ this.config.enableArbitraryDetection ? this.getArbitraryResults(mail, { remoteAddress: authOptions.ip, resolvedClientHostname: authOptions.hostname }) : [],
2761
+ this.getVirusResults(mail),
2762
+ this.getPatternResults(mail),
2763
+ this.getIDNHomographResults(mail),
2764
+ this.getToxicityResults(mail),
2765
+ this.getNSFWResults(mail)
2766
+ ];
2767
+ const enableAuth = scanOptions.enableAuthentication ?? this.config.enableAuthentication;
2768
+ if (enableAuth && authOptions.ip) {
2769
+ detectionPromises.push(this.getAuthenticationResults(source, mail, authOptions));
2770
+ } else {
2771
+ detectionPromises.push(Promise.resolve(null));
2772
+ }
2773
+ const enableReputation = scanOptions.enableReputation ?? this.config.enableReputation;
2774
+ if (enableReputation) {
2775
+ detectionPromises.push(this.getReputationResults(mail, authOptions, reputationOptions));
2776
+ } else {
2777
+ detectionPromises.push(Promise.resolve(null));
2778
+ }
1266
2779
  const [
1267
2780
  classification,
1268
2781
  phishing,
@@ -1273,20 +2786,14 @@ var SpamScanner = class {
1273
2786
  patterns,
1274
2787
  idnHomographAttack,
1275
2788
  toxicity,
1276
- nsfw
1277
- ] = await Promise.all([
1278
- this.getClassification(tokens),
1279
- this.getPhishingResults(mail),
1280
- this.getExecutableResults(mail),
1281
- this.config.enableMacroDetection ? this.getMacroResults(mail) : [],
1282
- this.getArbitraryResults(mail),
1283
- this.getVirusResults(mail),
1284
- this.getPatternResults(mail),
1285
- this.getIDNHomographResults(mail),
1286
- this.getToxicityResults(mail),
1287
- this.getNSFWResults(mail)
1288
- ]);
1289
- const isSpam = classification.category === "spam" || phishing.length > 0 || executables.length > 0 || macros.length > 0 || arbitrary.length > 0 || viruses.length > 0 || patterns.length > 0 || idnHomographAttack && idnHomographAttack.detected || toxicity.length > 0 || nsfw.length > 0;
2789
+ nsfw,
2790
+ authResult,
2791
+ reputationResult
2792
+ ] = await Promise.all(detectionPromises);
2793
+ let isSpam = classification.category === "spam" || phishing.length > 0 || executables.length > 0 || macros.length > 0 || arbitrary.length > 0 || viruses.length > 0 || patterns.length > 0 || idnHomographAttack && idnHomographAttack.detected || toxicity.length > 0 || nsfw.length > 0;
2794
+ if (reputationResult && reputationResult.isDenylisted) {
2795
+ isSpam = true;
2796
+ }
1290
2797
  let message = "Ham";
1291
2798
  if (isSpam) {
1292
2799
  const reasons = [];
@@ -1320,7 +2827,14 @@ var SpamScanner = class {
1320
2827
  if (nsfw.length > 0) {
1321
2828
  reasons.push("NSFW content");
1322
2829
  }
2830
+ if (reputationResult?.isDenylisted) {
2831
+ reasons.push("denylisted sender");
2832
+ }
1323
2833
  message = `Spam (${(0, import_array_join_conjunction.default)(reasons)})`;
2834
+ } else if (reputationResult?.isTruthSource) {
2835
+ message = "Ham (truth source)";
2836
+ } else if (reputationResult?.isAllowlisted) {
2837
+ message = "Ham (allowlisted)";
1324
2838
  }
1325
2839
  const endTime = Date.now();
1326
2840
  const processingTime = endTime - startTime;
@@ -1340,7 +2854,9 @@ var SpamScanner = class {
1340
2854
  patterns,
1341
2855
  idnHomographAttack,
1342
2856
  toxicity,
1343
- nsfw
2857
+ nsfw,
2858
+ authentication: authResult,
2859
+ reputation: reputationResult
1344
2860
  },
1345
2861
  links: this.extractAllUrls(mail, source),
1346
2862
  tokens,
@@ -1362,10 +2878,85 @@ var SpamScanner = class {
1362
2878
  }
1363
2879
  return result;
1364
2880
  } catch (error) {
1365
- debug3("Scan error:", error);
2881
+ debug7("Scan error:", error);
1366
2882
  throw error;
1367
2883
  }
1368
2884
  }
2885
+ // Get authentication results using mailauth
2886
+ async getAuthenticationResults(source, mail, options = {}) {
2887
+ try {
2888
+ const messageBuffer = typeof source === "string" ? import_node_buffer2.Buffer.from(source) : source;
2889
+ const sender = options.sender || mail.from?.value?.[0]?.address || mail.from?.text;
2890
+ const authResult = await authenticate(messageBuffer, {
2891
+ ip: options.ip,
2892
+ helo: options.helo,
2893
+ mta: options.mta || "spamscanner",
2894
+ sender,
2895
+ timeout: options.timeout || 1e4
2896
+ });
2897
+ const scoreResult = calculateAuthScore(authResult, this.config.authScoreWeights);
2898
+ return {
2899
+ ...authResult,
2900
+ score: scoreResult,
2901
+ authResultsHeader: formatAuthResultsHeader(authResult, options.mta || "spamscanner")
2902
+ };
2903
+ } catch (error) {
2904
+ debug7("Authentication error:", error);
2905
+ return null;
2906
+ }
2907
+ }
2908
+ // Get reputation results from Forward Email API
2909
+ // Uses get-attributes module to extract comprehensive attributes for checking
2910
+ async getReputationResults(mail, authOptions = {}, reputationOptions = {}) {
2911
+ try {
2912
+ const { attributes, session } = await extractAttributes(mail, {
2913
+ isAligned: reputationOptions.onlyAligned ?? true,
2914
+ senderIp: authOptions.ip,
2915
+ senderHostname: authOptions.hostname,
2916
+ authResults: authOptions.authResults
2917
+ });
2918
+ const valuesToCheck = [...attributes];
2919
+ if (authOptions.sender) {
2920
+ const senderLower = authOptions.sender.toLowerCase();
2921
+ if (!valuesToCheck.includes(senderLower)) {
2922
+ valuesToCheck.push(senderLower);
2923
+ }
2924
+ const envelopeDomain = senderLower.split("@")[1];
2925
+ if (envelopeDomain && !valuesToCheck.includes(envelopeDomain)) {
2926
+ valuesToCheck.push(envelopeDomain);
2927
+ }
2928
+ }
2929
+ const replyTo = mail.replyTo?.value || [];
2930
+ for (const addr of replyTo) {
2931
+ if (addr.address) {
2932
+ const addrLower = addr.address.toLowerCase();
2933
+ if (!valuesToCheck.includes(addrLower)) {
2934
+ valuesToCheck.push(addrLower);
2935
+ }
2936
+ const domain = addrLower.split("@")[1];
2937
+ if (domain && !valuesToCheck.includes(domain)) {
2938
+ valuesToCheck.push(domain);
2939
+ }
2940
+ }
2941
+ }
2942
+ if (valuesToCheck.length === 0) {
2943
+ return null;
2944
+ }
2945
+ debug7("Checking reputation for %d attributes: %o", valuesToCheck.length, valuesToCheck);
2946
+ const resultsMap = await checkReputationBatch(valuesToCheck, reputationOptions);
2947
+ const aggregated = aggregateReputationResults([...resultsMap.values()]);
2948
+ return {
2949
+ ...aggregated,
2950
+ checkedValues: valuesToCheck,
2951
+ details: Object.fromEntries(resultsMap),
2952
+ session
2953
+ // Include session info for debugging
2954
+ };
2955
+ } catch (error) {
2956
+ debug7("Reputation check error:", error);
2957
+ return null;
2958
+ }
2959
+ }
1369
2960
  // Get pattern recognition results
1370
2961
  async getPatternResults(mail) {
1371
2962
  const results = [];
@@ -1402,7 +2993,7 @@ var SpamScanner = class {
1402
2993
  try {
1403
2994
  mail = await (0, import_mailparser.simpleParser)(source);
1404
2995
  } catch (error) {
1405
- debug3("Mail parsing error:", error);
2996
+ debug7("Mail parsing error:", error);
1406
2997
  mail = {
1407
2998
  text: source,
1408
2999
  html: "",
@@ -1433,7 +3024,7 @@ var SpamScanner = class {
1433
3024
  // Default probability
1434
3025
  };
1435
3026
  } catch (error) {
1436
- debug3("Classification error:", error);
3027
+ debug7("Classification error:", error);
1437
3028
  return {
1438
3029
  category: "ham",
1439
3030
  probability: 0.5
@@ -1489,7 +3080,7 @@ var SpamScanner = class {
1489
3080
  }
1490
3081
  }
1491
3082
  } catch (error) {
1492
- debug3("Phishing check error:", error);
3083
+ debug7("Phishing check error:", error);
1493
3084
  }
1494
3085
  }
1495
3086
  return results;
@@ -1533,14 +3124,15 @@ var SpamScanner = class {
1533
3124
  });
1534
3125
  }
1535
3126
  } catch (error) {
1536
- debug3("File type detection error:", error);
3127
+ debug7("File type detection error:", error);
1537
3128
  }
1538
3129
  }
1539
3130
  }
1540
3131
  return results;
1541
3132
  }
1542
- // Arbitrary results (GTUBE, etc.)
1543
- async getArbitraryResults(mail) {
3133
+ // Arbitrary results (GTUBE, spam patterns, etc.)
3134
+ // Updated to use session info for Microsoft Exchange spam detection
3135
+ async getArbitraryResults(mail, sessionInfo = {}) {
1544
3136
  const results = [];
1545
3137
  let content = (mail.text || "") + (mail.html || "");
1546
3138
  if (mail.headerLines && Array.isArray(mail.headerLines)) {
@@ -1553,9 +3145,42 @@ var SpamScanner = class {
1553
3145
  if (content.includes("XJS*C4JDBQADN1.NSBN3*2IDNEN*GTUBE-STANDARD-ANTI-UBE-TEST-EMAIL*C.34X")) {
1554
3146
  results.push({
1555
3147
  type: "arbitrary",
1556
- description: "GTUBE spam test pattern detected"
3148
+ subtype: "gtube",
3149
+ description: "GTUBE spam test pattern detected",
3150
+ score: 100
1557
3151
  });
1558
3152
  }
3153
+ try {
3154
+ const session = buildSessionInfo(mail, sessionInfo);
3155
+ const arbitraryResult = isArbitrary(mail, {
3156
+ threshold: this.config.arbitraryThreshold || 5,
3157
+ checkSubject: true,
3158
+ checkBody: true,
3159
+ checkSender: true,
3160
+ checkHeaders: true,
3161
+ checkLinks: true,
3162
+ checkMicrosoftHeaders: true,
3163
+ // Enable Microsoft Exchange spam detection
3164
+ checkVendorSpam: true,
3165
+ // Enable vendor-specific spam detection
3166
+ checkSpoofing: true,
3167
+ // Enable spoofing attack detection
3168
+ session
3169
+ // Pass session info for advanced checks
3170
+ });
3171
+ if (arbitraryResult.isArbitrary) {
3172
+ results.push({
3173
+ type: "arbitrary",
3174
+ subtype: arbitraryResult.category ? arbitraryResult.category.toLowerCase() : "pattern",
3175
+ description: `Arbitrary spam patterns detected (score: ${arbitraryResult.score})`,
3176
+ score: arbitraryResult.score,
3177
+ reasons: arbitraryResult.reasons,
3178
+ category: arbitraryResult.category
3179
+ });
3180
+ }
3181
+ } catch (error) {
3182
+ debug7("Arbitrary detection error:", error);
3183
+ }
1559
3184
  return results;
1560
3185
  }
1561
3186
  // Parse and normalize locale
@@ -1621,7 +3246,7 @@ var SpamScanner = class {
1621
3246
  result.riskScore = Math.max(result.riskScore, analysis.riskScore);
1622
3247
  }
1623
3248
  } catch (error) {
1624
- debug3("IDN analysis error for URL:", url, error);
3249
+ debug7("IDN analysis error for URL:", url, error);
1625
3250
  }
1626
3251
  }
1627
3252
  if (result.detected) {
@@ -1638,7 +3263,7 @@ var SpamScanner = class {
1638
3263
  result.details.push(...allRiskFactors);
1639
3264
  }
1640
3265
  } catch (error) {
1641
- debug3("IDN homograph detection error:", error);
3266
+ debug7("IDN homograph detection error:", error);
1642
3267
  }
1643
3268
  return result;
1644
3269
  }
@@ -1654,7 +3279,7 @@ var SpamScanner = class {
1654
3279
  enableContextAnalysis: true
1655
3280
  });
1656
3281
  } catch (error) {
1657
- debug3("Failed to load IDN detector:", error);
3282
+ debug7("Failed to load IDN detector:", error);
1658
3283
  return null;
1659
3284
  }
1660
3285
  }
@@ -1706,7 +3331,7 @@ var SpamScanner = class {
1706
3331
  }
1707
3332
  return detected;
1708
3333
  } catch (error) {
1709
- debug3("Language detection error:", error);
3334
+ debug7("Language detection error:", error);
1710
3335
  try {
1711
3336
  const landeResult = (0, import_lande.default)(text);
1712
3337
  if (landeResult && landeResult.length > 0) {
@@ -1767,7 +3392,7 @@ var SpamScanner = class {
1767
3392
  }
1768
3393
  }
1769
3394
  } catch (error) {
1770
- debug3("Toxicity detection error:", error);
3395
+ debug7("Toxicity detection error:", error);
1771
3396
  }
1772
3397
  return results;
1773
3398
  }
@@ -1817,11 +3442,11 @@ var SpamScanner = class {
1817
3442
  }
1818
3443
  }
1819
3444
  } catch (error) {
1820
- debug3("NSFW detection error for attachment:", attachment.filename, error);
3445
+ debug7("NSFW detection error for attachment:", attachment.filename, error);
1821
3446
  }
1822
3447
  }
1823
3448
  } catch (error) {
1824
- debug3("NSFW detection error:", error);
3449
+ debug7("NSFW detection error:", error);
1825
3450
  }
1826
3451
  return results;
1827
3452
  }
@@ -2009,4 +3634,4 @@ var SpamScanner = class {
2009
3634
  }
2010
3635
  };
2011
3636
  var index_default = SpamScanner;
2012
- //# sourceMappingURL=index.js.map
3637
+ //# sourceMappingURL=index.cjs.map