@agenticmail/core 0.5.42 → 0.5.44

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,623 @@
1
+ // src/mail/spam-filter.ts
2
+ var SPAM_THRESHOLD = 40;
3
+ var WARNING_THRESHOLD = 20;
4
+ function isInternalEmail(email, localDomains) {
5
+ const fromDomain = email.from[0]?.address?.split("@")[1]?.toLowerCase();
6
+ if (!fromDomain) return false;
7
+ const internals = /* @__PURE__ */ new Set(["localhost", ...(localDomains ?? []).map((d) => d.toLowerCase())]);
8
+ if (internals.has(fromDomain) && email.replyTo?.length) {
9
+ const replyDomain = email.replyTo[0]?.address?.split("@")[1]?.toLowerCase();
10
+ if (replyDomain && !internals.has(replyDomain)) return false;
11
+ }
12
+ return internals.has(fromDomain);
13
+ }
14
+ var RE_IGNORE_INSTRUCTIONS = /ignore\s+(all\s+)?(previous|prior|above)\s+(instructions|prompts|rules)/i;
15
+ var RE_YOU_ARE_NOW = /you\s+are\s+now\s+(a|an|the|my)\b/i;
16
+ var RE_SYSTEM_DELIMITER = /\[SYSTEM\]|\[INST\]|<<SYS>>|<\|im_start\|>/i;
17
+ var RE_NEW_INSTRUCTIONS = /new\s+instructions?:|override\s+instructions?:/i;
18
+ var RE_ACT_AS = /act\s+as\s+(a|an|if)|pretend\s+(to be|you\s+are)/i;
19
+ var RE_DO_NOT_MENTION = /do\s+not\s+(mention|tell|reveal|disclose)\s+(that|this)/i;
20
+ var RE_TAG_CHARS = /[\u{E0001}-\u{E007F}]/u;
21
+ var RE_DENSE_ZWC = /[\u200B\u200C\u200D\uFEFF]{3,}/;
22
+ var RE_JAILBREAK = /\b(DAN|jailbreak|bypass\s+(safety|filter|restriction)|unlimited\s+mode)\b/i;
23
+ var RE_BASE64_BLOCK = /[A-Za-z0-9+/]{100,}={0,2}/;
24
+ var RE_MARKDOWN_INJECTION = /```(?:system|python\s+exec|bash\s+exec)/i;
25
+ var RE_OWNER_IMPERSONATION = /your\s+(owner|creator|admin|boss|master|human)\s+(asked|told|wants|said|instructed|needs)/i;
26
+ var RE_SECRET_REQUEST = /share\s+(your|the)\s+(api.?key|password|secret|credential|token)/i;
27
+ var RE_IMPERSONATE_SYSTEM = /this\s+is\s+(a|an)\s+(system|security|admin|automated)\s+(message|alert|notification)/i;
28
+ var RE_URGENCY = /\b(urgent|immediately|right now|asap|deadline|expires?|last chance|act now|time.?sensitive)\b/i;
29
+ var RE_AUTHORITY = /\b(suspend|terminate|deactivat|unauthori[zs]|locked|compromised|breach|violation|legal action)\b/i;
30
+ var RE_MONEY_REQUEST = /send\s+(me|us)\s+\$?\d|wire\s+transfer|western\s+union|money\s*gram/i;
31
+ var RE_GIFT_CARD = /buy\s+(me\s+)?gift\s*cards?|itunes\s+cards?|google\s+play\s+cards?/i;
32
+ var RE_CEO_FRAUD = /\b(CEO|CFO|CTO|director|executive)\b.*\b(wire|transfer|payment|urgent)\b/i;
33
+ var RE_FORWARD_ALL = /forward\s+(all|every)\s+(email|message)/i;
34
+ var RE_SEARCH_CREDS = /search\s+(inbox|email|mailbox).*password|find.*credential/i;
35
+ var RE_SEND_TO_EXTERNAL = /send\s+(the|all|every).*to\s+\S+@\S+/i;
36
+ var RE_DUMP_INSTRUCTIONS = /reveal.*system\s+prompt|dump.*instructions|show.*system\s+prompt|print.*instructions/i;
37
+ var RE_WEBHOOK_EXFIL = /https?:\/\/[^/]*(webhook|ngrok|pipedream|requestbin|hookbin)/i;
38
+ var RE_CREDENTIAL_HARVEST = /verify\s+your\s+(account|identity|password|credentials?)/i;
39
+ var RE_LINK_TAG = /<a\s[^>]*href\s*=\s*["']([^"']+)["']/gi;
40
+ var RE_LINK_TAG_WITH_TEXT = /<a\s[^>]*href\s*=\s*["']([^"']+)["'][^>]*>(.*?)<\/a>/gi;
41
+ var RE_URL_IN_TEXT = /https?:\/\/[^\s<>"]+/gi;
42
+ var RE_IP_URL = /https?:\/\/\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}/i;
43
+ var RE_URL_SHORTENER = /https?:\/\/(bit\.ly|t\.co|tinyurl\.com|goo\.gl|ow\.ly|is\.gd|buff\.ly|rebrand\.ly|shorturl\.at)\//i;
44
+ var RE_DATA_URI = /(?:data:text\/html|javascript:)/i;
45
+ var RE_LOGIN_URGENCY = /(click\s+here|sign\s+in|log\s*in).*\b(urgent|immediately|expire|suspend|locked)/i;
46
+ var RE_PHARMACY_SPAM = /\b(viagra|cialis|pharmacy|prescription|cheap\s+meds|online\s+pharmacy)\b/i;
47
+ var RE_WEIGHT_LOSS = /\b(weight\s+loss|diet\s+pill|lose\s+\d+\s+(lbs?|pounds|kg)|fat\s+burn)\b/i;
48
+ var RE_LOTTERY_SCAM = /you\s+(have\s+)?(won|been\s+selected)|lottery|million\s+dollars|nigerian?\s+prince/i;
49
+ var RE_CRYPTO_SCAM = /(bitcoin|crypto|ethereum).*invest(ment)?|guaranteed\s+returns|double\s+your\s+(money|bitcoin|crypto)/i;
50
+ var RE_EXECUTABLE_EXT = /\.(exe|bat|cmd|ps1|sh|dll|scr|vbs|js|msi|com)$/i;
51
+ var RE_DOUBLE_EXT = /\.\w{2,5}\.(exe|bat|cmd|ps1|sh|dll|scr|vbs|js|msi|com)$/i;
52
+ var RE_ARCHIVE_EXT = /\.(zip|rar|7z|tar\.gz|tgz)$/i;
53
+ var RE_HTML_ATTACHMENT_EXT = /\.(html?|svg)$/i;
54
+ var BRAND_DOMAINS = {
55
+ google: ["google.com", "gmail.com", "googlemail.com"],
56
+ microsoft: ["microsoft.com", "outlook.com", "hotmail.com", "live.com"],
57
+ apple: ["apple.com", "icloud.com"],
58
+ amazon: ["amazon.com", "amazon.co.uk", "amazon.de"],
59
+ paypal: ["paypal.com"],
60
+ meta: ["facebook.com", "meta.com", "instagram.com"],
61
+ netflix: ["netflix.com"],
62
+ bank: ["chase.com", "wellsfargo.com", "bankofamerica.com", "citibank.com"]
63
+ };
64
+ var SPAM_WORDS = [
65
+ "congratulations",
66
+ "winner",
67
+ "prize",
68
+ "claim",
69
+ "free",
70
+ "offer",
71
+ "limited time",
72
+ "act now",
73
+ "click here",
74
+ "no obligation",
75
+ "risk free",
76
+ "guaranteed",
77
+ "million",
78
+ "billion",
79
+ "inheritance",
80
+ "beneficiary",
81
+ "wire transfer",
82
+ "western union",
83
+ "dear friend",
84
+ "dear sir",
85
+ "kindly",
86
+ "revert back",
87
+ "do the needful",
88
+ "humbly",
89
+ "esteemed",
90
+ "investment opportunity",
91
+ "double your",
92
+ "earn money",
93
+ "work from home",
94
+ "make money",
95
+ "cash bonus",
96
+ "discount",
97
+ "lowest price"
98
+ ];
99
+ function countSpamWords(text) {
100
+ const lower = text.toLowerCase();
101
+ let count = 0;
102
+ for (const word of SPAM_WORDS) {
103
+ if (lower.includes(word)) count++;
104
+ }
105
+ return count;
106
+ }
107
+ function hasHomographChars(domain) {
108
+ if (domain.startsWith("xn--")) return true;
109
+ const hasCyrillic = /[\u0400-\u04FF]/.test(domain);
110
+ const hasLatin = /[a-zA-Z]/.test(domain);
111
+ return hasCyrillic && hasLatin;
112
+ }
113
+ var RULES = [
114
+ // === Prompt injection ===
115
+ {
116
+ id: "pi_ignore_instructions",
117
+ category: "prompt_injection",
118
+ score: 25,
119
+ description: 'Contains "ignore previous instructions" pattern',
120
+ test: (_e, text) => RE_IGNORE_INSTRUCTIONS.test(text)
121
+ },
122
+ {
123
+ id: "pi_you_are_now",
124
+ category: "prompt_injection",
125
+ score: 25,
126
+ description: 'Contains "you are now a..." roleplay injection',
127
+ test: (_e, text) => RE_YOU_ARE_NOW.test(text)
128
+ },
129
+ {
130
+ id: "pi_system_delimiter",
131
+ category: "prompt_injection",
132
+ score: 20,
133
+ description: "Contains LLM system delimiters ([SYSTEM], [INST], etc.)",
134
+ test: (_e, text, html) => RE_SYSTEM_DELIMITER.test(text) || RE_SYSTEM_DELIMITER.test(html)
135
+ },
136
+ {
137
+ id: "pi_new_instructions",
138
+ category: "prompt_injection",
139
+ score: 20,
140
+ description: 'Contains "new instructions:" or "override instructions:"',
141
+ test: (_e, text) => RE_NEW_INSTRUCTIONS.test(text)
142
+ },
143
+ {
144
+ id: "pi_act_as",
145
+ category: "prompt_injection",
146
+ score: 15,
147
+ description: 'Contains "act as" or "pretend to be" injection',
148
+ test: (_e, text) => RE_ACT_AS.test(text)
149
+ },
150
+ {
151
+ id: "pi_do_not_mention",
152
+ category: "prompt_injection",
153
+ score: 15,
154
+ description: 'Contains "do not mention/tell/reveal" suppression',
155
+ test: (_e, text) => RE_DO_NOT_MENTION.test(text)
156
+ },
157
+ {
158
+ id: "pi_invisible_unicode",
159
+ category: "prompt_injection",
160
+ score: 20,
161
+ description: "Contains invisible Unicode tag characters or dense zero-width chars",
162
+ test: (_e, text, html) => RE_TAG_CHARS.test(text) || RE_TAG_CHARS.test(html) || RE_DENSE_ZWC.test(text) || RE_DENSE_ZWC.test(html)
163
+ },
164
+ {
165
+ id: "pi_jailbreak",
166
+ category: "prompt_injection",
167
+ score: 20,
168
+ description: "Contains jailbreak/DAN/bypass safety language",
169
+ test: (_e, text) => RE_JAILBREAK.test(text)
170
+ },
171
+ {
172
+ id: "pi_base64_injection",
173
+ category: "prompt_injection",
174
+ score: 15,
175
+ description: "Contains long base64-encoded blocks (potential hidden instructions)",
176
+ test: (_e, text) => RE_BASE64_BLOCK.test(text)
177
+ },
178
+ {
179
+ id: "pi_markdown_injection",
180
+ category: "prompt_injection",
181
+ score: 10,
182
+ description: "Contains code block injection attempts (```system, ```python exec)",
183
+ test: (_e, text) => RE_MARKDOWN_INJECTION.test(text)
184
+ },
185
+ // === Social engineering ===
186
+ {
187
+ id: "se_owner_impersonation",
188
+ category: "social_engineering",
189
+ score: 20,
190
+ description: "Claims to speak on behalf of the agent's owner",
191
+ test: (_e, text) => RE_OWNER_IMPERSONATION.test(text)
192
+ },
193
+ {
194
+ id: "se_secret_request",
195
+ category: "social_engineering",
196
+ score: 15,
197
+ description: "Requests API keys, passwords, or credentials",
198
+ test: (_e, text) => RE_SECRET_REQUEST.test(text)
199
+ },
200
+ {
201
+ id: "se_impersonate_system",
202
+ category: "social_engineering",
203
+ score: 15,
204
+ description: "Impersonates a system/security message",
205
+ test: (_e, text) => RE_IMPERSONATE_SYSTEM.test(text)
206
+ },
207
+ {
208
+ id: "se_urgency_authority",
209
+ category: "social_engineering",
210
+ score: 10,
211
+ description: "Combines urgency language with authority/threat language",
212
+ test: (_e, text) => RE_URGENCY.test(text) && RE_AUTHORITY.test(text)
213
+ },
214
+ {
215
+ id: "se_money_request",
216
+ category: "social_engineering",
217
+ score: 15,
218
+ description: "Requests money transfer or wire",
219
+ test: (_e, text) => RE_MONEY_REQUEST.test(text)
220
+ },
221
+ {
222
+ id: "se_gift_card",
223
+ category: "social_engineering",
224
+ score: 20,
225
+ description: "Requests purchase of gift cards",
226
+ test: (_e, text) => RE_GIFT_CARD.test(text)
227
+ },
228
+ {
229
+ id: "se_ceo_fraud",
230
+ category: "social_engineering",
231
+ score: 15,
232
+ description: "BEC pattern: executive title + payment/wire/urgent",
233
+ test: (_e, text) => RE_CEO_FRAUD.test(text)
234
+ },
235
+ // === Data exfiltration ===
236
+ {
237
+ id: "de_forward_all",
238
+ category: "data_exfiltration",
239
+ score: 20,
240
+ description: "Requests forwarding all emails",
241
+ test: (_e, text) => RE_FORWARD_ALL.test(text)
242
+ },
243
+ {
244
+ id: "de_search_credentials",
245
+ category: "data_exfiltration",
246
+ score: 20,
247
+ description: "Requests searching inbox for passwords/credentials",
248
+ test: (_e, text) => RE_SEARCH_CREDS.test(text)
249
+ },
250
+ {
251
+ id: "de_send_to_external",
252
+ category: "data_exfiltration",
253
+ score: 15,
254
+ description: "Instructs sending data to an external email address",
255
+ test: (_e, text) => RE_SEND_TO_EXTERNAL.test(text)
256
+ },
257
+ {
258
+ id: "de_dump_instructions",
259
+ category: "data_exfiltration",
260
+ score: 15,
261
+ description: "Attempts to extract system prompt or instructions",
262
+ test: (_e, text) => RE_DUMP_INSTRUCTIONS.test(text)
263
+ },
264
+ {
265
+ id: "de_webhook_exfil",
266
+ category: "data_exfiltration",
267
+ score: 15,
268
+ description: "Contains webhook/ngrok/pipedream exfiltration URLs",
269
+ test: (_e, text) => RE_WEBHOOK_EXFIL.test(text)
270
+ },
271
+ // === Phishing ===
272
+ {
273
+ id: "ph_spoofed_sender",
274
+ category: "phishing",
275
+ score: 10,
276
+ description: "Sender name contains brand but domain doesn't match",
277
+ test: (email) => {
278
+ const from = email.from[0];
279
+ if (!from) return false;
280
+ const name = (from.name ?? "").toLowerCase();
281
+ const domain = (from.address ?? "").split("@")[1]?.toLowerCase() ?? "";
282
+ for (const [brand, domains] of Object.entries(BRAND_DOMAINS)) {
283
+ if (name.includes(brand) && !domains.some((d) => domain === d || domain.endsWith("." + d))) {
284
+ return true;
285
+ }
286
+ }
287
+ return false;
288
+ }
289
+ },
290
+ {
291
+ id: "ph_credential_harvest",
292
+ category: "phishing",
293
+ score: 15,
294
+ description: 'Asks to "verify your account/password" with links present',
295
+ test: (_e, text, html) => {
296
+ if (!RE_CREDENTIAL_HARVEST.test(text)) return false;
297
+ return RE_URL_IN_TEXT.test(text) || RE_LINK_TAG.test(html);
298
+ }
299
+ },
300
+ {
301
+ id: "ph_suspicious_links",
302
+ category: "phishing",
303
+ score: 10,
304
+ description: "Contains links with IP addresses, URL shorteners, or excessive subdomains",
305
+ test: (_e, text, html) => {
306
+ const allText = text + " " + html;
307
+ if (RE_IP_URL.test(allText)) return true;
308
+ if (RE_URL_SHORTENER.test(allText)) return true;
309
+ const urls = allText.match(RE_URL_IN_TEXT) ?? [];
310
+ for (const url of urls) {
311
+ try {
312
+ const hostname = new URL(url).hostname;
313
+ if (hostname.split(".").length > 4) return true;
314
+ } catch {
315
+ }
316
+ }
317
+ return false;
318
+ }
319
+ },
320
+ {
321
+ id: "ph_data_uri",
322
+ category: "phishing",
323
+ score: 15,
324
+ description: "Contains data: or javascript: URIs in links",
325
+ test: (_e, _text, html) => {
326
+ RE_LINK_TAG.lastIndex = 0;
327
+ let match;
328
+ while ((match = RE_LINK_TAG.exec(html)) !== null) {
329
+ if (RE_DATA_URI.test(match[1])) return true;
330
+ }
331
+ return false;
332
+ }
333
+ },
334
+ {
335
+ id: "ph_homograph",
336
+ category: "phishing",
337
+ score: 15,
338
+ description: "From domain contains mixed-script or punycode characters",
339
+ test: (email) => {
340
+ const domain = email.from[0]?.address?.split("@")[1] ?? "";
341
+ if (!domain) return false;
342
+ return hasHomographChars(domain);
343
+ }
344
+ },
345
+ {
346
+ id: "ph_mismatched_display_url",
347
+ category: "phishing",
348
+ score: 10,
349
+ description: "HTML link text shows one URL but href points to a different domain",
350
+ test: (_e, _text, html) => {
351
+ RE_LINK_TAG_WITH_TEXT.lastIndex = 0;
352
+ let match;
353
+ while ((match = RE_LINK_TAG_WITH_TEXT.exec(html)) !== null) {
354
+ const href = match[1];
355
+ const linkText = match[2].replace(/<[^>]*>/g, "").trim();
356
+ if (!/^https?:\/\//i.test(linkText)) continue;
357
+ try {
358
+ const hrefHost = new URL(href).hostname.replace(/^www\./, "");
359
+ const textHost = new URL(linkText).hostname.replace(/^www\./, "");
360
+ if (hrefHost !== textHost) return true;
361
+ } catch {
362
+ }
363
+ }
364
+ return false;
365
+ }
366
+ },
367
+ {
368
+ id: "ph_login_urgency",
369
+ category: "phishing",
370
+ score: 10,
371
+ description: "Combines login/click-here language with urgency",
372
+ test: (_e, text) => RE_LOGIN_URGENCY.test(text)
373
+ },
374
+ {
375
+ id: "ph_unsubscribe_missing",
376
+ category: "phishing",
377
+ score: 3,
378
+ description: "Marketing-like email with many links but no List-Unsubscribe header",
379
+ test: (email, text, html) => {
380
+ const allText = text + " " + html;
381
+ const urls = new Set(allText.match(RE_URL_IN_TEXT) ?? []);
382
+ if (urls.size < 5) return false;
383
+ return !email.headers.get("list-unsubscribe");
384
+ }
385
+ },
386
+ // === Authentication (SPF/DKIM/DMARC from headers) ===
387
+ {
388
+ id: "auth_spf_fail",
389
+ category: "authentication",
390
+ score: 15,
391
+ description: "SPF authentication failed",
392
+ test: (email) => {
393
+ const authResults = email.headers.get("authentication-results") ?? "";
394
+ return /spf=(fail|softfail)/i.test(authResults);
395
+ }
396
+ },
397
+ {
398
+ id: "auth_dkim_fail",
399
+ category: "authentication",
400
+ score: 15,
401
+ description: "DKIM authentication failed",
402
+ test: (email) => {
403
+ const authResults = email.headers.get("authentication-results") ?? "";
404
+ return /dkim=fail/i.test(authResults);
405
+ }
406
+ },
407
+ {
408
+ id: "auth_dmarc_fail",
409
+ category: "authentication",
410
+ score: 20,
411
+ description: "DMARC authentication failed",
412
+ test: (email) => {
413
+ const authResults = email.headers.get("authentication-results") ?? "";
414
+ return /dmarc=fail/i.test(authResults);
415
+ }
416
+ },
417
+ {
418
+ id: "auth_no_auth_results",
419
+ category: "authentication",
420
+ score: 3,
421
+ description: "No Authentication-Results header present",
422
+ test: (email) => {
423
+ return !email.headers.has("authentication-results");
424
+ }
425
+ },
426
+ // === Attachment risk ===
427
+ {
428
+ id: "at_executable",
429
+ category: "attachment_risk",
430
+ score: 25,
431
+ description: "Attachment has executable file extension",
432
+ test: (email) => {
433
+ return email.attachments.some((a) => RE_EXECUTABLE_EXT.test(a.filename));
434
+ }
435
+ },
436
+ {
437
+ id: "at_double_extension",
438
+ category: "attachment_risk",
439
+ score: 20,
440
+ description: "Attachment has double extension (e.g. document.pdf.exe)",
441
+ test: (email) => {
442
+ return email.attachments.some((a) => RE_DOUBLE_EXT.test(a.filename));
443
+ }
444
+ },
445
+ {
446
+ id: "at_archive_carrier",
447
+ category: "attachment_risk",
448
+ score: 15,
449
+ description: "Attachment is an archive (potential payload carrier)",
450
+ test: (email) => {
451
+ return email.attachments.some((a) => RE_ARCHIVE_EXT.test(a.filename));
452
+ }
453
+ },
454
+ {
455
+ id: "at_html_attachment",
456
+ category: "attachment_risk",
457
+ score: 10,
458
+ description: "HTML/SVG file attachment (phishing vector)",
459
+ test: (email) => {
460
+ return email.attachments.some((a) => RE_HTML_ATTACHMENT_EXT.test(a.filename));
461
+ }
462
+ },
463
+ // === Header anomalies ===
464
+ {
465
+ id: "ha_missing_message_id",
466
+ category: "header_anomaly",
467
+ score: 5,
468
+ description: "Missing Message-ID header",
469
+ test: (email) => !email.messageId
470
+ },
471
+ {
472
+ id: "ha_empty_from",
473
+ category: "header_anomaly",
474
+ score: 10,
475
+ description: "Missing or empty From address",
476
+ test: (email) => !email.from.length || !email.from[0].address
477
+ },
478
+ {
479
+ id: "ha_reply_to_mismatch",
480
+ category: "header_anomaly",
481
+ score: 5,
482
+ description: "Reply-To domain differs from From domain",
483
+ test: (email) => {
484
+ if (!email.replyTo?.length || !email.from.length) return false;
485
+ const fromDomain = email.from[0].address?.split("@")[1]?.toLowerCase();
486
+ const replyDomain = email.replyTo[0].address?.split("@")[1]?.toLowerCase();
487
+ return !!fromDomain && !!replyDomain && fromDomain !== replyDomain;
488
+ }
489
+ },
490
+ // === Content spam ===
491
+ {
492
+ id: "cs_all_caps_subject",
493
+ category: "content_spam",
494
+ score: 5,
495
+ description: "Subject is mostly uppercase",
496
+ test: (email) => {
497
+ const s = email.subject;
498
+ if (s.length < 10) return false;
499
+ const letters = s.replace(/[^a-zA-Z]/g, "");
500
+ if (letters.length < 5) return false;
501
+ const upper = letters.replace(/[^A-Z]/g, "").length;
502
+ return upper / letters.length > 0.8;
503
+ }
504
+ },
505
+ {
506
+ id: "cs_lottery_scam",
507
+ category: "content_spam",
508
+ score: 25,
509
+ description: "Contains lottery/prize scam language",
510
+ test: (_e, text) => RE_LOTTERY_SCAM.test(text)
511
+ },
512
+ {
513
+ id: "cs_crypto_scam",
514
+ category: "content_spam",
515
+ score: 10,
516
+ description: "Contains crypto/investment scam language",
517
+ test: (_e, text) => RE_CRYPTO_SCAM.test(text)
518
+ },
519
+ {
520
+ id: "cs_excessive_punctuation",
521
+ category: "content_spam",
522
+ score: 3,
523
+ description: "Subject has excessive punctuation (!!!!, ????)",
524
+ test: (email) => /[!]{4,}|[?]{4,}/.test(email.subject)
525
+ },
526
+ {
527
+ id: "cs_pharmacy_spam",
528
+ category: "content_spam",
529
+ score: 15,
530
+ description: "Contains pharmacy/prescription drug spam language",
531
+ test: (_e, text) => RE_PHARMACY_SPAM.test(text)
532
+ },
533
+ {
534
+ id: "cs_weight_loss",
535
+ category: "content_spam",
536
+ score: 10,
537
+ description: "Contains weight loss scam language",
538
+ test: (_e, text) => RE_WEIGHT_LOSS.test(text)
539
+ },
540
+ {
541
+ id: "cs_html_only_no_text",
542
+ category: "content_spam",
543
+ score: 5,
544
+ description: "Email has HTML body but empty/missing text body",
545
+ test: (email) => {
546
+ const hasHtml = !!email.html && email.html.trim().length > 0;
547
+ const hasText = !!email.text && email.text.trim().length > 0;
548
+ return hasHtml && !hasText;
549
+ }
550
+ },
551
+ {
552
+ id: "cs_spam_word_density",
553
+ category: "content_spam",
554
+ score: 0,
555
+ // Dynamic — calculated in test
556
+ description: "High density of common spam words",
557
+ test: (_e, text) => countSpamWords(text) > 5
558
+ },
559
+ // === Link analysis ===
560
+ {
561
+ id: "la_excessive_links",
562
+ category: "link_analysis",
563
+ score: 5,
564
+ description: "Contains more than 10 unique links",
565
+ test: (_e, text, html) => {
566
+ const allText = text + " " + html;
567
+ const urls = new Set(allText.match(RE_URL_IN_TEXT) ?? []);
568
+ return urls.size > 10;
569
+ }
570
+ }
571
+ ];
572
+ function scoreEmail(email) {
573
+ const bodyText = [email.subject, email.text ?? ""].join("\n");
574
+ const bodyHtml = email.html ?? "";
575
+ const matches = [];
576
+ for (const rule of RULES) {
577
+ try {
578
+ if (rule.test(email, bodyText, bodyHtml)) {
579
+ let score2 = rule.score;
580
+ if (rule.id === "cs_spam_word_density") {
581
+ const wordCount = countSpamWords(bodyText);
582
+ score2 = wordCount > 10 ? 20 : 10;
583
+ }
584
+ matches.push({
585
+ ruleId: rule.id,
586
+ category: rule.category,
587
+ score: score2,
588
+ description: rule.description
589
+ });
590
+ }
591
+ } catch {
592
+ }
593
+ }
594
+ const score = matches.reduce((sum, m) => sum + m.score, 0);
595
+ let topCategory = null;
596
+ if (matches.length > 0) {
597
+ const categoryScores = /* @__PURE__ */ new Map();
598
+ for (const m of matches) {
599
+ categoryScores.set(m.category, (categoryScores.get(m.category) ?? 0) + m.score);
600
+ }
601
+ let maxScore = 0;
602
+ for (const [cat, catScore] of categoryScores) {
603
+ if (catScore > maxScore) {
604
+ maxScore = catScore;
605
+ topCategory = cat;
606
+ }
607
+ }
608
+ }
609
+ return {
610
+ score,
611
+ isSpam: score >= SPAM_THRESHOLD,
612
+ isWarning: score >= WARNING_THRESHOLD && score < SPAM_THRESHOLD,
613
+ matches,
614
+ topCategory
615
+ };
616
+ }
617
+
618
+ export {
619
+ SPAM_THRESHOLD,
620
+ WARNING_THRESHOLD,
621
+ isInternalEmail,
622
+ scoreEmail
623
+ };