bekindprofanityfilter 0.0.5 → 0.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/index.js +6 -6
- package/dist/esm/index.d.ts +15 -0
- package/dist/esm/languages/english-primary-all-languages.d.ts +0 -17
- package/dist/esm.min.js +8 -0
- package/package.json +5 -6
- package/dist/esm/algos/aho-corasick.js +0 -238
- package/dist/esm/algos/bloom-filter.js +0 -208
- package/dist/esm/algos/context-patterns.js +0 -415
- package/dist/esm/index.js +0 -2640
- package/dist/esm/innocence-scoring.js +0 -118
- package/dist/esm/language-detector.js +0 -952
- package/dist/esm/language-dicts.js +0 -2718
- package/dist/esm/languages/english-primary-all-languages.js +0 -36894
- package/dist/esm/romanization-detector.js +0 -779
package/dist/esm/index.d.ts
CHANGED
|
@@ -644,6 +644,21 @@ export declare class BeKind {
|
|
|
644
644
|
* @param text - The input text.
|
|
645
645
|
* @returns Normalized text.
|
|
646
646
|
*/
|
|
647
|
+
/**
|
|
648
|
+
* Regex matching numeric-punctuation clusters that should never be leet-decoded.
|
|
649
|
+
* Covers phone numbers "(206) 366-3311", prices "$8,575!", quantities "1,000",
|
|
650
|
+
* percentages "5%", times "7:30", and other numeric expressions.
|
|
651
|
+
* Matches any token that starts with or contains digits mixed with common
|
|
652
|
+
* numeric punctuation (,.!?:;$%#()/-) and has no adjacent letters.
|
|
653
|
+
*/
|
|
654
|
+
/**
|
|
655
|
+
* Matches whitespace-delimited tokens that contain NO letters — only
|
|
656
|
+
* digits, punctuation, and symbols. These are pure numeric expressions
|
|
657
|
+
* (phone numbers, prices, years, quantities) and should never be
|
|
658
|
+
* leet-decoded. Tokens containing at least one letter (like "f0ck",
|
|
659
|
+
* "6006s", "b00bs") are left for leet-speak processing.
|
|
660
|
+
*/
|
|
661
|
+
private static readonly PURE_NUMERIC_TOKEN_RE;
|
|
647
662
|
private normalizeLeetSpeak;
|
|
648
663
|
/**
|
|
649
664
|
* Conservative leet normalization — only replaces non-letter characters
|
|
@@ -1,20 +1,3 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Consolidated profanity words list across all supported languages.
|
|
3
|
-
* Contains the full word lists from all individual language files.
|
|
4
|
-
*
|
|
5
|
-
* Cross-language false positives removed (normal English words that are profane elsewhere):
|
|
6
|
-
* bite (FR: dick), con (FR: cunt), gland (FR: idiot), laid (FR: ugly),
|
|
7
|
-
* pet (FR: fart), pine (FR: dick), sale (FR: dirty), beaver (EN: vulgar slang),
|
|
8
|
-
* Mist (DE: shit), mist (DE: crap), Sack (DE: scrotum), Pack (DE: scum),
|
|
9
|
-
* pica (BR: penis), pinto (BR: penis), peru (BR: penis + country name),
|
|
10
|
-
* bal (BN: pubic hair), paal (TA: contextually vulgar), memo (ES: dumb),
|
|
11
|
-
* laura (HI: penis variant), gay (BR: listed as profanity but isn't)
|
|
12
|
-
*
|
|
13
|
-
* Languages included: English, German, French, Spanish, Brazilian Portuguese,
|
|
14
|
-
* Hindi/Hinglish, Tamil, Bengali, Telugu, Arabic, Chinese, Italian, Japanese, Korean, Russian
|
|
15
|
-
*
|
|
16
|
-
* ⚠️ Warning: This file contains explicit language in multiple languages
|
|
17
|
-
*/
|
|
18
1
|
declare const allLanguagesBadWords: Record<string, {
|
|
19
2
|
severity: number;
|
|
20
3
|
certainty: number;
|