npm - glin-profanity - Versions diffs - 3.1.5 → 3.2.0 - Mend

glin-profanity 3.1.5 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/README.md +84 -566
package/dist/index.cjs +151 -57
package/dist/index.d.cts +2 -2
package/dist/index.d.ts +2 -2
package/dist/index.js +151 -57
package/dist/ml/index.cjs +151 -56
package/dist/ml/index.d.cts +2 -2
package/dist/ml/index.d.ts +2 -2
package/dist/ml/index.js +151 -56
package/dist/{types-CdDqSZY7.d.cts → types-Dj5vaoch.d.cts} +2 -0
package/dist/{types-CdDqSZY7.d.ts → types-Dj5vaoch.d.ts} +2 -0
package/package.json +1 -1

package/dist/index.js CHANGED Viewed

@@ -535,7 +535,7 @@ var danish_default = {
 // ../../shared/dictionaries/dutch.json
 var dutch_default = {
   words: [
-    "aardappels afgieteng",
+    "aardappels afgieten",
     "achter het raam zitten",
     "afberen",
     "aflebberen",
@@ -552,7 +552,7 @@ var dutch_default = {
     "bagger schijten",
     "balen",
     "bedonderen",
-    "befborstelg",
+    "befborstel",
     "beffen",
     "bekken",
     "belazeren",
@@ -561,11 +561,11 @@ var dutch_default = {
     "beurt",
     "boemelen",
     "boerelul",
-    "boerenpummelg",
+    "boerenpummel",
     "bokkelul",
     "botergeil",
     "broekhoesten",
-    "brugpieperg",
+    "brugpieper",
     "buffelen",
     "buiten de pot piesen",
     "da's kloten van de bok",
@@ -573,13 +573,13 @@ var dutch_default = {
     "de hoer spelen",
     "de hond uitlaten",
     "de koffer induiken",
-    "delg",
+    "del",
     "de pijp aan maarten geven",
     "de pijp uitgaan",
     "dombo",
-    "draaikontg",
+    "draaikont",
     "driehoog achter wonen",
-    "drolg",
+    "drol",
     "drooggeiler",
     "droogkloot",
     "een beurt geven",
@@ -599,7 +599,7 @@ var dutch_default = {
     "godverdomme",
     "graftak",
     "gras maaien",
-    "gratenkutg",
+    "gratenkut",
     "greppeldel",
     "griet",
     "hoempert",
@@ -612,7 +612,7 @@ var dutch_default = {
     "huisdealer",
     "johny",
     "kanen",
-    "kettingzeugg",
+    "kettingzeug",
     "klaarkomen",
     "klerebeer",
     "klojo",
@@ -622,22 +622,22 @@ var dutch_default = {
     "klootzak",
     "kloten",
     "knor",
-    "kontg",
+    "kont",
     "kontneuken",
     "krentekakker",
     "kut",
     "kuttelikkertje",
-    "kwakkieg",
+    "kwakkie",
     "liefdesgrot",
     "lul",
     "lul-de-behanger",
     "lulhannes",
     "lummel",
     "mafketel",
-    "matennaaierg",
+    "matennaaier",
     "matje",
     "mof",
-    "mutsg",
+    "muts",
     "naaien",
     "naakt",
     "neuken",
@@ -657,9 +657,9 @@ var dutch_default = {
     "paal",
     "paardelul",
     "palen",
-    "penozeg",
+    "penoze",
     "piesen",
-    "pijpbekkieg",
+    "pijpbekkie",
     "pijpen",
     "pik",
     "pleurislaaier",
@@ -674,7 +674,7 @@ var dutch_default = {
     "reet",
     "reetridder",
     "reet trappen, voor zijn",
-    "remsporeng",
+    "remsporen",
     "reutelen",
     "rothoer",
     "rotzak",
@@ -687,25 +687,25 @@ var dutch_default = {
     "schuinsmarcheerder",
     "shit",
     "slempen",
-    "sletg",
+    "slet",
     "sletterig",
     "slik mijn zaad",
-    "snolg",
+    "snol",
     "spuiten",
     "standje",
-    "standje-69g",
+    "standje-69",
     "stoephoer",
     "stootje",
-    "strontg",
-    "sufferdg",
+    "stront",
+    "sufferd",
     "tapijtnek",
-    "teefg",
+    "teef",
     "temeier",
     "teringlijer",
     "toeter",
-    "tongzoeng",
-    "triootjeg",
-    "trottoir prostitu\xE9e",
+    "tongzoen",
+    "triootje",
+    "trottoir prostituee",
     "trottoirteef",
     "vergallen",
     "verkloten",
@@ -778,6 +778,8 @@ var english_default = {
     "2 girls 1 cup",
     "2g1c",
     "a$$",
+    "@ss",
+    "4ss",
     "acrotomophilia",
     "alabama hot pocket",
     "alaskan pipeline",
@@ -917,6 +919,10 @@ var english_default = {
     "eunuch",
     "f*ck",
     "f@ck",
+    "f4ck",
+    "fvck",
+    "phuck",
+    "fuk",
     "faggot",
     "fecal",
     "felch",
@@ -1098,6 +1104,9 @@ var english_default = {
     "shemale",
     "shibari",
     "shit",
+    "sh1t",
+    "$hit",
+    "$h!t",
     "shitblimp",
     "shithead",
     "shitshow",
@@ -2201,7 +2210,7 @@ var italian_default = {
     "di merda",
     "ditalino",
     "duro",
-    "fare una\u0160",
+    "fare una sega",
     "fava",
     "femminuccia",
     "fica",
@@ -2449,7 +2458,6 @@ var japanese_default = {
     "\u7389\u8210\u3081",
     "\u7DCA\u7E1B",
     "\u8FD1\u89AA\u76F8\u59E6",
-    "\u5ACC\u3044",
     "\u5F8C\u80CC\u4F4D",
     "\u5408\u610F\u306E\u6027\u4EA4",
     "\u62F7\u554F",
@@ -2462,7 +2470,6 @@ var japanese_default = {
     "\u5C04\u7CBE",
     "\u624B\u30B3\u30AD",
     "\u7363\u59E6",
-    "\u5973\u306E\u5B50",
     "\u5973\u738B\u69D8",
     "\u5973\u5B50\u9AD8\u751F",
     "\u5973\u88C5",
@@ -2539,7 +2546,6 @@ var turkish_default = {
     "ak",
     "akp",
     "al a\u011Fz\u0131na",
-    "allah",
     "allahs\u0131z",
     "am",
     "am biti",
@@ -2634,7 +2640,6 @@ var turkish_default = {
     "am\u0131n\u0131 s",
     "am\u0131s\u0131na",
     "am\u0131s\u0131n\u0131",
-    "ana",
     "anaaann",
     "anal",
     "analarn",
@@ -2766,8 +2771,6 @@ var turkish_default = {
     "cikar",
     "cim",
     "cm",
-    "coca cola",
-    "cola",
     "dalaks\u0131z",
     "dallama",
     "daltassak",
@@ -3565,7 +3568,7 @@ var turkish_default = {
 // ../../shared/dictionaries/spanish.json
 var spanish_default = {
   words: [
-    "sesinato",
+    "asesinato",
     "asno",
     "bastardo",
     "Bollera",
@@ -3872,6 +3875,34 @@ var GAMING_POSITIVE = /* @__PURE__ */ new Set([
   "move",
   "combo"
 ]);
+var GAMING_ACCEPTABLE_WORDS = /* @__PURE__ */ new Set([
+  "kill",
+  "killer",
+  "killed",
+  "killing",
+  "shoot",
+  "shot",
+  "shooting",
+  "die",
+  "dying",
+  "died",
+  "dead",
+  "death",
+  "badass",
+  "sick",
+  "insane",
+  "crazy",
+  "mad",
+  "beast",
+  "savage",
+  "suck",
+  "sucks",
+  "wtf",
+  "omg",
+  "hell",
+  "damn",
+  "crap"
+]);
 var POSITIVE_PHRASES = /* @__PURE__ */ new Map([
   ["the bomb", 0.9],
   // "this movie is the bomb"
@@ -3904,7 +3935,9 @@ var ContextAnalyzer = class {
   constructor(config) {
     this.contextWindow = config.contextWindow;
     this.language = config.language;
-    this.domainWhitelists = new Set(config.domainWhitelists || []);
+    this.domainWhitelists = new Set(
+      (config.domainWhitelists || []).map((word) => word.toLowerCase())
+    );
   }
   /**
    * Analyzes the context around a profanity match to determine if it should be flagged
@@ -3941,10 +3974,9 @@ var ContextAnalyzer = class {
       isWhitelisted: false
     };
   }
-  // eslint-disable-next-line @typescript-eslint/no-unused-vars
   checkPhraseContext(contextText, matchWord) {
     for (const [phrase, score] of POSITIVE_PHRASES.entries()) {
-      if (contextText.includes(phrase)) {
+      if (phrase.includes(matchWord) && contextText.includes(phrase)) {
         return {
           contextScore: score,
           reason: `Positive phrase detected: "${phrase}"`,
@@ -3963,21 +3995,29 @@ var ContextAnalyzer = class {
     }
     return null;
   }
-  // eslint-disable-next-line @typescript-eslint/no-unused-vars
   isDomainWhitelisted(contextWords, matchWord) {
+    const normalizedMatchWord = matchWord.toLowerCase();
     for (const word of contextWords) {
-      if (this.domainWhitelists.has(word) || GAMING_POSITIVE.has(word)) {
+      if (this.domainWhitelists.has(word)) {
         return true;
       }
+      if (GAMING_POSITIVE.has(word)) {
+        if (GAMING_ACCEPTABLE_WORDS.has(normalizedMatchWord)) {
+          return true;
+        }
+      }
     }
     return false;
   }
-  // eslint-disable-next-line @typescript-eslint/no-unused-vars
   generateReason(score, contextWords) {
+    const foundPositive = Array.from(new Set(contextWords.filter((word) => POSITIVE_INDICATORS.has(word))));
+    const foundNegative = Array.from(new Set(contextWords.filter((word) => NEGATIVE_INDICATORS.has(word))));
     if (score >= 0.7) {
-      return "Positive context detected - likely not profanity";
+      const details = foundPositive.length > 0 ? ` (found: ${foundPositive.join(", ")})` : "";
+      return `Positive context detected${details} - likely not profanity`;
     } else if (score <= 0.3) {
-      return "Negative context detected - likely profanity";
+      const details = foundNegative.length > 0 ? ` (found: ${foundNegative.join(", ")})` : "";
+      return `Negative context detected${details} - likely profanity`;
     } else {
       return "Neutral context - uncertain classification";
     }
@@ -4035,7 +4075,7 @@ var ContextAnalyzer = class {
    * Updates the domain whitelist for this analyzer instance
    */
   updateDomainWhitelist(newWhitelist) {
-    this.domainWhitelists = new Set(newWhitelist);
+    this.domainWhitelists = new Set(newWhitelist.map((word) => word.toLowerCase()));
   }
   /**
    * Adds words to the domain whitelist
@@ -4235,6 +4275,10 @@ var HOMOGLYPHS = {
   // Cyrillic small e
   "\u0415": "E",
   // Cyrillic capital E
+  "\u043A": "k",
+  // Cyrillic small ka
+  "\u041A": "K",
+  // Cyrillic capital Ka
   "\u043E": "o",
   // Cyrillic small o
   "\u041E": "O",
@@ -4247,9 +4291,9 @@ var HOMOGLYPHS = {
   // Cyrillic small es
   "\u0421": "C",
   // Cyrillic capital Es
-  "\u0443": "y",
-  // Cyrillic small u
-  "\u0423": "Y",
+  "\u0443": "u",
+  // Cyrillic small u (map to u, not y)
+  "\u0423": "U",
   // Cyrillic capital U
   "\u0445": "x",
   // Cyrillic small ha
@@ -4267,6 +4311,11 @@ var HOMOGLYPHS = {
   // Cyrillic small dze
   "\u0405": "S",
   // Cyrillic capital Dze
+  // Currency and special symbols that look like letters
+  "\xA2": "c",
+  // Cent sign
+  "\u0192": "f",
+  // Latin small f with hook (florin)
   // Greek homoglyphs
   "\u03B1": "a",
   // Greek small alpha
@@ -4638,6 +4687,7 @@ var Filter = class {
     this.cacheResults = config?.cacheResults ?? false;
     this.maxCacheSize = config?.maxCacheSize ?? 1e3;
     this.cache = /* @__PURE__ */ new Map();
+    this.regexCache = /* @__PURE__ */ new Map();
     let words = [];
     if (config?.allLanguages) {
       for (const lang in dictionary_default) {
@@ -4667,9 +4717,10 @@ var Filter = class {
    * Applies Unicode normalization, leetspeak detection, and obfuscation handling.
    *
    * @param text - The input text to normalize
+   * @param aggressive - If true, collapses to single chars (for repeated char detection)
    * @returns The normalized text
    */
-  normalizeText(text) {
+  normalizeText(text, aggressive = false) {
     let normalized = text;
     if (this.normalizeUnicodeEnabled) {
       normalized = normalizeUnicode(normalized);
@@ -4678,6 +4729,8 @@ var Filter = class {
       normalized = normalizeLeetspeak(normalized, {
         level: this.leetspeakLevel,
         collapseRepeated: true,
+        // Keep double letters like "ss" for normal check, collapse all for aggressive
+        maxRepeated: aggressive ? 1 : 2,
         removeSpacedChars: true
       });
     }
@@ -4708,6 +4761,7 @@ var Filter = class {
    */
   clearCache() {
     this.cache.clear();
+    this.regexCache.clear();
   }
   /**
    * Gets the current cache size.
@@ -4789,10 +4843,17 @@ var Filter = class {
     return this.cache.get(key);
   }
   getRegex(word) {
+    if (this.regexCache.has(word)) {
+      const regex2 = this.regexCache.get(word);
+      regex2.lastIndex = 0;
+      return regex2;
+    }
     const flags = this.caseSensitive ? "g" : "gi";
     const escapedWord = word.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
     const boundary = this.wordBoundaries ? "\\b" : "";
-    return new RegExp(`${boundary}${escapedWord}${boundary}`, flags);
+    const regex = new RegExp(`${boundary}${escapedWord}${boundary}`, flags);
+    this.regexCache.set(word, regex);
+    return regex;
   }
   isFuzzyToleranceMatch(word, text) {
     const simplifiedText = text.toLowerCase().replace(/[^a-z]/g, "");
@@ -4810,11 +4871,12 @@ var Filter = class {
     return score >= this.fuzzyToleranceLevel;
   }
   evaluateSeverity(word, text) {
-    if (this.wordBoundaries && this.getRegex(word).test(text)) {
+    if (this.getRegex(word).test(text)) {
       return 1 /* EXACT */;
     }
-    if (this.getRegex(word).test(text)) return 1 /* EXACT */;
-    if (this.isFuzzyToleranceMatch(word, text)) return 2 /* FUZZY */;
+    if (!this.wordBoundaries && this.isFuzzyToleranceMatch(word, text)) {
+      return 2 /* FUZZY */;
+    }
     return void 0;
   }
   /**
@@ -4834,9 +4896,20 @@ var Filter = class {
    * ```
    */
   isProfane(value) {
-    const input = this.normalizeText(value);
+    const originalInput = value;
+    const normalizedInput = this.normalizeText(value);
+    const aggressiveInput = this.normalizeText(value, true);
     for (const word of this.words.keys()) {
-      if (!this.ignoreWords.has(word.toLowerCase()) && this.evaluateSeverity(word, input) !== void 0) {
+      if (this.ignoreWords.has(word.toLowerCase())) {
+        continue;
+      }
+      if (this.evaluateSeverity(word, originalInput) !== void 0) {
+        return true;
+      }
+      if (this.evaluateSeverity(word, normalizedInput) !== void 0) {
+        return true;
+      }
+      if (this.evaluateSeverity(word, aggressiveInput) !== void 0) {
         return true;
       }
     }
@@ -4877,23 +4950,45 @@ var Filter = class {
       return cachedResult;
     }
     if (!this.enableContextAware) {
-      let input2 = this.normalizeText(text);
-      input2 = input2.toLowerCase();
+      const originalInput = text.toLowerCase();
+      const normalizedInput = this.normalizeText(text).toLowerCase();
+      const aggressiveInput = this.normalizeText(text, true).toLowerCase();
       const profaneWords2 = [];
       const severityMap2 = {};
       for (const dictWord of this.words.keys()) {
         if (this.ignoreWords.has(dictWord.toLowerCase())) continue;
-        const severity = this.evaluateSeverity(dictWord, input2);
+        let severity = this.evaluateSeverity(dictWord, originalInput);
         if (severity !== void 0) {
           const regex = this.getRegex(dictWord);
           let match;
-          while ((match = regex.exec(input2)) !== null) {
+          while ((match = regex.exec(originalInput)) !== null) {
             profaneWords2.push(match[0]);
             if (severityMap2[match[0]] === void 0) {
               severityMap2[match[0]] = severity;
             }
           }
         }
+        severity = this.evaluateSeverity(dictWord, normalizedInput);
+        if (severity !== void 0) {
+          const regex = this.getRegex(dictWord);
+          while ((regex.exec(normalizedInput)) !== null) {
+            if (!profaneWords2.includes(dictWord)) {
+              profaneWords2.push(dictWord);
+              if (severityMap2[dictWord] === void 0) {
+                severityMap2[dictWord] = severity;
+              }
+            }
+          }
+        }
+        severity = this.evaluateSeverity(dictWord, aggressiveInput);
+        if (severity !== void 0) {
+          if (!profaneWords2.includes(dictWord)) {
+            profaneWords2.push(dictWord);
+            if (severityMap2[dictWord] === void 0) {
+              severityMap2[dictWord] = severity;
+            }
+          }
+        }
       }
       let processedText2 = text;
       if (this.replaceWith && profaneWords2.length > 0) {
@@ -5028,7 +5123,6 @@ var globalWhitelist_default = {
     "Cucumber",
     "Analysis",
     "Japan",
-    "Analytics",
     "Manipulate",
     "Shoot",
     "Button",