glin-profanity 3.1.2 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -535,7 +535,7 @@ var danish_default = {
535
535
  // ../../shared/dictionaries/dutch.json
536
536
  var dutch_default = {
537
537
  words: [
538
- "aardappels afgieteng",
538
+ "aardappels afgieten",
539
539
  "achter het raam zitten",
540
540
  "afberen",
541
541
  "aflebberen",
@@ -552,7 +552,7 @@ var dutch_default = {
552
552
  "bagger schijten",
553
553
  "balen",
554
554
  "bedonderen",
555
- "befborstelg",
555
+ "befborstel",
556
556
  "beffen",
557
557
  "bekken",
558
558
  "belazeren",
@@ -561,11 +561,11 @@ var dutch_default = {
561
561
  "beurt",
562
562
  "boemelen",
563
563
  "boerelul",
564
- "boerenpummelg",
564
+ "boerenpummel",
565
565
  "bokkelul",
566
566
  "botergeil",
567
567
  "broekhoesten",
568
- "brugpieperg",
568
+ "brugpieper",
569
569
  "buffelen",
570
570
  "buiten de pot piesen",
571
571
  "da's kloten van de bok",
@@ -573,13 +573,13 @@ var dutch_default = {
573
573
  "de hoer spelen",
574
574
  "de hond uitlaten",
575
575
  "de koffer induiken",
576
- "delg",
576
+ "del",
577
577
  "de pijp aan maarten geven",
578
578
  "de pijp uitgaan",
579
579
  "dombo",
580
- "draaikontg",
580
+ "draaikont",
581
581
  "driehoog achter wonen",
582
- "drolg",
582
+ "drol",
583
583
  "drooggeiler",
584
584
  "droogkloot",
585
585
  "een beurt geven",
@@ -599,7 +599,7 @@ var dutch_default = {
599
599
  "godverdomme",
600
600
  "graftak",
601
601
  "gras maaien",
602
- "gratenkutg",
602
+ "gratenkut",
603
603
  "greppeldel",
604
604
  "griet",
605
605
  "hoempert",
@@ -612,7 +612,7 @@ var dutch_default = {
612
612
  "huisdealer",
613
613
  "johny",
614
614
  "kanen",
615
- "kettingzeugg",
615
+ "kettingzeug",
616
616
  "klaarkomen",
617
617
  "klerebeer",
618
618
  "klojo",
@@ -622,22 +622,22 @@ var dutch_default = {
622
622
  "klootzak",
623
623
  "kloten",
624
624
  "knor",
625
- "kontg",
625
+ "kont",
626
626
  "kontneuken",
627
627
  "krentekakker",
628
628
  "kut",
629
629
  "kuttelikkertje",
630
- "kwakkieg",
630
+ "kwakkie",
631
631
  "liefdesgrot",
632
632
  "lul",
633
633
  "lul-de-behanger",
634
634
  "lulhannes",
635
635
  "lummel",
636
636
  "mafketel",
637
- "matennaaierg",
637
+ "matennaaier",
638
638
  "matje",
639
639
  "mof",
640
- "mutsg",
640
+ "muts",
641
641
  "naaien",
642
642
  "naakt",
643
643
  "neuken",
@@ -657,9 +657,9 @@ var dutch_default = {
657
657
  "paal",
658
658
  "paardelul",
659
659
  "palen",
660
- "penozeg",
660
+ "penoze",
661
661
  "piesen",
662
- "pijpbekkieg",
662
+ "pijpbekkie",
663
663
  "pijpen",
664
664
  "pik",
665
665
  "pleurislaaier",
@@ -674,7 +674,7 @@ var dutch_default = {
674
674
  "reet",
675
675
  "reetridder",
676
676
  "reet trappen, voor zijn",
677
- "remsporeng",
677
+ "remsporen",
678
678
  "reutelen",
679
679
  "rothoer",
680
680
  "rotzak",
@@ -687,25 +687,25 @@ var dutch_default = {
687
687
  "schuinsmarcheerder",
688
688
  "shit",
689
689
  "slempen",
690
- "sletg",
690
+ "slet",
691
691
  "sletterig",
692
692
  "slik mijn zaad",
693
- "snolg",
693
+ "snol",
694
694
  "spuiten",
695
695
  "standje",
696
- "standje-69g",
696
+ "standje-69",
697
697
  "stoephoer",
698
698
  "stootje",
699
- "strontg",
700
- "sufferdg",
699
+ "stront",
700
+ "sufferd",
701
701
  "tapijtnek",
702
- "teefg",
702
+ "teef",
703
703
  "temeier",
704
704
  "teringlijer",
705
705
  "toeter",
706
- "tongzoeng",
707
- "triootjeg",
708
- "trottoir prostitu\xE9e",
706
+ "tongzoen",
707
+ "triootje",
708
+ "trottoir prostituee",
709
709
  "trottoirteef",
710
710
  "vergallen",
711
711
  "verkloten",
@@ -778,6 +778,8 @@ var english_default = {
778
778
  "2 girls 1 cup",
779
779
  "2g1c",
780
780
  "a$$",
781
+ "@ss",
782
+ "4ss",
781
783
  "acrotomophilia",
782
784
  "alabama hot pocket",
783
785
  "alaskan pipeline",
@@ -917,6 +919,10 @@ var english_default = {
917
919
  "eunuch",
918
920
  "f*ck",
919
921
  "f@ck",
922
+ "f4ck",
923
+ "fvck",
924
+ "phuck",
925
+ "fuk",
920
926
  "faggot",
921
927
  "fecal",
922
928
  "felch",
@@ -1098,6 +1104,9 @@ var english_default = {
1098
1104
  "shemale",
1099
1105
  "shibari",
1100
1106
  "shit",
1107
+ "sh1t",
1108
+ "$hit",
1109
+ "$h!t",
1101
1110
  "shitblimp",
1102
1111
  "shithead",
1103
1112
  "shitshow",
@@ -2201,7 +2210,7 @@ var italian_default = {
2201
2210
  "di merda",
2202
2211
  "ditalino",
2203
2212
  "duro",
2204
- "fare una\u0160",
2213
+ "fare una sega",
2205
2214
  "fava",
2206
2215
  "femminuccia",
2207
2216
  "fica",
@@ -2449,7 +2458,6 @@ var japanese_default = {
2449
2458
  "\u7389\u8210\u3081",
2450
2459
  "\u7DCA\u7E1B",
2451
2460
  "\u8FD1\u89AA\u76F8\u59E6",
2452
- "\u5ACC\u3044",
2453
2461
  "\u5F8C\u80CC\u4F4D",
2454
2462
  "\u5408\u610F\u306E\u6027\u4EA4",
2455
2463
  "\u62F7\u554F",
@@ -2462,7 +2470,6 @@ var japanese_default = {
2462
2470
  "\u5C04\u7CBE",
2463
2471
  "\u624B\u30B3\u30AD",
2464
2472
  "\u7363\u59E6",
2465
- "\u5973\u306E\u5B50",
2466
2473
  "\u5973\u738B\u69D8",
2467
2474
  "\u5973\u5B50\u9AD8\u751F",
2468
2475
  "\u5973\u88C5",
@@ -2539,7 +2546,6 @@ var turkish_default = {
2539
2546
  "ak",
2540
2547
  "akp",
2541
2548
  "al a\u011Fz\u0131na",
2542
- "allah",
2543
2549
  "allahs\u0131z",
2544
2550
  "am",
2545
2551
  "am biti",
@@ -2634,7 +2640,6 @@ var turkish_default = {
2634
2640
  "am\u0131n\u0131 s",
2635
2641
  "am\u0131s\u0131na",
2636
2642
  "am\u0131s\u0131n\u0131",
2637
- "ana",
2638
2643
  "anaaann",
2639
2644
  "anal",
2640
2645
  "analarn",
@@ -2766,8 +2771,6 @@ var turkish_default = {
2766
2771
  "cikar",
2767
2772
  "cim",
2768
2773
  "cm",
2769
- "coca cola",
2770
- "cola",
2771
2774
  "dalaks\u0131z",
2772
2775
  "dallama",
2773
2776
  "daltassak",
@@ -3565,7 +3568,7 @@ var turkish_default = {
3565
3568
  // ../../shared/dictionaries/spanish.json
3566
3569
  var spanish_default = {
3567
3570
  words: [
3568
- "sesinato",
3571
+ "asesinato",
3569
3572
  "asno",
3570
3573
  "bastardo",
3571
3574
  "Bollera",
@@ -3872,6 +3875,34 @@ var GAMING_POSITIVE = /* @__PURE__ */ new Set([
3872
3875
  "move",
3873
3876
  "combo"
3874
3877
  ]);
3878
+ var GAMING_ACCEPTABLE_WORDS = /* @__PURE__ */ new Set([
3879
+ "kill",
3880
+ "killer",
3881
+ "killed",
3882
+ "killing",
3883
+ "shoot",
3884
+ "shot",
3885
+ "shooting",
3886
+ "die",
3887
+ "dying",
3888
+ "died",
3889
+ "dead",
3890
+ "death",
3891
+ "badass",
3892
+ "sick",
3893
+ "insane",
3894
+ "crazy",
3895
+ "mad",
3896
+ "beast",
3897
+ "savage",
3898
+ "suck",
3899
+ "sucks",
3900
+ "wtf",
3901
+ "omg",
3902
+ "hell",
3903
+ "damn",
3904
+ "crap"
3905
+ ]);
3875
3906
  var POSITIVE_PHRASES = /* @__PURE__ */ new Map([
3876
3907
  ["the bomb", 0.9],
3877
3908
  // "this movie is the bomb"
@@ -3904,7 +3935,9 @@ var ContextAnalyzer = class {
3904
3935
  constructor(config) {
3905
3936
  this.contextWindow = config.contextWindow;
3906
3937
  this.language = config.language;
3907
- this.domainWhitelists = new Set(config.domainWhitelists || []);
3938
+ this.domainWhitelists = new Set(
3939
+ (config.domainWhitelists || []).map((word) => word.toLowerCase())
3940
+ );
3908
3941
  }
3909
3942
  /**
3910
3943
  * Analyzes the context around a profanity match to determine if it should be flagged
@@ -3941,10 +3974,9 @@ var ContextAnalyzer = class {
3941
3974
  isWhitelisted: false
3942
3975
  };
3943
3976
  }
3944
- // eslint-disable-next-line @typescript-eslint/no-unused-vars
3945
3977
  checkPhraseContext(contextText, matchWord) {
3946
3978
  for (const [phrase, score] of POSITIVE_PHRASES.entries()) {
3947
- if (contextText.includes(phrase)) {
3979
+ if (phrase.includes(matchWord) && contextText.includes(phrase)) {
3948
3980
  return {
3949
3981
  contextScore: score,
3950
3982
  reason: `Positive phrase detected: "${phrase}"`,
@@ -3963,21 +3995,29 @@ var ContextAnalyzer = class {
3963
3995
  }
3964
3996
  return null;
3965
3997
  }
3966
- // eslint-disable-next-line @typescript-eslint/no-unused-vars
3967
3998
  isDomainWhitelisted(contextWords, matchWord) {
3999
+ const normalizedMatchWord = matchWord.toLowerCase();
3968
4000
  for (const word of contextWords) {
3969
- if (this.domainWhitelists.has(word) || GAMING_POSITIVE.has(word)) {
4001
+ if (this.domainWhitelists.has(word)) {
3970
4002
  return true;
3971
4003
  }
4004
+ if (GAMING_POSITIVE.has(word)) {
4005
+ if (GAMING_ACCEPTABLE_WORDS.has(normalizedMatchWord)) {
4006
+ return true;
4007
+ }
4008
+ }
3972
4009
  }
3973
4010
  return false;
3974
4011
  }
3975
- // eslint-disable-next-line @typescript-eslint/no-unused-vars
3976
4012
  generateReason(score, contextWords) {
4013
+ const foundPositive = Array.from(new Set(contextWords.filter((word) => POSITIVE_INDICATORS.has(word))));
4014
+ const foundNegative = Array.from(new Set(contextWords.filter((word) => NEGATIVE_INDICATORS.has(word))));
3977
4015
  if (score >= 0.7) {
3978
- return "Positive context detected - likely not profanity";
4016
+ const details = foundPositive.length > 0 ? ` (found: ${foundPositive.join(", ")})` : "";
4017
+ return `Positive context detected${details} - likely not profanity`;
3979
4018
  } else if (score <= 0.3) {
3980
- return "Negative context detected - likely profanity";
4019
+ const details = foundNegative.length > 0 ? ` (found: ${foundNegative.join(", ")})` : "";
4020
+ return `Negative context detected${details} - likely profanity`;
3981
4021
  } else {
3982
4022
  return "Neutral context - uncertain classification";
3983
4023
  }
@@ -4035,7 +4075,7 @@ var ContextAnalyzer = class {
4035
4075
  * Updates the domain whitelist for this analyzer instance
4036
4076
  */
4037
4077
  updateDomainWhitelist(newWhitelist) {
4038
- this.domainWhitelists = new Set(newWhitelist);
4078
+ this.domainWhitelists = new Set(newWhitelist.map((word) => word.toLowerCase()));
4039
4079
  }
4040
4080
  /**
4041
4081
  * Adds words to the domain whitelist
@@ -4235,6 +4275,10 @@ var HOMOGLYPHS = {
4235
4275
  // Cyrillic small e
4236
4276
  "\u0415": "E",
4237
4277
  // Cyrillic capital E
4278
+ "\u043A": "k",
4279
+ // Cyrillic small ka
4280
+ "\u041A": "K",
4281
+ // Cyrillic capital Ka
4238
4282
  "\u043E": "o",
4239
4283
  // Cyrillic small o
4240
4284
  "\u041E": "O",
@@ -4247,9 +4291,9 @@ var HOMOGLYPHS = {
4247
4291
  // Cyrillic small es
4248
4292
  "\u0421": "C",
4249
4293
  // Cyrillic capital Es
4250
- "\u0443": "y",
4251
- // Cyrillic small u
4252
- "\u0423": "Y",
4294
+ "\u0443": "u",
4295
+ // Cyrillic small u (map to u, not y)
4296
+ "\u0423": "U",
4253
4297
  // Cyrillic capital U
4254
4298
  "\u0445": "x",
4255
4299
  // Cyrillic small ha
@@ -4267,6 +4311,11 @@ var HOMOGLYPHS = {
4267
4311
  // Cyrillic small dze
4268
4312
  "\u0405": "S",
4269
4313
  // Cyrillic capital Dze
4314
+ // Currency and special symbols that look like letters
4315
+ "\xA2": "c",
4316
+ // Cent sign
4317
+ "\u0192": "f",
4318
+ // Latin small f with hook (florin)
4270
4319
  // Greek homoglyphs
4271
4320
  "\u03B1": "a",
4272
4321
  // Greek small alpha
@@ -4638,6 +4687,7 @@ var Filter = class {
4638
4687
  this.cacheResults = config?.cacheResults ?? false;
4639
4688
  this.maxCacheSize = config?.maxCacheSize ?? 1e3;
4640
4689
  this.cache = /* @__PURE__ */ new Map();
4690
+ this.regexCache = /* @__PURE__ */ new Map();
4641
4691
  let words = [];
4642
4692
  if (config?.allLanguages) {
4643
4693
  for (const lang in dictionary_default) {
@@ -4667,9 +4717,10 @@ var Filter = class {
4667
4717
  * Applies Unicode normalization, leetspeak detection, and obfuscation handling.
4668
4718
  *
4669
4719
  * @param text - The input text to normalize
4720
+ * @param aggressive - If true, collapses to single chars (for repeated char detection)
4670
4721
  * @returns The normalized text
4671
4722
  */
4672
- normalizeText(text) {
4723
+ normalizeText(text, aggressive = false) {
4673
4724
  let normalized = text;
4674
4725
  if (this.normalizeUnicodeEnabled) {
4675
4726
  normalized = normalizeUnicode(normalized);
@@ -4678,6 +4729,8 @@ var Filter = class {
4678
4729
  normalized = normalizeLeetspeak(normalized, {
4679
4730
  level: this.leetspeakLevel,
4680
4731
  collapseRepeated: true,
4732
+ // Keep double letters like "ss" for normal check, collapse all for aggressive
4733
+ maxRepeated: aggressive ? 1 : 2,
4681
4734
  removeSpacedChars: true
4682
4735
  });
4683
4736
  }
@@ -4708,6 +4761,7 @@ var Filter = class {
4708
4761
  */
4709
4762
  clearCache() {
4710
4763
  this.cache.clear();
4764
+ this.regexCache.clear();
4711
4765
  }
4712
4766
  /**
4713
4767
  * Gets the current cache size.
@@ -4789,10 +4843,17 @@ var Filter = class {
4789
4843
  return this.cache.get(key);
4790
4844
  }
4791
4845
  getRegex(word) {
4846
+ if (this.regexCache.has(word)) {
4847
+ const regex2 = this.regexCache.get(word);
4848
+ regex2.lastIndex = 0;
4849
+ return regex2;
4850
+ }
4792
4851
  const flags = this.caseSensitive ? "g" : "gi";
4793
4852
  const escapedWord = word.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
4794
4853
  const boundary = this.wordBoundaries ? "\\b" : "";
4795
- return new RegExp(`${boundary}${escapedWord}${boundary}`, flags);
4854
+ const regex = new RegExp(`${boundary}${escapedWord}${boundary}`, flags);
4855
+ this.regexCache.set(word, regex);
4856
+ return regex;
4796
4857
  }
4797
4858
  isFuzzyToleranceMatch(word, text) {
4798
4859
  const simplifiedText = text.toLowerCase().replace(/[^a-z]/g, "");
@@ -4810,11 +4871,12 @@ var Filter = class {
4810
4871
  return score >= this.fuzzyToleranceLevel;
4811
4872
  }
4812
4873
  evaluateSeverity(word, text) {
4813
- if (this.wordBoundaries && this.getRegex(word).test(text)) {
4874
+ if (this.getRegex(word).test(text)) {
4814
4875
  return 1 /* EXACT */;
4815
4876
  }
4816
- if (this.getRegex(word).test(text)) return 1 /* EXACT */;
4817
- if (this.isFuzzyToleranceMatch(word, text)) return 2 /* FUZZY */;
4877
+ if (!this.wordBoundaries && this.isFuzzyToleranceMatch(word, text)) {
4878
+ return 2 /* FUZZY */;
4879
+ }
4818
4880
  return void 0;
4819
4881
  }
4820
4882
  /**
@@ -4834,9 +4896,20 @@ var Filter = class {
4834
4896
  * ```
4835
4897
  */
4836
4898
  isProfane(value) {
4837
- const input = this.normalizeText(value);
4899
+ const originalInput = value;
4900
+ const normalizedInput = this.normalizeText(value);
4901
+ const aggressiveInput = this.normalizeText(value, true);
4838
4902
  for (const word of this.words.keys()) {
4839
- if (!this.ignoreWords.has(word.toLowerCase()) && this.evaluateSeverity(word, input) !== void 0) {
4903
+ if (this.ignoreWords.has(word.toLowerCase())) {
4904
+ continue;
4905
+ }
4906
+ if (this.evaluateSeverity(word, originalInput) !== void 0) {
4907
+ return true;
4908
+ }
4909
+ if (this.evaluateSeverity(word, normalizedInput) !== void 0) {
4910
+ return true;
4911
+ }
4912
+ if (this.evaluateSeverity(word, aggressiveInput) !== void 0) {
4840
4913
  return true;
4841
4914
  }
4842
4915
  }
@@ -4877,23 +4950,45 @@ var Filter = class {
4877
4950
  return cachedResult;
4878
4951
  }
4879
4952
  if (!this.enableContextAware) {
4880
- let input2 = this.normalizeText(text);
4881
- input2 = input2.toLowerCase();
4953
+ const originalInput = text.toLowerCase();
4954
+ const normalizedInput = this.normalizeText(text).toLowerCase();
4955
+ const aggressiveInput = this.normalizeText(text, true).toLowerCase();
4882
4956
  const profaneWords2 = [];
4883
4957
  const severityMap2 = {};
4884
4958
  for (const dictWord of this.words.keys()) {
4885
4959
  if (this.ignoreWords.has(dictWord.toLowerCase())) continue;
4886
- const severity = this.evaluateSeverity(dictWord, input2);
4960
+ let severity = this.evaluateSeverity(dictWord, originalInput);
4887
4961
  if (severity !== void 0) {
4888
4962
  const regex = this.getRegex(dictWord);
4889
4963
  let match;
4890
- while ((match = regex.exec(input2)) !== null) {
4964
+ while ((match = regex.exec(originalInput)) !== null) {
4891
4965
  profaneWords2.push(match[0]);
4892
4966
  if (severityMap2[match[0]] === void 0) {
4893
4967
  severityMap2[match[0]] = severity;
4894
4968
  }
4895
4969
  }
4896
4970
  }
4971
+ severity = this.evaluateSeverity(dictWord, normalizedInput);
4972
+ if (severity !== void 0) {
4973
+ const regex = this.getRegex(dictWord);
4974
+ while ((regex.exec(normalizedInput)) !== null) {
4975
+ if (!profaneWords2.includes(dictWord)) {
4976
+ profaneWords2.push(dictWord);
4977
+ if (severityMap2[dictWord] === void 0) {
4978
+ severityMap2[dictWord] = severity;
4979
+ }
4980
+ }
4981
+ }
4982
+ }
4983
+ severity = this.evaluateSeverity(dictWord, aggressiveInput);
4984
+ if (severity !== void 0) {
4985
+ if (!profaneWords2.includes(dictWord)) {
4986
+ profaneWords2.push(dictWord);
4987
+ if (severityMap2[dictWord] === void 0) {
4988
+ severityMap2[dictWord] = severity;
4989
+ }
4990
+ }
4991
+ }
4897
4992
  }
4898
4993
  let processedText2 = text;
4899
4994
  if (this.replaceWith && profaneWords2.length > 0) {
@@ -5028,7 +5123,6 @@ var globalWhitelist_default = {
5028
5123
  "Cucumber",
5029
5124
  "Analysis",
5030
5125
  "Japan",
5031
- "Analytics",
5032
5126
  "Manipulate",
5033
5127
  "Shoot",
5034
5128
  "Button",