glin-profanity 3.1.5 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -537,7 +537,7 @@ var danish_default = {
537
537
  // ../../shared/dictionaries/dutch.json
538
538
  var dutch_default = {
539
539
  words: [
540
- "aardappels afgieteng",
540
+ "aardappels afgieten",
541
541
  "achter het raam zitten",
542
542
  "afberen",
543
543
  "aflebberen",
@@ -554,7 +554,7 @@ var dutch_default = {
554
554
  "bagger schijten",
555
555
  "balen",
556
556
  "bedonderen",
557
- "befborstelg",
557
+ "befborstel",
558
558
  "beffen",
559
559
  "bekken",
560
560
  "belazeren",
@@ -563,11 +563,11 @@ var dutch_default = {
563
563
  "beurt",
564
564
  "boemelen",
565
565
  "boerelul",
566
- "boerenpummelg",
566
+ "boerenpummel",
567
567
  "bokkelul",
568
568
  "botergeil",
569
569
  "broekhoesten",
570
- "brugpieperg",
570
+ "brugpieper",
571
571
  "buffelen",
572
572
  "buiten de pot piesen",
573
573
  "da's kloten van de bok",
@@ -575,13 +575,13 @@ var dutch_default = {
575
575
  "de hoer spelen",
576
576
  "de hond uitlaten",
577
577
  "de koffer induiken",
578
- "delg",
578
+ "del",
579
579
  "de pijp aan maarten geven",
580
580
  "de pijp uitgaan",
581
581
  "dombo",
582
- "draaikontg",
582
+ "draaikont",
583
583
  "driehoog achter wonen",
584
- "drolg",
584
+ "drol",
585
585
  "drooggeiler",
586
586
  "droogkloot",
587
587
  "een beurt geven",
@@ -601,7 +601,7 @@ var dutch_default = {
601
601
  "godverdomme",
602
602
  "graftak",
603
603
  "gras maaien",
604
- "gratenkutg",
604
+ "gratenkut",
605
605
  "greppeldel",
606
606
  "griet",
607
607
  "hoempert",
@@ -614,7 +614,7 @@ var dutch_default = {
614
614
  "huisdealer",
615
615
  "johny",
616
616
  "kanen",
617
- "kettingzeugg",
617
+ "kettingzeug",
618
618
  "klaarkomen",
619
619
  "klerebeer",
620
620
  "klojo",
@@ -624,22 +624,22 @@ var dutch_default = {
624
624
  "klootzak",
625
625
  "kloten",
626
626
  "knor",
627
- "kontg",
627
+ "kont",
628
628
  "kontneuken",
629
629
  "krentekakker",
630
630
  "kut",
631
631
  "kuttelikkertje",
632
- "kwakkieg",
632
+ "kwakkie",
633
633
  "liefdesgrot",
634
634
  "lul",
635
635
  "lul-de-behanger",
636
636
  "lulhannes",
637
637
  "lummel",
638
638
  "mafketel",
639
- "matennaaierg",
639
+ "matennaaier",
640
640
  "matje",
641
641
  "mof",
642
- "mutsg",
642
+ "muts",
643
643
  "naaien",
644
644
  "naakt",
645
645
  "neuken",
@@ -659,9 +659,9 @@ var dutch_default = {
659
659
  "paal",
660
660
  "paardelul",
661
661
  "palen",
662
- "penozeg",
662
+ "penoze",
663
663
  "piesen",
664
- "pijpbekkieg",
664
+ "pijpbekkie",
665
665
  "pijpen",
666
666
  "pik",
667
667
  "pleurislaaier",
@@ -676,7 +676,7 @@ var dutch_default = {
676
676
  "reet",
677
677
  "reetridder",
678
678
  "reet trappen, voor zijn",
679
- "remsporeng",
679
+ "remsporen",
680
680
  "reutelen",
681
681
  "rothoer",
682
682
  "rotzak",
@@ -689,25 +689,25 @@ var dutch_default = {
689
689
  "schuinsmarcheerder",
690
690
  "shit",
691
691
  "slempen",
692
- "sletg",
692
+ "slet",
693
693
  "sletterig",
694
694
  "slik mijn zaad",
695
- "snolg",
695
+ "snol",
696
696
  "spuiten",
697
697
  "standje",
698
- "standje-69g",
698
+ "standje-69",
699
699
  "stoephoer",
700
700
  "stootje",
701
- "strontg",
702
- "sufferdg",
701
+ "stront",
702
+ "sufferd",
703
703
  "tapijtnek",
704
- "teefg",
704
+ "teef",
705
705
  "temeier",
706
706
  "teringlijer",
707
707
  "toeter",
708
- "tongzoeng",
709
- "triootjeg",
710
- "trottoir prostitu\xE9e",
708
+ "tongzoen",
709
+ "triootje",
710
+ "trottoir prostituee",
711
711
  "trottoirteef",
712
712
  "vergallen",
713
713
  "verkloten",
@@ -780,6 +780,8 @@ var english_default = {
780
780
  "2 girls 1 cup",
781
781
  "2g1c",
782
782
  "a$$",
783
+ "@ss",
784
+ "4ss",
783
785
  "acrotomophilia",
784
786
  "alabama hot pocket",
785
787
  "alaskan pipeline",
@@ -919,6 +921,10 @@ var english_default = {
919
921
  "eunuch",
920
922
  "f*ck",
921
923
  "f@ck",
924
+ "f4ck",
925
+ "fvck",
926
+ "phuck",
927
+ "fuk",
922
928
  "faggot",
923
929
  "fecal",
924
930
  "felch",
@@ -1100,6 +1106,9 @@ var english_default = {
1100
1106
  "shemale",
1101
1107
  "shibari",
1102
1108
  "shit",
1109
+ "sh1t",
1110
+ "$hit",
1111
+ "$h!t",
1103
1112
  "shitblimp",
1104
1113
  "shithead",
1105
1114
  "shitshow",
@@ -2203,7 +2212,7 @@ var italian_default = {
2203
2212
  "di merda",
2204
2213
  "ditalino",
2205
2214
  "duro",
2206
- "fare una\u0160",
2215
+ "fare una sega",
2207
2216
  "fava",
2208
2217
  "femminuccia",
2209
2218
  "fica",
@@ -2451,7 +2460,6 @@ var japanese_default = {
2451
2460
  "\u7389\u8210\u3081",
2452
2461
  "\u7DCA\u7E1B",
2453
2462
  "\u8FD1\u89AA\u76F8\u59E6",
2454
- "\u5ACC\u3044",
2455
2463
  "\u5F8C\u80CC\u4F4D",
2456
2464
  "\u5408\u610F\u306E\u6027\u4EA4",
2457
2465
  "\u62F7\u554F",
@@ -2464,7 +2472,6 @@ var japanese_default = {
2464
2472
  "\u5C04\u7CBE",
2465
2473
  "\u624B\u30B3\u30AD",
2466
2474
  "\u7363\u59E6",
2467
- "\u5973\u306E\u5B50",
2468
2475
  "\u5973\u738B\u69D8",
2469
2476
  "\u5973\u5B50\u9AD8\u751F",
2470
2477
  "\u5973\u88C5",
@@ -2541,7 +2548,6 @@ var turkish_default = {
2541
2548
  "ak",
2542
2549
  "akp",
2543
2550
  "al a\u011Fz\u0131na",
2544
- "allah",
2545
2551
  "allahs\u0131z",
2546
2552
  "am",
2547
2553
  "am biti",
@@ -2636,7 +2642,6 @@ var turkish_default = {
2636
2642
  "am\u0131n\u0131 s",
2637
2643
  "am\u0131s\u0131na",
2638
2644
  "am\u0131s\u0131n\u0131",
2639
- "ana",
2640
2645
  "anaaann",
2641
2646
  "anal",
2642
2647
  "analarn",
@@ -2768,8 +2773,6 @@ var turkish_default = {
2768
2773
  "cikar",
2769
2774
  "cim",
2770
2775
  "cm",
2771
- "coca cola",
2772
- "cola",
2773
2776
  "dalaks\u0131z",
2774
2777
  "dallama",
2775
2778
  "daltassak",
@@ -3567,7 +3570,7 @@ var turkish_default = {
3567
3570
  // ../../shared/dictionaries/spanish.json
3568
3571
  var spanish_default = {
3569
3572
  words: [
3570
- "sesinato",
3573
+ "asesinato",
3571
3574
  "asno",
3572
3575
  "bastardo",
3573
3576
  "Bollera",
@@ -3874,6 +3877,34 @@ var GAMING_POSITIVE = /* @__PURE__ */ new Set([
3874
3877
  "move",
3875
3878
  "combo"
3876
3879
  ]);
3880
+ var GAMING_ACCEPTABLE_WORDS = /* @__PURE__ */ new Set([
3881
+ "kill",
3882
+ "killer",
3883
+ "killed",
3884
+ "killing",
3885
+ "shoot",
3886
+ "shot",
3887
+ "shooting",
3888
+ "die",
3889
+ "dying",
3890
+ "died",
3891
+ "dead",
3892
+ "death",
3893
+ "badass",
3894
+ "sick",
3895
+ "insane",
3896
+ "crazy",
3897
+ "mad",
3898
+ "beast",
3899
+ "savage",
3900
+ "suck",
3901
+ "sucks",
3902
+ "wtf",
3903
+ "omg",
3904
+ "hell",
3905
+ "damn",
3906
+ "crap"
3907
+ ]);
3877
3908
  var POSITIVE_PHRASES = /* @__PURE__ */ new Map([
3878
3909
  ["the bomb", 0.9],
3879
3910
  // "this movie is the bomb"
@@ -3906,7 +3937,9 @@ var ContextAnalyzer = class {
3906
3937
  constructor(config) {
3907
3938
  this.contextWindow = config.contextWindow;
3908
3939
  this.language = config.language;
3909
- this.domainWhitelists = new Set(config.domainWhitelists || []);
3940
+ this.domainWhitelists = new Set(
3941
+ (config.domainWhitelists || []).map((word) => word.toLowerCase())
3942
+ );
3910
3943
  }
3911
3944
  /**
3912
3945
  * Analyzes the context around a profanity match to determine if it should be flagged
@@ -3943,10 +3976,9 @@ var ContextAnalyzer = class {
3943
3976
  isWhitelisted: false
3944
3977
  };
3945
3978
  }
3946
- // eslint-disable-next-line @typescript-eslint/no-unused-vars
3947
3979
  checkPhraseContext(contextText, matchWord) {
3948
3980
  for (const [phrase, score] of POSITIVE_PHRASES.entries()) {
3949
- if (contextText.includes(phrase)) {
3981
+ if (phrase.includes(matchWord) && contextText.includes(phrase)) {
3950
3982
  return {
3951
3983
  contextScore: score,
3952
3984
  reason: `Positive phrase detected: "${phrase}"`,
@@ -3965,21 +3997,29 @@ var ContextAnalyzer = class {
3965
3997
  }
3966
3998
  return null;
3967
3999
  }
3968
- // eslint-disable-next-line @typescript-eslint/no-unused-vars
3969
4000
  isDomainWhitelisted(contextWords, matchWord) {
4001
+ const normalizedMatchWord = matchWord.toLowerCase();
3970
4002
  for (const word of contextWords) {
3971
- if (this.domainWhitelists.has(word) || GAMING_POSITIVE.has(word)) {
4003
+ if (this.domainWhitelists.has(word)) {
3972
4004
  return true;
3973
4005
  }
4006
+ if (GAMING_POSITIVE.has(word)) {
4007
+ if (GAMING_ACCEPTABLE_WORDS.has(normalizedMatchWord)) {
4008
+ return true;
4009
+ }
4010
+ }
3974
4011
  }
3975
4012
  return false;
3976
4013
  }
3977
- // eslint-disable-next-line @typescript-eslint/no-unused-vars
3978
4014
  generateReason(score, contextWords) {
4015
+ const foundPositive = Array.from(new Set(contextWords.filter((word) => POSITIVE_INDICATORS.has(word))));
4016
+ const foundNegative = Array.from(new Set(contextWords.filter((word) => NEGATIVE_INDICATORS.has(word))));
3979
4017
  if (score >= 0.7) {
3980
- return "Positive context detected - likely not profanity";
4018
+ const details = foundPositive.length > 0 ? ` (found: ${foundPositive.join(", ")})` : "";
4019
+ return `Positive context detected${details} - likely not profanity`;
3981
4020
  } else if (score <= 0.3) {
3982
- return "Negative context detected - likely profanity";
4021
+ const details = foundNegative.length > 0 ? ` (found: ${foundNegative.join(", ")})` : "";
4022
+ return `Negative context detected${details} - likely profanity`;
3983
4023
  } else {
3984
4024
  return "Neutral context - uncertain classification";
3985
4025
  }
@@ -4037,7 +4077,7 @@ var ContextAnalyzer = class {
4037
4077
  * Updates the domain whitelist for this analyzer instance
4038
4078
  */
4039
4079
  updateDomainWhitelist(newWhitelist) {
4040
- this.domainWhitelists = new Set(newWhitelist);
4080
+ this.domainWhitelists = new Set(newWhitelist.map((word) => word.toLowerCase()));
4041
4081
  }
4042
4082
  /**
4043
4083
  * Adds words to the domain whitelist
@@ -4237,6 +4277,10 @@ var HOMOGLYPHS = {
4237
4277
  // Cyrillic small e
4238
4278
  "\u0415": "E",
4239
4279
  // Cyrillic capital E
4280
+ "\u043A": "k",
4281
+ // Cyrillic small ka
4282
+ "\u041A": "K",
4283
+ // Cyrillic capital Ka
4240
4284
  "\u043E": "o",
4241
4285
  // Cyrillic small o
4242
4286
  "\u041E": "O",
@@ -4249,9 +4293,9 @@ var HOMOGLYPHS = {
4249
4293
  // Cyrillic small es
4250
4294
  "\u0421": "C",
4251
4295
  // Cyrillic capital Es
4252
- "\u0443": "y",
4253
- // Cyrillic small u
4254
- "\u0423": "Y",
4296
+ "\u0443": "u",
4297
+ // Cyrillic small u (map to u, not y)
4298
+ "\u0423": "U",
4255
4299
  // Cyrillic capital U
4256
4300
  "\u0445": "x",
4257
4301
  // Cyrillic small ha
@@ -4269,6 +4313,11 @@ var HOMOGLYPHS = {
4269
4313
  // Cyrillic small dze
4270
4314
  "\u0405": "S",
4271
4315
  // Cyrillic capital Dze
4316
+ // Currency and special symbols that look like letters
4317
+ "\xA2": "c",
4318
+ // Cent sign
4319
+ "\u0192": "f",
4320
+ // Latin small f with hook (florin)
4272
4321
  // Greek homoglyphs
4273
4322
  "\u03B1": "a",
4274
4323
  // Greek small alpha
@@ -4640,6 +4689,7 @@ var Filter = class {
4640
4689
  this.cacheResults = config?.cacheResults ?? false;
4641
4690
  this.maxCacheSize = config?.maxCacheSize ?? 1e3;
4642
4691
  this.cache = /* @__PURE__ */ new Map();
4692
+ this.regexCache = /* @__PURE__ */ new Map();
4643
4693
  let words = [];
4644
4694
  if (config?.allLanguages) {
4645
4695
  for (const lang in dictionary_default) {
@@ -4669,9 +4719,10 @@ var Filter = class {
4669
4719
  * Applies Unicode normalization, leetspeak detection, and obfuscation handling.
4670
4720
  *
4671
4721
  * @param text - The input text to normalize
4722
+ * @param aggressive - If true, collapses to single chars (for repeated char detection)
4672
4723
  * @returns The normalized text
4673
4724
  */
4674
- normalizeText(text) {
4725
+ normalizeText(text, aggressive = false) {
4675
4726
  let normalized = text;
4676
4727
  if (this.normalizeUnicodeEnabled) {
4677
4728
  normalized = normalizeUnicode(normalized);
@@ -4680,6 +4731,8 @@ var Filter = class {
4680
4731
  normalized = normalizeLeetspeak(normalized, {
4681
4732
  level: this.leetspeakLevel,
4682
4733
  collapseRepeated: true,
4734
+ // Keep double letters like "ss" for normal check, collapse all for aggressive
4735
+ maxRepeated: aggressive ? 1 : 2,
4683
4736
  removeSpacedChars: true
4684
4737
  });
4685
4738
  }
@@ -4710,6 +4763,7 @@ var Filter = class {
4710
4763
  */
4711
4764
  clearCache() {
4712
4765
  this.cache.clear();
4766
+ this.regexCache.clear();
4713
4767
  }
4714
4768
  /**
4715
4769
  * Gets the current cache size.
@@ -4791,10 +4845,17 @@ var Filter = class {
4791
4845
  return this.cache.get(key);
4792
4846
  }
4793
4847
  getRegex(word) {
4848
+ if (this.regexCache.has(word)) {
4849
+ const regex2 = this.regexCache.get(word);
4850
+ regex2.lastIndex = 0;
4851
+ return regex2;
4852
+ }
4794
4853
  const flags = this.caseSensitive ? "g" : "gi";
4795
4854
  const escapedWord = word.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
4796
4855
  const boundary = this.wordBoundaries ? "\\b" : "";
4797
- return new RegExp(`${boundary}${escapedWord}${boundary}`, flags);
4856
+ const regex = new RegExp(`${boundary}${escapedWord}${boundary}`, flags);
4857
+ this.regexCache.set(word, regex);
4858
+ return regex;
4798
4859
  }
4799
4860
  isFuzzyToleranceMatch(word, text) {
4800
4861
  const simplifiedText = text.toLowerCase().replace(/[^a-z]/g, "");
@@ -4812,11 +4873,12 @@ var Filter = class {
4812
4873
  return score >= this.fuzzyToleranceLevel;
4813
4874
  }
4814
4875
  evaluateSeverity(word, text) {
4815
- if (this.wordBoundaries && this.getRegex(word).test(text)) {
4876
+ if (this.getRegex(word).test(text)) {
4816
4877
  return 1 /* EXACT */;
4817
4878
  }
4818
- if (this.getRegex(word).test(text)) return 1 /* EXACT */;
4819
- if (this.isFuzzyToleranceMatch(word, text)) return 2 /* FUZZY */;
4879
+ if (!this.wordBoundaries && this.isFuzzyToleranceMatch(word, text)) {
4880
+ return 2 /* FUZZY */;
4881
+ }
4820
4882
  return void 0;
4821
4883
  }
4822
4884
  /**
@@ -4836,9 +4898,20 @@ var Filter = class {
4836
4898
  * ```
4837
4899
  */
4838
4900
  isProfane(value) {
4839
- const input = this.normalizeText(value);
4901
+ const originalInput = value;
4902
+ const normalizedInput = this.normalizeText(value);
4903
+ const aggressiveInput = this.normalizeText(value, true);
4840
4904
  for (const word of this.words.keys()) {
4841
- if (!this.ignoreWords.has(word.toLowerCase()) && this.evaluateSeverity(word, input) !== void 0) {
4905
+ if (this.ignoreWords.has(word.toLowerCase())) {
4906
+ continue;
4907
+ }
4908
+ if (this.evaluateSeverity(word, originalInput) !== void 0) {
4909
+ return true;
4910
+ }
4911
+ if (this.evaluateSeverity(word, normalizedInput) !== void 0) {
4912
+ return true;
4913
+ }
4914
+ if (this.evaluateSeverity(word, aggressiveInput) !== void 0) {
4842
4915
  return true;
4843
4916
  }
4844
4917
  }
@@ -4879,23 +4952,45 @@ var Filter = class {
4879
4952
  return cachedResult;
4880
4953
  }
4881
4954
  if (!this.enableContextAware) {
4882
- let input2 = this.normalizeText(text);
4883
- input2 = input2.toLowerCase();
4955
+ const originalInput = text.toLowerCase();
4956
+ const normalizedInput = this.normalizeText(text).toLowerCase();
4957
+ const aggressiveInput = this.normalizeText(text, true).toLowerCase();
4884
4958
  const profaneWords2 = [];
4885
4959
  const severityMap2 = {};
4886
4960
  for (const dictWord of this.words.keys()) {
4887
4961
  if (this.ignoreWords.has(dictWord.toLowerCase())) continue;
4888
- const severity = this.evaluateSeverity(dictWord, input2);
4962
+ let severity = this.evaluateSeverity(dictWord, originalInput);
4889
4963
  if (severity !== void 0) {
4890
4964
  const regex = this.getRegex(dictWord);
4891
4965
  let match;
4892
- while ((match = regex.exec(input2)) !== null) {
4966
+ while ((match = regex.exec(originalInput)) !== null) {
4893
4967
  profaneWords2.push(match[0]);
4894
4968
  if (severityMap2[match[0]] === void 0) {
4895
4969
  severityMap2[match[0]] = severity;
4896
4970
  }
4897
4971
  }
4898
4972
  }
4973
+ severity = this.evaluateSeverity(dictWord, normalizedInput);
4974
+ if (severity !== void 0) {
4975
+ const regex = this.getRegex(dictWord);
4976
+ while ((regex.exec(normalizedInput)) !== null) {
4977
+ if (!profaneWords2.includes(dictWord)) {
4978
+ profaneWords2.push(dictWord);
4979
+ if (severityMap2[dictWord] === void 0) {
4980
+ severityMap2[dictWord] = severity;
4981
+ }
4982
+ }
4983
+ }
4984
+ }
4985
+ severity = this.evaluateSeverity(dictWord, aggressiveInput);
4986
+ if (severity !== void 0) {
4987
+ if (!profaneWords2.includes(dictWord)) {
4988
+ profaneWords2.push(dictWord);
4989
+ if (severityMap2[dictWord] === void 0) {
4990
+ severityMap2[dictWord] = severity;
4991
+ }
4992
+ }
4993
+ }
4899
4994
  }
4900
4995
  let processedText2 = text;
4901
4996
  if (this.replaceWith && profaneWords2.length > 0) {
@@ -5030,7 +5125,6 @@ var globalWhitelist_default = {
5030
5125
  "Cucumber",
5031
5126
  "Analysis",
5032
5127
  "Japan",
5033
- "Analytics",
5034
5128
  "Manipulate",
5035
5129
  "Shoot",
5036
5130
  "Button",
package/dist/index.d.cts CHANGED
@@ -1,5 +1,5 @@
1
- import { F as FilterConfig, S as SeverityLevel, C as CheckProfanityResult } from './types-CdDqSZY7.cjs';
2
- export { c as ContextAwareConfig, a as Filter, b as FilteredProfanityResult, H as HybridAnalysisResult, L as Language, e as MLAnalysisResult, f as MLDetectorConfig, M as Match, a as ProfanityFilter, T as ToxicityLabel, d as ToxicityPrediction } from './types-CdDqSZY7.cjs';
1
+ import { F as FilterConfig, S as SeverityLevel, C as CheckProfanityResult } from './types-Dj5vaoch.cjs';
2
+ export { c as ContextAwareConfig, a as Filter, b as FilteredProfanityResult, H as HybridAnalysisResult, L as Language, e as MLAnalysisResult, f as MLDetectorConfig, M as Match, a as ProfanityFilter, T as ToxicityLabel, d as ToxicityPrediction } from './types-Dj5vaoch.cjs';
3
3
 
4
4
  /**
5
5
  * Configuration options for the profanity checker hook and functions.
package/dist/index.d.ts CHANGED
@@ -1,5 +1,5 @@
1
- import { F as FilterConfig, S as SeverityLevel, C as CheckProfanityResult } from './types-CdDqSZY7.js';
2
- export { c as ContextAwareConfig, a as Filter, b as FilteredProfanityResult, H as HybridAnalysisResult, L as Language, e as MLAnalysisResult, f as MLDetectorConfig, M as Match, a as ProfanityFilter, T as ToxicityLabel, d as ToxicityPrediction } from './types-CdDqSZY7.js';
1
+ import { F as FilterConfig, S as SeverityLevel, C as CheckProfanityResult } from './types-Dj5vaoch.js';
2
+ export { c as ContextAwareConfig, a as Filter, b as FilteredProfanityResult, H as HybridAnalysisResult, L as Language, e as MLAnalysisResult, f as MLDetectorConfig, M as Match, a as ProfanityFilter, T as ToxicityLabel, d as ToxicityPrediction } from './types-Dj5vaoch.js';
3
3
 
4
4
  /**
5
5
  * Configuration options for the profanity checker hook and functions.