glin-profanity 3.1.5 → 3.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/README.md +84 -566
  2. package/dist/{types-CdDqSZY7.d.cts → Filter-BGcyIAvO.d.ts} +4 -162
  3. package/dist/{types-CdDqSZY7.d.ts → Filter-D34Wsmrj.d.cts} +4 -162
  4. package/dist/frameworks/index.cjs +5257 -0
  5. package/dist/frameworks/index.d.cts +2 -0
  6. package/dist/frameworks/index.d.ts +2 -0
  7. package/dist/frameworks/index.js +5252 -0
  8. package/dist/frameworks/nextjs.cjs +5257 -0
  9. package/dist/frameworks/nextjs.d.cts +173 -0
  10. package/dist/frameworks/nextjs.d.ts +173 -0
  11. package/dist/frameworks/nextjs.js +5252 -0
  12. package/dist/index.cjs +151 -85
  13. package/dist/index.d.cts +5 -29
  14. package/dist/index.d.ts +5 -29
  15. package/dist/index.js +152 -85
  16. package/dist/integrations/index.cjs +6110 -0
  17. package/dist/integrations/index.d.cts +5 -0
  18. package/dist/integrations/index.d.ts +5 -0
  19. package/dist/integrations/index.js +6082 -0
  20. package/dist/integrations/langchain.cjs +5252 -0
  21. package/dist/integrations/langchain.d.cts +231 -0
  22. package/dist/integrations/langchain.d.ts +231 -0
  23. package/dist/integrations/langchain.js +5239 -0
  24. package/dist/integrations/openai.cjs +5367 -0
  25. package/dist/integrations/openai.d.cts +167 -0
  26. package/dist/integrations/openai.d.ts +167 -0
  27. package/dist/integrations/openai.js +5362 -0
  28. package/dist/integrations/semantic.cjs +5314 -0
  29. package/dist/integrations/semantic.d.cts +268 -0
  30. package/dist/integrations/semantic.d.ts +268 -0
  31. package/dist/integrations/semantic.js +5309 -0
  32. package/dist/integrations/vercel-ai.cjs +5282 -0
  33. package/dist/integrations/vercel-ai.d.cts +224 -0
  34. package/dist/integrations/vercel-ai.d.ts +224 -0
  35. package/dist/integrations/vercel-ai.js +5273 -0
  36. package/dist/ml/index.cjs +358 -56
  37. package/dist/ml/index.d.cts +5 -2
  38. package/dist/ml/index.d.ts +5 -2
  39. package/dist/ml/index.js +354 -57
  40. package/dist/ml/transformers.cjs +5237 -0
  41. package/dist/ml/transformers.d.cts +232 -0
  42. package/dist/ml/transformers.d.ts +232 -0
  43. package/dist/ml/transformers.js +5231 -0
  44. package/dist/multimodal/audio.cjs +5269 -0
  45. package/dist/multimodal/audio.d.cts +255 -0
  46. package/dist/multimodal/audio.d.ts +255 -0
  47. package/dist/multimodal/audio.js +5264 -0
  48. package/dist/multimodal/index.cjs +5432 -0
  49. package/dist/multimodal/index.d.cts +4 -0
  50. package/dist/multimodal/index.d.ts +4 -0
  51. package/dist/multimodal/index.js +5422 -0
  52. package/dist/multimodal/ocr.cjs +5193 -0
  53. package/dist/multimodal/ocr.d.cts +157 -0
  54. package/dist/multimodal/ocr.d.ts +157 -0
  55. package/dist/multimodal/ocr.js +5187 -0
  56. package/dist/react.cjs +5133 -0
  57. package/dist/react.d.cts +13 -0
  58. package/dist/react.d.ts +13 -0
  59. package/dist/react.js +5131 -0
  60. package/dist/types-B9c_ik4k.d.cts +88 -0
  61. package/dist/types-B9c_ik4k.d.ts +88 -0
  62. package/dist/types-BuKh9tvV.d.ts +20 -0
  63. package/dist/types-Ct_ueYqw.d.cts +76 -0
  64. package/dist/types-Ct_ueYqw.d.ts +76 -0
  65. package/dist/types-DI8nzwWc.d.cts +20 -0
  66. package/package.json +170 -3
package/dist/ml/index.cjs CHANGED
@@ -812,7 +812,7 @@ var danish_default = {
812
812
  // ../../shared/dictionaries/dutch.json
813
813
  var dutch_default = {
814
814
  words: [
815
- "aardappels afgieteng",
815
+ "aardappels afgieten",
816
816
  "achter het raam zitten",
817
817
  "afberen",
818
818
  "aflebberen",
@@ -829,7 +829,7 @@ var dutch_default = {
829
829
  "bagger schijten",
830
830
  "balen",
831
831
  "bedonderen",
832
- "befborstelg",
832
+ "befborstel",
833
833
  "beffen",
834
834
  "bekken",
835
835
  "belazeren",
@@ -838,11 +838,11 @@ var dutch_default = {
838
838
  "beurt",
839
839
  "boemelen",
840
840
  "boerelul",
841
- "boerenpummelg",
841
+ "boerenpummel",
842
842
  "bokkelul",
843
843
  "botergeil",
844
844
  "broekhoesten",
845
- "brugpieperg",
845
+ "brugpieper",
846
846
  "buffelen",
847
847
  "buiten de pot piesen",
848
848
  "da's kloten van de bok",
@@ -850,13 +850,13 @@ var dutch_default = {
850
850
  "de hoer spelen",
851
851
  "de hond uitlaten",
852
852
  "de koffer induiken",
853
- "delg",
853
+ "del",
854
854
  "de pijp aan maarten geven",
855
855
  "de pijp uitgaan",
856
856
  "dombo",
857
- "draaikontg",
857
+ "draaikont",
858
858
  "driehoog achter wonen",
859
- "drolg",
859
+ "drol",
860
860
  "drooggeiler",
861
861
  "droogkloot",
862
862
  "een beurt geven",
@@ -876,7 +876,7 @@ var dutch_default = {
876
876
  "godverdomme",
877
877
  "graftak",
878
878
  "gras maaien",
879
- "gratenkutg",
879
+ "gratenkut",
880
880
  "greppeldel",
881
881
  "griet",
882
882
  "hoempert",
@@ -889,7 +889,7 @@ var dutch_default = {
889
889
  "huisdealer",
890
890
  "johny",
891
891
  "kanen",
892
- "kettingzeugg",
892
+ "kettingzeug",
893
893
  "klaarkomen",
894
894
  "klerebeer",
895
895
  "klojo",
@@ -899,22 +899,22 @@ var dutch_default = {
899
899
  "klootzak",
900
900
  "kloten",
901
901
  "knor",
902
- "kontg",
902
+ "kont",
903
903
  "kontneuken",
904
904
  "krentekakker",
905
905
  "kut",
906
906
  "kuttelikkertje",
907
- "kwakkieg",
907
+ "kwakkie",
908
908
  "liefdesgrot",
909
909
  "lul",
910
910
  "lul-de-behanger",
911
911
  "lulhannes",
912
912
  "lummel",
913
913
  "mafketel",
914
- "matennaaierg",
914
+ "matennaaier",
915
915
  "matje",
916
916
  "mof",
917
- "mutsg",
917
+ "muts",
918
918
  "naaien",
919
919
  "naakt",
920
920
  "neuken",
@@ -934,9 +934,9 @@ var dutch_default = {
934
934
  "paal",
935
935
  "paardelul",
936
936
  "palen",
937
- "penozeg",
937
+ "penoze",
938
938
  "piesen",
939
- "pijpbekkieg",
939
+ "pijpbekkie",
940
940
  "pijpen",
941
941
  "pik",
942
942
  "pleurislaaier",
@@ -951,7 +951,7 @@ var dutch_default = {
951
951
  "reet",
952
952
  "reetridder",
953
953
  "reet trappen, voor zijn",
954
- "remsporeng",
954
+ "remsporen",
955
955
  "reutelen",
956
956
  "rothoer",
957
957
  "rotzak",
@@ -964,25 +964,25 @@ var dutch_default = {
964
964
  "schuinsmarcheerder",
965
965
  "shit",
966
966
  "slempen",
967
- "sletg",
967
+ "slet",
968
968
  "sletterig",
969
969
  "slik mijn zaad",
970
- "snolg",
970
+ "snol",
971
971
  "spuiten",
972
972
  "standje",
973
- "standje-69g",
973
+ "standje-69",
974
974
  "stoephoer",
975
975
  "stootje",
976
- "strontg",
977
- "sufferdg",
976
+ "stront",
977
+ "sufferd",
978
978
  "tapijtnek",
979
- "teefg",
979
+ "teef",
980
980
  "temeier",
981
981
  "teringlijer",
982
982
  "toeter",
983
- "tongzoeng",
984
- "triootjeg",
985
- "trottoir prostitu\xE9e",
983
+ "tongzoen",
984
+ "triootje",
985
+ "trottoir prostituee",
986
986
  "trottoirteef",
987
987
  "vergallen",
988
988
  "verkloten",
@@ -1055,6 +1055,8 @@ var english_default = {
1055
1055
  "2 girls 1 cup",
1056
1056
  "2g1c",
1057
1057
  "a$$",
1058
+ "@ss",
1059
+ "4ss",
1058
1060
  "acrotomophilia",
1059
1061
  "alabama hot pocket",
1060
1062
  "alaskan pipeline",
@@ -1194,6 +1196,10 @@ var english_default = {
1194
1196
  "eunuch",
1195
1197
  "f*ck",
1196
1198
  "f@ck",
1199
+ "f4ck",
1200
+ "fvck",
1201
+ "phuck",
1202
+ "fuk",
1197
1203
  "faggot",
1198
1204
  "fecal",
1199
1205
  "felch",
@@ -1375,6 +1381,9 @@ var english_default = {
1375
1381
  "shemale",
1376
1382
  "shibari",
1377
1383
  "shit",
1384
+ "sh1t",
1385
+ "$hit",
1386
+ "$h!t",
1378
1387
  "shitblimp",
1379
1388
  "shithead",
1380
1389
  "shitshow",
@@ -2478,7 +2487,7 @@ var italian_default = {
2478
2487
  "di merda",
2479
2488
  "ditalino",
2480
2489
  "duro",
2481
- "fare una\u0160",
2490
+ "fare una sega",
2482
2491
  "fava",
2483
2492
  "femminuccia",
2484
2493
  "fica",
@@ -2726,7 +2735,6 @@ var japanese_default = {
2726
2735
  "\u7389\u8210\u3081",
2727
2736
  "\u7DCA\u7E1B",
2728
2737
  "\u8FD1\u89AA\u76F8\u59E6",
2729
- "\u5ACC\u3044",
2730
2738
  "\u5F8C\u80CC\u4F4D",
2731
2739
  "\u5408\u610F\u306E\u6027\u4EA4",
2732
2740
  "\u62F7\u554F",
@@ -2739,7 +2747,6 @@ var japanese_default = {
2739
2747
  "\u5C04\u7CBE",
2740
2748
  "\u624B\u30B3\u30AD",
2741
2749
  "\u7363\u59E6",
2742
- "\u5973\u306E\u5B50",
2743
2750
  "\u5973\u738B\u69D8",
2744
2751
  "\u5973\u5B50\u9AD8\u751F",
2745
2752
  "\u5973\u88C5",
@@ -2816,7 +2823,6 @@ var turkish_default = {
2816
2823
  "ak",
2817
2824
  "akp",
2818
2825
  "al a\u011Fz\u0131na",
2819
- "allah",
2820
2826
  "allahs\u0131z",
2821
2827
  "am",
2822
2828
  "am biti",
@@ -2911,7 +2917,6 @@ var turkish_default = {
2911
2917
  "am\u0131n\u0131 s",
2912
2918
  "am\u0131s\u0131na",
2913
2919
  "am\u0131s\u0131n\u0131",
2914
- "ana",
2915
2920
  "anaaann",
2916
2921
  "anal",
2917
2922
  "analarn",
@@ -3043,8 +3048,6 @@ var turkish_default = {
3043
3048
  "cikar",
3044
3049
  "cim",
3045
3050
  "cm",
3046
- "coca cola",
3047
- "cola",
3048
3051
  "dalaks\u0131z",
3049
3052
  "dallama",
3050
3053
  "daltassak",
@@ -3842,7 +3845,7 @@ var turkish_default = {
3842
3845
  // ../../shared/dictionaries/spanish.json
3843
3846
  var spanish_default = {
3844
3847
  words: [
3845
- "sesinato",
3848
+ "asesinato",
3846
3849
  "asno",
3847
3850
  "bastardo",
3848
3851
  "Bollera",
@@ -4142,6 +4145,34 @@ var GAMING_POSITIVE = /* @__PURE__ */ new Set([
4142
4145
  "move",
4143
4146
  "combo"
4144
4147
  ]);
4148
+ var GAMING_ACCEPTABLE_WORDS = /* @__PURE__ */ new Set([
4149
+ "kill",
4150
+ "killer",
4151
+ "killed",
4152
+ "killing",
4153
+ "shoot",
4154
+ "shot",
4155
+ "shooting",
4156
+ "die",
4157
+ "dying",
4158
+ "died",
4159
+ "dead",
4160
+ "death",
4161
+ "badass",
4162
+ "sick",
4163
+ "insane",
4164
+ "crazy",
4165
+ "mad",
4166
+ "beast",
4167
+ "savage",
4168
+ "suck",
4169
+ "sucks",
4170
+ "wtf",
4171
+ "omg",
4172
+ "hell",
4173
+ "damn",
4174
+ "crap"
4175
+ ]);
4145
4176
  var POSITIVE_PHRASES = /* @__PURE__ */ new Map([
4146
4177
  ["the bomb", 0.9],
4147
4178
  // "this movie is the bomb"
@@ -4174,7 +4205,9 @@ var ContextAnalyzer = class {
4174
4205
  constructor(config) {
4175
4206
  this.contextWindow = config.contextWindow;
4176
4207
  this.language = config.language;
4177
- this.domainWhitelists = new Set(config.domainWhitelists || []);
4208
+ this.domainWhitelists = new Set(
4209
+ (config.domainWhitelists || []).map((word) => word.toLowerCase())
4210
+ );
4178
4211
  }
4179
4212
  /**
4180
4213
  * Analyzes the context around a profanity match to determine if it should be flagged
@@ -4211,10 +4244,9 @@ var ContextAnalyzer = class {
4211
4244
  isWhitelisted: false
4212
4245
  };
4213
4246
  }
4214
- // eslint-disable-next-line @typescript-eslint/no-unused-vars
4215
4247
  checkPhraseContext(contextText, matchWord) {
4216
4248
  for (const [phrase, score] of POSITIVE_PHRASES.entries()) {
4217
- if (contextText.includes(phrase)) {
4249
+ if (phrase.includes(matchWord) && contextText.includes(phrase)) {
4218
4250
  return {
4219
4251
  contextScore: score,
4220
4252
  reason: `Positive phrase detected: "${phrase}"`,
@@ -4233,21 +4265,29 @@ var ContextAnalyzer = class {
4233
4265
  }
4234
4266
  return null;
4235
4267
  }
4236
- // eslint-disable-next-line @typescript-eslint/no-unused-vars
4237
4268
  isDomainWhitelisted(contextWords, matchWord) {
4269
+ const normalizedMatchWord = matchWord.toLowerCase();
4238
4270
  for (const word of contextWords) {
4239
- if (this.domainWhitelists.has(word) || GAMING_POSITIVE.has(word)) {
4271
+ if (this.domainWhitelists.has(word)) {
4240
4272
  return true;
4241
4273
  }
4274
+ if (GAMING_POSITIVE.has(word)) {
4275
+ if (GAMING_ACCEPTABLE_WORDS.has(normalizedMatchWord)) {
4276
+ return true;
4277
+ }
4278
+ }
4242
4279
  }
4243
4280
  return false;
4244
4281
  }
4245
- // eslint-disable-next-line @typescript-eslint/no-unused-vars
4246
4282
  generateReason(score, contextWords) {
4283
+ const foundPositive = Array.from(new Set(contextWords.filter((word) => POSITIVE_INDICATORS.has(word))));
4284
+ const foundNegative = Array.from(new Set(contextWords.filter((word) => NEGATIVE_INDICATORS.has(word))));
4247
4285
  if (score >= 0.7) {
4248
- return "Positive context detected - likely not profanity";
4286
+ const details = foundPositive.length > 0 ? ` (found: ${foundPositive.join(", ")})` : "";
4287
+ return `Positive context detected${details} - likely not profanity`;
4249
4288
  } else if (score <= 0.3) {
4250
- return "Negative context detected - likely profanity";
4289
+ const details = foundNegative.length > 0 ? ` (found: ${foundNegative.join(", ")})` : "";
4290
+ return `Negative context detected${details} - likely profanity`;
4251
4291
  } else {
4252
4292
  return "Neutral context - uncertain classification";
4253
4293
  }
@@ -4305,7 +4345,7 @@ var ContextAnalyzer = class {
4305
4345
  * Updates the domain whitelist for this analyzer instance
4306
4346
  */
4307
4347
  updateDomainWhitelist(newWhitelist) {
4308
- this.domainWhitelists = new Set(newWhitelist);
4348
+ this.domainWhitelists = new Set(newWhitelist.map((word) => word.toLowerCase()));
4309
4349
  }
4310
4350
  /**
4311
4351
  * Adds words to the domain whitelist
@@ -4464,6 +4504,10 @@ var HOMOGLYPHS = {
4464
4504
  // Cyrillic small e
4465
4505
  "\u0415": "E",
4466
4506
  // Cyrillic capital E
4507
+ "\u043A": "k",
4508
+ // Cyrillic small ka
4509
+ "\u041A": "K",
4510
+ // Cyrillic capital Ka
4467
4511
  "\u043E": "o",
4468
4512
  // Cyrillic small o
4469
4513
  "\u041E": "O",
@@ -4476,9 +4520,9 @@ var HOMOGLYPHS = {
4476
4520
  // Cyrillic small es
4477
4521
  "\u0421": "C",
4478
4522
  // Cyrillic capital Es
4479
- "\u0443": "y",
4480
- // Cyrillic small u
4481
- "\u0423": "Y",
4523
+ "\u0443": "u",
4524
+ // Cyrillic small u (map to u, not y)
4525
+ "\u0423": "U",
4482
4526
  // Cyrillic capital U
4483
4527
  "\u0445": "x",
4484
4528
  // Cyrillic small ha
@@ -4496,6 +4540,11 @@ var HOMOGLYPHS = {
4496
4540
  // Cyrillic small dze
4497
4541
  "\u0405": "S",
4498
4542
  // Cyrillic capital Dze
4543
+ // Currency and special symbols that look like letters
4544
+ "\xA2": "c",
4545
+ // Cent sign
4546
+ "\u0192": "f",
4547
+ // Latin small f with hook (florin)
4499
4548
  // Greek homoglyphs
4500
4549
  "\u03B1": "a",
4501
4550
  // Greek small alpha
@@ -4842,6 +4891,7 @@ var Filter = class {
4842
4891
  this.cacheResults = config?.cacheResults ?? false;
4843
4892
  this.maxCacheSize = config?.maxCacheSize ?? 1e3;
4844
4893
  this.cache = /* @__PURE__ */ new Map();
4894
+ this.regexCache = /* @__PURE__ */ new Map();
4845
4895
  let words = [];
4846
4896
  if (config?.allLanguages) {
4847
4897
  for (const lang in dictionary_default) {
@@ -4871,9 +4921,10 @@ var Filter = class {
4871
4921
  * Applies Unicode normalization, leetspeak detection, and obfuscation handling.
4872
4922
  *
4873
4923
  * @param text - The input text to normalize
4924
+ * @param aggressive - If true, collapses to single chars (for repeated char detection)
4874
4925
  * @returns The normalized text
4875
4926
  */
4876
- normalizeText(text) {
4927
+ normalizeText(text, aggressive = false) {
4877
4928
  let normalized = text;
4878
4929
  if (this.normalizeUnicodeEnabled) {
4879
4930
  normalized = normalizeUnicode(normalized);
@@ -4882,6 +4933,8 @@ var Filter = class {
4882
4933
  normalized = normalizeLeetspeak(normalized, {
4883
4934
  level: this.leetspeakLevel,
4884
4935
  collapseRepeated: true,
4936
+ // Keep double letters like "ss" for normal check, collapse all for aggressive
4937
+ maxRepeated: aggressive ? 1 : 2,
4885
4938
  removeSpacedChars: true
4886
4939
  });
4887
4940
  }
@@ -4912,6 +4965,7 @@ var Filter = class {
4912
4965
  */
4913
4966
  clearCache() {
4914
4967
  this.cache.clear();
4968
+ this.regexCache.clear();
4915
4969
  }
4916
4970
  /**
4917
4971
  * Gets the current cache size.
@@ -4993,10 +5047,17 @@ var Filter = class {
4993
5047
  return this.cache.get(key);
4994
5048
  }
4995
5049
  getRegex(word) {
5050
+ if (this.regexCache.has(word)) {
5051
+ const regex2 = this.regexCache.get(word);
5052
+ regex2.lastIndex = 0;
5053
+ return regex2;
5054
+ }
4996
5055
  const flags = this.caseSensitive ? "g" : "gi";
4997
5056
  const escapedWord = word.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
4998
5057
  const boundary = this.wordBoundaries ? "\\b" : "";
4999
- return new RegExp(`${boundary}${escapedWord}${boundary}`, flags);
5058
+ const regex = new RegExp(`${boundary}${escapedWord}${boundary}`, flags);
5059
+ this.regexCache.set(word, regex);
5060
+ return regex;
5000
5061
  }
5001
5062
  isFuzzyToleranceMatch(word, text) {
5002
5063
  const simplifiedText = text.toLowerCase().replace(/[^a-z]/g, "");
@@ -5014,11 +5075,12 @@ var Filter = class {
5014
5075
  return score >= this.fuzzyToleranceLevel;
5015
5076
  }
5016
5077
  evaluateSeverity(word, text) {
5017
- if (this.wordBoundaries && this.getRegex(word).test(text)) {
5078
+ if (this.getRegex(word).test(text)) {
5018
5079
  return 1 /* EXACT */;
5019
5080
  }
5020
- if (this.getRegex(word).test(text)) return 1 /* EXACT */;
5021
- if (this.isFuzzyToleranceMatch(word, text)) return 2 /* FUZZY */;
5081
+ if (!this.wordBoundaries && this.isFuzzyToleranceMatch(word, text)) {
5082
+ return 2 /* FUZZY */;
5083
+ }
5022
5084
  return void 0;
5023
5085
  }
5024
5086
  /**
@@ -5038,9 +5100,20 @@ var Filter = class {
5038
5100
  * ```
5039
5101
  */
5040
5102
  isProfane(value) {
5041
- const input = this.normalizeText(value);
5103
+ const originalInput = value;
5104
+ const normalizedInput = this.normalizeText(value);
5105
+ const aggressiveInput = this.normalizeText(value, true);
5042
5106
  for (const word of this.words.keys()) {
5043
- if (!this.ignoreWords.has(word.toLowerCase()) && this.evaluateSeverity(word, input) !== void 0) {
5107
+ if (this.ignoreWords.has(word.toLowerCase())) {
5108
+ continue;
5109
+ }
5110
+ if (this.evaluateSeverity(word, originalInput) !== void 0) {
5111
+ return true;
5112
+ }
5113
+ if (this.evaluateSeverity(word, normalizedInput) !== void 0) {
5114
+ return true;
5115
+ }
5116
+ if (this.evaluateSeverity(word, aggressiveInput) !== void 0) {
5044
5117
  return true;
5045
5118
  }
5046
5119
  }
@@ -5081,23 +5154,45 @@ var Filter = class {
5081
5154
  return cachedResult;
5082
5155
  }
5083
5156
  if (!this.enableContextAware) {
5084
- let input2 = this.normalizeText(text);
5085
- input2 = input2.toLowerCase();
5157
+ const originalInput = text.toLowerCase();
5158
+ const normalizedInput = this.normalizeText(text).toLowerCase();
5159
+ const aggressiveInput = this.normalizeText(text, true).toLowerCase();
5086
5160
  const profaneWords2 = [];
5087
5161
  const severityMap2 = {};
5088
5162
  for (const dictWord of this.words.keys()) {
5089
5163
  if (this.ignoreWords.has(dictWord.toLowerCase())) continue;
5090
- const severity = this.evaluateSeverity(dictWord, input2);
5164
+ let severity = this.evaluateSeverity(dictWord, originalInput);
5091
5165
  if (severity !== void 0) {
5092
5166
  const regex = this.getRegex(dictWord);
5093
5167
  let match;
5094
- while ((match = regex.exec(input2)) !== null) {
5168
+ while ((match = regex.exec(originalInput)) !== null) {
5095
5169
  profaneWords2.push(match[0]);
5096
5170
  if (severityMap2[match[0]] === void 0) {
5097
5171
  severityMap2[match[0]] = severity;
5098
5172
  }
5099
5173
  }
5100
5174
  }
5175
+ severity = this.evaluateSeverity(dictWord, normalizedInput);
5176
+ if (severity !== void 0) {
5177
+ const regex = this.getRegex(dictWord);
5178
+ while ((regex.exec(normalizedInput)) !== null) {
5179
+ if (!profaneWords2.includes(dictWord)) {
5180
+ profaneWords2.push(dictWord);
5181
+ if (severityMap2[dictWord] === void 0) {
5182
+ severityMap2[dictWord] = severity;
5183
+ }
5184
+ }
5185
+ }
5186
+ }
5187
+ severity = this.evaluateSeverity(dictWord, aggressiveInput);
5188
+ if (severity !== void 0) {
5189
+ if (!profaneWords2.includes(dictWord)) {
5190
+ profaneWords2.push(dictWord);
5191
+ if (severityMap2[dictWord] === void 0) {
5192
+ severityMap2[dictWord] = severity;
5193
+ }
5194
+ }
5195
+ }
5101
5196
  }
5102
5197
  let processedText2 = text;
5103
5198
  if (this.replaceWith && profaneWords2.length > 0) {
@@ -5483,5 +5578,212 @@ var HybridFilter = class {
5483
5578
  }
5484
5579
  };
5485
5580
 
5581
+ // src/ml/transformers.ts
5582
+ var RECOMMENDED_MODELS = {
5583
+ /** High accuracy English model (97.5%) - 67M params */
5584
+ pardonmyai: "tarekziade/pardonmyai",
5585
+ /** Smaller version for constrained environments */
5586
+ pardonmyaiTiny: "tarekziade/pardonmyai-tiny",
5587
+ /** Multilingual toxicity detection (7 languages) */
5588
+ toxicBert: "unitary/toxic-bert",
5589
+ /** Offensive speech detector (DeBERTa-based) */
5590
+ offensiveSpeech: "KoalaAI/OffensiveSpeechDetector"
5591
+ };
5592
+ var MODEL_PROFANE_LABELS = {
5593
+ "tarekziade/pardonmyai": "profane",
5594
+ "tarekziade/pardonmyai-tiny": "profane",
5595
+ "unitary/toxic-bert": "toxic",
5596
+ "KoalaAI/OffensiveSpeechDetector": "LABEL_1",
5597
+ // Offensive
5598
+ default: "LABEL_1"
5599
+ };
5600
+ async function getTransformers() {
5601
+ try {
5602
+ const transformers = await import('@xenova/transformers');
5603
+ return transformers;
5604
+ } catch {
5605
+ throw new Error(
5606
+ "Transformers.js is required for ML features. Install it with: npm install @xenova/transformers"
5607
+ );
5608
+ }
5609
+ }
5610
+ async function createMLChecker(config = {}) {
5611
+ const {
5612
+ model = RECOMMENDED_MODELS.pardonmyai,
5613
+ threshold = 0.5,
5614
+ profaneLabel = MODEL_PROFANE_LABELS[model] || MODEL_PROFANE_LABELS.default,
5615
+ quantized = true,
5616
+ device = "cpu"
5617
+ } = config;
5618
+ const transformers = await getTransformers();
5619
+ const classifier = await transformers.pipeline("text-classification", model, {
5620
+ quantized,
5621
+ device
5622
+ });
5623
+ return {
5624
+ /**
5625
+ * Check a single text for profanity
5626
+ */
5627
+ async check(text) {
5628
+ const startTime = Date.now();
5629
+ const output = await classifier(text);
5630
+ const processingTimeMs = Date.now() - startTime;
5631
+ const profaneScore = output.find((o) => o.label === profaneLabel)?.score || 0;
5632
+ const containsProfanity = profaneScore >= threshold;
5633
+ return {
5634
+ containsProfanity,
5635
+ confidence: profaneScore,
5636
+ rawOutput: output,
5637
+ processingTimeMs
5638
+ };
5639
+ },
5640
+ /**
5641
+ * Check multiple texts
5642
+ */
5643
+ async checkBatch(texts) {
5644
+ return Promise.all(texts.map((text) => this.check(text)));
5645
+ },
5646
+ /**
5647
+ * Get the profanity score for text (0-1)
5648
+ */
5649
+ async getScore(text) {
5650
+ const result = await this.check(text);
5651
+ return result.confidence;
5652
+ },
5653
+ /**
5654
+ * Get current configuration
5655
+ */
5656
+ getConfig() {
5657
+ return { model, threshold, profaneLabel, quantized, device };
5658
+ },
5659
+ /**
5660
+ * Dispose of the model (free memory)
5661
+ */
5662
+ dispose() {
5663
+ }
5664
+ };
5665
+ }
5666
+ async function createHybridChecker(config = {}) {
5667
+ const {
5668
+ model = RECOMMENDED_MODELS.pardonmyai,
5669
+ threshold = 0.5,
5670
+ profaneLabel,
5671
+ quantized = true,
5672
+ device = "cpu",
5673
+ filterConfig = {},
5674
+ mlThreshold = 0.3,
5675
+ dictionaryWeight = 0.6,
5676
+ mlWeight = 0.4
5677
+ } = config;
5678
+ const filter = new Filter({
5679
+ languages: filterConfig.languages || ["english"],
5680
+ detectLeetspeak: filterConfig.detectLeetspeak ?? true,
5681
+ normalizeUnicode: filterConfig.normalizeUnicode ?? true,
5682
+ severityLevels: true,
5683
+ cacheResults: true,
5684
+ ...filterConfig
5685
+ });
5686
+ let mlChecker = null;
5687
+ async function getMLChecker() {
5688
+ if (!mlChecker) {
5689
+ mlChecker = await createMLChecker({
5690
+ model,
5691
+ threshold,
5692
+ profaneLabel,
5693
+ quantized,
5694
+ device
5695
+ });
5696
+ }
5697
+ return mlChecker;
5698
+ }
5699
+ return {
5700
+ /**
5701
+ * Check text using hybrid approach
5702
+ */
5703
+ async check(text) {
5704
+ const startTime = Date.now();
5705
+ const dictionaryResult = filter.checkProfanity(text);
5706
+ if (dictionaryResult.containsProfanity) {
5707
+ return {
5708
+ containsProfanity: true,
5709
+ confidence: 1,
5710
+ dictionaryResult,
5711
+ usedML: false,
5712
+ profaneWords: dictionaryResult.profaneWords,
5713
+ processingTimeMs: Date.now() - startTime
5714
+ };
5715
+ }
5716
+ const ml = await getMLChecker();
5717
+ const mlResult = await ml.check(text);
5718
+ const dictionaryScore = dictionaryResult.containsProfanity ? 1 : 0;
5719
+ const combinedScore = dictionaryScore * dictionaryWeight + mlResult.confidence * mlWeight;
5720
+ const containsProfanity = combinedScore >= mlThreshold;
5721
+ return {
5722
+ containsProfanity,
5723
+ confidence: combinedScore,
5724
+ dictionaryResult,
5725
+ mlResult,
5726
+ usedML: true,
5727
+ profaneWords: dictionaryResult.profaneWords,
5728
+ processingTimeMs: Date.now() - startTime
5729
+ };
5730
+ },
5731
+ /**
5732
+ * Check multiple texts
5733
+ */
5734
+ async checkBatch(texts) {
5735
+ return Promise.all(texts.map((text) => this.check(text)));
5736
+ },
5737
+ /**
5738
+ * Dictionary-only check (fast, no ML)
5739
+ */
5740
+ checkFast(text) {
5741
+ return filter.checkProfanity(text);
5742
+ },
5743
+ /**
5744
+ * ML-only check (slower, more accurate)
5745
+ */
5746
+ async checkML(text) {
5747
+ const ml = await getMLChecker();
5748
+ return ml.check(text);
5749
+ },
5750
+ /**
5751
+ * Get the underlying filter
5752
+ */
5753
+ getFilter() {
5754
+ return filter;
5755
+ },
5756
+ /**
5757
+ * Dispose of resources
5758
+ */
5759
+ async dispose() {
5760
+ if (mlChecker) {
5761
+ mlChecker.dispose();
5762
+ mlChecker = null;
5763
+ }
5764
+ }
5765
+ };
5766
+ }
5767
+ async function isTransformersAvailable() {
5768
+ try {
5769
+ await getTransformers();
5770
+ return true;
5771
+ } catch {
5772
+ return false;
5773
+ }
5774
+ }
5775
+ async function preloadModel(model = RECOMMENDED_MODELS.pardonmyai, options = {}) {
5776
+ const { quantized = true } = options;
5777
+ const transformers = await getTransformers();
5778
+ await transformers.pipeline("text-classification", model, {
5779
+ quantized
5780
+ });
5781
+ }
5782
+
5486
5783
  exports.HybridFilter = HybridFilter;
5784
+ exports.RECOMMENDED_MODELS = RECOMMENDED_MODELS;
5487
5785
  exports.ToxicityDetector = ToxicityDetector;
5786
+ exports.createHybridChecker = createHybridChecker;
5787
+ exports.createMLChecker = createMLChecker;
5788
+ exports.isTransformersAvailable = isTransformersAvailable;
5789
+ exports.preloadModel = preloadModel;