glin-profanity 3.1.5 → 3.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/README.md +84 -566
  2. package/dist/{types-CdDqSZY7.d.cts → Filter-BGcyIAvO.d.ts} +4 -162
  3. package/dist/{types-CdDqSZY7.d.ts → Filter-D34Wsmrj.d.cts} +4 -162
  4. package/dist/frameworks/index.cjs +5257 -0
  5. package/dist/frameworks/index.d.cts +2 -0
  6. package/dist/frameworks/index.d.ts +2 -0
  7. package/dist/frameworks/index.js +5252 -0
  8. package/dist/frameworks/nextjs.cjs +5257 -0
  9. package/dist/frameworks/nextjs.d.cts +173 -0
  10. package/dist/frameworks/nextjs.d.ts +173 -0
  11. package/dist/frameworks/nextjs.js +5252 -0
  12. package/dist/index.cjs +151 -85
  13. package/dist/index.d.cts +5 -29
  14. package/dist/index.d.ts +5 -29
  15. package/dist/index.js +152 -85
  16. package/dist/integrations/index.cjs +6110 -0
  17. package/dist/integrations/index.d.cts +5 -0
  18. package/dist/integrations/index.d.ts +5 -0
  19. package/dist/integrations/index.js +6082 -0
  20. package/dist/integrations/langchain.cjs +5252 -0
  21. package/dist/integrations/langchain.d.cts +231 -0
  22. package/dist/integrations/langchain.d.ts +231 -0
  23. package/dist/integrations/langchain.js +5239 -0
  24. package/dist/integrations/openai.cjs +5367 -0
  25. package/dist/integrations/openai.d.cts +167 -0
  26. package/dist/integrations/openai.d.ts +167 -0
  27. package/dist/integrations/openai.js +5362 -0
  28. package/dist/integrations/semantic.cjs +5314 -0
  29. package/dist/integrations/semantic.d.cts +268 -0
  30. package/dist/integrations/semantic.d.ts +268 -0
  31. package/dist/integrations/semantic.js +5309 -0
  32. package/dist/integrations/vercel-ai.cjs +5282 -0
  33. package/dist/integrations/vercel-ai.d.cts +224 -0
  34. package/dist/integrations/vercel-ai.d.ts +224 -0
  35. package/dist/integrations/vercel-ai.js +5273 -0
  36. package/dist/ml/index.cjs +358 -56
  37. package/dist/ml/index.d.cts +5 -2
  38. package/dist/ml/index.d.ts +5 -2
  39. package/dist/ml/index.js +354 -57
  40. package/dist/ml/transformers.cjs +5237 -0
  41. package/dist/ml/transformers.d.cts +232 -0
  42. package/dist/ml/transformers.d.ts +232 -0
  43. package/dist/ml/transformers.js +5231 -0
  44. package/dist/multimodal/audio.cjs +5269 -0
  45. package/dist/multimodal/audio.d.cts +255 -0
  46. package/dist/multimodal/audio.d.ts +255 -0
  47. package/dist/multimodal/audio.js +5264 -0
  48. package/dist/multimodal/index.cjs +5432 -0
  49. package/dist/multimodal/index.d.cts +4 -0
  50. package/dist/multimodal/index.d.ts +4 -0
  51. package/dist/multimodal/index.js +5422 -0
  52. package/dist/multimodal/ocr.cjs +5193 -0
  53. package/dist/multimodal/ocr.d.cts +157 -0
  54. package/dist/multimodal/ocr.d.ts +157 -0
  55. package/dist/multimodal/ocr.js +5187 -0
  56. package/dist/react.cjs +5133 -0
  57. package/dist/react.d.cts +13 -0
  58. package/dist/react.d.ts +13 -0
  59. package/dist/react.js +5131 -0
  60. package/dist/types-B9c_ik4k.d.cts +88 -0
  61. package/dist/types-B9c_ik4k.d.ts +88 -0
  62. package/dist/types-BuKh9tvV.d.ts +20 -0
  63. package/dist/types-Ct_ueYqw.d.cts +76 -0
  64. package/dist/types-Ct_ueYqw.d.ts +76 -0
  65. package/dist/types-DI8nzwWc.d.cts +20 -0
  66. package/package.json +170 -3
package/dist/ml/index.js CHANGED
@@ -810,7 +810,7 @@ var danish_default = {
810
810
  // ../../shared/dictionaries/dutch.json
811
811
  var dutch_default = {
812
812
  words: [
813
- "aardappels afgieteng",
813
+ "aardappels afgieten",
814
814
  "achter het raam zitten",
815
815
  "afberen",
816
816
  "aflebberen",
@@ -827,7 +827,7 @@ var dutch_default = {
827
827
  "bagger schijten",
828
828
  "balen",
829
829
  "bedonderen",
830
- "befborstelg",
830
+ "befborstel",
831
831
  "beffen",
832
832
  "bekken",
833
833
  "belazeren",
@@ -836,11 +836,11 @@ var dutch_default = {
836
836
  "beurt",
837
837
  "boemelen",
838
838
  "boerelul",
839
- "boerenpummelg",
839
+ "boerenpummel",
840
840
  "bokkelul",
841
841
  "botergeil",
842
842
  "broekhoesten",
843
- "brugpieperg",
843
+ "brugpieper",
844
844
  "buffelen",
845
845
  "buiten de pot piesen",
846
846
  "da's kloten van de bok",
@@ -848,13 +848,13 @@ var dutch_default = {
848
848
  "de hoer spelen",
849
849
  "de hond uitlaten",
850
850
  "de koffer induiken",
851
- "delg",
851
+ "del",
852
852
  "de pijp aan maarten geven",
853
853
  "de pijp uitgaan",
854
854
  "dombo",
855
- "draaikontg",
855
+ "draaikont",
856
856
  "driehoog achter wonen",
857
- "drolg",
857
+ "drol",
858
858
  "drooggeiler",
859
859
  "droogkloot",
860
860
  "een beurt geven",
@@ -874,7 +874,7 @@ var dutch_default = {
874
874
  "godverdomme",
875
875
  "graftak",
876
876
  "gras maaien",
877
- "gratenkutg",
877
+ "gratenkut",
878
878
  "greppeldel",
879
879
  "griet",
880
880
  "hoempert",
@@ -887,7 +887,7 @@ var dutch_default = {
887
887
  "huisdealer",
888
888
  "johny",
889
889
  "kanen",
890
- "kettingzeugg",
890
+ "kettingzeug",
891
891
  "klaarkomen",
892
892
  "klerebeer",
893
893
  "klojo",
@@ -897,22 +897,22 @@ var dutch_default = {
897
897
  "klootzak",
898
898
  "kloten",
899
899
  "knor",
900
- "kontg",
900
+ "kont",
901
901
  "kontneuken",
902
902
  "krentekakker",
903
903
  "kut",
904
904
  "kuttelikkertje",
905
- "kwakkieg",
905
+ "kwakkie",
906
906
  "liefdesgrot",
907
907
  "lul",
908
908
  "lul-de-behanger",
909
909
  "lulhannes",
910
910
  "lummel",
911
911
  "mafketel",
912
- "matennaaierg",
912
+ "matennaaier",
913
913
  "matje",
914
914
  "mof",
915
- "mutsg",
915
+ "muts",
916
916
  "naaien",
917
917
  "naakt",
918
918
  "neuken",
@@ -932,9 +932,9 @@ var dutch_default = {
932
932
  "paal",
933
933
  "paardelul",
934
934
  "palen",
935
- "penozeg",
935
+ "penoze",
936
936
  "piesen",
937
- "pijpbekkieg",
937
+ "pijpbekkie",
938
938
  "pijpen",
939
939
  "pik",
940
940
  "pleurislaaier",
@@ -949,7 +949,7 @@ var dutch_default = {
949
949
  "reet",
950
950
  "reetridder",
951
951
  "reet trappen, voor zijn",
952
- "remsporeng",
952
+ "remsporen",
953
953
  "reutelen",
954
954
  "rothoer",
955
955
  "rotzak",
@@ -962,25 +962,25 @@ var dutch_default = {
962
962
  "schuinsmarcheerder",
963
963
  "shit",
964
964
  "slempen",
965
- "sletg",
965
+ "slet",
966
966
  "sletterig",
967
967
  "slik mijn zaad",
968
- "snolg",
968
+ "snol",
969
969
  "spuiten",
970
970
  "standje",
971
- "standje-69g",
971
+ "standje-69",
972
972
  "stoephoer",
973
973
  "stootje",
974
- "strontg",
975
- "sufferdg",
974
+ "stront",
975
+ "sufferd",
976
976
  "tapijtnek",
977
- "teefg",
977
+ "teef",
978
978
  "temeier",
979
979
  "teringlijer",
980
980
  "toeter",
981
- "tongzoeng",
982
- "triootjeg",
983
- "trottoir prostitu\xE9e",
981
+ "tongzoen",
982
+ "triootje",
983
+ "trottoir prostituee",
984
984
  "trottoirteef",
985
985
  "vergallen",
986
986
  "verkloten",
@@ -1053,6 +1053,8 @@ var english_default = {
1053
1053
  "2 girls 1 cup",
1054
1054
  "2g1c",
1055
1055
  "a$$",
1056
+ "@ss",
1057
+ "4ss",
1056
1058
  "acrotomophilia",
1057
1059
  "alabama hot pocket",
1058
1060
  "alaskan pipeline",
@@ -1192,6 +1194,10 @@ var english_default = {
1192
1194
  "eunuch",
1193
1195
  "f*ck",
1194
1196
  "f@ck",
1197
+ "f4ck",
1198
+ "fvck",
1199
+ "phuck",
1200
+ "fuk",
1195
1201
  "faggot",
1196
1202
  "fecal",
1197
1203
  "felch",
@@ -1373,6 +1379,9 @@ var english_default = {
1373
1379
  "shemale",
1374
1380
  "shibari",
1375
1381
  "shit",
1382
+ "sh1t",
1383
+ "$hit",
1384
+ "$h!t",
1376
1385
  "shitblimp",
1377
1386
  "shithead",
1378
1387
  "shitshow",
@@ -2476,7 +2485,7 @@ var italian_default = {
2476
2485
  "di merda",
2477
2486
  "ditalino",
2478
2487
  "duro",
2479
- "fare una\u0160",
2488
+ "fare una sega",
2480
2489
  "fava",
2481
2490
  "femminuccia",
2482
2491
  "fica",
@@ -2724,7 +2733,6 @@ var japanese_default = {
2724
2733
  "\u7389\u8210\u3081",
2725
2734
  "\u7DCA\u7E1B",
2726
2735
  "\u8FD1\u89AA\u76F8\u59E6",
2727
- "\u5ACC\u3044",
2728
2736
  "\u5F8C\u80CC\u4F4D",
2729
2737
  "\u5408\u610F\u306E\u6027\u4EA4",
2730
2738
  "\u62F7\u554F",
@@ -2737,7 +2745,6 @@ var japanese_default = {
2737
2745
  "\u5C04\u7CBE",
2738
2746
  "\u624B\u30B3\u30AD",
2739
2747
  "\u7363\u59E6",
2740
- "\u5973\u306E\u5B50",
2741
2748
  "\u5973\u738B\u69D8",
2742
2749
  "\u5973\u5B50\u9AD8\u751F",
2743
2750
  "\u5973\u88C5",
@@ -2814,7 +2821,6 @@ var turkish_default = {
2814
2821
  "ak",
2815
2822
  "akp",
2816
2823
  "al a\u011Fz\u0131na",
2817
- "allah",
2818
2824
  "allahs\u0131z",
2819
2825
  "am",
2820
2826
  "am biti",
@@ -2909,7 +2915,6 @@ var turkish_default = {
2909
2915
  "am\u0131n\u0131 s",
2910
2916
  "am\u0131s\u0131na",
2911
2917
  "am\u0131s\u0131n\u0131",
2912
- "ana",
2913
2918
  "anaaann",
2914
2919
  "anal",
2915
2920
  "analarn",
@@ -3041,8 +3046,6 @@ var turkish_default = {
3041
3046
  "cikar",
3042
3047
  "cim",
3043
3048
  "cm",
3044
- "coca cola",
3045
- "cola",
3046
3049
  "dalaks\u0131z",
3047
3050
  "dallama",
3048
3051
  "daltassak",
@@ -3840,7 +3843,7 @@ var turkish_default = {
3840
3843
  // ../../shared/dictionaries/spanish.json
3841
3844
  var spanish_default = {
3842
3845
  words: [
3843
- "sesinato",
3846
+ "asesinato",
3844
3847
  "asno",
3845
3848
  "bastardo",
3846
3849
  "Bollera",
@@ -4140,6 +4143,34 @@ var GAMING_POSITIVE = /* @__PURE__ */ new Set([
4140
4143
  "move",
4141
4144
  "combo"
4142
4145
  ]);
4146
+ var GAMING_ACCEPTABLE_WORDS = /* @__PURE__ */ new Set([
4147
+ "kill",
4148
+ "killer",
4149
+ "killed",
4150
+ "killing",
4151
+ "shoot",
4152
+ "shot",
4153
+ "shooting",
4154
+ "die",
4155
+ "dying",
4156
+ "died",
4157
+ "dead",
4158
+ "death",
4159
+ "badass",
4160
+ "sick",
4161
+ "insane",
4162
+ "crazy",
4163
+ "mad",
4164
+ "beast",
4165
+ "savage",
4166
+ "suck",
4167
+ "sucks",
4168
+ "wtf",
4169
+ "omg",
4170
+ "hell",
4171
+ "damn",
4172
+ "crap"
4173
+ ]);
4143
4174
  var POSITIVE_PHRASES = /* @__PURE__ */ new Map([
4144
4175
  ["the bomb", 0.9],
4145
4176
  // "this movie is the bomb"
@@ -4172,7 +4203,9 @@ var ContextAnalyzer = class {
4172
4203
  constructor(config) {
4173
4204
  this.contextWindow = config.contextWindow;
4174
4205
  this.language = config.language;
4175
- this.domainWhitelists = new Set(config.domainWhitelists || []);
4206
+ this.domainWhitelists = new Set(
4207
+ (config.domainWhitelists || []).map((word) => word.toLowerCase())
4208
+ );
4176
4209
  }
4177
4210
  /**
4178
4211
  * Analyzes the context around a profanity match to determine if it should be flagged
@@ -4209,10 +4242,9 @@ var ContextAnalyzer = class {
4209
4242
  isWhitelisted: false
4210
4243
  };
4211
4244
  }
4212
- // eslint-disable-next-line @typescript-eslint/no-unused-vars
4213
4245
  checkPhraseContext(contextText, matchWord) {
4214
4246
  for (const [phrase, score] of POSITIVE_PHRASES.entries()) {
4215
- if (contextText.includes(phrase)) {
4247
+ if (phrase.includes(matchWord) && contextText.includes(phrase)) {
4216
4248
  return {
4217
4249
  contextScore: score,
4218
4250
  reason: `Positive phrase detected: "${phrase}"`,
@@ -4231,21 +4263,29 @@ var ContextAnalyzer = class {
4231
4263
  }
4232
4264
  return null;
4233
4265
  }
4234
- // eslint-disable-next-line @typescript-eslint/no-unused-vars
4235
4266
  isDomainWhitelisted(contextWords, matchWord) {
4267
+ const normalizedMatchWord = matchWord.toLowerCase();
4236
4268
  for (const word of contextWords) {
4237
- if (this.domainWhitelists.has(word) || GAMING_POSITIVE.has(word)) {
4269
+ if (this.domainWhitelists.has(word)) {
4238
4270
  return true;
4239
4271
  }
4272
+ if (GAMING_POSITIVE.has(word)) {
4273
+ if (GAMING_ACCEPTABLE_WORDS.has(normalizedMatchWord)) {
4274
+ return true;
4275
+ }
4276
+ }
4240
4277
  }
4241
4278
  return false;
4242
4279
  }
4243
- // eslint-disable-next-line @typescript-eslint/no-unused-vars
4244
4280
  generateReason(score, contextWords) {
4281
+ const foundPositive = Array.from(new Set(contextWords.filter((word) => POSITIVE_INDICATORS.has(word))));
4282
+ const foundNegative = Array.from(new Set(contextWords.filter((word) => NEGATIVE_INDICATORS.has(word))));
4245
4283
  if (score >= 0.7) {
4246
- return "Positive context detected - likely not profanity";
4284
+ const details = foundPositive.length > 0 ? ` (found: ${foundPositive.join(", ")})` : "";
4285
+ return `Positive context detected${details} - likely not profanity`;
4247
4286
  } else if (score <= 0.3) {
4248
- return "Negative context detected - likely profanity";
4287
+ const details = foundNegative.length > 0 ? ` (found: ${foundNegative.join(", ")})` : "";
4288
+ return `Negative context detected${details} - likely profanity`;
4249
4289
  } else {
4250
4290
  return "Neutral context - uncertain classification";
4251
4291
  }
@@ -4303,7 +4343,7 @@ var ContextAnalyzer = class {
4303
4343
  * Updates the domain whitelist for this analyzer instance
4304
4344
  */
4305
4345
  updateDomainWhitelist(newWhitelist) {
4306
- this.domainWhitelists = new Set(newWhitelist);
4346
+ this.domainWhitelists = new Set(newWhitelist.map((word) => word.toLowerCase()));
4307
4347
  }
4308
4348
  /**
4309
4349
  * Adds words to the domain whitelist
@@ -4462,6 +4502,10 @@ var HOMOGLYPHS = {
4462
4502
  // Cyrillic small e
4463
4503
  "\u0415": "E",
4464
4504
  // Cyrillic capital E
4505
+ "\u043A": "k",
4506
+ // Cyrillic small ka
4507
+ "\u041A": "K",
4508
+ // Cyrillic capital Ka
4465
4509
  "\u043E": "o",
4466
4510
  // Cyrillic small o
4467
4511
  "\u041E": "O",
@@ -4474,9 +4518,9 @@ var HOMOGLYPHS = {
4474
4518
  // Cyrillic small es
4475
4519
  "\u0421": "C",
4476
4520
  // Cyrillic capital Es
4477
- "\u0443": "y",
4478
- // Cyrillic small u
4479
- "\u0423": "Y",
4521
+ "\u0443": "u",
4522
+ // Cyrillic small u (map to u, not y)
4523
+ "\u0423": "U",
4480
4524
  // Cyrillic capital U
4481
4525
  "\u0445": "x",
4482
4526
  // Cyrillic small ha
@@ -4494,6 +4538,11 @@ var HOMOGLYPHS = {
4494
4538
  // Cyrillic small dze
4495
4539
  "\u0405": "S",
4496
4540
  // Cyrillic capital Dze
4541
+ // Currency and special symbols that look like letters
4542
+ "\xA2": "c",
4543
+ // Cent sign
4544
+ "\u0192": "f",
4545
+ // Latin small f with hook (florin)
4497
4546
  // Greek homoglyphs
4498
4547
  "\u03B1": "a",
4499
4548
  // Greek small alpha
@@ -4840,6 +4889,7 @@ var Filter = class {
4840
4889
  this.cacheResults = config?.cacheResults ?? false;
4841
4890
  this.maxCacheSize = config?.maxCacheSize ?? 1e3;
4842
4891
  this.cache = /* @__PURE__ */ new Map();
4892
+ this.regexCache = /* @__PURE__ */ new Map();
4843
4893
  let words = [];
4844
4894
  if (config?.allLanguages) {
4845
4895
  for (const lang in dictionary_default) {
@@ -4869,9 +4919,10 @@ var Filter = class {
4869
4919
  * Applies Unicode normalization, leetspeak detection, and obfuscation handling.
4870
4920
  *
4871
4921
  * @param text - The input text to normalize
4922
+ * @param aggressive - If true, collapses to single chars (for repeated char detection)
4872
4923
  * @returns The normalized text
4873
4924
  */
4874
- normalizeText(text) {
4925
+ normalizeText(text, aggressive = false) {
4875
4926
  let normalized = text;
4876
4927
  if (this.normalizeUnicodeEnabled) {
4877
4928
  normalized = normalizeUnicode(normalized);
@@ -4880,6 +4931,8 @@ var Filter = class {
4880
4931
  normalized = normalizeLeetspeak(normalized, {
4881
4932
  level: this.leetspeakLevel,
4882
4933
  collapseRepeated: true,
4934
+ // Keep double letters like "ss" for normal check, collapse all for aggressive
4935
+ maxRepeated: aggressive ? 1 : 2,
4883
4936
  removeSpacedChars: true
4884
4937
  });
4885
4938
  }
@@ -4910,6 +4963,7 @@ var Filter = class {
4910
4963
  */
4911
4964
  clearCache() {
4912
4965
  this.cache.clear();
4966
+ this.regexCache.clear();
4913
4967
  }
4914
4968
  /**
4915
4969
  * Gets the current cache size.
@@ -4991,10 +5045,17 @@ var Filter = class {
4991
5045
  return this.cache.get(key);
4992
5046
  }
4993
5047
  getRegex(word) {
5048
+ if (this.regexCache.has(word)) {
5049
+ const regex2 = this.regexCache.get(word);
5050
+ regex2.lastIndex = 0;
5051
+ return regex2;
5052
+ }
4994
5053
  const flags = this.caseSensitive ? "g" : "gi";
4995
5054
  const escapedWord = word.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
4996
5055
  const boundary = this.wordBoundaries ? "\\b" : "";
4997
- return new RegExp(`${boundary}${escapedWord}${boundary}`, flags);
5056
+ const regex = new RegExp(`${boundary}${escapedWord}${boundary}`, flags);
5057
+ this.regexCache.set(word, regex);
5058
+ return regex;
4998
5059
  }
4999
5060
  isFuzzyToleranceMatch(word, text) {
5000
5061
  const simplifiedText = text.toLowerCase().replace(/[^a-z]/g, "");
@@ -5012,11 +5073,12 @@ var Filter = class {
5012
5073
  return score >= this.fuzzyToleranceLevel;
5013
5074
  }
5014
5075
  evaluateSeverity(word, text) {
5015
- if (this.wordBoundaries && this.getRegex(word).test(text)) {
5076
+ if (this.getRegex(word).test(text)) {
5016
5077
  return 1 /* EXACT */;
5017
5078
  }
5018
- if (this.getRegex(word).test(text)) return 1 /* EXACT */;
5019
- if (this.isFuzzyToleranceMatch(word, text)) return 2 /* FUZZY */;
5079
+ if (!this.wordBoundaries && this.isFuzzyToleranceMatch(word, text)) {
5080
+ return 2 /* FUZZY */;
5081
+ }
5020
5082
  return void 0;
5021
5083
  }
5022
5084
  /**
@@ -5036,9 +5098,20 @@ var Filter = class {
5036
5098
  * ```
5037
5099
  */
5038
5100
  isProfane(value) {
5039
- const input = this.normalizeText(value);
5101
+ const originalInput = value;
5102
+ const normalizedInput = this.normalizeText(value);
5103
+ const aggressiveInput = this.normalizeText(value, true);
5040
5104
  for (const word of this.words.keys()) {
5041
- if (!this.ignoreWords.has(word.toLowerCase()) && this.evaluateSeverity(word, input) !== void 0) {
5105
+ if (this.ignoreWords.has(word.toLowerCase())) {
5106
+ continue;
5107
+ }
5108
+ if (this.evaluateSeverity(word, originalInput) !== void 0) {
5109
+ return true;
5110
+ }
5111
+ if (this.evaluateSeverity(word, normalizedInput) !== void 0) {
5112
+ return true;
5113
+ }
5114
+ if (this.evaluateSeverity(word, aggressiveInput) !== void 0) {
5042
5115
  return true;
5043
5116
  }
5044
5117
  }
@@ -5079,23 +5152,45 @@ var Filter = class {
5079
5152
  return cachedResult;
5080
5153
  }
5081
5154
  if (!this.enableContextAware) {
5082
- let input2 = this.normalizeText(text);
5083
- input2 = input2.toLowerCase();
5155
+ const originalInput = text.toLowerCase();
5156
+ const normalizedInput = this.normalizeText(text).toLowerCase();
5157
+ const aggressiveInput = this.normalizeText(text, true).toLowerCase();
5084
5158
  const profaneWords2 = [];
5085
5159
  const severityMap2 = {};
5086
5160
  for (const dictWord of this.words.keys()) {
5087
5161
  if (this.ignoreWords.has(dictWord.toLowerCase())) continue;
5088
- const severity = this.evaluateSeverity(dictWord, input2);
5162
+ let severity = this.evaluateSeverity(dictWord, originalInput);
5089
5163
  if (severity !== void 0) {
5090
5164
  const regex = this.getRegex(dictWord);
5091
5165
  let match;
5092
- while ((match = regex.exec(input2)) !== null) {
5166
+ while ((match = regex.exec(originalInput)) !== null) {
5093
5167
  profaneWords2.push(match[0]);
5094
5168
  if (severityMap2[match[0]] === void 0) {
5095
5169
  severityMap2[match[0]] = severity;
5096
5170
  }
5097
5171
  }
5098
5172
  }
5173
+ severity = this.evaluateSeverity(dictWord, normalizedInput);
5174
+ if (severity !== void 0) {
5175
+ const regex = this.getRegex(dictWord);
5176
+ while ((regex.exec(normalizedInput)) !== null) {
5177
+ if (!profaneWords2.includes(dictWord)) {
5178
+ profaneWords2.push(dictWord);
5179
+ if (severityMap2[dictWord] === void 0) {
5180
+ severityMap2[dictWord] = severity;
5181
+ }
5182
+ }
5183
+ }
5184
+ }
5185
+ severity = this.evaluateSeverity(dictWord, aggressiveInput);
5186
+ if (severity !== void 0) {
5187
+ if (!profaneWords2.includes(dictWord)) {
5188
+ profaneWords2.push(dictWord);
5189
+ if (severityMap2[dictWord] === void 0) {
5190
+ severityMap2[dictWord] = severity;
5191
+ }
5192
+ }
5193
+ }
5099
5194
  }
5100
5195
  let processedText2 = text;
5101
5196
  if (this.replaceWith && profaneWords2.length > 0) {
@@ -5481,4 +5576,206 @@ var HybridFilter = class {
5481
5576
  }
5482
5577
  };
5483
5578
 
5484
- export { HybridFilter, ToxicityDetector };
5579
+ // src/ml/transformers.ts
5580
+ var RECOMMENDED_MODELS = {
5581
+ /** High accuracy English model (97.5%) - 67M params */
5582
+ pardonmyai: "tarekziade/pardonmyai",
5583
+ /** Smaller version for constrained environments */
5584
+ pardonmyaiTiny: "tarekziade/pardonmyai-tiny",
5585
+ /** Multilingual toxicity detection (7 languages) */
5586
+ toxicBert: "unitary/toxic-bert",
5587
+ /** Offensive speech detector (DeBERTa-based) */
5588
+ offensiveSpeech: "KoalaAI/OffensiveSpeechDetector"
5589
+ };
5590
+ var MODEL_PROFANE_LABELS = {
5591
+ "tarekziade/pardonmyai": "profane",
5592
+ "tarekziade/pardonmyai-tiny": "profane",
5593
+ "unitary/toxic-bert": "toxic",
5594
+ "KoalaAI/OffensiveSpeechDetector": "LABEL_1",
5595
+ // Offensive
5596
+ default: "LABEL_1"
5597
+ };
5598
+ async function getTransformers() {
5599
+ try {
5600
+ const transformers = await import('@xenova/transformers');
5601
+ return transformers;
5602
+ } catch {
5603
+ throw new Error(
5604
+ "Transformers.js is required for ML features. Install it with: npm install @xenova/transformers"
5605
+ );
5606
+ }
5607
+ }
5608
+ async function createMLChecker(config = {}) {
5609
+ const {
5610
+ model = RECOMMENDED_MODELS.pardonmyai,
5611
+ threshold = 0.5,
5612
+ profaneLabel = MODEL_PROFANE_LABELS[model] || MODEL_PROFANE_LABELS.default,
5613
+ quantized = true,
5614
+ device = "cpu"
5615
+ } = config;
5616
+ const transformers = await getTransformers();
5617
+ const classifier = await transformers.pipeline("text-classification", model, {
5618
+ quantized,
5619
+ device
5620
+ });
5621
+ return {
5622
+ /**
5623
+ * Check a single text for profanity
5624
+ */
5625
+ async check(text) {
5626
+ const startTime = Date.now();
5627
+ const output = await classifier(text);
5628
+ const processingTimeMs = Date.now() - startTime;
5629
+ const profaneScore = output.find((o) => o.label === profaneLabel)?.score || 0;
5630
+ const containsProfanity = profaneScore >= threshold;
5631
+ return {
5632
+ containsProfanity,
5633
+ confidence: profaneScore,
5634
+ rawOutput: output,
5635
+ processingTimeMs
5636
+ };
5637
+ },
5638
+ /**
5639
+ * Check multiple texts
5640
+ */
5641
+ async checkBatch(texts) {
5642
+ return Promise.all(texts.map((text) => this.check(text)));
5643
+ },
5644
+ /**
5645
+ * Get the profanity score for text (0-1)
5646
+ */
5647
+ async getScore(text) {
5648
+ const result = await this.check(text);
5649
+ return result.confidence;
5650
+ },
5651
+ /**
5652
+ * Get current configuration
5653
+ */
5654
+ getConfig() {
5655
+ return { model, threshold, profaneLabel, quantized, device };
5656
+ },
5657
+ /**
5658
+ * Dispose of the model (free memory)
5659
+ */
5660
+ dispose() {
5661
+ }
5662
+ };
5663
+ }
5664
+ async function createHybridChecker(config = {}) {
5665
+ const {
5666
+ model = RECOMMENDED_MODELS.pardonmyai,
5667
+ threshold = 0.5,
5668
+ profaneLabel,
5669
+ quantized = true,
5670
+ device = "cpu",
5671
+ filterConfig = {},
5672
+ mlThreshold = 0.3,
5673
+ dictionaryWeight = 0.6,
5674
+ mlWeight = 0.4
5675
+ } = config;
5676
+ const filter = new Filter({
5677
+ languages: filterConfig.languages || ["english"],
5678
+ detectLeetspeak: filterConfig.detectLeetspeak ?? true,
5679
+ normalizeUnicode: filterConfig.normalizeUnicode ?? true,
5680
+ severityLevels: true,
5681
+ cacheResults: true,
5682
+ ...filterConfig
5683
+ });
5684
+ let mlChecker = null;
5685
+ async function getMLChecker() {
5686
+ if (!mlChecker) {
5687
+ mlChecker = await createMLChecker({
5688
+ model,
5689
+ threshold,
5690
+ profaneLabel,
5691
+ quantized,
5692
+ device
5693
+ });
5694
+ }
5695
+ return mlChecker;
5696
+ }
5697
+ return {
5698
+ /**
5699
+ * Check text using hybrid approach
5700
+ */
5701
+ async check(text) {
5702
+ const startTime = Date.now();
5703
+ const dictionaryResult = filter.checkProfanity(text);
5704
+ if (dictionaryResult.containsProfanity) {
5705
+ return {
5706
+ containsProfanity: true,
5707
+ confidence: 1,
5708
+ dictionaryResult,
5709
+ usedML: false,
5710
+ profaneWords: dictionaryResult.profaneWords,
5711
+ processingTimeMs: Date.now() - startTime
5712
+ };
5713
+ }
5714
+ const ml = await getMLChecker();
5715
+ const mlResult = await ml.check(text);
5716
+ const dictionaryScore = dictionaryResult.containsProfanity ? 1 : 0;
5717
+ const combinedScore = dictionaryScore * dictionaryWeight + mlResult.confidence * mlWeight;
5718
+ const containsProfanity = combinedScore >= mlThreshold;
5719
+ return {
5720
+ containsProfanity,
5721
+ confidence: combinedScore,
5722
+ dictionaryResult,
5723
+ mlResult,
5724
+ usedML: true,
5725
+ profaneWords: dictionaryResult.profaneWords,
5726
+ processingTimeMs: Date.now() - startTime
5727
+ };
5728
+ },
5729
+ /**
5730
+ * Check multiple texts
5731
+ */
5732
+ async checkBatch(texts) {
5733
+ return Promise.all(texts.map((text) => this.check(text)));
5734
+ },
5735
+ /**
5736
+ * Dictionary-only check (fast, no ML)
5737
+ */
5738
+ checkFast(text) {
5739
+ return filter.checkProfanity(text);
5740
+ },
5741
+ /**
5742
+ * ML-only check (slower, more accurate)
5743
+ */
5744
+ async checkML(text) {
5745
+ const ml = await getMLChecker();
5746
+ return ml.check(text);
5747
+ },
5748
+ /**
5749
+ * Get the underlying filter
5750
+ */
5751
+ getFilter() {
5752
+ return filter;
5753
+ },
5754
+ /**
5755
+ * Dispose of resources
5756
+ */
5757
+ async dispose() {
5758
+ if (mlChecker) {
5759
+ mlChecker.dispose();
5760
+ mlChecker = null;
5761
+ }
5762
+ }
5763
+ };
5764
+ }
5765
+ async function isTransformersAvailable() {
5766
+ try {
5767
+ await getTransformers();
5768
+ return true;
5769
+ } catch {
5770
+ return false;
5771
+ }
5772
+ }
5773
+ async function preloadModel(model = RECOMMENDED_MODELS.pardonmyai, options = {}) {
5774
+ const { quantized = true } = options;
5775
+ const transformers = await getTransformers();
5776
+ await transformers.pipeline("text-classification", model, {
5777
+ quantized
5778
+ });
5779
+ }
5780
+
5781
+ export { HybridFilter, RECOMMENDED_MODELS, ToxicityDetector, createHybridChecker, createMLChecker, isTransformersAvailable, preloadModel };