glin-profanity 3.1.2 → 3.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +84 -566
- package/dist/index.cjs +151 -57
- package/dist/index.d.cts +11 -14
- package/dist/index.d.ts +11 -14
- package/dist/index.js +151 -57
- package/dist/ml/index.cjs +151 -56
- package/dist/ml/index.d.cts +2 -2
- package/dist/ml/index.d.ts +2 -2
- package/dist/ml/index.js +151 -56
- package/dist/{types-BgQe4FSE.d.cts → types-Dj5vaoch.d.cts} +3 -1
- package/dist/{types-BgQe4FSE.d.ts → types-Dj5vaoch.d.ts} +3 -1
- package/package.json +1 -1
package/dist/ml/index.cjs
CHANGED
|
@@ -812,7 +812,7 @@ var danish_default = {
|
|
|
812
812
|
// ../../shared/dictionaries/dutch.json
|
|
813
813
|
var dutch_default = {
|
|
814
814
|
words: [
|
|
815
|
-
"aardappels
|
|
815
|
+
"aardappels afgieten",
|
|
816
816
|
"achter het raam zitten",
|
|
817
817
|
"afberen",
|
|
818
818
|
"aflebberen",
|
|
@@ -829,7 +829,7 @@ var dutch_default = {
|
|
|
829
829
|
"bagger schijten",
|
|
830
830
|
"balen",
|
|
831
831
|
"bedonderen",
|
|
832
|
-
"
|
|
832
|
+
"befborstel",
|
|
833
833
|
"beffen",
|
|
834
834
|
"bekken",
|
|
835
835
|
"belazeren",
|
|
@@ -838,11 +838,11 @@ var dutch_default = {
|
|
|
838
838
|
"beurt",
|
|
839
839
|
"boemelen",
|
|
840
840
|
"boerelul",
|
|
841
|
-
"
|
|
841
|
+
"boerenpummel",
|
|
842
842
|
"bokkelul",
|
|
843
843
|
"botergeil",
|
|
844
844
|
"broekhoesten",
|
|
845
|
-
"
|
|
845
|
+
"brugpieper",
|
|
846
846
|
"buffelen",
|
|
847
847
|
"buiten de pot piesen",
|
|
848
848
|
"da's kloten van de bok",
|
|
@@ -850,13 +850,13 @@ var dutch_default = {
|
|
|
850
850
|
"de hoer spelen",
|
|
851
851
|
"de hond uitlaten",
|
|
852
852
|
"de koffer induiken",
|
|
853
|
-
"
|
|
853
|
+
"del",
|
|
854
854
|
"de pijp aan maarten geven",
|
|
855
855
|
"de pijp uitgaan",
|
|
856
856
|
"dombo",
|
|
857
|
-
"
|
|
857
|
+
"draaikont",
|
|
858
858
|
"driehoog achter wonen",
|
|
859
|
-
"
|
|
859
|
+
"drol",
|
|
860
860
|
"drooggeiler",
|
|
861
861
|
"droogkloot",
|
|
862
862
|
"een beurt geven",
|
|
@@ -876,7 +876,7 @@ var dutch_default = {
|
|
|
876
876
|
"godverdomme",
|
|
877
877
|
"graftak",
|
|
878
878
|
"gras maaien",
|
|
879
|
-
"
|
|
879
|
+
"gratenkut",
|
|
880
880
|
"greppeldel",
|
|
881
881
|
"griet",
|
|
882
882
|
"hoempert",
|
|
@@ -889,7 +889,7 @@ var dutch_default = {
|
|
|
889
889
|
"huisdealer",
|
|
890
890
|
"johny",
|
|
891
891
|
"kanen",
|
|
892
|
-
"
|
|
892
|
+
"kettingzeug",
|
|
893
893
|
"klaarkomen",
|
|
894
894
|
"klerebeer",
|
|
895
895
|
"klojo",
|
|
@@ -899,22 +899,22 @@ var dutch_default = {
|
|
|
899
899
|
"klootzak",
|
|
900
900
|
"kloten",
|
|
901
901
|
"knor",
|
|
902
|
-
"
|
|
902
|
+
"kont",
|
|
903
903
|
"kontneuken",
|
|
904
904
|
"krentekakker",
|
|
905
905
|
"kut",
|
|
906
906
|
"kuttelikkertje",
|
|
907
|
-
"
|
|
907
|
+
"kwakkie",
|
|
908
908
|
"liefdesgrot",
|
|
909
909
|
"lul",
|
|
910
910
|
"lul-de-behanger",
|
|
911
911
|
"lulhannes",
|
|
912
912
|
"lummel",
|
|
913
913
|
"mafketel",
|
|
914
|
-
"
|
|
914
|
+
"matennaaier",
|
|
915
915
|
"matje",
|
|
916
916
|
"mof",
|
|
917
|
-
"
|
|
917
|
+
"muts",
|
|
918
918
|
"naaien",
|
|
919
919
|
"naakt",
|
|
920
920
|
"neuken",
|
|
@@ -934,9 +934,9 @@ var dutch_default = {
|
|
|
934
934
|
"paal",
|
|
935
935
|
"paardelul",
|
|
936
936
|
"palen",
|
|
937
|
-
"
|
|
937
|
+
"penoze",
|
|
938
938
|
"piesen",
|
|
939
|
-
"
|
|
939
|
+
"pijpbekkie",
|
|
940
940
|
"pijpen",
|
|
941
941
|
"pik",
|
|
942
942
|
"pleurislaaier",
|
|
@@ -951,7 +951,7 @@ var dutch_default = {
|
|
|
951
951
|
"reet",
|
|
952
952
|
"reetridder",
|
|
953
953
|
"reet trappen, voor zijn",
|
|
954
|
-
"
|
|
954
|
+
"remsporen",
|
|
955
955
|
"reutelen",
|
|
956
956
|
"rothoer",
|
|
957
957
|
"rotzak",
|
|
@@ -964,25 +964,25 @@ var dutch_default = {
|
|
|
964
964
|
"schuinsmarcheerder",
|
|
965
965
|
"shit",
|
|
966
966
|
"slempen",
|
|
967
|
-
"
|
|
967
|
+
"slet",
|
|
968
968
|
"sletterig",
|
|
969
969
|
"slik mijn zaad",
|
|
970
|
-
"
|
|
970
|
+
"snol",
|
|
971
971
|
"spuiten",
|
|
972
972
|
"standje",
|
|
973
|
-
"standje-
|
|
973
|
+
"standje-69",
|
|
974
974
|
"stoephoer",
|
|
975
975
|
"stootje",
|
|
976
|
-
"
|
|
977
|
-
"
|
|
976
|
+
"stront",
|
|
977
|
+
"sufferd",
|
|
978
978
|
"tapijtnek",
|
|
979
|
-
"
|
|
979
|
+
"teef",
|
|
980
980
|
"temeier",
|
|
981
981
|
"teringlijer",
|
|
982
982
|
"toeter",
|
|
983
|
-
"
|
|
984
|
-
"
|
|
985
|
-
"trottoir
|
|
983
|
+
"tongzoen",
|
|
984
|
+
"triootje",
|
|
985
|
+
"trottoir prostituee",
|
|
986
986
|
"trottoirteef",
|
|
987
987
|
"vergallen",
|
|
988
988
|
"verkloten",
|
|
@@ -1055,6 +1055,8 @@ var english_default = {
|
|
|
1055
1055
|
"2 girls 1 cup",
|
|
1056
1056
|
"2g1c",
|
|
1057
1057
|
"a$$",
|
|
1058
|
+
"@ss",
|
|
1059
|
+
"4ss",
|
|
1058
1060
|
"acrotomophilia",
|
|
1059
1061
|
"alabama hot pocket",
|
|
1060
1062
|
"alaskan pipeline",
|
|
@@ -1194,6 +1196,10 @@ var english_default = {
|
|
|
1194
1196
|
"eunuch",
|
|
1195
1197
|
"f*ck",
|
|
1196
1198
|
"f@ck",
|
|
1199
|
+
"f4ck",
|
|
1200
|
+
"fvck",
|
|
1201
|
+
"phuck",
|
|
1202
|
+
"fuk",
|
|
1197
1203
|
"faggot",
|
|
1198
1204
|
"fecal",
|
|
1199
1205
|
"felch",
|
|
@@ -1375,6 +1381,9 @@ var english_default = {
|
|
|
1375
1381
|
"shemale",
|
|
1376
1382
|
"shibari",
|
|
1377
1383
|
"shit",
|
|
1384
|
+
"sh1t",
|
|
1385
|
+
"$hit",
|
|
1386
|
+
"$h!t",
|
|
1378
1387
|
"shitblimp",
|
|
1379
1388
|
"shithead",
|
|
1380
1389
|
"shitshow",
|
|
@@ -2478,7 +2487,7 @@ var italian_default = {
|
|
|
2478
2487
|
"di merda",
|
|
2479
2488
|
"ditalino",
|
|
2480
2489
|
"duro",
|
|
2481
|
-
"fare una
|
|
2490
|
+
"fare una sega",
|
|
2482
2491
|
"fava",
|
|
2483
2492
|
"femminuccia",
|
|
2484
2493
|
"fica",
|
|
@@ -2726,7 +2735,6 @@ var japanese_default = {
|
|
|
2726
2735
|
"\u7389\u8210\u3081",
|
|
2727
2736
|
"\u7DCA\u7E1B",
|
|
2728
2737
|
"\u8FD1\u89AA\u76F8\u59E6",
|
|
2729
|
-
"\u5ACC\u3044",
|
|
2730
2738
|
"\u5F8C\u80CC\u4F4D",
|
|
2731
2739
|
"\u5408\u610F\u306E\u6027\u4EA4",
|
|
2732
2740
|
"\u62F7\u554F",
|
|
@@ -2739,7 +2747,6 @@ var japanese_default = {
|
|
|
2739
2747
|
"\u5C04\u7CBE",
|
|
2740
2748
|
"\u624B\u30B3\u30AD",
|
|
2741
2749
|
"\u7363\u59E6",
|
|
2742
|
-
"\u5973\u306E\u5B50",
|
|
2743
2750
|
"\u5973\u738B\u69D8",
|
|
2744
2751
|
"\u5973\u5B50\u9AD8\u751F",
|
|
2745
2752
|
"\u5973\u88C5",
|
|
@@ -2816,7 +2823,6 @@ var turkish_default = {
|
|
|
2816
2823
|
"ak",
|
|
2817
2824
|
"akp",
|
|
2818
2825
|
"al a\u011Fz\u0131na",
|
|
2819
|
-
"allah",
|
|
2820
2826
|
"allahs\u0131z",
|
|
2821
2827
|
"am",
|
|
2822
2828
|
"am biti",
|
|
@@ -2911,7 +2917,6 @@ var turkish_default = {
|
|
|
2911
2917
|
"am\u0131n\u0131 s",
|
|
2912
2918
|
"am\u0131s\u0131na",
|
|
2913
2919
|
"am\u0131s\u0131n\u0131",
|
|
2914
|
-
"ana",
|
|
2915
2920
|
"anaaann",
|
|
2916
2921
|
"anal",
|
|
2917
2922
|
"analarn",
|
|
@@ -3043,8 +3048,6 @@ var turkish_default = {
|
|
|
3043
3048
|
"cikar",
|
|
3044
3049
|
"cim",
|
|
3045
3050
|
"cm",
|
|
3046
|
-
"coca cola",
|
|
3047
|
-
"cola",
|
|
3048
3051
|
"dalaks\u0131z",
|
|
3049
3052
|
"dallama",
|
|
3050
3053
|
"daltassak",
|
|
@@ -3842,7 +3845,7 @@ var turkish_default = {
|
|
|
3842
3845
|
// ../../shared/dictionaries/spanish.json
|
|
3843
3846
|
var spanish_default = {
|
|
3844
3847
|
words: [
|
|
3845
|
-
"
|
|
3848
|
+
"asesinato",
|
|
3846
3849
|
"asno",
|
|
3847
3850
|
"bastardo",
|
|
3848
3851
|
"Bollera",
|
|
@@ -4142,6 +4145,34 @@ var GAMING_POSITIVE = /* @__PURE__ */ new Set([
|
|
|
4142
4145
|
"move",
|
|
4143
4146
|
"combo"
|
|
4144
4147
|
]);
|
|
4148
|
+
var GAMING_ACCEPTABLE_WORDS = /* @__PURE__ */ new Set([
|
|
4149
|
+
"kill",
|
|
4150
|
+
"killer",
|
|
4151
|
+
"killed",
|
|
4152
|
+
"killing",
|
|
4153
|
+
"shoot",
|
|
4154
|
+
"shot",
|
|
4155
|
+
"shooting",
|
|
4156
|
+
"die",
|
|
4157
|
+
"dying",
|
|
4158
|
+
"died",
|
|
4159
|
+
"dead",
|
|
4160
|
+
"death",
|
|
4161
|
+
"badass",
|
|
4162
|
+
"sick",
|
|
4163
|
+
"insane",
|
|
4164
|
+
"crazy",
|
|
4165
|
+
"mad",
|
|
4166
|
+
"beast",
|
|
4167
|
+
"savage",
|
|
4168
|
+
"suck",
|
|
4169
|
+
"sucks",
|
|
4170
|
+
"wtf",
|
|
4171
|
+
"omg",
|
|
4172
|
+
"hell",
|
|
4173
|
+
"damn",
|
|
4174
|
+
"crap"
|
|
4175
|
+
]);
|
|
4145
4176
|
var POSITIVE_PHRASES = /* @__PURE__ */ new Map([
|
|
4146
4177
|
["the bomb", 0.9],
|
|
4147
4178
|
// "this movie is the bomb"
|
|
@@ -4174,7 +4205,9 @@ var ContextAnalyzer = class {
|
|
|
4174
4205
|
constructor(config) {
|
|
4175
4206
|
this.contextWindow = config.contextWindow;
|
|
4176
4207
|
this.language = config.language;
|
|
4177
|
-
this.domainWhitelists = new Set(
|
|
4208
|
+
this.domainWhitelists = new Set(
|
|
4209
|
+
(config.domainWhitelists || []).map((word) => word.toLowerCase())
|
|
4210
|
+
);
|
|
4178
4211
|
}
|
|
4179
4212
|
/**
|
|
4180
4213
|
* Analyzes the context around a profanity match to determine if it should be flagged
|
|
@@ -4211,10 +4244,9 @@ var ContextAnalyzer = class {
|
|
|
4211
4244
|
isWhitelisted: false
|
|
4212
4245
|
};
|
|
4213
4246
|
}
|
|
4214
|
-
// eslint-disable-next-line @typescript-eslint/no-unused-vars
|
|
4215
4247
|
checkPhraseContext(contextText, matchWord) {
|
|
4216
4248
|
for (const [phrase, score] of POSITIVE_PHRASES.entries()) {
|
|
4217
|
-
if (contextText.includes(phrase)) {
|
|
4249
|
+
if (phrase.includes(matchWord) && contextText.includes(phrase)) {
|
|
4218
4250
|
return {
|
|
4219
4251
|
contextScore: score,
|
|
4220
4252
|
reason: `Positive phrase detected: "${phrase}"`,
|
|
@@ -4233,21 +4265,29 @@ var ContextAnalyzer = class {
|
|
|
4233
4265
|
}
|
|
4234
4266
|
return null;
|
|
4235
4267
|
}
|
|
4236
|
-
// eslint-disable-next-line @typescript-eslint/no-unused-vars
|
|
4237
4268
|
isDomainWhitelisted(contextWords, matchWord) {
|
|
4269
|
+
const normalizedMatchWord = matchWord.toLowerCase();
|
|
4238
4270
|
for (const word of contextWords) {
|
|
4239
|
-
if (this.domainWhitelists.has(word)
|
|
4271
|
+
if (this.domainWhitelists.has(word)) {
|
|
4240
4272
|
return true;
|
|
4241
4273
|
}
|
|
4274
|
+
if (GAMING_POSITIVE.has(word)) {
|
|
4275
|
+
if (GAMING_ACCEPTABLE_WORDS.has(normalizedMatchWord)) {
|
|
4276
|
+
return true;
|
|
4277
|
+
}
|
|
4278
|
+
}
|
|
4242
4279
|
}
|
|
4243
4280
|
return false;
|
|
4244
4281
|
}
|
|
4245
|
-
// eslint-disable-next-line @typescript-eslint/no-unused-vars
|
|
4246
4282
|
generateReason(score, contextWords) {
|
|
4283
|
+
const foundPositive = Array.from(new Set(contextWords.filter((word) => POSITIVE_INDICATORS.has(word))));
|
|
4284
|
+
const foundNegative = Array.from(new Set(contextWords.filter((word) => NEGATIVE_INDICATORS.has(word))));
|
|
4247
4285
|
if (score >= 0.7) {
|
|
4248
|
-
|
|
4286
|
+
const details = foundPositive.length > 0 ? ` (found: ${foundPositive.join(", ")})` : "";
|
|
4287
|
+
return `Positive context detected${details} - likely not profanity`;
|
|
4249
4288
|
} else if (score <= 0.3) {
|
|
4250
|
-
|
|
4289
|
+
const details = foundNegative.length > 0 ? ` (found: ${foundNegative.join(", ")})` : "";
|
|
4290
|
+
return `Negative context detected${details} - likely profanity`;
|
|
4251
4291
|
} else {
|
|
4252
4292
|
return "Neutral context - uncertain classification";
|
|
4253
4293
|
}
|
|
@@ -4305,7 +4345,7 @@ var ContextAnalyzer = class {
|
|
|
4305
4345
|
* Updates the domain whitelist for this analyzer instance
|
|
4306
4346
|
*/
|
|
4307
4347
|
updateDomainWhitelist(newWhitelist) {
|
|
4308
|
-
this.domainWhitelists = new Set(newWhitelist);
|
|
4348
|
+
this.domainWhitelists = new Set(newWhitelist.map((word) => word.toLowerCase()));
|
|
4309
4349
|
}
|
|
4310
4350
|
/**
|
|
4311
4351
|
* Adds words to the domain whitelist
|
|
@@ -4464,6 +4504,10 @@ var HOMOGLYPHS = {
|
|
|
4464
4504
|
// Cyrillic small e
|
|
4465
4505
|
"\u0415": "E",
|
|
4466
4506
|
// Cyrillic capital E
|
|
4507
|
+
"\u043A": "k",
|
|
4508
|
+
// Cyrillic small ka
|
|
4509
|
+
"\u041A": "K",
|
|
4510
|
+
// Cyrillic capital Ka
|
|
4467
4511
|
"\u043E": "o",
|
|
4468
4512
|
// Cyrillic small o
|
|
4469
4513
|
"\u041E": "O",
|
|
@@ -4476,9 +4520,9 @@ var HOMOGLYPHS = {
|
|
|
4476
4520
|
// Cyrillic small es
|
|
4477
4521
|
"\u0421": "C",
|
|
4478
4522
|
// Cyrillic capital Es
|
|
4479
|
-
"\u0443": "
|
|
4480
|
-
// Cyrillic small u
|
|
4481
|
-
"\u0423": "
|
|
4523
|
+
"\u0443": "u",
|
|
4524
|
+
// Cyrillic small u (map to u, not y)
|
|
4525
|
+
"\u0423": "U",
|
|
4482
4526
|
// Cyrillic capital U
|
|
4483
4527
|
"\u0445": "x",
|
|
4484
4528
|
// Cyrillic small ha
|
|
@@ -4496,6 +4540,11 @@ var HOMOGLYPHS = {
|
|
|
4496
4540
|
// Cyrillic small dze
|
|
4497
4541
|
"\u0405": "S",
|
|
4498
4542
|
// Cyrillic capital Dze
|
|
4543
|
+
// Currency and special symbols that look like letters
|
|
4544
|
+
"\xA2": "c",
|
|
4545
|
+
// Cent sign
|
|
4546
|
+
"\u0192": "f",
|
|
4547
|
+
// Latin small f with hook (florin)
|
|
4499
4548
|
// Greek homoglyphs
|
|
4500
4549
|
"\u03B1": "a",
|
|
4501
4550
|
// Greek small alpha
|
|
@@ -4842,6 +4891,7 @@ var Filter = class {
|
|
|
4842
4891
|
this.cacheResults = config?.cacheResults ?? false;
|
|
4843
4892
|
this.maxCacheSize = config?.maxCacheSize ?? 1e3;
|
|
4844
4893
|
this.cache = /* @__PURE__ */ new Map();
|
|
4894
|
+
this.regexCache = /* @__PURE__ */ new Map();
|
|
4845
4895
|
let words = [];
|
|
4846
4896
|
if (config?.allLanguages) {
|
|
4847
4897
|
for (const lang in dictionary_default) {
|
|
@@ -4871,9 +4921,10 @@ var Filter = class {
|
|
|
4871
4921
|
* Applies Unicode normalization, leetspeak detection, and obfuscation handling.
|
|
4872
4922
|
*
|
|
4873
4923
|
* @param text - The input text to normalize
|
|
4924
|
+
* @param aggressive - If true, collapses to single chars (for repeated char detection)
|
|
4874
4925
|
* @returns The normalized text
|
|
4875
4926
|
*/
|
|
4876
|
-
normalizeText(text) {
|
|
4927
|
+
normalizeText(text, aggressive = false) {
|
|
4877
4928
|
let normalized = text;
|
|
4878
4929
|
if (this.normalizeUnicodeEnabled) {
|
|
4879
4930
|
normalized = normalizeUnicode(normalized);
|
|
@@ -4882,6 +4933,8 @@ var Filter = class {
|
|
|
4882
4933
|
normalized = normalizeLeetspeak(normalized, {
|
|
4883
4934
|
level: this.leetspeakLevel,
|
|
4884
4935
|
collapseRepeated: true,
|
|
4936
|
+
// Keep double letters like "ss" for normal check, collapse all for aggressive
|
|
4937
|
+
maxRepeated: aggressive ? 1 : 2,
|
|
4885
4938
|
removeSpacedChars: true
|
|
4886
4939
|
});
|
|
4887
4940
|
}
|
|
@@ -4912,6 +4965,7 @@ var Filter = class {
|
|
|
4912
4965
|
*/
|
|
4913
4966
|
clearCache() {
|
|
4914
4967
|
this.cache.clear();
|
|
4968
|
+
this.regexCache.clear();
|
|
4915
4969
|
}
|
|
4916
4970
|
/**
|
|
4917
4971
|
* Gets the current cache size.
|
|
@@ -4993,10 +5047,17 @@ var Filter = class {
|
|
|
4993
5047
|
return this.cache.get(key);
|
|
4994
5048
|
}
|
|
4995
5049
|
getRegex(word) {
|
|
5050
|
+
if (this.regexCache.has(word)) {
|
|
5051
|
+
const regex2 = this.regexCache.get(word);
|
|
5052
|
+
regex2.lastIndex = 0;
|
|
5053
|
+
return regex2;
|
|
5054
|
+
}
|
|
4996
5055
|
const flags = this.caseSensitive ? "g" : "gi";
|
|
4997
5056
|
const escapedWord = word.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
4998
5057
|
const boundary = this.wordBoundaries ? "\\b" : "";
|
|
4999
|
-
|
|
5058
|
+
const regex = new RegExp(`${boundary}${escapedWord}${boundary}`, flags);
|
|
5059
|
+
this.regexCache.set(word, regex);
|
|
5060
|
+
return regex;
|
|
5000
5061
|
}
|
|
5001
5062
|
isFuzzyToleranceMatch(word, text) {
|
|
5002
5063
|
const simplifiedText = text.toLowerCase().replace(/[^a-z]/g, "");
|
|
@@ -5014,11 +5075,12 @@ var Filter = class {
|
|
|
5014
5075
|
return score >= this.fuzzyToleranceLevel;
|
|
5015
5076
|
}
|
|
5016
5077
|
evaluateSeverity(word, text) {
|
|
5017
|
-
if (this.
|
|
5078
|
+
if (this.getRegex(word).test(text)) {
|
|
5018
5079
|
return 1 /* EXACT */;
|
|
5019
5080
|
}
|
|
5020
|
-
if (this.
|
|
5021
|
-
|
|
5081
|
+
if (!this.wordBoundaries && this.isFuzzyToleranceMatch(word, text)) {
|
|
5082
|
+
return 2 /* FUZZY */;
|
|
5083
|
+
}
|
|
5022
5084
|
return void 0;
|
|
5023
5085
|
}
|
|
5024
5086
|
/**
|
|
@@ -5038,9 +5100,20 @@ var Filter = class {
|
|
|
5038
5100
|
* ```
|
|
5039
5101
|
*/
|
|
5040
5102
|
isProfane(value) {
|
|
5041
|
-
const
|
|
5103
|
+
const originalInput = value;
|
|
5104
|
+
const normalizedInput = this.normalizeText(value);
|
|
5105
|
+
const aggressiveInput = this.normalizeText(value, true);
|
|
5042
5106
|
for (const word of this.words.keys()) {
|
|
5043
|
-
if (
|
|
5107
|
+
if (this.ignoreWords.has(word.toLowerCase())) {
|
|
5108
|
+
continue;
|
|
5109
|
+
}
|
|
5110
|
+
if (this.evaluateSeverity(word, originalInput) !== void 0) {
|
|
5111
|
+
return true;
|
|
5112
|
+
}
|
|
5113
|
+
if (this.evaluateSeverity(word, normalizedInput) !== void 0) {
|
|
5114
|
+
return true;
|
|
5115
|
+
}
|
|
5116
|
+
if (this.evaluateSeverity(word, aggressiveInput) !== void 0) {
|
|
5044
5117
|
return true;
|
|
5045
5118
|
}
|
|
5046
5119
|
}
|
|
@@ -5081,23 +5154,45 @@ var Filter = class {
|
|
|
5081
5154
|
return cachedResult;
|
|
5082
5155
|
}
|
|
5083
5156
|
if (!this.enableContextAware) {
|
|
5084
|
-
|
|
5085
|
-
|
|
5157
|
+
const originalInput = text.toLowerCase();
|
|
5158
|
+
const normalizedInput = this.normalizeText(text).toLowerCase();
|
|
5159
|
+
const aggressiveInput = this.normalizeText(text, true).toLowerCase();
|
|
5086
5160
|
const profaneWords2 = [];
|
|
5087
5161
|
const severityMap2 = {};
|
|
5088
5162
|
for (const dictWord of this.words.keys()) {
|
|
5089
5163
|
if (this.ignoreWords.has(dictWord.toLowerCase())) continue;
|
|
5090
|
-
|
|
5164
|
+
let severity = this.evaluateSeverity(dictWord, originalInput);
|
|
5091
5165
|
if (severity !== void 0) {
|
|
5092
5166
|
const regex = this.getRegex(dictWord);
|
|
5093
5167
|
let match;
|
|
5094
|
-
while ((match = regex.exec(
|
|
5168
|
+
while ((match = regex.exec(originalInput)) !== null) {
|
|
5095
5169
|
profaneWords2.push(match[0]);
|
|
5096
5170
|
if (severityMap2[match[0]] === void 0) {
|
|
5097
5171
|
severityMap2[match[0]] = severity;
|
|
5098
5172
|
}
|
|
5099
5173
|
}
|
|
5100
5174
|
}
|
|
5175
|
+
severity = this.evaluateSeverity(dictWord, normalizedInput);
|
|
5176
|
+
if (severity !== void 0) {
|
|
5177
|
+
const regex = this.getRegex(dictWord);
|
|
5178
|
+
while ((regex.exec(normalizedInput)) !== null) {
|
|
5179
|
+
if (!profaneWords2.includes(dictWord)) {
|
|
5180
|
+
profaneWords2.push(dictWord);
|
|
5181
|
+
if (severityMap2[dictWord] === void 0) {
|
|
5182
|
+
severityMap2[dictWord] = severity;
|
|
5183
|
+
}
|
|
5184
|
+
}
|
|
5185
|
+
}
|
|
5186
|
+
}
|
|
5187
|
+
severity = this.evaluateSeverity(dictWord, aggressiveInput);
|
|
5188
|
+
if (severity !== void 0) {
|
|
5189
|
+
if (!profaneWords2.includes(dictWord)) {
|
|
5190
|
+
profaneWords2.push(dictWord);
|
|
5191
|
+
if (severityMap2[dictWord] === void 0) {
|
|
5192
|
+
severityMap2[dictWord] = severity;
|
|
5193
|
+
}
|
|
5194
|
+
}
|
|
5195
|
+
}
|
|
5101
5196
|
}
|
|
5102
5197
|
let processedText2 = text;
|
|
5103
5198
|
if (this.replaceWith && profaneWords2.length > 0) {
|
package/dist/ml/index.d.cts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { T as ToxicityLabel, f as MLDetectorConfig, e as MLAnalysisResult,
|
|
2
|
-
export { d as ToxicityPrediction } from '../types-
|
|
1
|
+
import { T as ToxicityLabel, f as MLDetectorConfig, e as MLAnalysisResult, F as FilterConfig, C as CheckProfanityResult, H as HybridAnalysisResult, a as Filter } from '../types-Dj5vaoch.cjs';
|
|
2
|
+
export { d as ToxicityPrediction } from '../types-Dj5vaoch.cjs';
|
|
3
3
|
|
|
4
4
|
/**
|
|
5
5
|
* ML-based toxicity detection using TensorFlow.js.
|
package/dist/ml/index.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { T as ToxicityLabel, f as MLDetectorConfig, e as MLAnalysisResult,
|
|
2
|
-
export { d as ToxicityPrediction } from '../types-
|
|
1
|
+
import { T as ToxicityLabel, f as MLDetectorConfig, e as MLAnalysisResult, F as FilterConfig, C as CheckProfanityResult, H as HybridAnalysisResult, a as Filter } from '../types-Dj5vaoch.js';
|
|
2
|
+
export { d as ToxicityPrediction } from '../types-Dj5vaoch.js';
|
|
3
3
|
|
|
4
4
|
/**
|
|
5
5
|
* ML-based toxicity detection using TensorFlow.js.
|