terlik.js 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs ADDED
@@ -0,0 +1,2004 @@
1
+ // src/dictionary/index.ts
2
+ var Dictionary = class {
3
+ entries = /* @__PURE__ */ new Map();
4
+ whitelist;
5
+ allWords = [];
6
+ suffixes;
7
+ /**
8
+ * Creates a new Dictionary from validated dictionary data.
9
+ * @param data - Validated dictionary data (entries, suffixes, whitelist).
10
+ * @param customWords - Additional words to detect.
11
+ * @param customWhitelist - Additional words to exclude.
12
+ */
13
+ constructor(data, customWords, customWhitelist) {
14
+ this.whitelist = new Set(data.whitelist.map((w) => w.toLowerCase()));
15
+ this.suffixes = data.suffixes;
16
+ if (customWhitelist) {
17
+ for (const w of customWhitelist) {
18
+ this.whitelist.add(w.toLowerCase());
19
+ }
20
+ }
21
+ for (const entry of data.entries) {
22
+ this.addEntry({
23
+ root: entry.root,
24
+ variants: entry.variants,
25
+ severity: entry.severity,
26
+ category: entry.category,
27
+ suffixable: entry.suffixable
28
+ });
29
+ }
30
+ if (customWords) {
31
+ for (const word of customWords) {
32
+ this.addEntry({
33
+ root: word.toLowerCase(),
34
+ variants: [],
35
+ severity: "medium"
36
+ });
37
+ }
38
+ }
39
+ }
40
+ addEntry(entry) {
41
+ const normalizedRoot = entry.root.toLowerCase();
42
+ this.entries.set(normalizedRoot, entry);
43
+ this.allWords.push(normalizedRoot);
44
+ for (const v of entry.variants) {
45
+ this.allWords.push(v.toLowerCase());
46
+ }
47
+ }
48
+ /** Returns all dictionary entries keyed by root word. */
49
+ getEntries() {
50
+ return this.entries;
51
+ }
52
+ /** Returns all words (roots + variants) as a flat array. */
53
+ getAllWords() {
54
+ return this.allWords;
55
+ }
56
+ /** Returns the whitelist as a Set of lowercase strings. */
57
+ getWhitelist() {
58
+ return this.whitelist;
59
+ }
60
+ /** Returns available grammatical suffixes for the language. */
61
+ getSuffixes() {
62
+ return this.suffixes;
63
+ }
64
+ /**
65
+ * Adds words to the dictionary at runtime.
66
+ * Empty strings and already-existing words are silently skipped.
67
+ * @param words - Words to add.
68
+ */
69
+ addWords(words) {
70
+ for (const word of words) {
71
+ const lower = word.toLowerCase().trim();
72
+ if (lower.length === 0) continue;
73
+ if (!this.entries.has(lower)) {
74
+ this.addEntry({
75
+ root: lower,
76
+ variants: [],
77
+ severity: "medium"
78
+ });
79
+ }
80
+ }
81
+ }
82
+ /**
83
+ * Removes words from the dictionary at runtime.
84
+ * @param words - Words to remove.
85
+ */
86
+ removeWords(words) {
87
+ for (const word of words) {
88
+ const key = word.toLowerCase();
89
+ const entry = this.entries.get(key);
90
+ if (entry) {
91
+ this.entries.delete(key);
92
+ this.allWords = this.allWords.filter(
93
+ (w) => w !== key && !entry.variants.map((v) => v.toLowerCase()).includes(w)
94
+ );
95
+ }
96
+ }
97
+ }
98
+ /**
99
+ * Finds the dictionary entry for a given word (checks root and variants).
100
+ * @param word - The word to look up.
101
+ * @returns The matching WordEntry, or undefined if not found.
102
+ */
103
+ findRootForWord(word) {
104
+ const lower = word.toLowerCase();
105
+ const direct = this.entries.get(lower);
106
+ if (direct) return direct;
107
+ for (const [, entry] of this.entries) {
108
+ if (entry.variants.some((v) => v.toLowerCase() === lower)) {
109
+ return entry;
110
+ }
111
+ }
112
+ return void 0;
113
+ }
114
+ };
115
+
116
+ // src/patterns.ts
117
+ var SEPARATOR = "[^\\p{L}\\p{N}]{0,3}";
118
+ var MAX_PATTERN_LENGTH = 1e4;
119
+ var MAX_SUFFIX_CHAIN = 2;
120
+ var REGEX_TIMEOUT_MS = 250;
121
+ function charToPattern(ch, charClasses) {
122
+ const cls = charClasses[ch.toLowerCase()];
123
+ if (cls) return `${cls}+`;
124
+ return ch.replace(/[.*+?^${}()|[\]\\]/g, "\\$&") + "+";
125
+ }
126
+ function wordToPattern(word, charClasses, normalizeFn) {
127
+ const normalized = normalizeFn(word);
128
+ const chars = [...normalized];
129
+ const parts = chars.map((ch) => charToPattern(ch, charClasses));
130
+ return parts.join(SEPARATOR);
131
+ }
132
+ function buildSuffixGroup(suffixes, charClasses) {
133
+ if (suffixes.length === 0) return "";
134
+ const suffixPatterns = suffixes.map((suffix) => {
135
+ const chars = [...suffix];
136
+ const parts = chars.map((ch) => charToPattern(ch, charClasses));
137
+ return parts.join(SEPARATOR);
138
+ });
139
+ suffixPatterns.sort((a, b) => b.length - a.length);
140
+ return `(?:${SEPARATOR}(?:${suffixPatterns.join("|")}))`;
141
+ }
142
+ function compilePatterns(entries, suffixes, charClasses, normalizeFn) {
143
+ const patterns = [];
144
+ const suffixGroup = suffixes && suffixes.length > 0 ? buildSuffixGroup(suffixes, charClasses) : "";
145
+ for (const [, entry] of entries) {
146
+ const allForms = [entry.root, ...entry.variants];
147
+ const sortedForms = allForms.map((w) => normalizeFn(w)).filter((w) => w.length > 0).filter((w, i, arr) => arr.indexOf(w) === i).sort((a, b) => b.length - a.length);
148
+ const formPatterns = sortedForms.map(
149
+ (w) => wordToPattern(w, charClasses, normalizeFn)
150
+ );
151
+ const combined = formPatterns.join("|");
152
+ const useSuffix = entry.suffixable && suffixGroup.length > 0;
153
+ let pattern;
154
+ if (useSuffix) {
155
+ pattern = `(?<![\\p{L}\\p{N}])(?:${combined})${suffixGroup}{0,${MAX_SUFFIX_CHAIN}}(?![\\p{L}\\p{N}])`;
156
+ } else {
157
+ pattern = `(?<![\\p{L}\\p{N}])(?:${combined})(?![\\p{L}\\p{N}])`;
158
+ }
159
+ if (pattern.length > MAX_PATTERN_LENGTH && useSuffix) {
160
+ pattern = `(?<![\\p{L}\\p{N}])(?:${combined})(?![\\p{L}\\p{N}])`;
161
+ }
162
+ try {
163
+ const regex = new RegExp(pattern, "giu");
164
+ patterns.push({
165
+ root: entry.root,
166
+ severity: entry.severity,
167
+ regex,
168
+ variants: entry.variants
169
+ });
170
+ } catch (err) {
171
+ if (useSuffix) {
172
+ try {
173
+ const fallbackPattern = `(?<![\\p{L}\\p{N}])(?:${combined})(?![\\p{L}\\p{N}])`;
174
+ const regex = new RegExp(fallbackPattern, "giu");
175
+ patterns.push({
176
+ root: entry.root,
177
+ severity: entry.severity,
178
+ regex,
179
+ variants: entry.variants
180
+ });
181
+ console.warn(`[terlik] Pattern for "${entry.root}" failed with suffixes, using fallback: ${err instanceof Error ? err.message : String(err)}`);
182
+ } catch (err2) {
183
+ console.warn(`[terlik] Pattern for "${entry.root}" failed completely, skipping: ${err2 instanceof Error ? err2.message : String(err2)}`);
184
+ }
185
+ } else {
186
+ console.warn(`[terlik] Pattern for "${entry.root}" failed, skipping: ${err instanceof Error ? err.message : String(err)}`);
187
+ }
188
+ }
189
+ }
190
+ return patterns;
191
+ }
192
+
193
+ // src/fuzzy.ts
194
+ function levenshteinDistance(a, b) {
195
+ const m = a.length;
196
+ const n = b.length;
197
+ if (m === 0) return n;
198
+ if (n === 0) return m;
199
+ let prev = new Array(n + 1);
200
+ let curr = new Array(n + 1);
201
+ for (let j = 0; j <= n; j++) prev[j] = j;
202
+ for (let i = 1; i <= m; i++) {
203
+ curr[0] = i;
204
+ for (let j = 1; j <= n; j++) {
205
+ const cost = a[i - 1] === b[j - 1] ? 0 : 1;
206
+ curr[j] = Math.min(
207
+ prev[j] + 1,
208
+ // deletion
209
+ curr[j - 1] + 1,
210
+ // insertion
211
+ prev[j - 1] + cost
212
+ // substitution
213
+ );
214
+ }
215
+ [prev, curr] = [curr, prev];
216
+ }
217
+ return prev[n];
218
+ }
219
+ function levenshteinSimilarity(a, b) {
220
+ const maxLen = Math.max(a.length, b.length);
221
+ if (maxLen === 0) return 1;
222
+ return 1 - levenshteinDistance(a, b) / maxLen;
223
+ }
224
+ function bigrams(str) {
225
+ const set = /* @__PURE__ */ new Set();
226
+ for (let i = 0; i < str.length - 1; i++) {
227
+ set.add(str.slice(i, i + 2));
228
+ }
229
+ return set;
230
+ }
231
+ function diceSimilarity(a, b) {
232
+ if (a.length < 2 || b.length < 2) {
233
+ return a === b ? 1 : 0;
234
+ }
235
+ const bigramsA = bigrams(a);
236
+ const bigramsB = bigrams(b);
237
+ let intersection = 0;
238
+ for (const bg of bigramsA) {
239
+ if (bigramsB.has(bg)) intersection++;
240
+ }
241
+ return 2 * intersection / (bigramsA.size + bigramsB.size);
242
+ }
243
+ function getFuzzyMatcher(algorithm) {
244
+ return algorithm === "levenshtein" ? levenshteinSimilarity : diceSimilarity;
245
+ }
246
+
247
+ // src/detector.ts
248
+ var Detector = class {
249
+ dictionary;
250
+ _patterns = null;
251
+ normalizedWordSet;
252
+ normalizedWordToRoot;
253
+ normalizeFn;
254
+ locale;
255
+ charClasses;
256
+ constructor(dictionary, normalizeFn, locale, charClasses) {
257
+ this.dictionary = dictionary;
258
+ this.normalizeFn = normalizeFn;
259
+ this.locale = locale;
260
+ this.charClasses = charClasses;
261
+ this.normalizedWordSet = /* @__PURE__ */ new Set();
262
+ this.normalizedWordToRoot = /* @__PURE__ */ new Map();
263
+ this.buildNormalizedLookup();
264
+ }
265
+ ensureCompiled() {
266
+ if (this._patterns === null) {
267
+ this._patterns = compilePatterns(
268
+ this.dictionary.getEntries(),
269
+ this.dictionary.getSuffixes(),
270
+ this.charClasses,
271
+ this.normalizeFn
272
+ );
273
+ }
274
+ return this._patterns;
275
+ }
276
+ compile() {
277
+ this.ensureCompiled();
278
+ }
279
+ recompile() {
280
+ this._patterns = compilePatterns(
281
+ this.dictionary.getEntries(),
282
+ this.dictionary.getSuffixes(),
283
+ this.charClasses,
284
+ this.normalizeFn
285
+ );
286
+ this.buildNormalizedLookup();
287
+ }
288
+ buildNormalizedLookup() {
289
+ this.normalizedWordSet.clear();
290
+ this.normalizedWordToRoot.clear();
291
+ for (const word of this.dictionary.getAllWords()) {
292
+ const n = this.normalizeFn(word);
293
+ this.normalizedWordSet.add(n);
294
+ this.normalizedWordToRoot.set(n, word);
295
+ }
296
+ }
297
+ getPatterns() {
298
+ const map = /* @__PURE__ */ new Map();
299
+ for (const p of this.ensureCompiled()) {
300
+ map.set(p.root, p.regex);
301
+ }
302
+ return map;
303
+ }
304
+ detect(text, options) {
305
+ const mode = options?.mode ?? "balanced";
306
+ const results = [];
307
+ const whitelist = this.dictionary.getWhitelist();
308
+ if (mode === "strict") {
309
+ this.detectStrict(text, whitelist, results);
310
+ } else {
311
+ this.detectPattern(text, whitelist, results);
312
+ }
313
+ if (mode === "loose" || options?.enableFuzzy) {
314
+ const threshold = options?.fuzzyThreshold ?? 0.8;
315
+ const algorithm = options?.fuzzyAlgorithm ?? "levenshtein";
316
+ this.detectFuzzy(text, whitelist, results, threshold, algorithm);
317
+ }
318
+ return this.deduplicateResults(results);
319
+ }
320
+ detectStrict(text, whitelist, results) {
321
+ const normalized = this.normalizeFn(text);
322
+ const words = normalized.split(/\s+/);
323
+ const originalWords = text.split(/\s+/);
324
+ let charIndex = 0;
325
+ for (let wi = 0; wi < originalWords.length; wi++) {
326
+ const origWord = originalWords[wi];
327
+ const normWord = wi < words.length ? words[wi] : "";
328
+ if (normWord.length === 0) {
329
+ charIndex += origWord.length + 1;
330
+ continue;
331
+ }
332
+ if (whitelist.has(normWord)) {
333
+ charIndex += origWord.length + 1;
334
+ continue;
335
+ }
336
+ if (this.normalizedWordSet.has(normWord)) {
337
+ const dictWord = this.normalizedWordToRoot.get(normWord);
338
+ const entry = this.dictionary.findRootForWord(dictWord);
339
+ if (entry) {
340
+ results.push({
341
+ word: origWord,
342
+ root: entry.root,
343
+ index: charIndex,
344
+ severity: entry.severity,
345
+ method: "exact"
346
+ });
347
+ }
348
+ }
349
+ charIndex += origWord.length + 1;
350
+ }
351
+ }
352
+ detectPattern(text, whitelist, results) {
353
+ this.runPatterns(text, text, whitelist, results, false);
354
+ const normalizedText = this.normalizeFn(text);
355
+ const lowerText = text.toLocaleLowerCase(this.locale);
356
+ if (normalizedText !== lowerText && normalizedText.length > 0) {
357
+ this.runPatterns(normalizedText, text, whitelist, results, true);
358
+ }
359
+ }
360
+ runPatterns(searchText, originalText, whitelist, results, isNormalized) {
361
+ const existingIndices = new Set(results.map((r) => r.index));
362
+ const patterns = this.ensureCompiled();
363
+ for (const pattern of patterns) {
364
+ const patternStart = Date.now();
365
+ pattern.regex.lastIndex = 0;
366
+ let match;
367
+ while ((match = pattern.regex.exec(searchText)) !== null) {
368
+ if (Date.now() - patternStart > REGEX_TIMEOUT_MS) break;
369
+ const matchedText = match[0];
370
+ const matchIndex = match.index;
371
+ const normalizedMatch = this.normalizeFn(matchedText);
372
+ if (whitelist.has(normalizedMatch)) continue;
373
+ const surrounding = this.getSurroundingWord(searchText, matchIndex, matchedText.length);
374
+ const normalizedSurrounding = this.normalizeFn(surrounding);
375
+ if (whitelist.has(normalizedSurrounding)) continue;
376
+ if (isNormalized) {
377
+ const mapped = this.mapNormalizedToOriginal(originalText, matchIndex, matchedText);
378
+ if (mapped && !existingIndices.has(mapped.index)) {
379
+ results.push({
380
+ word: mapped.word,
381
+ root: pattern.root,
382
+ index: mapped.index,
383
+ severity: pattern.severity,
384
+ method: "pattern"
385
+ });
386
+ existingIndices.add(mapped.index);
387
+ }
388
+ } else {
389
+ if (!existingIndices.has(matchIndex)) {
390
+ results.push({
391
+ word: matchedText,
392
+ root: pattern.root,
393
+ index: matchIndex,
394
+ severity: pattern.severity,
395
+ method: "pattern"
396
+ });
397
+ existingIndices.add(matchIndex);
398
+ }
399
+ }
400
+ if (matchedText.length === 0) {
401
+ pattern.regex.lastIndex++;
402
+ }
403
+ }
404
+ }
405
+ }
406
+ mapNormalizedToOriginal(originalText, normIndex, _normMatch) {
407
+ const origWords = originalText.split(/(\s+)/);
408
+ let normOffset = 0;
409
+ let origOffset = 0;
410
+ for (const segment of origWords) {
411
+ if (/^\s+$/.test(segment)) {
412
+ normOffset += 1;
413
+ origOffset += segment.length;
414
+ continue;
415
+ }
416
+ const normWord = this.normalizeFn(segment);
417
+ const normEnd = normOffset + normWord.length;
418
+ if (normIndex >= normOffset && normIndex < normEnd) {
419
+ return { word: segment, index: origOffset };
420
+ }
421
+ normOffset = normEnd;
422
+ origOffset += segment.length;
423
+ }
424
+ return null;
425
+ }
426
+ detectFuzzy(text, whitelist, existingResults, threshold, algorithm) {
427
+ const normalized = this.normalizeFn(text);
428
+ const normWords = normalized.split(/\s+/);
429
+ const origWords = text.split(/\s+/);
430
+ const matcher = getFuzzyMatcher(algorithm);
431
+ const existingIndices = new Set(existingResults.map((r) => r.index));
432
+ const startTime = Date.now();
433
+ let charIndex = 0;
434
+ for (let wi = 0; wi < origWords.length; wi++) {
435
+ if (Date.now() - startTime > REGEX_TIMEOUT_MS) break;
436
+ const origWord = origWords[wi];
437
+ const word = wi < normWords.length ? normWords[wi] : "";
438
+ if (word.length < 3 || whitelist.has(word)) {
439
+ charIndex += origWord.length + 1;
440
+ continue;
441
+ }
442
+ for (const normDict of this.normalizedWordSet) {
443
+ if (normDict.length < 3) continue;
444
+ const similarity = matcher(word, normDict);
445
+ if (similarity >= threshold) {
446
+ if (!existingIndices.has(charIndex)) {
447
+ const dictWord = this.normalizedWordToRoot.get(normDict);
448
+ const entry = this.dictionary.findRootForWord(dictWord);
449
+ if (entry) {
450
+ existingResults.push({
451
+ word: origWord,
452
+ root: entry.root,
453
+ index: charIndex,
454
+ severity: entry.severity,
455
+ method: "fuzzy"
456
+ });
457
+ existingIndices.add(charIndex);
458
+ }
459
+ }
460
+ break;
461
+ }
462
+ }
463
+ charIndex += origWord.length + 1;
464
+ }
465
+ }
466
+ getSurroundingWord(text, index, length) {
467
+ let start = index;
468
+ let end = index + length;
469
+ while (start > 0 && /\p{L}/u.test(text[start - 1])) start--;
470
+ while (end < text.length && /\p{L}/u.test(text[end])) end++;
471
+ return text.slice(start, end);
472
+ }
473
+ deduplicateResults(results) {
474
+ const seen = /* @__PURE__ */ new Map();
475
+ for (const result of results) {
476
+ const existing = seen.get(result.index);
477
+ if (!existing || result.word.length > existing.word.length) {
478
+ seen.set(result.index, result);
479
+ }
480
+ }
481
+ return [...seen.values()].sort((a, b) => a.index - b.index);
482
+ }
483
+ };
484
+
485
+ // src/cleaner.ts
486
+ function maskStars(word) {
487
+ return "*".repeat(word.length);
488
+ }
489
+ function maskPartial(word) {
490
+ if (word.length <= 2) return "*".repeat(word.length);
491
+ return word[0] + "*".repeat(word.length - 2) + word[word.length - 1];
492
+ }
493
+ function maskReplace(replaceMask) {
494
+ return replaceMask;
495
+ }
496
+ function applyMask(word, style, replaceMask) {
497
+ switch (style) {
498
+ case "stars":
499
+ return maskStars(word);
500
+ case "partial":
501
+ return maskPartial(word);
502
+ case "replace":
503
+ return maskReplace(replaceMask);
504
+ }
505
+ }
506
+ function cleanText(text, matches, style, replaceMask) {
507
+ if (matches.length === 0) return text;
508
+ const sorted = [...matches].sort((a, b) => b.index - a.index);
509
+ let result = text;
510
+ for (const match of sorted) {
511
+ const masked = applyMask(match.word, style, replaceMask);
512
+ result = result.slice(0, match.index) + masked + result.slice(match.index + match.word.length);
513
+ }
514
+ return result;
515
+ }
516
+
517
+ // src/utils.ts
518
+ var MAX_INPUT_LENGTH = 1e4;
519
+ function validateInput(text, maxLength) {
520
+ if (text == null) return "";
521
+ if (typeof text !== "string") return String(text);
522
+ if (text.length > maxLength) return text.slice(0, maxLength);
523
+ return text;
524
+ }
525
+
526
+ // src/lang/tr/dictionary.json
527
+ var dictionary_default = {
528
+ version: 1,
529
+ suffixes: [
530
+ "tir",
531
+ "dir",
532
+ "il",
533
+ "in",
534
+ "ik",
535
+ "uk",
536
+ "en",
537
+ "ici",
538
+ "di",
539
+ "ti",
540
+ "dim",
541
+ "tim",
542
+ "din",
543
+ "dik",
544
+ "tik",
545
+ "mis",
546
+ "mus",
547
+ "iyor",
548
+ "uyor",
549
+ "ecek",
550
+ "acak",
551
+ "ecem",
552
+ "acam",
553
+ "icem",
554
+ "er",
555
+ "ir",
556
+ "ar",
557
+ "eyim",
558
+ "ayim",
559
+ "elim",
560
+ "alim",
561
+ "se",
562
+ "sa",
563
+ "im",
564
+ "um",
565
+ "sin",
566
+ "sun",
567
+ "yim",
568
+ "mek",
569
+ "mak",
570
+ "me",
571
+ "ma",
572
+ "is",
573
+ "us",
574
+ "ler",
575
+ "lar",
576
+ "si",
577
+ "e",
578
+ "a",
579
+ "de",
580
+ "da",
581
+ "den",
582
+ "dan",
583
+ "te",
584
+ "ta",
585
+ "ten",
586
+ "tan",
587
+ "i",
588
+ "lik",
589
+ "luk",
590
+ "li",
591
+ "lu",
592
+ "ci",
593
+ "cu",
594
+ "ce",
595
+ "ca",
596
+ "le",
597
+ "la",
598
+ "ken",
599
+ "esi",
600
+ "un",
601
+ "lari",
602
+ "leri",
603
+ "larin",
604
+ "lerin",
605
+ "misin",
606
+ "misiniz",
607
+ "musun",
608
+ "musunuz",
609
+ "miyim",
610
+ "miyiz",
611
+ "cesine",
612
+ "casina"
613
+ ],
614
+ entries: [
615
+ {
616
+ root: "sik",
617
+ variants: [
618
+ "siktir",
619
+ "sikicem",
620
+ "siktim",
621
+ "sikeyim",
622
+ "sikerim",
623
+ "sikis",
624
+ "sikik",
625
+ "sikim",
626
+ "sikimle",
627
+ "sikimin",
628
+ "sikime",
629
+ "sike",
630
+ "siken",
631
+ "siker",
632
+ "sikti",
633
+ "siktiler",
634
+ "sikmis",
635
+ "sikmek",
636
+ "sikecek",
637
+ "sikiyor",
638
+ "sikme",
639
+ "sikici",
640
+ "siksin",
641
+ "siktirler",
642
+ "sikimsonik",
643
+ "siktirin",
644
+ "siktiler",
645
+ "sikerler",
646
+ "sikiler",
647
+ "sikti\u011Fimin",
648
+ "sikermisiniz",
649
+ "sikermisin",
650
+ "siktirmi\u015Fcesine"
651
+ ],
652
+ severity: "high",
653
+ category: "sexual",
654
+ suffixable: false
655
+ },
656
+ {
657
+ root: "amk",
658
+ variants: ["amk", "amina", "aminakoyim", "aminakoydugum", "amq"],
659
+ severity: "high",
660
+ category: "sexual",
661
+ suffixable: false
662
+ },
663
+ {
664
+ root: "orospu",
665
+ variants: ["orospucocugu", "orosbucocugu", "orspu", "oruspu", "orosbu"],
666
+ severity: "high",
667
+ category: "insult",
668
+ suffixable: true
669
+ },
670
+ {
671
+ root: "pi\xE7",
672
+ variants: ["pic", "piclik"],
673
+ severity: "high",
674
+ category: "insult",
675
+ suffixable: true
676
+ },
677
+ {
678
+ root: "yarrak",
679
+ variants: ["yarak", "yarrak", "yarakli", "dalyarak", "dalyarrak"],
680
+ severity: "high",
681
+ category: "sexual",
682
+ suffixable: true
683
+ },
684
+ {
685
+ root: "g\xF6t",
686
+ variants: [
687
+ "got",
688
+ "gotunu",
689
+ "gotlek",
690
+ "gotveren",
691
+ "gotverenler",
692
+ "gote",
693
+ "gotu",
694
+ "gotler",
695
+ "gotlu",
696
+ "gotunden",
697
+ "gotune"
698
+ ],
699
+ severity: "high",
700
+ category: "sexual",
701
+ suffixable: false
702
+ },
703
+ {
704
+ root: "am",
705
+ variants: ["amcik", "amcuk"],
706
+ severity: "high",
707
+ category: "sexual",
708
+ suffixable: false
709
+ },
710
+ {
711
+ root: "ta\u015Fak",
712
+ variants: ["tasak", "tassak", "tassakli"],
713
+ severity: "medium",
714
+ category: "sexual",
715
+ suffixable: true
716
+ },
717
+ {
718
+ root: "meme",
719
+ variants: [],
720
+ severity: "medium",
721
+ category: "sexual",
722
+ suffixable: false
723
+ },
724
+ {
725
+ root: "ibne",
726
+ variants: ["ibneler"],
727
+ severity: "high",
728
+ category: "slur",
729
+ suffixable: true
730
+ },
731
+ {
732
+ root: "gavat",
733
+ variants: ["gavatlik"],
734
+ severity: "high",
735
+ category: "insult",
736
+ suffixable: true
737
+ },
738
+ {
739
+ root: "pezevenk",
740
+ variants: ["pezo"],
741
+ severity: "high",
742
+ category: "insult",
743
+ suffixable: true
744
+ },
745
+ {
746
+ root: "bok",
747
+ variants: [
748
+ "boktan",
749
+ "boka",
750
+ "boku",
751
+ "boklu",
752
+ "boklar",
753
+ "boklari"
754
+ ],
755
+ severity: "medium",
756
+ category: "general",
757
+ suffixable: false
758
+ },
759
+ {
760
+ root: "haysiyetsiz",
761
+ variants: [],
762
+ severity: "medium",
763
+ category: "insult",
764
+ suffixable: false
765
+ },
766
+ {
767
+ root: "salak",
768
+ variants: ["salaklik"],
769
+ severity: "low",
770
+ category: "insult",
771
+ suffixable: true
772
+ },
773
+ {
774
+ root: "aptal",
775
+ variants: ["aptallik", "aptalca"],
776
+ severity: "low",
777
+ category: "insult",
778
+ suffixable: true
779
+ },
780
+ {
781
+ root: "gerizekal\u0131",
782
+ variants: ["gerizekali"],
783
+ severity: "low",
784
+ category: "insult",
785
+ suffixable: true
786
+ },
787
+ {
788
+ root: "mal",
789
+ variants: [],
790
+ severity: "low",
791
+ category: "insult",
792
+ suffixable: false
793
+ },
794
+ {
795
+ root: "dangalak",
796
+ variants: [],
797
+ severity: "low",
798
+ category: "insult",
799
+ suffixable: true
800
+ },
801
+ {
802
+ root: "ezik",
803
+ variants: [],
804
+ severity: "low",
805
+ category: "insult",
806
+ suffixable: true
807
+ },
808
+ {
809
+ root: "pu\u015Ft",
810
+ variants: ["pust", "pustt"],
811
+ severity: "high",
812
+ category: "slur",
813
+ suffixable: true
814
+ },
815
+ {
816
+ root: "\u015Ferefsiz",
817
+ variants: ["serefsiz", "serefsizler"],
818
+ severity: "medium",
819
+ category: "insult",
820
+ suffixable: true
821
+ },
822
+ {
823
+ root: "yav\u015Fak",
824
+ variants: ["yavsak"],
825
+ severity: "medium",
826
+ category: "insult",
827
+ suffixable: true
828
+ },
829
+ {
830
+ root: "d\xF6l",
831
+ variants: ["dol", "dolunu", "dolcu"],
832
+ severity: "high",
833
+ category: "sexual",
834
+ suffixable: false
835
+ },
836
+ {
837
+ root: "kahpe",
838
+ variants: ["kahpelik"],
839
+ severity: "high",
840
+ category: "insult",
841
+ suffixable: true
842
+ }
843
+ ],
844
+ whitelist: [
845
+ "amsterdam",
846
+ "amateur",
847
+ "amat\xF6r",
848
+ "sikke",
849
+ "sikkeler",
850
+ "masikler",
851
+ "sikilasma",
852
+ "ambalaj",
853
+ "ambassador",
854
+ "ambulans",
855
+ "amel",
856
+ "ameliyat",
857
+ "amerika",
858
+ "amele",
859
+ "amino",
860
+ "amonyak",
861
+ "amper",
862
+ "ampul",
863
+ "boks\xF6r",
864
+ "bokser",
865
+ "bokluk",
866
+ "malzeme",
867
+ "maliyet",
868
+ "malik",
869
+ "malikane",
870
+ "maliye",
871
+ "mallorca",
872
+ "malta",
873
+ "malt",
874
+ "gotan",
875
+ "gotik",
876
+ "gotham",
877
+ "memento",
878
+ "memleket",
879
+ "memur",
880
+ "memorial",
881
+ "piknik",
882
+ "pikachu",
883
+ "tasselled",
884
+ "siklet",
885
+ "kasim",
886
+ "kas\u0131m",
887
+ "yarasa",
888
+ "dolunay",
889
+ "dolum",
890
+ "doluluk",
891
+ "ama",
892
+ "ami",
893
+ "amen",
894
+ "amir",
895
+ "amil",
896
+ "dolmen"
897
+ ]
898
+ };
899
+
900
+ // src/dictionary/schema.ts
901
+ var VALID_SEVERITIES = ["high", "medium", "low"];
902
+ var VALID_CATEGORIES = ["sexual", "insult", "slur", "general"];
903
+ var MAX_SUFFIXES = 100;
904
+ var SUFFIX_PATTERN = /^[a-z]{1,10}$/;
905
+ function validateDictionary(data) {
906
+ if (data == null || typeof data !== "object") {
907
+ throw new Error("Dictionary data must be a non-null object");
908
+ }
909
+ const d = data;
910
+ if (typeof d.version !== "number" || d.version < 1) {
911
+ throw new Error("Dictionary version must be a positive number");
912
+ }
913
+ if (!Array.isArray(d.suffixes)) {
914
+ throw new Error("Dictionary suffixes must be an array");
915
+ }
916
+ if (d.suffixes.length > MAX_SUFFIXES) {
917
+ throw new Error(`Dictionary suffixes exceed maximum of ${MAX_SUFFIXES}`);
918
+ }
919
+ for (const suffix of d.suffixes) {
920
+ if (typeof suffix !== "string" || !SUFFIX_PATTERN.test(suffix)) {
921
+ throw new Error(
922
+ `Invalid suffix "${suffix}": must be 1-10 lowercase letters [a-z]`
923
+ );
924
+ }
925
+ }
926
+ if (!Array.isArray(d.entries)) {
927
+ throw new Error("Dictionary entries must be an array");
928
+ }
929
+ const seenRoots = /* @__PURE__ */ new Set();
930
+ for (let i = 0; i < d.entries.length; i++) {
931
+ const entry = d.entries[i];
932
+ const label = `entries[${i}]`;
933
+ if (entry == null || typeof entry !== "object") {
934
+ throw new Error(`${label}: must be an object`);
935
+ }
936
+ if (typeof entry.root !== "string" || entry.root.length === 0) {
937
+ throw new Error(`${label}: root must be a non-empty string`);
938
+ }
939
+ const rootLower = entry.root.toLowerCase();
940
+ if (seenRoots.has(rootLower)) {
941
+ throw new Error(`${label}: duplicate root "${entry.root}"`);
942
+ }
943
+ seenRoots.add(rootLower);
944
+ if (!Array.isArray(entry.variants)) {
945
+ throw new Error(`${label} (root="${entry.root}"): variants must be an array`);
946
+ }
947
+ if (typeof entry.severity !== "string" || !VALID_SEVERITIES.includes(entry.severity)) {
948
+ throw new Error(
949
+ `${label} (root="${entry.root}"): severity must be one of ${VALID_SEVERITIES.join(", ")}`
950
+ );
951
+ }
952
+ if (typeof entry.category !== "string" || !VALID_CATEGORIES.includes(entry.category)) {
953
+ throw new Error(
954
+ `${label} (root="${entry.root}"): category must be one of ${VALID_CATEGORIES.join(", ")}`
955
+ );
956
+ }
957
+ if (typeof entry.suffixable !== "boolean") {
958
+ throw new Error(`${label} (root="${entry.root}"): suffixable must be a boolean`);
959
+ }
960
+ }
961
+ if (!Array.isArray(d.whitelist)) {
962
+ throw new Error("Dictionary whitelist must be an array");
963
+ }
964
+ const seenWhitelist = /* @__PURE__ */ new Set();
965
+ for (let i = 0; i < d.whitelist.length; i++) {
966
+ if (typeof d.whitelist[i] !== "string") {
967
+ throw new Error(`whitelist[${i}]: must be a string`);
968
+ }
969
+ if (d.whitelist[i].length === 0) {
970
+ throw new Error(`whitelist[${i}]: must not be empty`);
971
+ }
972
+ const wlLower = d.whitelist[i].toLowerCase();
973
+ if (seenWhitelist.has(wlLower)) {
974
+ throw new Error(`whitelist[${i}]: duplicate entry "${d.whitelist[i]}"`);
975
+ }
976
+ seenWhitelist.add(wlLower);
977
+ }
978
+ return data;
979
+ }
980
+
981
+ // src/lang/tr/config.ts
982
+ var validatedData = validateDictionary(dictionary_default);
983
+ var config = {
984
+ locale: "tr",
985
+ charMap: {
986
+ \u00E7: "c",
987
+ \u00C7: "c",
988
+ \u011F: "g",
989
+ \u011E: "g",
990
+ \u0131: "i",
991
+ \u0130: "i",
992
+ \u00F6: "o",
993
+ \u00D6: "o",
994
+ \u015F: "s",
995
+ \u015E: "s",
996
+ \u00FC: "u",
997
+ \u00DC: "u"
998
+ },
999
+ leetMap: {
1000
+ "0": "o",
1001
+ "1": "i",
1002
+ "2": "i",
1003
+ "3": "e",
1004
+ "4": "a",
1005
+ "5": "s",
1006
+ "6": "g",
1007
+ "7": "t",
1008
+ "8": "b",
1009
+ "9": "g",
1010
+ "@": "a",
1011
+ $: "s",
1012
+ "!": "i"
1013
+ },
1014
+ charClasses: {
1015
+ a: "[a4\xE0\xE1\xE2\xE3\xE4\xE5]",
1016
+ b: "[b8\xDF]",
1017
+ c: "[c\xE7\xC7]",
1018
+ d: "[d]",
1019
+ e: "[e3\xE8\xE9\xEA\xEB]",
1020
+ f: "[f]",
1021
+ g: "[g\u011F\u011E69]",
1022
+ h: "[h]",
1023
+ i: "[i\u0131\u013012\xEC\xED\xEE\xEF]",
1024
+ j: "[j]",
1025
+ k: "[k]",
1026
+ l: "[l1]",
1027
+ m: "[m]",
1028
+ n: "[n\xF1]",
1029
+ o: "[o0\xF6\xD6\xF2\xF3\xF4\xF5]",
1030
+ p: "[p]",
1031
+ q: "[qk]",
1032
+ r: "[r]",
1033
+ s: "[s5\u015F\u015E\xDF]",
1034
+ t: "[t7]",
1035
+ u: "[u\xFC\xDC\xF9\xFA\xFBv]",
1036
+ v: "[vu]",
1037
+ w: "[w]",
1038
+ x: "[x]",
1039
+ y: "[y]",
1040
+ z: "[z2]"
1041
+ },
1042
+ numberExpansions: [
1043
+ ["100", "yuz"],
1044
+ ["50", "elli"],
1045
+ ["10", "on"],
1046
+ ["2", "iki"]
1047
+ ],
1048
+ dictionary: validatedData
1049
+ };
1050
+
1051
+ // src/lang/en/dictionary.json
1052
+ var dictionary_default2 = {
1053
+ version: 1,
1054
+ suffixes: ["ing", "ed", "er", "ers", "s", "es", "ly", "ness"],
1055
+ entries: [
1056
+ {
1057
+ root: "fuck",
1058
+ variants: ["fucking", "fucker", "fucked", "fuckers", "fucks", "fck", "fuk", "fuking", "fcking", "stfu", "motherfucker", "motherfucking", "fuckface", "fuckwit", "clusterfuck", "mindfuck"],
1059
+ severity: "high",
1060
+ category: "sexual",
1061
+ suffixable: true
1062
+ },
1063
+ {
1064
+ root: "shit",
1065
+ variants: ["shitty", "bullshit", "shitting", "sht", "shits", "shite", "shithead", "shitstorm", "dipshit", "horseshit", "batshit", "apeshit", "shithole", "shitface", "shitshow"],
1066
+ severity: "high",
1067
+ category: "general",
1068
+ suffixable: true
1069
+ },
1070
+ {
1071
+ root: "ass",
1072
+ variants: ["asses", "arse", "arses", "asshat", "asswipe", "smartass", "dumbass", "fatass", "badass", "jackass", "lardass", "kickass"],
1073
+ severity: "medium",
1074
+ category: "insult",
1075
+ suffixable: false
1076
+ },
1077
+ {
1078
+ root: "asshole",
1079
+ variants: ["assholes", "arsehole", "arseholes"],
1080
+ severity: "high",
1081
+ category: "insult",
1082
+ suffixable: false
1083
+ },
1084
+ {
1085
+ root: "bitch",
1086
+ variants: ["bitches", "bitchy", "biatch", "bitching", "bitchass", "sonofabitch"],
1087
+ severity: "high",
1088
+ category: "insult",
1089
+ suffixable: true
1090
+ },
1091
+ {
1092
+ root: "bastard",
1093
+ variants: ["bastards", "bastardy"],
1094
+ severity: "medium",
1095
+ category: "insult",
1096
+ suffixable: true
1097
+ },
1098
+ {
1099
+ root: "dick",
1100
+ variants: ["dickhead", "dickheads", "dicks", "dickwad", "dickweed"],
1101
+ severity: "medium",
1102
+ category: "sexual",
1103
+ suffixable: false
1104
+ },
1105
+ {
1106
+ root: "cock",
1107
+ variants: ["cocks", "cocksucker", "cocksucking", "cockhead"],
1108
+ severity: "high",
1109
+ category: "sexual",
1110
+ suffixable: false
1111
+ },
1112
+ {
1113
+ root: "cunt",
1114
+ variants: ["cunts", "cunty"],
1115
+ severity: "high",
1116
+ category: "sexual",
1117
+ suffixable: true
1118
+ },
1119
+ {
1120
+ root: "whore",
1121
+ variants: ["whores", "whorish", "whorebag"],
1122
+ severity: "high",
1123
+ category: "insult",
1124
+ suffixable: true
1125
+ },
1126
+ {
1127
+ root: "slut",
1128
+ variants: ["sluts", "slutty", "slutbag"],
1129
+ severity: "high",
1130
+ category: "insult",
1131
+ suffixable: true
1132
+ },
1133
+ {
1134
+ root: "piss",
1135
+ variants: ["pissed", "pissing", "pisser", "pissoff", "pisshead"],
1136
+ severity: "medium",
1137
+ category: "general",
1138
+ suffixable: true
1139
+ },
1140
+ {
1141
+ root: "wank",
1142
+ variants: ["wanker", "wankers", "wanking"],
1143
+ severity: "medium",
1144
+ category: "sexual",
1145
+ suffixable: true
1146
+ },
1147
+ {
1148
+ root: "twat",
1149
+ variants: ["twats"],
1150
+ severity: "high",
1151
+ category: "sexual",
1152
+ suffixable: true
1153
+ },
1154
+ {
1155
+ root: "bollocks",
1156
+ variants: ["bollock", "bollocked"],
1157
+ severity: "medium",
1158
+ category: "general",
1159
+ suffixable: false
1160
+ },
1161
+ {
1162
+ root: "crap",
1163
+ variants: ["crappy", "craps"],
1164
+ severity: "low",
1165
+ category: "general",
1166
+ suffixable: true
1167
+ },
1168
+ {
1169
+ root: "damn",
1170
+ variants: ["damned", "damnit", "dammit", "goddamn", "goddamnit"],
1171
+ severity: "low",
1172
+ category: "general",
1173
+ suffixable: false
1174
+ },
1175
+ {
1176
+ root: "retard",
1177
+ variants: ["retards", "retarded", "retardation"],
1178
+ severity: "high",
1179
+ category: "slur",
1180
+ suffixable: false
1181
+ },
1182
+ {
1183
+ root: "nigger",
1184
+ variants: ["niggers", "nigga", "niggas", "nigg3r"],
1185
+ severity: "high",
1186
+ category: "slur",
1187
+ suffixable: false
1188
+ },
1189
+ {
1190
+ root: "faggot",
1191
+ variants: ["faggots", "fag", "fags", "faggy"],
1192
+ severity: "high",
1193
+ category: "slur",
1194
+ suffixable: false
1195
+ },
1196
+ {
1197
+ root: "douche",
1198
+ variants: ["douchebag", "douchebags", "douchy", "douchey"],
1199
+ severity: "medium",
1200
+ category: "insult",
1201
+ suffixable: true
1202
+ },
1203
+ {
1204
+ root: "tosser",
1205
+ variants: ["tossers"],
1206
+ severity: "medium",
1207
+ category: "insult",
1208
+ suffixable: false
1209
+ },
1210
+ {
1211
+ root: "wanker",
1212
+ variants: ["wankers"],
1213
+ severity: "medium",
1214
+ category: "insult",
1215
+ suffixable: false
1216
+ }
1217
+ ],
1218
+ whitelist: [
1219
+ "assembly",
1220
+ "assist",
1221
+ "assassin",
1222
+ "bass",
1223
+ "class",
1224
+ "classic",
1225
+ "classify",
1226
+ "grass",
1227
+ "mass",
1228
+ "massive",
1229
+ "pass",
1230
+ "passage",
1231
+ "passenger",
1232
+ "passion",
1233
+ "passive",
1234
+ "passport",
1235
+ "assume",
1236
+ "assignment",
1237
+ "associate",
1238
+ "assertion",
1239
+ "asset",
1240
+ "assess",
1241
+ "dickens",
1242
+ "cocktail",
1243
+ "cockatoo",
1244
+ "peacock",
1245
+ "hancock",
1246
+ "scrap",
1247
+ "scrappy",
1248
+ "shitake",
1249
+ "document",
1250
+ "buckle",
1251
+ "piston",
1252
+ "bassist",
1253
+ "embassy",
1254
+ "cassette",
1255
+ "hassle",
1256
+ "lasso",
1257
+ "massage",
1258
+ "compass",
1259
+ "trespass",
1260
+ "harass"
1261
+ ]
1262
+ };
1263
+
1264
+ // src/lang/en/config.ts
1265
+ var validatedData2 = validateDictionary(dictionary_default2);
1266
+ var config2 = {
1267
+ locale: "en",
1268
+ charMap: {},
1269
+ leetMap: {
1270
+ "0": "o",
1271
+ "1": "i",
1272
+ "3": "e",
1273
+ "4": "a",
1274
+ "5": "s",
1275
+ "7": "t",
1276
+ "@": "a",
1277
+ $: "s",
1278
+ "!": "i"
1279
+ },
1280
+ charClasses: {
1281
+ a: "[a4]",
1282
+ b: "[b8]",
1283
+ c: "[c]",
1284
+ d: "[d]",
1285
+ e: "[e3]",
1286
+ f: "[f]",
1287
+ g: "[g9]",
1288
+ h: "[h]",
1289
+ i: "[i1]",
1290
+ j: "[j]",
1291
+ k: "[k]",
1292
+ l: "[l1]",
1293
+ m: "[m]",
1294
+ n: "[n]",
1295
+ o: "[o0]",
1296
+ p: "[p]",
1297
+ q: "[q]",
1298
+ r: "[r]",
1299
+ s: "[s5]",
1300
+ t: "[t7]",
1301
+ u: "[uv]",
1302
+ v: "[vu]",
1303
+ w: "[w]",
1304
+ x: "[x]",
1305
+ y: "[y]",
1306
+ z: "[z]"
1307
+ },
1308
+ dictionary: validatedData2
1309
+ };
1310
+
1311
+ // src/lang/es/dictionary.json
1312
+ var dictionary_default3 = {
1313
+ version: 1,
1314
+ suffixes: ["ado", "ando", "ido", "iendo", "ar", "er", "ir", "os", "as", "es", "ito", "ita", "azo"],
1315
+ entries: [
1316
+ {
1317
+ root: "mierda",
1318
+ variants: ["mierdas", "mierdo", "mierdero", "mierdoso"],
1319
+ severity: "high",
1320
+ category: "general",
1321
+ suffixable: true
1322
+ },
1323
+ {
1324
+ root: "puta",
1325
+ variants: ["putas", "putada", "puto", "putos", "hijoputa", "hijaputa", "putero", "puton", "putear"],
1326
+ severity: "high",
1327
+ category: "insult",
1328
+ suffixable: true
1329
+ },
1330
+ {
1331
+ root: "cabron",
1332
+ variants: ["cabrones", "cabrona", "cabronazo", "cabronada"],
1333
+ severity: "high",
1334
+ category: "insult",
1335
+ suffixable: true
1336
+ },
1337
+ {
1338
+ root: "joder",
1339
+ variants: ["jodido", "jodida", "jodidos", "jodidas", "joderse", "jodiendo"],
1340
+ severity: "high",
1341
+ category: "general",
1342
+ suffixable: true
1343
+ },
1344
+ {
1345
+ root: "co\xF1o",
1346
+ variants: ["cono", "conos", "co\xF1os"],
1347
+ severity: "high",
1348
+ category: "sexual",
1349
+ suffixable: false
1350
+ },
1351
+ {
1352
+ root: "verga",
1353
+ variants: ["vergas", "vergon", "vergudo", "vergota"],
1354
+ severity: "high",
1355
+ category: "sexual",
1356
+ suffixable: true
1357
+ },
1358
+ {
1359
+ root: "chingar",
1360
+ variants: ["chingado", "chingada", "chingados", "chinga", "chingas", "chingo", "chingon", "chingona", "chingadera"],
1361
+ severity: "high",
1362
+ category: "general",
1363
+ suffixable: true
1364
+ },
1365
+ {
1366
+ root: "pendejo",
1367
+ variants: ["pendejos", "pendeja", "pendejas", "pendejada", "pendejear"],
1368
+ severity: "high",
1369
+ category: "insult",
1370
+ suffixable: true
1371
+ },
1372
+ {
1373
+ root: "marica",
1374
+ variants: ["maricas", "maricon", "maricones", "maricona"],
1375
+ severity: "high",
1376
+ category: "slur",
1377
+ suffixable: false
1378
+ },
1379
+ {
1380
+ root: "carajo",
1381
+ variants: ["carajos"],
1382
+ severity: "medium",
1383
+ category: "general",
1384
+ suffixable: false
1385
+ },
1386
+ {
1387
+ root: "idiota",
1388
+ variants: ["idiotas", "idiotez"],
1389
+ severity: "low",
1390
+ category: "insult",
1391
+ suffixable: false
1392
+ },
1393
+ {
1394
+ root: "culo",
1395
+ variants: ["culos", "culazo", "culear"],
1396
+ severity: "medium",
1397
+ category: "sexual",
1398
+ suffixable: true
1399
+ },
1400
+ {
1401
+ root: "zorra",
1402
+ variants: ["zorras", "zorron"],
1403
+ severity: "high",
1404
+ category: "insult",
1405
+ suffixable: true
1406
+ },
1407
+ {
1408
+ root: "estupido",
1409
+ variants: ["estupidos", "estupida", "estupidas", "estupidez"],
1410
+ severity: "low",
1411
+ category: "insult",
1412
+ suffixable: false
1413
+ },
1414
+ {
1415
+ root: "imbecil",
1416
+ variants: ["imbeciles"],
1417
+ severity: "low",
1418
+ category: "insult",
1419
+ suffixable: false
1420
+ },
1421
+ {
1422
+ root: "gilipollas",
1423
+ variants: ["gilipolleces", "gilipollez"],
1424
+ severity: "high",
1425
+ category: "insult",
1426
+ suffixable: false
1427
+ },
1428
+ {
1429
+ root: "huevon",
1430
+ variants: ["huevones", "huevona", "huevonazo", "guevon"],
1431
+ severity: "medium",
1432
+ category: "insult",
1433
+ suffixable: false
1434
+ },
1435
+ {
1436
+ root: "mamada",
1437
+ variants: ["mamadas", "mamazo", "mamon", "mamona", "mamones"],
1438
+ severity: "medium",
1439
+ category: "sexual",
1440
+ suffixable: false
1441
+ },
1442
+ {
1443
+ root: "pinche",
1444
+ variants: ["pinches"],
1445
+ severity: "medium",
1446
+ category: "general",
1447
+ suffixable: false
1448
+ }
1449
+ ],
1450
+ whitelist: [
1451
+ "putamen",
1452
+ "computadora",
1453
+ "computar",
1454
+ "disputar",
1455
+ "disputa",
1456
+ "reputacion",
1457
+ "imputar",
1458
+ "inocular",
1459
+ "acular",
1460
+ "calcular",
1461
+ "icular",
1462
+ "vehicular",
1463
+ "particular",
1464
+ "articulo",
1465
+ "maricopa"
1466
+ ]
1467
+ };
1468
+
1469
+ // src/lang/es/config.ts
1470
+ var validatedData3 = validateDictionary(dictionary_default3);
1471
+ var config3 = {
1472
+ locale: "es",
1473
+ charMap: {
1474
+ \u00F1: "n",
1475
+ \u00D1: "n",
1476
+ \u00E1: "a",
1477
+ \u00C1: "a",
1478
+ \u00E9: "e",
1479
+ \u00C9: "e",
1480
+ \u00ED: "i",
1481
+ \u00CD: "i",
1482
+ \u00F3: "o",
1483
+ \u00D3: "o",
1484
+ \u00FA: "u",
1485
+ \u00DA: "u"
1486
+ },
1487
+ leetMap: {
1488
+ "0": "o",
1489
+ "1": "i",
1490
+ "3": "e",
1491
+ "4": "a",
1492
+ "5": "s",
1493
+ "7": "t",
1494
+ "@": "a",
1495
+ $: "s",
1496
+ "!": "i"
1497
+ },
1498
+ charClasses: {
1499
+ a: "[a4\xE1\xC1]",
1500
+ b: "[b8]",
1501
+ c: "[c]",
1502
+ d: "[d]",
1503
+ e: "[e3\xE9\xC9]",
1504
+ f: "[f]",
1505
+ g: "[g9]",
1506
+ h: "[h]",
1507
+ i: "[i1\xED\xCD]",
1508
+ j: "[j]",
1509
+ k: "[k]",
1510
+ l: "[l1]",
1511
+ m: "[m]",
1512
+ n: "[n\xF1\xD1]",
1513
+ o: "[o0\xF3\xD3]",
1514
+ p: "[p]",
1515
+ q: "[q]",
1516
+ r: "[r]",
1517
+ s: "[s5]",
1518
+ t: "[t7]",
1519
+ u: "[uv\xFA\xDA]",
1520
+ v: "[vu]",
1521
+ w: "[w]",
1522
+ x: "[x]",
1523
+ y: "[y]",
1524
+ z: "[z]"
1525
+ },
1526
+ dictionary: validatedData3
1527
+ };
1528
+
1529
+ // src/lang/de/dictionary.json
1530
+ var dictionary_default4 = {
1531
+ version: 1,
1532
+ suffixes: ["en", "er", "es", "em", "ung", "e", "te", "st"],
1533
+ entries: [
1534
+ {
1535
+ root: "schei\xDFe",
1536
+ variants: ["scheisse", "scheiss", "scheisser", "scheisserei", "beschissen"],
1537
+ severity: "high",
1538
+ category: "general",
1539
+ suffixable: true
1540
+ },
1541
+ {
1542
+ root: "fick",
1543
+ variants: ["ficken", "ficker", "gefickt", "fickend", "fickerei", "abgefickt"],
1544
+ severity: "high",
1545
+ category: "sexual",
1546
+ suffixable: true
1547
+ },
1548
+ {
1549
+ root: "arsch",
1550
+ variants: ["arschloch", "arscher", "arschgeige", "arschgesicht", "arschkriecher"],
1551
+ severity: "high",
1552
+ category: "insult",
1553
+ suffixable: true
1554
+ },
1555
+ {
1556
+ root: "hurensohn",
1557
+ variants: ["hurensohne", "hurens\xF6hne"],
1558
+ severity: "high",
1559
+ category: "insult",
1560
+ suffixable: false
1561
+ },
1562
+ {
1563
+ root: "hure",
1564
+ variants: ["huren", "hurig"],
1565
+ severity: "high",
1566
+ category: "insult",
1567
+ suffixable: true
1568
+ },
1569
+ {
1570
+ root: "fotze",
1571
+ variants: ["fotzen"],
1572
+ severity: "high",
1573
+ category: "sexual",
1574
+ suffixable: true
1575
+ },
1576
+ {
1577
+ root: "wichser",
1578
+ variants: ["wichsern", "wichse", "wichsen", "gewichst"],
1579
+ severity: "high",
1580
+ category: "sexual",
1581
+ suffixable: false
1582
+ },
1583
+ {
1584
+ root: "schwanz",
1585
+ variants: ["schw\xE4nze", "schwanze"],
1586
+ severity: "medium",
1587
+ category: "sexual",
1588
+ suffixable: false
1589
+ },
1590
+ {
1591
+ root: "schlampe",
1592
+ variants: ["schlampen", "schlampig"],
1593
+ severity: "high",
1594
+ category: "insult",
1595
+ suffixable: true
1596
+ },
1597
+ {
1598
+ root: "mistkerl",
1599
+ variants: ["mistkerle"],
1600
+ severity: "medium",
1601
+ category: "insult",
1602
+ suffixable: false
1603
+ },
1604
+ {
1605
+ root: "idiot",
1606
+ variants: ["idioten", "idiotin", "idiotisch"],
1607
+ severity: "low",
1608
+ category: "insult",
1609
+ suffixable: true
1610
+ },
1611
+ {
1612
+ root: "dumm",
1613
+ variants: ["dummkopf", "dumme", "dummer", "dummes", "dummheit"],
1614
+ severity: "low",
1615
+ category: "insult",
1616
+ suffixable: true
1617
+ },
1618
+ {
1619
+ root: "depp",
1620
+ variants: ["deppen", "deppert"],
1621
+ severity: "low",
1622
+ category: "insult",
1623
+ suffixable: true
1624
+ },
1625
+ {
1626
+ root: "vollidiot",
1627
+ variants: ["vollidioten"],
1628
+ severity: "medium",
1629
+ category: "insult",
1630
+ suffixable: false
1631
+ },
1632
+ {
1633
+ root: "missgeburt",
1634
+ variants: ["missgeburten"],
1635
+ severity: "high",
1636
+ category: "insult",
1637
+ suffixable: false
1638
+ },
1639
+ {
1640
+ root: "drecksau",
1641
+ variants: ["drecks\xE4ue"],
1642
+ severity: "high",
1643
+ category: "insult",
1644
+ suffixable: false
1645
+ },
1646
+ {
1647
+ root: "dreck",
1648
+ variants: ["dreckig", "dreckiger", "dreckiges"],
1649
+ severity: "medium",
1650
+ category: "general",
1651
+ suffixable: true
1652
+ },
1653
+ {
1654
+ root: "trottel",
1655
+ variants: ["trotteln", "trottelig"],
1656
+ severity: "low",
1657
+ category: "insult",
1658
+ suffixable: false
1659
+ }
1660
+ ],
1661
+ whitelist: [
1662
+ "ficktion",
1663
+ "arschen",
1664
+ "schwanzen"
1665
+ ]
1666
+ };
1667
+
1668
+ // src/lang/de/config.ts
1669
+ var validatedData4 = validateDictionary(dictionary_default4);
1670
+ var config4 = {
1671
+ locale: "de",
1672
+ charMap: {
1673
+ \u00E4: "a",
1674
+ \u00C4: "a",
1675
+ \u00F6: "o",
1676
+ \u00D6: "o",
1677
+ \u00FC: "u",
1678
+ \u00DC: "u",
1679
+ \u00DF: "ss"
1680
+ },
1681
+ leetMap: {
1682
+ "0": "o",
1683
+ "1": "i",
1684
+ "3": "e",
1685
+ "4": "a",
1686
+ "5": "s",
1687
+ "7": "t",
1688
+ "@": "a",
1689
+ $: "s",
1690
+ "!": "i"
1691
+ },
1692
+ charClasses: {
1693
+ a: "[a4\xE4\xC4]",
1694
+ b: "[b8]",
1695
+ c: "[c]",
1696
+ d: "[d]",
1697
+ e: "[e3]",
1698
+ f: "[f]",
1699
+ g: "[g9]",
1700
+ h: "[h]",
1701
+ i: "[i1]",
1702
+ j: "[j]",
1703
+ k: "[k]",
1704
+ l: "[l1]",
1705
+ m: "[m]",
1706
+ n: "[n]",
1707
+ o: "[o0\xF6\xD6]",
1708
+ p: "[p]",
1709
+ q: "[q]",
1710
+ r: "[r]",
1711
+ s: "[s5\xDF]",
1712
+ t: "[t7]",
1713
+ u: "[uv\xFC\xDC]",
1714
+ v: "[vu]",
1715
+ w: "[w]",
1716
+ x: "[x]",
1717
+ y: "[y]",
1718
+ z: "[z]"
1719
+ },
1720
+ dictionary: validatedData4
1721
+ };
1722
+
1723
+ // src/lang/index.ts
1724
+ var CORE_DICT_VERSION = 1;
1725
+ var REGISTRY = {
1726
+ tr: config,
1727
+ en: config2,
1728
+ es: config3,
1729
+ de: config4
1730
+ };
1731
+ function getLanguageConfig(lang) {
1732
+ const config5 = REGISTRY[lang];
1733
+ if (!config5) {
1734
+ const available = getSupportedLanguages().join(", ");
1735
+ throw new Error(
1736
+ `Unsupported language: "${lang}". Available languages: ${available}`
1737
+ );
1738
+ }
1739
+ if (config5.dictionary.version < CORE_DICT_VERSION) {
1740
+ throw new Error(
1741
+ `Dictionary version ${config5.dictionary.version} for language "${lang}" is below minimum required version ${CORE_DICT_VERSION}. Please update the language pack.`
1742
+ );
1743
+ }
1744
+ return config5;
1745
+ }
1746
+ function getSupportedLanguages() {
1747
+ return Object.keys(REGISTRY);
1748
+ }
1749
+
1750
+ // src/normalizer.ts
1751
+ function replaceFromMap(text, map) {
1752
+ let result = "";
1753
+ for (const ch of text) {
1754
+ result += map[ch] ?? ch;
1755
+ }
1756
+ return result;
1757
+ }
1758
+ function buildNumberExpander(expansions) {
1759
+ if (expansions.length === 0) return null;
1760
+ const regex = new RegExp(
1761
+ expansions.map(([num]) => {
1762
+ const escaped = num.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
1763
+ return `(?<=\\p{L})${escaped}(?=\\p{L})`;
1764
+ }).join("|"),
1765
+ "gu"
1766
+ );
1767
+ const lookup = Object.fromEntries(expansions);
1768
+ return (text) => text.replace(regex, (match) => lookup[match] ?? match);
1769
+ }
1770
+ function removePunctuation(text) {
1771
+ return text.replace(/(?<=\p{L})[.\-_*,;:!?]+(?=\p{L})/gu, "");
1772
+ }
1773
+ function collapseRepeats(text) {
1774
+ return text.replace(/(.)\1{2,}/g, "$1");
1775
+ }
1776
+ function trimWhitespace(text) {
1777
+ return text.replace(/\s+/g, " ").trim();
1778
+ }
1779
+ function createNormalizer(config5) {
1780
+ const expandNumbers = config5.numberExpansions ? buildNumberExpander(config5.numberExpansions) : null;
1781
+ return function normalize2(text) {
1782
+ let result = text;
1783
+ result = result.toLocaleLowerCase(config5.locale);
1784
+ result = replaceFromMap(result, config5.charMap);
1785
+ if (expandNumbers) {
1786
+ result = expandNumbers(result);
1787
+ }
1788
+ result = replaceFromMap(result, config5.leetMap);
1789
+ result = removePunctuation(result);
1790
+ result = collapseRepeats(result);
1791
+ result = trimWhitespace(result);
1792
+ return result;
1793
+ };
1794
+ }
1795
+ var TURKISH_CHAR_MAP = {
1796
+ \u00E7: "c",
1797
+ \u00C7: "c",
1798
+ \u011F: "g",
1799
+ \u011E: "g",
1800
+ \u0131: "i",
1801
+ \u0130: "i",
1802
+ \u00F6: "o",
1803
+ \u00D6: "o",
1804
+ \u015F: "s",
1805
+ \u015E: "s",
1806
+ \u00FC: "u",
1807
+ \u00DC: "u"
1808
+ };
1809
+ var LEET_MAP = {
1810
+ "0": "o",
1811
+ "1": "i",
1812
+ "2": "i",
1813
+ "3": "e",
1814
+ "4": "a",
1815
+ "5": "s",
1816
+ "6": "g",
1817
+ "7": "t",
1818
+ "8": "b",
1819
+ "9": "g",
1820
+ "@": "a",
1821
+ $: "s",
1822
+ "!": "i"
1823
+ };
1824
+ var TR_NUMBER_MAP = [
1825
+ ["100", "yuz"],
1826
+ ["50", "elli"],
1827
+ ["10", "on"],
1828
+ ["2", "iki"]
1829
+ ];
1830
+ var _turkishNormalize = createNormalizer({
1831
+ locale: "tr",
1832
+ charMap: TURKISH_CHAR_MAP,
1833
+ leetMap: LEET_MAP,
1834
+ numberExpansions: TR_NUMBER_MAP
1835
+ });
1836
+ function normalize(text) {
1837
+ return _turkishNormalize(text);
1838
+ }
1839
+
1840
+ // src/terlik.ts
1841
+ var Terlik = class _Terlik {
1842
+ dictionary;
1843
+ detector;
1844
+ mode;
1845
+ maskStyle;
1846
+ enableFuzzy;
1847
+ fuzzyThreshold;
1848
+ fuzzyAlgorithm;
1849
+ maxLength;
1850
+ replaceMask;
1851
+ /** The language code this instance was created with. */
1852
+ language;
1853
+ /**
1854
+ * Creates a new Terlik instance.
1855
+ * @param options - Configuration options.
1856
+ * @throws {Error} If the specified language is not supported.
1857
+ */
1858
+ constructor(options) {
1859
+ this.language = options?.language ?? "tr";
1860
+ this.mode = options?.mode ?? "balanced";
1861
+ this.maskStyle = options?.maskStyle ?? "stars";
1862
+ this.enableFuzzy = options?.enableFuzzy ?? false;
1863
+ this.fuzzyAlgorithm = options?.fuzzyAlgorithm ?? "levenshtein";
1864
+ this.replaceMask = options?.replaceMask ?? "[***]";
1865
+ const threshold = options?.fuzzyThreshold ?? 0.8;
1866
+ if (threshold < 0 || threshold > 1) {
1867
+ throw new Error(`fuzzyThreshold must be between 0 and 1, got ${threshold}`);
1868
+ }
1869
+ this.fuzzyThreshold = threshold;
1870
+ const maxLen = options?.maxLength ?? MAX_INPUT_LENGTH;
1871
+ if (maxLen < 1) {
1872
+ throw new Error(`maxLength must be at least 1, got ${maxLen}`);
1873
+ }
1874
+ this.maxLength = maxLen;
1875
+ const langConfig = getLanguageConfig(this.language);
1876
+ const normalizeFn = createNormalizer({
1877
+ locale: langConfig.locale,
1878
+ charMap: langConfig.charMap,
1879
+ leetMap: langConfig.leetMap,
1880
+ numberExpansions: langConfig.numberExpansions
1881
+ });
1882
+ this.dictionary = new Dictionary(
1883
+ langConfig.dictionary,
1884
+ options?.customList,
1885
+ options?.whitelist
1886
+ );
1887
+ this.detector = new Detector(
1888
+ this.dictionary,
1889
+ normalizeFn,
1890
+ langConfig.locale,
1891
+ langConfig.charClasses
1892
+ );
1893
+ if (options?.backgroundWarmup) {
1894
+ setTimeout(() => {
1895
+ this.detector.compile();
1896
+ this.containsProfanity("warmup");
1897
+ }, 0);
1898
+ }
1899
+ }
1900
+ /**
1901
+ * Creates and JIT-warms instances for multiple languages at once.
1902
+ * Useful for server deployments to eliminate cold-start latency.
1903
+ *
1904
+ * @param languages - Language codes to warm up (e.g. `["tr", "en"]`).
1905
+ * @param baseOptions - Shared options applied to all instances.
1906
+ * @returns A map of language code to warmed-up Terlik instance.
1907
+ *
1908
+ * @example
1909
+ * ```ts
1910
+ * const cache = Terlik.warmup(["tr", "en", "es"]);
1911
+ * cache.get("en")!.containsProfanity("fuck"); // true, no cold start
1912
+ * ```
1913
+ */
1914
+ static warmup(languages, baseOptions) {
1915
+ const map = /* @__PURE__ */ new Map();
1916
+ for (const lang of languages) {
1917
+ const instance = new _Terlik({ ...baseOptions, language: lang });
1918
+ instance.containsProfanity("warmup");
1919
+ map.set(lang, instance);
1920
+ }
1921
+ return map;
1922
+ }
1923
+ /**
1924
+ * Checks whether the text contains profanity.
1925
+ * @param text - The text to check.
1926
+ * @param options - Per-call detection options (overrides instance defaults).
1927
+ * @returns `true` if profanity is detected, `false` otherwise.
1928
+ */
1929
+ containsProfanity(text, options) {
1930
+ const input = validateInput(text, this.maxLength);
1931
+ if (input.length === 0) return false;
1932
+ const matches = this.detector.detect(input, this.mergeDetectOptions(options));
1933
+ return matches.length > 0;
1934
+ }
1935
+ /**
1936
+ * Returns all profanity matches with details (word, root, index, severity, method).
1937
+ * @param text - The text to analyze.
1938
+ * @param options - Per-call detection options (overrides instance defaults).
1939
+ * @returns Array of match results, sorted by index.
1940
+ */
1941
+ getMatches(text, options) {
1942
+ const input = validateInput(text, this.maxLength);
1943
+ if (input.length === 0) return [];
1944
+ return this.detector.detect(input, this.mergeDetectOptions(options));
1945
+ }
1946
+ /**
1947
+ * Returns the text with detected profanity masked.
1948
+ * @param text - The text to clean.
1949
+ * @param options - Per-call clean options (overrides instance defaults).
1950
+ * @returns The cleaned text with profanity replaced by mask characters.
1951
+ */
1952
+ clean(text, options) {
1953
+ const input = validateInput(text, this.maxLength);
1954
+ if (input.length === 0) return input;
1955
+ const matches = this.detector.detect(input, this.mergeDetectOptions(options));
1956
+ const style = options?.maskStyle ?? this.maskStyle;
1957
+ const replaceMask = options?.replaceMask ?? this.replaceMask;
1958
+ return cleanText(input, matches, style, replaceMask);
1959
+ }
1960
+ /**
1961
+ * Adds custom words to the detection dictionary at runtime.
1962
+ * Triggers pattern recompilation.
1963
+ * @param words - Words to add.
1964
+ */
1965
+ addWords(words) {
1966
+ this.dictionary.addWords(words);
1967
+ this.detector.recompile();
1968
+ }
1969
+ /**
1970
+ * Removes words from the detection dictionary at runtime.
1971
+ * Triggers pattern recompilation.
1972
+ * @param words - Words to remove.
1973
+ */
1974
+ removeWords(words) {
1975
+ this.dictionary.removeWords(words);
1976
+ this.detector.recompile();
1977
+ }
1978
+ /**
1979
+ * Returns the compiled regex patterns keyed by root word.
1980
+ * Useful for debugging or advanced usage.
1981
+ * @returns Map of root word to compiled RegExp.
1982
+ */
1983
+ getPatterns() {
1984
+ return this.detector.getPatterns();
1985
+ }
1986
+ mergeDetectOptions(options) {
1987
+ return {
1988
+ mode: options?.mode ?? this.mode,
1989
+ enableFuzzy: options?.enableFuzzy ?? this.enableFuzzy,
1990
+ fuzzyThreshold: options?.fuzzyThreshold ?? this.fuzzyThreshold,
1991
+ fuzzyAlgorithm: options?.fuzzyAlgorithm ?? this.fuzzyAlgorithm
1992
+ };
1993
+ }
1994
+ };
1995
+ export {
1996
+ Terlik,
1997
+ createNormalizer,
1998
+ diceSimilarity,
1999
+ getLanguageConfig,
2000
+ getSupportedLanguages,
2001
+ levenshteinDistance,
2002
+ levenshteinSimilarity,
2003
+ normalize
2004
+ };