terlik.js 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +519 -0
- package/dist/index.d.mts +278 -0
- package/dist/index.d.ts +278 -0
- package/dist/index.js +2038 -0
- package/dist/index.mjs +2004 -0
- package/package.json +81 -0
package/dist/index.mjs
ADDED
|
@@ -0,0 +1,2004 @@
|
|
|
1
|
+
// src/dictionary/index.ts
|
|
2
|
+
var Dictionary = class {
|
|
3
|
+
entries = /* @__PURE__ */ new Map();
|
|
4
|
+
whitelist;
|
|
5
|
+
allWords = [];
|
|
6
|
+
suffixes;
|
|
7
|
+
/**
|
|
8
|
+
* Creates a new Dictionary from validated dictionary data.
|
|
9
|
+
* @param data - Validated dictionary data (entries, suffixes, whitelist).
|
|
10
|
+
* @param customWords - Additional words to detect.
|
|
11
|
+
* @param customWhitelist - Additional words to exclude.
|
|
12
|
+
*/
|
|
13
|
+
constructor(data, customWords, customWhitelist) {
|
|
14
|
+
this.whitelist = new Set(data.whitelist.map((w) => w.toLowerCase()));
|
|
15
|
+
this.suffixes = data.suffixes;
|
|
16
|
+
if (customWhitelist) {
|
|
17
|
+
for (const w of customWhitelist) {
|
|
18
|
+
this.whitelist.add(w.toLowerCase());
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
for (const entry of data.entries) {
|
|
22
|
+
this.addEntry({
|
|
23
|
+
root: entry.root,
|
|
24
|
+
variants: entry.variants,
|
|
25
|
+
severity: entry.severity,
|
|
26
|
+
category: entry.category,
|
|
27
|
+
suffixable: entry.suffixable
|
|
28
|
+
});
|
|
29
|
+
}
|
|
30
|
+
if (customWords) {
|
|
31
|
+
for (const word of customWords) {
|
|
32
|
+
this.addEntry({
|
|
33
|
+
root: word.toLowerCase(),
|
|
34
|
+
variants: [],
|
|
35
|
+
severity: "medium"
|
|
36
|
+
});
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
addEntry(entry) {
|
|
41
|
+
const normalizedRoot = entry.root.toLowerCase();
|
|
42
|
+
this.entries.set(normalizedRoot, entry);
|
|
43
|
+
this.allWords.push(normalizedRoot);
|
|
44
|
+
for (const v of entry.variants) {
|
|
45
|
+
this.allWords.push(v.toLowerCase());
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
/** Returns all dictionary entries keyed by root word. */
|
|
49
|
+
getEntries() {
|
|
50
|
+
return this.entries;
|
|
51
|
+
}
|
|
52
|
+
/** Returns all words (roots + variants) as a flat array. */
|
|
53
|
+
getAllWords() {
|
|
54
|
+
return this.allWords;
|
|
55
|
+
}
|
|
56
|
+
/** Returns the whitelist as a Set of lowercase strings. */
|
|
57
|
+
getWhitelist() {
|
|
58
|
+
return this.whitelist;
|
|
59
|
+
}
|
|
60
|
+
/** Returns available grammatical suffixes for the language. */
|
|
61
|
+
getSuffixes() {
|
|
62
|
+
return this.suffixes;
|
|
63
|
+
}
|
|
64
|
+
/**
|
|
65
|
+
* Adds words to the dictionary at runtime.
|
|
66
|
+
* Empty strings and already-existing words are silently skipped.
|
|
67
|
+
* @param words - Words to add.
|
|
68
|
+
*/
|
|
69
|
+
addWords(words) {
|
|
70
|
+
for (const word of words) {
|
|
71
|
+
const lower = word.toLowerCase().trim();
|
|
72
|
+
if (lower.length === 0) continue;
|
|
73
|
+
if (!this.entries.has(lower)) {
|
|
74
|
+
this.addEntry({
|
|
75
|
+
root: lower,
|
|
76
|
+
variants: [],
|
|
77
|
+
severity: "medium"
|
|
78
|
+
});
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
/**
|
|
83
|
+
* Removes words from the dictionary at runtime.
|
|
84
|
+
* @param words - Words to remove.
|
|
85
|
+
*/
|
|
86
|
+
removeWords(words) {
|
|
87
|
+
for (const word of words) {
|
|
88
|
+
const key = word.toLowerCase();
|
|
89
|
+
const entry = this.entries.get(key);
|
|
90
|
+
if (entry) {
|
|
91
|
+
this.entries.delete(key);
|
|
92
|
+
this.allWords = this.allWords.filter(
|
|
93
|
+
(w) => w !== key && !entry.variants.map((v) => v.toLowerCase()).includes(w)
|
|
94
|
+
);
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
/**
|
|
99
|
+
* Finds the dictionary entry for a given word (checks root and variants).
|
|
100
|
+
* @param word - The word to look up.
|
|
101
|
+
* @returns The matching WordEntry, or undefined if not found.
|
|
102
|
+
*/
|
|
103
|
+
findRootForWord(word) {
|
|
104
|
+
const lower = word.toLowerCase();
|
|
105
|
+
const direct = this.entries.get(lower);
|
|
106
|
+
if (direct) return direct;
|
|
107
|
+
for (const [, entry] of this.entries) {
|
|
108
|
+
if (entry.variants.some((v) => v.toLowerCase() === lower)) {
|
|
109
|
+
return entry;
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
return void 0;
|
|
113
|
+
}
|
|
114
|
+
};
|
|
115
|
+
|
|
116
|
+
// src/patterns.ts
|
|
117
|
+
var SEPARATOR = "[^\\p{L}\\p{N}]{0,3}";
|
|
118
|
+
var MAX_PATTERN_LENGTH = 1e4;
|
|
119
|
+
var MAX_SUFFIX_CHAIN = 2;
|
|
120
|
+
var REGEX_TIMEOUT_MS = 250;
|
|
121
|
+
function charToPattern(ch, charClasses) {
|
|
122
|
+
const cls = charClasses[ch.toLowerCase()];
|
|
123
|
+
if (cls) return `${cls}+`;
|
|
124
|
+
return ch.replace(/[.*+?^${}()|[\]\\]/g, "\\$&") + "+";
|
|
125
|
+
}
|
|
126
|
+
function wordToPattern(word, charClasses, normalizeFn) {
|
|
127
|
+
const normalized = normalizeFn(word);
|
|
128
|
+
const chars = [...normalized];
|
|
129
|
+
const parts = chars.map((ch) => charToPattern(ch, charClasses));
|
|
130
|
+
return parts.join(SEPARATOR);
|
|
131
|
+
}
|
|
132
|
+
function buildSuffixGroup(suffixes, charClasses) {
|
|
133
|
+
if (suffixes.length === 0) return "";
|
|
134
|
+
const suffixPatterns = suffixes.map((suffix) => {
|
|
135
|
+
const chars = [...suffix];
|
|
136
|
+
const parts = chars.map((ch) => charToPattern(ch, charClasses));
|
|
137
|
+
return parts.join(SEPARATOR);
|
|
138
|
+
});
|
|
139
|
+
suffixPatterns.sort((a, b) => b.length - a.length);
|
|
140
|
+
return `(?:${SEPARATOR}(?:${suffixPatterns.join("|")}))`;
|
|
141
|
+
}
|
|
142
|
+
function compilePatterns(entries, suffixes, charClasses, normalizeFn) {
|
|
143
|
+
const patterns = [];
|
|
144
|
+
const suffixGroup = suffixes && suffixes.length > 0 ? buildSuffixGroup(suffixes, charClasses) : "";
|
|
145
|
+
for (const [, entry] of entries) {
|
|
146
|
+
const allForms = [entry.root, ...entry.variants];
|
|
147
|
+
const sortedForms = allForms.map((w) => normalizeFn(w)).filter((w) => w.length > 0).filter((w, i, arr) => arr.indexOf(w) === i).sort((a, b) => b.length - a.length);
|
|
148
|
+
const formPatterns = sortedForms.map(
|
|
149
|
+
(w) => wordToPattern(w, charClasses, normalizeFn)
|
|
150
|
+
);
|
|
151
|
+
const combined = formPatterns.join("|");
|
|
152
|
+
const useSuffix = entry.suffixable && suffixGroup.length > 0;
|
|
153
|
+
let pattern;
|
|
154
|
+
if (useSuffix) {
|
|
155
|
+
pattern = `(?<![\\p{L}\\p{N}])(?:${combined})${suffixGroup}{0,${MAX_SUFFIX_CHAIN}}(?![\\p{L}\\p{N}])`;
|
|
156
|
+
} else {
|
|
157
|
+
pattern = `(?<![\\p{L}\\p{N}])(?:${combined})(?![\\p{L}\\p{N}])`;
|
|
158
|
+
}
|
|
159
|
+
if (pattern.length > MAX_PATTERN_LENGTH && useSuffix) {
|
|
160
|
+
pattern = `(?<![\\p{L}\\p{N}])(?:${combined})(?![\\p{L}\\p{N}])`;
|
|
161
|
+
}
|
|
162
|
+
try {
|
|
163
|
+
const regex = new RegExp(pattern, "giu");
|
|
164
|
+
patterns.push({
|
|
165
|
+
root: entry.root,
|
|
166
|
+
severity: entry.severity,
|
|
167
|
+
regex,
|
|
168
|
+
variants: entry.variants
|
|
169
|
+
});
|
|
170
|
+
} catch (err) {
|
|
171
|
+
if (useSuffix) {
|
|
172
|
+
try {
|
|
173
|
+
const fallbackPattern = `(?<![\\p{L}\\p{N}])(?:${combined})(?![\\p{L}\\p{N}])`;
|
|
174
|
+
const regex = new RegExp(fallbackPattern, "giu");
|
|
175
|
+
patterns.push({
|
|
176
|
+
root: entry.root,
|
|
177
|
+
severity: entry.severity,
|
|
178
|
+
regex,
|
|
179
|
+
variants: entry.variants
|
|
180
|
+
});
|
|
181
|
+
console.warn(`[terlik] Pattern for "${entry.root}" failed with suffixes, using fallback: ${err instanceof Error ? err.message : String(err)}`);
|
|
182
|
+
} catch (err2) {
|
|
183
|
+
console.warn(`[terlik] Pattern for "${entry.root}" failed completely, skipping: ${err2 instanceof Error ? err2.message : String(err2)}`);
|
|
184
|
+
}
|
|
185
|
+
} else {
|
|
186
|
+
console.warn(`[terlik] Pattern for "${entry.root}" failed, skipping: ${err instanceof Error ? err.message : String(err)}`);
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
return patterns;
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
// src/fuzzy.ts
|
|
194
|
+
function levenshteinDistance(a, b) {
|
|
195
|
+
const m = a.length;
|
|
196
|
+
const n = b.length;
|
|
197
|
+
if (m === 0) return n;
|
|
198
|
+
if (n === 0) return m;
|
|
199
|
+
let prev = new Array(n + 1);
|
|
200
|
+
let curr = new Array(n + 1);
|
|
201
|
+
for (let j = 0; j <= n; j++) prev[j] = j;
|
|
202
|
+
for (let i = 1; i <= m; i++) {
|
|
203
|
+
curr[0] = i;
|
|
204
|
+
for (let j = 1; j <= n; j++) {
|
|
205
|
+
const cost = a[i - 1] === b[j - 1] ? 0 : 1;
|
|
206
|
+
curr[j] = Math.min(
|
|
207
|
+
prev[j] + 1,
|
|
208
|
+
// deletion
|
|
209
|
+
curr[j - 1] + 1,
|
|
210
|
+
// insertion
|
|
211
|
+
prev[j - 1] + cost
|
|
212
|
+
// substitution
|
|
213
|
+
);
|
|
214
|
+
}
|
|
215
|
+
[prev, curr] = [curr, prev];
|
|
216
|
+
}
|
|
217
|
+
return prev[n];
|
|
218
|
+
}
|
|
219
|
+
function levenshteinSimilarity(a, b) {
|
|
220
|
+
const maxLen = Math.max(a.length, b.length);
|
|
221
|
+
if (maxLen === 0) return 1;
|
|
222
|
+
return 1 - levenshteinDistance(a, b) / maxLen;
|
|
223
|
+
}
|
|
224
|
+
function bigrams(str) {
|
|
225
|
+
const set = /* @__PURE__ */ new Set();
|
|
226
|
+
for (let i = 0; i < str.length - 1; i++) {
|
|
227
|
+
set.add(str.slice(i, i + 2));
|
|
228
|
+
}
|
|
229
|
+
return set;
|
|
230
|
+
}
|
|
231
|
+
function diceSimilarity(a, b) {
|
|
232
|
+
if (a.length < 2 || b.length < 2) {
|
|
233
|
+
return a === b ? 1 : 0;
|
|
234
|
+
}
|
|
235
|
+
const bigramsA = bigrams(a);
|
|
236
|
+
const bigramsB = bigrams(b);
|
|
237
|
+
let intersection = 0;
|
|
238
|
+
for (const bg of bigramsA) {
|
|
239
|
+
if (bigramsB.has(bg)) intersection++;
|
|
240
|
+
}
|
|
241
|
+
return 2 * intersection / (bigramsA.size + bigramsB.size);
|
|
242
|
+
}
|
|
243
|
+
function getFuzzyMatcher(algorithm) {
|
|
244
|
+
return algorithm === "levenshtein" ? levenshteinSimilarity : diceSimilarity;
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
// src/detector.ts
|
|
248
|
+
var Detector = class {
|
|
249
|
+
dictionary;
|
|
250
|
+
_patterns = null;
|
|
251
|
+
normalizedWordSet;
|
|
252
|
+
normalizedWordToRoot;
|
|
253
|
+
normalizeFn;
|
|
254
|
+
locale;
|
|
255
|
+
charClasses;
|
|
256
|
+
constructor(dictionary, normalizeFn, locale, charClasses) {
|
|
257
|
+
this.dictionary = dictionary;
|
|
258
|
+
this.normalizeFn = normalizeFn;
|
|
259
|
+
this.locale = locale;
|
|
260
|
+
this.charClasses = charClasses;
|
|
261
|
+
this.normalizedWordSet = /* @__PURE__ */ new Set();
|
|
262
|
+
this.normalizedWordToRoot = /* @__PURE__ */ new Map();
|
|
263
|
+
this.buildNormalizedLookup();
|
|
264
|
+
}
|
|
265
|
+
ensureCompiled() {
|
|
266
|
+
if (this._patterns === null) {
|
|
267
|
+
this._patterns = compilePatterns(
|
|
268
|
+
this.dictionary.getEntries(),
|
|
269
|
+
this.dictionary.getSuffixes(),
|
|
270
|
+
this.charClasses,
|
|
271
|
+
this.normalizeFn
|
|
272
|
+
);
|
|
273
|
+
}
|
|
274
|
+
return this._patterns;
|
|
275
|
+
}
|
|
276
|
+
compile() {
|
|
277
|
+
this.ensureCompiled();
|
|
278
|
+
}
|
|
279
|
+
recompile() {
|
|
280
|
+
this._patterns = compilePatterns(
|
|
281
|
+
this.dictionary.getEntries(),
|
|
282
|
+
this.dictionary.getSuffixes(),
|
|
283
|
+
this.charClasses,
|
|
284
|
+
this.normalizeFn
|
|
285
|
+
);
|
|
286
|
+
this.buildNormalizedLookup();
|
|
287
|
+
}
|
|
288
|
+
buildNormalizedLookup() {
|
|
289
|
+
this.normalizedWordSet.clear();
|
|
290
|
+
this.normalizedWordToRoot.clear();
|
|
291
|
+
for (const word of this.dictionary.getAllWords()) {
|
|
292
|
+
const n = this.normalizeFn(word);
|
|
293
|
+
this.normalizedWordSet.add(n);
|
|
294
|
+
this.normalizedWordToRoot.set(n, word);
|
|
295
|
+
}
|
|
296
|
+
}
|
|
297
|
+
getPatterns() {
|
|
298
|
+
const map = /* @__PURE__ */ new Map();
|
|
299
|
+
for (const p of this.ensureCompiled()) {
|
|
300
|
+
map.set(p.root, p.regex);
|
|
301
|
+
}
|
|
302
|
+
return map;
|
|
303
|
+
}
|
|
304
|
+
detect(text, options) {
|
|
305
|
+
const mode = options?.mode ?? "balanced";
|
|
306
|
+
const results = [];
|
|
307
|
+
const whitelist = this.dictionary.getWhitelist();
|
|
308
|
+
if (mode === "strict") {
|
|
309
|
+
this.detectStrict(text, whitelist, results);
|
|
310
|
+
} else {
|
|
311
|
+
this.detectPattern(text, whitelist, results);
|
|
312
|
+
}
|
|
313
|
+
if (mode === "loose" || options?.enableFuzzy) {
|
|
314
|
+
const threshold = options?.fuzzyThreshold ?? 0.8;
|
|
315
|
+
const algorithm = options?.fuzzyAlgorithm ?? "levenshtein";
|
|
316
|
+
this.detectFuzzy(text, whitelist, results, threshold, algorithm);
|
|
317
|
+
}
|
|
318
|
+
return this.deduplicateResults(results);
|
|
319
|
+
}
|
|
320
|
+
detectStrict(text, whitelist, results) {
|
|
321
|
+
const normalized = this.normalizeFn(text);
|
|
322
|
+
const words = normalized.split(/\s+/);
|
|
323
|
+
const originalWords = text.split(/\s+/);
|
|
324
|
+
let charIndex = 0;
|
|
325
|
+
for (let wi = 0; wi < originalWords.length; wi++) {
|
|
326
|
+
const origWord = originalWords[wi];
|
|
327
|
+
const normWord = wi < words.length ? words[wi] : "";
|
|
328
|
+
if (normWord.length === 0) {
|
|
329
|
+
charIndex += origWord.length + 1;
|
|
330
|
+
continue;
|
|
331
|
+
}
|
|
332
|
+
if (whitelist.has(normWord)) {
|
|
333
|
+
charIndex += origWord.length + 1;
|
|
334
|
+
continue;
|
|
335
|
+
}
|
|
336
|
+
if (this.normalizedWordSet.has(normWord)) {
|
|
337
|
+
const dictWord = this.normalizedWordToRoot.get(normWord);
|
|
338
|
+
const entry = this.dictionary.findRootForWord(dictWord);
|
|
339
|
+
if (entry) {
|
|
340
|
+
results.push({
|
|
341
|
+
word: origWord,
|
|
342
|
+
root: entry.root,
|
|
343
|
+
index: charIndex,
|
|
344
|
+
severity: entry.severity,
|
|
345
|
+
method: "exact"
|
|
346
|
+
});
|
|
347
|
+
}
|
|
348
|
+
}
|
|
349
|
+
charIndex += origWord.length + 1;
|
|
350
|
+
}
|
|
351
|
+
}
|
|
352
|
+
detectPattern(text, whitelist, results) {
|
|
353
|
+
this.runPatterns(text, text, whitelist, results, false);
|
|
354
|
+
const normalizedText = this.normalizeFn(text);
|
|
355
|
+
const lowerText = text.toLocaleLowerCase(this.locale);
|
|
356
|
+
if (normalizedText !== lowerText && normalizedText.length > 0) {
|
|
357
|
+
this.runPatterns(normalizedText, text, whitelist, results, true);
|
|
358
|
+
}
|
|
359
|
+
}
|
|
360
|
+
runPatterns(searchText, originalText, whitelist, results, isNormalized) {
|
|
361
|
+
const existingIndices = new Set(results.map((r) => r.index));
|
|
362
|
+
const patterns = this.ensureCompiled();
|
|
363
|
+
for (const pattern of patterns) {
|
|
364
|
+
const patternStart = Date.now();
|
|
365
|
+
pattern.regex.lastIndex = 0;
|
|
366
|
+
let match;
|
|
367
|
+
while ((match = pattern.regex.exec(searchText)) !== null) {
|
|
368
|
+
if (Date.now() - patternStart > REGEX_TIMEOUT_MS) break;
|
|
369
|
+
const matchedText = match[0];
|
|
370
|
+
const matchIndex = match.index;
|
|
371
|
+
const normalizedMatch = this.normalizeFn(matchedText);
|
|
372
|
+
if (whitelist.has(normalizedMatch)) continue;
|
|
373
|
+
const surrounding = this.getSurroundingWord(searchText, matchIndex, matchedText.length);
|
|
374
|
+
const normalizedSurrounding = this.normalizeFn(surrounding);
|
|
375
|
+
if (whitelist.has(normalizedSurrounding)) continue;
|
|
376
|
+
if (isNormalized) {
|
|
377
|
+
const mapped = this.mapNormalizedToOriginal(originalText, matchIndex, matchedText);
|
|
378
|
+
if (mapped && !existingIndices.has(mapped.index)) {
|
|
379
|
+
results.push({
|
|
380
|
+
word: mapped.word,
|
|
381
|
+
root: pattern.root,
|
|
382
|
+
index: mapped.index,
|
|
383
|
+
severity: pattern.severity,
|
|
384
|
+
method: "pattern"
|
|
385
|
+
});
|
|
386
|
+
existingIndices.add(mapped.index);
|
|
387
|
+
}
|
|
388
|
+
} else {
|
|
389
|
+
if (!existingIndices.has(matchIndex)) {
|
|
390
|
+
results.push({
|
|
391
|
+
word: matchedText,
|
|
392
|
+
root: pattern.root,
|
|
393
|
+
index: matchIndex,
|
|
394
|
+
severity: pattern.severity,
|
|
395
|
+
method: "pattern"
|
|
396
|
+
});
|
|
397
|
+
existingIndices.add(matchIndex);
|
|
398
|
+
}
|
|
399
|
+
}
|
|
400
|
+
if (matchedText.length === 0) {
|
|
401
|
+
pattern.regex.lastIndex++;
|
|
402
|
+
}
|
|
403
|
+
}
|
|
404
|
+
}
|
|
405
|
+
}
|
|
406
|
+
mapNormalizedToOriginal(originalText, normIndex, _normMatch) {
|
|
407
|
+
const origWords = originalText.split(/(\s+)/);
|
|
408
|
+
let normOffset = 0;
|
|
409
|
+
let origOffset = 0;
|
|
410
|
+
for (const segment of origWords) {
|
|
411
|
+
if (/^\s+$/.test(segment)) {
|
|
412
|
+
normOffset += 1;
|
|
413
|
+
origOffset += segment.length;
|
|
414
|
+
continue;
|
|
415
|
+
}
|
|
416
|
+
const normWord = this.normalizeFn(segment);
|
|
417
|
+
const normEnd = normOffset + normWord.length;
|
|
418
|
+
if (normIndex >= normOffset && normIndex < normEnd) {
|
|
419
|
+
return { word: segment, index: origOffset };
|
|
420
|
+
}
|
|
421
|
+
normOffset = normEnd;
|
|
422
|
+
origOffset += segment.length;
|
|
423
|
+
}
|
|
424
|
+
return null;
|
|
425
|
+
}
|
|
426
|
+
detectFuzzy(text, whitelist, existingResults, threshold, algorithm) {
|
|
427
|
+
const normalized = this.normalizeFn(text);
|
|
428
|
+
const normWords = normalized.split(/\s+/);
|
|
429
|
+
const origWords = text.split(/\s+/);
|
|
430
|
+
const matcher = getFuzzyMatcher(algorithm);
|
|
431
|
+
const existingIndices = new Set(existingResults.map((r) => r.index));
|
|
432
|
+
const startTime = Date.now();
|
|
433
|
+
let charIndex = 0;
|
|
434
|
+
for (let wi = 0; wi < origWords.length; wi++) {
|
|
435
|
+
if (Date.now() - startTime > REGEX_TIMEOUT_MS) break;
|
|
436
|
+
const origWord = origWords[wi];
|
|
437
|
+
const word = wi < normWords.length ? normWords[wi] : "";
|
|
438
|
+
if (word.length < 3 || whitelist.has(word)) {
|
|
439
|
+
charIndex += origWord.length + 1;
|
|
440
|
+
continue;
|
|
441
|
+
}
|
|
442
|
+
for (const normDict of this.normalizedWordSet) {
|
|
443
|
+
if (normDict.length < 3) continue;
|
|
444
|
+
const similarity = matcher(word, normDict);
|
|
445
|
+
if (similarity >= threshold) {
|
|
446
|
+
if (!existingIndices.has(charIndex)) {
|
|
447
|
+
const dictWord = this.normalizedWordToRoot.get(normDict);
|
|
448
|
+
const entry = this.dictionary.findRootForWord(dictWord);
|
|
449
|
+
if (entry) {
|
|
450
|
+
existingResults.push({
|
|
451
|
+
word: origWord,
|
|
452
|
+
root: entry.root,
|
|
453
|
+
index: charIndex,
|
|
454
|
+
severity: entry.severity,
|
|
455
|
+
method: "fuzzy"
|
|
456
|
+
});
|
|
457
|
+
existingIndices.add(charIndex);
|
|
458
|
+
}
|
|
459
|
+
}
|
|
460
|
+
break;
|
|
461
|
+
}
|
|
462
|
+
}
|
|
463
|
+
charIndex += origWord.length + 1;
|
|
464
|
+
}
|
|
465
|
+
}
|
|
466
|
+
getSurroundingWord(text, index, length) {
|
|
467
|
+
let start = index;
|
|
468
|
+
let end = index + length;
|
|
469
|
+
while (start > 0 && /\p{L}/u.test(text[start - 1])) start--;
|
|
470
|
+
while (end < text.length && /\p{L}/u.test(text[end])) end++;
|
|
471
|
+
return text.slice(start, end);
|
|
472
|
+
}
|
|
473
|
+
deduplicateResults(results) {
|
|
474
|
+
const seen = /* @__PURE__ */ new Map();
|
|
475
|
+
for (const result of results) {
|
|
476
|
+
const existing = seen.get(result.index);
|
|
477
|
+
if (!existing || result.word.length > existing.word.length) {
|
|
478
|
+
seen.set(result.index, result);
|
|
479
|
+
}
|
|
480
|
+
}
|
|
481
|
+
return [...seen.values()].sort((a, b) => a.index - b.index);
|
|
482
|
+
}
|
|
483
|
+
};
|
|
484
|
+
|
|
485
|
+
// src/cleaner.ts
|
|
486
|
+
function maskStars(word) {
|
|
487
|
+
return "*".repeat(word.length);
|
|
488
|
+
}
|
|
489
|
+
function maskPartial(word) {
|
|
490
|
+
if (word.length <= 2) return "*".repeat(word.length);
|
|
491
|
+
return word[0] + "*".repeat(word.length - 2) + word[word.length - 1];
|
|
492
|
+
}
|
|
493
|
+
function maskReplace(replaceMask) {
|
|
494
|
+
return replaceMask;
|
|
495
|
+
}
|
|
496
|
+
function applyMask(word, style, replaceMask) {
|
|
497
|
+
switch (style) {
|
|
498
|
+
case "stars":
|
|
499
|
+
return maskStars(word);
|
|
500
|
+
case "partial":
|
|
501
|
+
return maskPartial(word);
|
|
502
|
+
case "replace":
|
|
503
|
+
return maskReplace(replaceMask);
|
|
504
|
+
}
|
|
505
|
+
}
|
|
506
|
+
function cleanText(text, matches, style, replaceMask) {
|
|
507
|
+
if (matches.length === 0) return text;
|
|
508
|
+
const sorted = [...matches].sort((a, b) => b.index - a.index);
|
|
509
|
+
let result = text;
|
|
510
|
+
for (const match of sorted) {
|
|
511
|
+
const masked = applyMask(match.word, style, replaceMask);
|
|
512
|
+
result = result.slice(0, match.index) + masked + result.slice(match.index + match.word.length);
|
|
513
|
+
}
|
|
514
|
+
return result;
|
|
515
|
+
}
|
|
516
|
+
|
|
517
|
+
// src/utils.ts
|
|
518
|
+
var MAX_INPUT_LENGTH = 1e4;
|
|
519
|
+
function validateInput(text, maxLength) {
|
|
520
|
+
if (text == null) return "";
|
|
521
|
+
if (typeof text !== "string") return String(text);
|
|
522
|
+
if (text.length > maxLength) return text.slice(0, maxLength);
|
|
523
|
+
return text;
|
|
524
|
+
}
|
|
525
|
+
|
|
526
|
+
// src/lang/tr/dictionary.json
|
|
527
|
+
var dictionary_default = {
|
|
528
|
+
version: 1,
|
|
529
|
+
suffixes: [
|
|
530
|
+
"tir",
|
|
531
|
+
"dir",
|
|
532
|
+
"il",
|
|
533
|
+
"in",
|
|
534
|
+
"ik",
|
|
535
|
+
"uk",
|
|
536
|
+
"en",
|
|
537
|
+
"ici",
|
|
538
|
+
"di",
|
|
539
|
+
"ti",
|
|
540
|
+
"dim",
|
|
541
|
+
"tim",
|
|
542
|
+
"din",
|
|
543
|
+
"dik",
|
|
544
|
+
"tik",
|
|
545
|
+
"mis",
|
|
546
|
+
"mus",
|
|
547
|
+
"iyor",
|
|
548
|
+
"uyor",
|
|
549
|
+
"ecek",
|
|
550
|
+
"acak",
|
|
551
|
+
"ecem",
|
|
552
|
+
"acam",
|
|
553
|
+
"icem",
|
|
554
|
+
"er",
|
|
555
|
+
"ir",
|
|
556
|
+
"ar",
|
|
557
|
+
"eyim",
|
|
558
|
+
"ayim",
|
|
559
|
+
"elim",
|
|
560
|
+
"alim",
|
|
561
|
+
"se",
|
|
562
|
+
"sa",
|
|
563
|
+
"im",
|
|
564
|
+
"um",
|
|
565
|
+
"sin",
|
|
566
|
+
"sun",
|
|
567
|
+
"yim",
|
|
568
|
+
"mek",
|
|
569
|
+
"mak",
|
|
570
|
+
"me",
|
|
571
|
+
"ma",
|
|
572
|
+
"is",
|
|
573
|
+
"us",
|
|
574
|
+
"ler",
|
|
575
|
+
"lar",
|
|
576
|
+
"si",
|
|
577
|
+
"e",
|
|
578
|
+
"a",
|
|
579
|
+
"de",
|
|
580
|
+
"da",
|
|
581
|
+
"den",
|
|
582
|
+
"dan",
|
|
583
|
+
"te",
|
|
584
|
+
"ta",
|
|
585
|
+
"ten",
|
|
586
|
+
"tan",
|
|
587
|
+
"i",
|
|
588
|
+
"lik",
|
|
589
|
+
"luk",
|
|
590
|
+
"li",
|
|
591
|
+
"lu",
|
|
592
|
+
"ci",
|
|
593
|
+
"cu",
|
|
594
|
+
"ce",
|
|
595
|
+
"ca",
|
|
596
|
+
"le",
|
|
597
|
+
"la",
|
|
598
|
+
"ken",
|
|
599
|
+
"esi",
|
|
600
|
+
"un",
|
|
601
|
+
"lari",
|
|
602
|
+
"leri",
|
|
603
|
+
"larin",
|
|
604
|
+
"lerin",
|
|
605
|
+
"misin",
|
|
606
|
+
"misiniz",
|
|
607
|
+
"musun",
|
|
608
|
+
"musunuz",
|
|
609
|
+
"miyim",
|
|
610
|
+
"miyiz",
|
|
611
|
+
"cesine",
|
|
612
|
+
"casina"
|
|
613
|
+
],
|
|
614
|
+
entries: [
|
|
615
|
+
{
|
|
616
|
+
root: "sik",
|
|
617
|
+
variants: [
|
|
618
|
+
"siktir",
|
|
619
|
+
"sikicem",
|
|
620
|
+
"siktim",
|
|
621
|
+
"sikeyim",
|
|
622
|
+
"sikerim",
|
|
623
|
+
"sikis",
|
|
624
|
+
"sikik",
|
|
625
|
+
"sikim",
|
|
626
|
+
"sikimle",
|
|
627
|
+
"sikimin",
|
|
628
|
+
"sikime",
|
|
629
|
+
"sike",
|
|
630
|
+
"siken",
|
|
631
|
+
"siker",
|
|
632
|
+
"sikti",
|
|
633
|
+
"siktiler",
|
|
634
|
+
"sikmis",
|
|
635
|
+
"sikmek",
|
|
636
|
+
"sikecek",
|
|
637
|
+
"sikiyor",
|
|
638
|
+
"sikme",
|
|
639
|
+
"sikici",
|
|
640
|
+
"siksin",
|
|
641
|
+
"siktirler",
|
|
642
|
+
"sikimsonik",
|
|
643
|
+
"siktirin",
|
|
644
|
+
"siktiler",
|
|
645
|
+
"sikerler",
|
|
646
|
+
"sikiler",
|
|
647
|
+
"sikti\u011Fimin",
|
|
648
|
+
"sikermisiniz",
|
|
649
|
+
"sikermisin",
|
|
650
|
+
"siktirmi\u015Fcesine"
|
|
651
|
+
],
|
|
652
|
+
severity: "high",
|
|
653
|
+
category: "sexual",
|
|
654
|
+
suffixable: false
|
|
655
|
+
},
|
|
656
|
+
{
|
|
657
|
+
root: "amk",
|
|
658
|
+
variants: ["amk", "amina", "aminakoyim", "aminakoydugum", "amq"],
|
|
659
|
+
severity: "high",
|
|
660
|
+
category: "sexual",
|
|
661
|
+
suffixable: false
|
|
662
|
+
},
|
|
663
|
+
{
|
|
664
|
+
root: "orospu",
|
|
665
|
+
variants: ["orospucocugu", "orosbucocugu", "orspu", "oruspu", "orosbu"],
|
|
666
|
+
severity: "high",
|
|
667
|
+
category: "insult",
|
|
668
|
+
suffixable: true
|
|
669
|
+
},
|
|
670
|
+
{
|
|
671
|
+
root: "pi\xE7",
|
|
672
|
+
variants: ["pic", "piclik"],
|
|
673
|
+
severity: "high",
|
|
674
|
+
category: "insult",
|
|
675
|
+
suffixable: true
|
|
676
|
+
},
|
|
677
|
+
{
|
|
678
|
+
root: "yarrak",
|
|
679
|
+
variants: ["yarak", "yarrak", "yarakli", "dalyarak", "dalyarrak"],
|
|
680
|
+
severity: "high",
|
|
681
|
+
category: "sexual",
|
|
682
|
+
suffixable: true
|
|
683
|
+
},
|
|
684
|
+
{
|
|
685
|
+
root: "g\xF6t",
|
|
686
|
+
variants: [
|
|
687
|
+
"got",
|
|
688
|
+
"gotunu",
|
|
689
|
+
"gotlek",
|
|
690
|
+
"gotveren",
|
|
691
|
+
"gotverenler",
|
|
692
|
+
"gote",
|
|
693
|
+
"gotu",
|
|
694
|
+
"gotler",
|
|
695
|
+
"gotlu",
|
|
696
|
+
"gotunden",
|
|
697
|
+
"gotune"
|
|
698
|
+
],
|
|
699
|
+
severity: "high",
|
|
700
|
+
category: "sexual",
|
|
701
|
+
suffixable: false
|
|
702
|
+
},
|
|
703
|
+
{
|
|
704
|
+
root: "am",
|
|
705
|
+
variants: ["amcik", "amcuk"],
|
|
706
|
+
severity: "high",
|
|
707
|
+
category: "sexual",
|
|
708
|
+
suffixable: false
|
|
709
|
+
},
|
|
710
|
+
{
|
|
711
|
+
root: "ta\u015Fak",
|
|
712
|
+
variants: ["tasak", "tassak", "tassakli"],
|
|
713
|
+
severity: "medium",
|
|
714
|
+
category: "sexual",
|
|
715
|
+
suffixable: true
|
|
716
|
+
},
|
|
717
|
+
{
|
|
718
|
+
root: "meme",
|
|
719
|
+
variants: [],
|
|
720
|
+
severity: "medium",
|
|
721
|
+
category: "sexual",
|
|
722
|
+
suffixable: false
|
|
723
|
+
},
|
|
724
|
+
{
|
|
725
|
+
root: "ibne",
|
|
726
|
+
variants: ["ibneler"],
|
|
727
|
+
severity: "high",
|
|
728
|
+
category: "slur",
|
|
729
|
+
suffixable: true
|
|
730
|
+
},
|
|
731
|
+
{
|
|
732
|
+
root: "gavat",
|
|
733
|
+
variants: ["gavatlik"],
|
|
734
|
+
severity: "high",
|
|
735
|
+
category: "insult",
|
|
736
|
+
suffixable: true
|
|
737
|
+
},
|
|
738
|
+
{
|
|
739
|
+
root: "pezevenk",
|
|
740
|
+
variants: ["pezo"],
|
|
741
|
+
severity: "high",
|
|
742
|
+
category: "insult",
|
|
743
|
+
suffixable: true
|
|
744
|
+
},
|
|
745
|
+
{
|
|
746
|
+
root: "bok",
|
|
747
|
+
variants: [
|
|
748
|
+
"boktan",
|
|
749
|
+
"boka",
|
|
750
|
+
"boku",
|
|
751
|
+
"boklu",
|
|
752
|
+
"boklar",
|
|
753
|
+
"boklari"
|
|
754
|
+
],
|
|
755
|
+
severity: "medium",
|
|
756
|
+
category: "general",
|
|
757
|
+
suffixable: false
|
|
758
|
+
},
|
|
759
|
+
{
|
|
760
|
+
root: "haysiyetsiz",
|
|
761
|
+
variants: [],
|
|
762
|
+
severity: "medium",
|
|
763
|
+
category: "insult",
|
|
764
|
+
suffixable: false
|
|
765
|
+
},
|
|
766
|
+
{
|
|
767
|
+
root: "salak",
|
|
768
|
+
variants: ["salaklik"],
|
|
769
|
+
severity: "low",
|
|
770
|
+
category: "insult",
|
|
771
|
+
suffixable: true
|
|
772
|
+
},
|
|
773
|
+
{
|
|
774
|
+
root: "aptal",
|
|
775
|
+
variants: ["aptallik", "aptalca"],
|
|
776
|
+
severity: "low",
|
|
777
|
+
category: "insult",
|
|
778
|
+
suffixable: true
|
|
779
|
+
},
|
|
780
|
+
{
|
|
781
|
+
root: "gerizekal\u0131",
|
|
782
|
+
variants: ["gerizekali"],
|
|
783
|
+
severity: "low",
|
|
784
|
+
category: "insult",
|
|
785
|
+
suffixable: true
|
|
786
|
+
},
|
|
787
|
+
{
|
|
788
|
+
root: "mal",
|
|
789
|
+
variants: [],
|
|
790
|
+
severity: "low",
|
|
791
|
+
category: "insult",
|
|
792
|
+
suffixable: false
|
|
793
|
+
},
|
|
794
|
+
{
|
|
795
|
+
root: "dangalak",
|
|
796
|
+
variants: [],
|
|
797
|
+
severity: "low",
|
|
798
|
+
category: "insult",
|
|
799
|
+
suffixable: true
|
|
800
|
+
},
|
|
801
|
+
{
|
|
802
|
+
root: "ezik",
|
|
803
|
+
variants: [],
|
|
804
|
+
severity: "low",
|
|
805
|
+
category: "insult",
|
|
806
|
+
suffixable: true
|
|
807
|
+
},
|
|
808
|
+
{
|
|
809
|
+
root: "pu\u015Ft",
|
|
810
|
+
variants: ["pust", "pustt"],
|
|
811
|
+
severity: "high",
|
|
812
|
+
category: "slur",
|
|
813
|
+
suffixable: true
|
|
814
|
+
},
|
|
815
|
+
{
|
|
816
|
+
root: "\u015Ferefsiz",
|
|
817
|
+
variants: ["serefsiz", "serefsizler"],
|
|
818
|
+
severity: "medium",
|
|
819
|
+
category: "insult",
|
|
820
|
+
suffixable: true
|
|
821
|
+
},
|
|
822
|
+
{
|
|
823
|
+
root: "yav\u015Fak",
|
|
824
|
+
variants: ["yavsak"],
|
|
825
|
+
severity: "medium",
|
|
826
|
+
category: "insult",
|
|
827
|
+
suffixable: true
|
|
828
|
+
},
|
|
829
|
+
{
|
|
830
|
+
root: "d\xF6l",
|
|
831
|
+
variants: ["dol", "dolunu", "dolcu"],
|
|
832
|
+
severity: "high",
|
|
833
|
+
category: "sexual",
|
|
834
|
+
suffixable: false
|
|
835
|
+
},
|
|
836
|
+
{
|
|
837
|
+
root: "kahpe",
|
|
838
|
+
variants: ["kahpelik"],
|
|
839
|
+
severity: "high",
|
|
840
|
+
category: "insult",
|
|
841
|
+
suffixable: true
|
|
842
|
+
}
|
|
843
|
+
],
|
|
844
|
+
whitelist: [
|
|
845
|
+
"amsterdam",
|
|
846
|
+
"amateur",
|
|
847
|
+
"amat\xF6r",
|
|
848
|
+
"sikke",
|
|
849
|
+
"sikkeler",
|
|
850
|
+
"masikler",
|
|
851
|
+
"sikilasma",
|
|
852
|
+
"ambalaj",
|
|
853
|
+
"ambassador",
|
|
854
|
+
"ambulans",
|
|
855
|
+
"amel",
|
|
856
|
+
"ameliyat",
|
|
857
|
+
"amerika",
|
|
858
|
+
"amele",
|
|
859
|
+
"amino",
|
|
860
|
+
"amonyak",
|
|
861
|
+
"amper",
|
|
862
|
+
"ampul",
|
|
863
|
+
"boks\xF6r",
|
|
864
|
+
"bokser",
|
|
865
|
+
"bokluk",
|
|
866
|
+
"malzeme",
|
|
867
|
+
"maliyet",
|
|
868
|
+
"malik",
|
|
869
|
+
"malikane",
|
|
870
|
+
"maliye",
|
|
871
|
+
"mallorca",
|
|
872
|
+
"malta",
|
|
873
|
+
"malt",
|
|
874
|
+
"gotan",
|
|
875
|
+
"gotik",
|
|
876
|
+
"gotham",
|
|
877
|
+
"memento",
|
|
878
|
+
"memleket",
|
|
879
|
+
"memur",
|
|
880
|
+
"memorial",
|
|
881
|
+
"piknik",
|
|
882
|
+
"pikachu",
|
|
883
|
+
"tasselled",
|
|
884
|
+
"siklet",
|
|
885
|
+
"kasim",
|
|
886
|
+
"kas\u0131m",
|
|
887
|
+
"yarasa",
|
|
888
|
+
"dolunay",
|
|
889
|
+
"dolum",
|
|
890
|
+
"doluluk",
|
|
891
|
+
"ama",
|
|
892
|
+
"ami",
|
|
893
|
+
"amen",
|
|
894
|
+
"amir",
|
|
895
|
+
"amil",
|
|
896
|
+
"dolmen"
|
|
897
|
+
]
|
|
898
|
+
};
|
|
899
|
+
|
|
900
|
+
// src/dictionary/schema.ts
|
|
901
|
+
var VALID_SEVERITIES = ["high", "medium", "low"];
|
|
902
|
+
var VALID_CATEGORIES = ["sexual", "insult", "slur", "general"];
|
|
903
|
+
var MAX_SUFFIXES = 100;
|
|
904
|
+
var SUFFIX_PATTERN = /^[a-z]{1,10}$/;
|
|
905
|
+
function validateDictionary(data) {
|
|
906
|
+
if (data == null || typeof data !== "object") {
|
|
907
|
+
throw new Error("Dictionary data must be a non-null object");
|
|
908
|
+
}
|
|
909
|
+
const d = data;
|
|
910
|
+
if (typeof d.version !== "number" || d.version < 1) {
|
|
911
|
+
throw new Error("Dictionary version must be a positive number");
|
|
912
|
+
}
|
|
913
|
+
if (!Array.isArray(d.suffixes)) {
|
|
914
|
+
throw new Error("Dictionary suffixes must be an array");
|
|
915
|
+
}
|
|
916
|
+
if (d.suffixes.length > MAX_SUFFIXES) {
|
|
917
|
+
throw new Error(`Dictionary suffixes exceed maximum of ${MAX_SUFFIXES}`);
|
|
918
|
+
}
|
|
919
|
+
for (const suffix of d.suffixes) {
|
|
920
|
+
if (typeof suffix !== "string" || !SUFFIX_PATTERN.test(suffix)) {
|
|
921
|
+
throw new Error(
|
|
922
|
+
`Invalid suffix "${suffix}": must be 1-10 lowercase letters [a-z]`
|
|
923
|
+
);
|
|
924
|
+
}
|
|
925
|
+
}
|
|
926
|
+
if (!Array.isArray(d.entries)) {
|
|
927
|
+
throw new Error("Dictionary entries must be an array");
|
|
928
|
+
}
|
|
929
|
+
const seenRoots = /* @__PURE__ */ new Set();
|
|
930
|
+
for (let i = 0; i < d.entries.length; i++) {
|
|
931
|
+
const entry = d.entries[i];
|
|
932
|
+
const label = `entries[${i}]`;
|
|
933
|
+
if (entry == null || typeof entry !== "object") {
|
|
934
|
+
throw new Error(`${label}: must be an object`);
|
|
935
|
+
}
|
|
936
|
+
if (typeof entry.root !== "string" || entry.root.length === 0) {
|
|
937
|
+
throw new Error(`${label}: root must be a non-empty string`);
|
|
938
|
+
}
|
|
939
|
+
const rootLower = entry.root.toLowerCase();
|
|
940
|
+
if (seenRoots.has(rootLower)) {
|
|
941
|
+
throw new Error(`${label}: duplicate root "${entry.root}"`);
|
|
942
|
+
}
|
|
943
|
+
seenRoots.add(rootLower);
|
|
944
|
+
if (!Array.isArray(entry.variants)) {
|
|
945
|
+
throw new Error(`${label} (root="${entry.root}"): variants must be an array`);
|
|
946
|
+
}
|
|
947
|
+
if (typeof entry.severity !== "string" || !VALID_SEVERITIES.includes(entry.severity)) {
|
|
948
|
+
throw new Error(
|
|
949
|
+
`${label} (root="${entry.root}"): severity must be one of ${VALID_SEVERITIES.join(", ")}`
|
|
950
|
+
);
|
|
951
|
+
}
|
|
952
|
+
if (typeof entry.category !== "string" || !VALID_CATEGORIES.includes(entry.category)) {
|
|
953
|
+
throw new Error(
|
|
954
|
+
`${label} (root="${entry.root}"): category must be one of ${VALID_CATEGORIES.join(", ")}`
|
|
955
|
+
);
|
|
956
|
+
}
|
|
957
|
+
if (typeof entry.suffixable !== "boolean") {
|
|
958
|
+
throw new Error(`${label} (root="${entry.root}"): suffixable must be a boolean`);
|
|
959
|
+
}
|
|
960
|
+
}
|
|
961
|
+
if (!Array.isArray(d.whitelist)) {
|
|
962
|
+
throw new Error("Dictionary whitelist must be an array");
|
|
963
|
+
}
|
|
964
|
+
const seenWhitelist = /* @__PURE__ */ new Set();
|
|
965
|
+
for (let i = 0; i < d.whitelist.length; i++) {
|
|
966
|
+
if (typeof d.whitelist[i] !== "string") {
|
|
967
|
+
throw new Error(`whitelist[${i}]: must be a string`);
|
|
968
|
+
}
|
|
969
|
+
if (d.whitelist[i].length === 0) {
|
|
970
|
+
throw new Error(`whitelist[${i}]: must not be empty`);
|
|
971
|
+
}
|
|
972
|
+
const wlLower = d.whitelist[i].toLowerCase();
|
|
973
|
+
if (seenWhitelist.has(wlLower)) {
|
|
974
|
+
throw new Error(`whitelist[${i}]: duplicate entry "${d.whitelist[i]}"`);
|
|
975
|
+
}
|
|
976
|
+
seenWhitelist.add(wlLower);
|
|
977
|
+
}
|
|
978
|
+
return data;
|
|
979
|
+
}
|
|
980
|
+
|
|
981
|
+
// src/lang/tr/config.ts
|
|
982
|
+
var validatedData = validateDictionary(dictionary_default);
|
|
983
|
+
var config = {
|
|
984
|
+
locale: "tr",
|
|
985
|
+
charMap: {
|
|
986
|
+
\u00E7: "c",
|
|
987
|
+
\u00C7: "c",
|
|
988
|
+
\u011F: "g",
|
|
989
|
+
\u011E: "g",
|
|
990
|
+
\u0131: "i",
|
|
991
|
+
\u0130: "i",
|
|
992
|
+
\u00F6: "o",
|
|
993
|
+
\u00D6: "o",
|
|
994
|
+
\u015F: "s",
|
|
995
|
+
\u015E: "s",
|
|
996
|
+
\u00FC: "u",
|
|
997
|
+
\u00DC: "u"
|
|
998
|
+
},
|
|
999
|
+
leetMap: {
|
|
1000
|
+
"0": "o",
|
|
1001
|
+
"1": "i",
|
|
1002
|
+
"2": "i",
|
|
1003
|
+
"3": "e",
|
|
1004
|
+
"4": "a",
|
|
1005
|
+
"5": "s",
|
|
1006
|
+
"6": "g",
|
|
1007
|
+
"7": "t",
|
|
1008
|
+
"8": "b",
|
|
1009
|
+
"9": "g",
|
|
1010
|
+
"@": "a",
|
|
1011
|
+
$: "s",
|
|
1012
|
+
"!": "i"
|
|
1013
|
+
},
|
|
1014
|
+
charClasses: {
|
|
1015
|
+
a: "[a4\xE0\xE1\xE2\xE3\xE4\xE5]",
|
|
1016
|
+
b: "[b8\xDF]",
|
|
1017
|
+
c: "[c\xE7\xC7]",
|
|
1018
|
+
d: "[d]",
|
|
1019
|
+
e: "[e3\xE8\xE9\xEA\xEB]",
|
|
1020
|
+
f: "[f]",
|
|
1021
|
+
g: "[g\u011F\u011E69]",
|
|
1022
|
+
h: "[h]",
|
|
1023
|
+
i: "[i\u0131\u013012\xEC\xED\xEE\xEF]",
|
|
1024
|
+
j: "[j]",
|
|
1025
|
+
k: "[k]",
|
|
1026
|
+
l: "[l1]",
|
|
1027
|
+
m: "[m]",
|
|
1028
|
+
n: "[n\xF1]",
|
|
1029
|
+
o: "[o0\xF6\xD6\xF2\xF3\xF4\xF5]",
|
|
1030
|
+
p: "[p]",
|
|
1031
|
+
q: "[qk]",
|
|
1032
|
+
r: "[r]",
|
|
1033
|
+
s: "[s5\u015F\u015E\xDF]",
|
|
1034
|
+
t: "[t7]",
|
|
1035
|
+
u: "[u\xFC\xDC\xF9\xFA\xFBv]",
|
|
1036
|
+
v: "[vu]",
|
|
1037
|
+
w: "[w]",
|
|
1038
|
+
x: "[x]",
|
|
1039
|
+
y: "[y]",
|
|
1040
|
+
z: "[z2]"
|
|
1041
|
+
},
|
|
1042
|
+
numberExpansions: [
|
|
1043
|
+
["100", "yuz"],
|
|
1044
|
+
["50", "elli"],
|
|
1045
|
+
["10", "on"],
|
|
1046
|
+
["2", "iki"]
|
|
1047
|
+
],
|
|
1048
|
+
dictionary: validatedData
|
|
1049
|
+
};
|
|
1050
|
+
|
|
1051
|
+
// src/lang/en/dictionary.json
|
|
1052
|
+
var dictionary_default2 = {
|
|
1053
|
+
version: 1,
|
|
1054
|
+
suffixes: ["ing", "ed", "er", "ers", "s", "es", "ly", "ness"],
|
|
1055
|
+
entries: [
|
|
1056
|
+
{
|
|
1057
|
+
root: "fuck",
|
|
1058
|
+
variants: ["fucking", "fucker", "fucked", "fuckers", "fucks", "fck", "fuk", "fuking", "fcking", "stfu", "motherfucker", "motherfucking", "fuckface", "fuckwit", "clusterfuck", "mindfuck"],
|
|
1059
|
+
severity: "high",
|
|
1060
|
+
category: "sexual",
|
|
1061
|
+
suffixable: true
|
|
1062
|
+
},
|
|
1063
|
+
{
|
|
1064
|
+
root: "shit",
|
|
1065
|
+
variants: ["shitty", "bullshit", "shitting", "sht", "shits", "shite", "shithead", "shitstorm", "dipshit", "horseshit", "batshit", "apeshit", "shithole", "shitface", "shitshow"],
|
|
1066
|
+
severity: "high",
|
|
1067
|
+
category: "general",
|
|
1068
|
+
suffixable: true
|
|
1069
|
+
},
|
|
1070
|
+
{
|
|
1071
|
+
root: "ass",
|
|
1072
|
+
variants: ["asses", "arse", "arses", "asshat", "asswipe", "smartass", "dumbass", "fatass", "badass", "jackass", "lardass", "kickass"],
|
|
1073
|
+
severity: "medium",
|
|
1074
|
+
category: "insult",
|
|
1075
|
+
suffixable: false
|
|
1076
|
+
},
|
|
1077
|
+
{
|
|
1078
|
+
root: "asshole",
|
|
1079
|
+
variants: ["assholes", "arsehole", "arseholes"],
|
|
1080
|
+
severity: "high",
|
|
1081
|
+
category: "insult",
|
|
1082
|
+
suffixable: false
|
|
1083
|
+
},
|
|
1084
|
+
{
|
|
1085
|
+
root: "bitch",
|
|
1086
|
+
variants: ["bitches", "bitchy", "biatch", "bitching", "bitchass", "sonofabitch"],
|
|
1087
|
+
severity: "high",
|
|
1088
|
+
category: "insult",
|
|
1089
|
+
suffixable: true
|
|
1090
|
+
},
|
|
1091
|
+
{
|
|
1092
|
+
root: "bastard",
|
|
1093
|
+
variants: ["bastards", "bastardy"],
|
|
1094
|
+
severity: "medium",
|
|
1095
|
+
category: "insult",
|
|
1096
|
+
suffixable: true
|
|
1097
|
+
},
|
|
1098
|
+
{
|
|
1099
|
+
root: "dick",
|
|
1100
|
+
variants: ["dickhead", "dickheads", "dicks", "dickwad", "dickweed"],
|
|
1101
|
+
severity: "medium",
|
|
1102
|
+
category: "sexual",
|
|
1103
|
+
suffixable: false
|
|
1104
|
+
},
|
|
1105
|
+
{
|
|
1106
|
+
root: "cock",
|
|
1107
|
+
variants: ["cocks", "cocksucker", "cocksucking", "cockhead"],
|
|
1108
|
+
severity: "high",
|
|
1109
|
+
category: "sexual",
|
|
1110
|
+
suffixable: false
|
|
1111
|
+
},
|
|
1112
|
+
{
|
|
1113
|
+
root: "cunt",
|
|
1114
|
+
variants: ["cunts", "cunty"],
|
|
1115
|
+
severity: "high",
|
|
1116
|
+
category: "sexual",
|
|
1117
|
+
suffixable: true
|
|
1118
|
+
},
|
|
1119
|
+
{
|
|
1120
|
+
root: "whore",
|
|
1121
|
+
variants: ["whores", "whorish", "whorebag"],
|
|
1122
|
+
severity: "high",
|
|
1123
|
+
category: "insult",
|
|
1124
|
+
suffixable: true
|
|
1125
|
+
},
|
|
1126
|
+
{
|
|
1127
|
+
root: "slut",
|
|
1128
|
+
variants: ["sluts", "slutty", "slutbag"],
|
|
1129
|
+
severity: "high",
|
|
1130
|
+
category: "insult",
|
|
1131
|
+
suffixable: true
|
|
1132
|
+
},
|
|
1133
|
+
{
|
|
1134
|
+
root: "piss",
|
|
1135
|
+
variants: ["pissed", "pissing", "pisser", "pissoff", "pisshead"],
|
|
1136
|
+
severity: "medium",
|
|
1137
|
+
category: "general",
|
|
1138
|
+
suffixable: true
|
|
1139
|
+
},
|
|
1140
|
+
{
|
|
1141
|
+
root: "wank",
|
|
1142
|
+
variants: ["wanker", "wankers", "wanking"],
|
|
1143
|
+
severity: "medium",
|
|
1144
|
+
category: "sexual",
|
|
1145
|
+
suffixable: true
|
|
1146
|
+
},
|
|
1147
|
+
{
|
|
1148
|
+
root: "twat",
|
|
1149
|
+
variants: ["twats"],
|
|
1150
|
+
severity: "high",
|
|
1151
|
+
category: "sexual",
|
|
1152
|
+
suffixable: true
|
|
1153
|
+
},
|
|
1154
|
+
{
|
|
1155
|
+
root: "bollocks",
|
|
1156
|
+
variants: ["bollock", "bollocked"],
|
|
1157
|
+
severity: "medium",
|
|
1158
|
+
category: "general",
|
|
1159
|
+
suffixable: false
|
|
1160
|
+
},
|
|
1161
|
+
{
|
|
1162
|
+
root: "crap",
|
|
1163
|
+
variants: ["crappy", "craps"],
|
|
1164
|
+
severity: "low",
|
|
1165
|
+
category: "general",
|
|
1166
|
+
suffixable: true
|
|
1167
|
+
},
|
|
1168
|
+
{
|
|
1169
|
+
root: "damn",
|
|
1170
|
+
variants: ["damned", "damnit", "dammit", "goddamn", "goddamnit"],
|
|
1171
|
+
severity: "low",
|
|
1172
|
+
category: "general",
|
|
1173
|
+
suffixable: false
|
|
1174
|
+
},
|
|
1175
|
+
{
|
|
1176
|
+
root: "retard",
|
|
1177
|
+
variants: ["retards", "retarded", "retardation"],
|
|
1178
|
+
severity: "high",
|
|
1179
|
+
category: "slur",
|
|
1180
|
+
suffixable: false
|
|
1181
|
+
},
|
|
1182
|
+
{
|
|
1183
|
+
root: "nigger",
|
|
1184
|
+
variants: ["niggers", "nigga", "niggas", "nigg3r"],
|
|
1185
|
+
severity: "high",
|
|
1186
|
+
category: "slur",
|
|
1187
|
+
suffixable: false
|
|
1188
|
+
},
|
|
1189
|
+
{
|
|
1190
|
+
root: "faggot",
|
|
1191
|
+
variants: ["faggots", "fag", "fags", "faggy"],
|
|
1192
|
+
severity: "high",
|
|
1193
|
+
category: "slur",
|
|
1194
|
+
suffixable: false
|
|
1195
|
+
},
|
|
1196
|
+
{
|
|
1197
|
+
root: "douche",
|
|
1198
|
+
variants: ["douchebag", "douchebags", "douchy", "douchey"],
|
|
1199
|
+
severity: "medium",
|
|
1200
|
+
category: "insult",
|
|
1201
|
+
suffixable: true
|
|
1202
|
+
},
|
|
1203
|
+
{
|
|
1204
|
+
root: "tosser",
|
|
1205
|
+
variants: ["tossers"],
|
|
1206
|
+
severity: "medium",
|
|
1207
|
+
category: "insult",
|
|
1208
|
+
suffixable: false
|
|
1209
|
+
},
|
|
1210
|
+
{
|
|
1211
|
+
root: "wanker",
|
|
1212
|
+
variants: ["wankers"],
|
|
1213
|
+
severity: "medium",
|
|
1214
|
+
category: "insult",
|
|
1215
|
+
suffixable: false
|
|
1216
|
+
}
|
|
1217
|
+
],
|
|
1218
|
+
whitelist: [
|
|
1219
|
+
"assembly",
|
|
1220
|
+
"assist",
|
|
1221
|
+
"assassin",
|
|
1222
|
+
"bass",
|
|
1223
|
+
"class",
|
|
1224
|
+
"classic",
|
|
1225
|
+
"classify",
|
|
1226
|
+
"grass",
|
|
1227
|
+
"mass",
|
|
1228
|
+
"massive",
|
|
1229
|
+
"pass",
|
|
1230
|
+
"passage",
|
|
1231
|
+
"passenger",
|
|
1232
|
+
"passion",
|
|
1233
|
+
"passive",
|
|
1234
|
+
"passport",
|
|
1235
|
+
"assume",
|
|
1236
|
+
"assignment",
|
|
1237
|
+
"associate",
|
|
1238
|
+
"assertion",
|
|
1239
|
+
"asset",
|
|
1240
|
+
"assess",
|
|
1241
|
+
"dickens",
|
|
1242
|
+
"cocktail",
|
|
1243
|
+
"cockatoo",
|
|
1244
|
+
"peacock",
|
|
1245
|
+
"hancock",
|
|
1246
|
+
"scrap",
|
|
1247
|
+
"scrappy",
|
|
1248
|
+
"shitake",
|
|
1249
|
+
"document",
|
|
1250
|
+
"buckle",
|
|
1251
|
+
"piston",
|
|
1252
|
+
"bassist",
|
|
1253
|
+
"embassy",
|
|
1254
|
+
"cassette",
|
|
1255
|
+
"hassle",
|
|
1256
|
+
"lasso",
|
|
1257
|
+
"massage",
|
|
1258
|
+
"compass",
|
|
1259
|
+
"trespass",
|
|
1260
|
+
"harass"
|
|
1261
|
+
]
|
|
1262
|
+
};
|
|
1263
|
+
|
|
1264
|
+
// src/lang/en/config.ts
|
|
1265
|
+
var validatedData2 = validateDictionary(dictionary_default2);
|
|
1266
|
+
var config2 = {
|
|
1267
|
+
locale: "en",
|
|
1268
|
+
charMap: {},
|
|
1269
|
+
leetMap: {
|
|
1270
|
+
"0": "o",
|
|
1271
|
+
"1": "i",
|
|
1272
|
+
"3": "e",
|
|
1273
|
+
"4": "a",
|
|
1274
|
+
"5": "s",
|
|
1275
|
+
"7": "t",
|
|
1276
|
+
"@": "a",
|
|
1277
|
+
$: "s",
|
|
1278
|
+
"!": "i"
|
|
1279
|
+
},
|
|
1280
|
+
charClasses: {
|
|
1281
|
+
a: "[a4]",
|
|
1282
|
+
b: "[b8]",
|
|
1283
|
+
c: "[c]",
|
|
1284
|
+
d: "[d]",
|
|
1285
|
+
e: "[e3]",
|
|
1286
|
+
f: "[f]",
|
|
1287
|
+
g: "[g9]",
|
|
1288
|
+
h: "[h]",
|
|
1289
|
+
i: "[i1]",
|
|
1290
|
+
j: "[j]",
|
|
1291
|
+
k: "[k]",
|
|
1292
|
+
l: "[l1]",
|
|
1293
|
+
m: "[m]",
|
|
1294
|
+
n: "[n]",
|
|
1295
|
+
o: "[o0]",
|
|
1296
|
+
p: "[p]",
|
|
1297
|
+
q: "[q]",
|
|
1298
|
+
r: "[r]",
|
|
1299
|
+
s: "[s5]",
|
|
1300
|
+
t: "[t7]",
|
|
1301
|
+
u: "[uv]",
|
|
1302
|
+
v: "[vu]",
|
|
1303
|
+
w: "[w]",
|
|
1304
|
+
x: "[x]",
|
|
1305
|
+
y: "[y]",
|
|
1306
|
+
z: "[z]"
|
|
1307
|
+
},
|
|
1308
|
+
dictionary: validatedData2
|
|
1309
|
+
};
|
|
1310
|
+
|
|
1311
|
+
// src/lang/es/dictionary.json
|
|
1312
|
+
var dictionary_default3 = {
|
|
1313
|
+
version: 1,
|
|
1314
|
+
suffixes: ["ado", "ando", "ido", "iendo", "ar", "er", "ir", "os", "as", "es", "ito", "ita", "azo"],
|
|
1315
|
+
entries: [
|
|
1316
|
+
{
|
|
1317
|
+
root: "mierda",
|
|
1318
|
+
variants: ["mierdas", "mierdo", "mierdero", "mierdoso"],
|
|
1319
|
+
severity: "high",
|
|
1320
|
+
category: "general",
|
|
1321
|
+
suffixable: true
|
|
1322
|
+
},
|
|
1323
|
+
{
|
|
1324
|
+
root: "puta",
|
|
1325
|
+
variants: ["putas", "putada", "puto", "putos", "hijoputa", "hijaputa", "putero", "puton", "putear"],
|
|
1326
|
+
severity: "high",
|
|
1327
|
+
category: "insult",
|
|
1328
|
+
suffixable: true
|
|
1329
|
+
},
|
|
1330
|
+
{
|
|
1331
|
+
root: "cabron",
|
|
1332
|
+
variants: ["cabrones", "cabrona", "cabronazo", "cabronada"],
|
|
1333
|
+
severity: "high",
|
|
1334
|
+
category: "insult",
|
|
1335
|
+
suffixable: true
|
|
1336
|
+
},
|
|
1337
|
+
{
|
|
1338
|
+
root: "joder",
|
|
1339
|
+
variants: ["jodido", "jodida", "jodidos", "jodidas", "joderse", "jodiendo"],
|
|
1340
|
+
severity: "high",
|
|
1341
|
+
category: "general",
|
|
1342
|
+
suffixable: true
|
|
1343
|
+
},
|
|
1344
|
+
{
|
|
1345
|
+
root: "co\xF1o",
|
|
1346
|
+
variants: ["cono", "conos", "co\xF1os"],
|
|
1347
|
+
severity: "high",
|
|
1348
|
+
category: "sexual",
|
|
1349
|
+
suffixable: false
|
|
1350
|
+
},
|
|
1351
|
+
{
|
|
1352
|
+
root: "verga",
|
|
1353
|
+
variants: ["vergas", "vergon", "vergudo", "vergota"],
|
|
1354
|
+
severity: "high",
|
|
1355
|
+
category: "sexual",
|
|
1356
|
+
suffixable: true
|
|
1357
|
+
},
|
|
1358
|
+
{
|
|
1359
|
+
root: "chingar",
|
|
1360
|
+
variants: ["chingado", "chingada", "chingados", "chinga", "chingas", "chingo", "chingon", "chingona", "chingadera"],
|
|
1361
|
+
severity: "high",
|
|
1362
|
+
category: "general",
|
|
1363
|
+
suffixable: true
|
|
1364
|
+
},
|
|
1365
|
+
{
|
|
1366
|
+
root: "pendejo",
|
|
1367
|
+
variants: ["pendejos", "pendeja", "pendejas", "pendejada", "pendejear"],
|
|
1368
|
+
severity: "high",
|
|
1369
|
+
category: "insult",
|
|
1370
|
+
suffixable: true
|
|
1371
|
+
},
|
|
1372
|
+
{
|
|
1373
|
+
root: "marica",
|
|
1374
|
+
variants: ["maricas", "maricon", "maricones", "maricona"],
|
|
1375
|
+
severity: "high",
|
|
1376
|
+
category: "slur",
|
|
1377
|
+
suffixable: false
|
|
1378
|
+
},
|
|
1379
|
+
{
|
|
1380
|
+
root: "carajo",
|
|
1381
|
+
variants: ["carajos"],
|
|
1382
|
+
severity: "medium",
|
|
1383
|
+
category: "general",
|
|
1384
|
+
suffixable: false
|
|
1385
|
+
},
|
|
1386
|
+
{
|
|
1387
|
+
root: "idiota",
|
|
1388
|
+
variants: ["idiotas", "idiotez"],
|
|
1389
|
+
severity: "low",
|
|
1390
|
+
category: "insult",
|
|
1391
|
+
suffixable: false
|
|
1392
|
+
},
|
|
1393
|
+
{
|
|
1394
|
+
root: "culo",
|
|
1395
|
+
variants: ["culos", "culazo", "culear"],
|
|
1396
|
+
severity: "medium",
|
|
1397
|
+
category: "sexual",
|
|
1398
|
+
suffixable: true
|
|
1399
|
+
},
|
|
1400
|
+
{
|
|
1401
|
+
root: "zorra",
|
|
1402
|
+
variants: ["zorras", "zorron"],
|
|
1403
|
+
severity: "high",
|
|
1404
|
+
category: "insult",
|
|
1405
|
+
suffixable: true
|
|
1406
|
+
},
|
|
1407
|
+
{
|
|
1408
|
+
root: "estupido",
|
|
1409
|
+
variants: ["estupidos", "estupida", "estupidas", "estupidez"],
|
|
1410
|
+
severity: "low",
|
|
1411
|
+
category: "insult",
|
|
1412
|
+
suffixable: false
|
|
1413
|
+
},
|
|
1414
|
+
{
|
|
1415
|
+
root: "imbecil",
|
|
1416
|
+
variants: ["imbeciles"],
|
|
1417
|
+
severity: "low",
|
|
1418
|
+
category: "insult",
|
|
1419
|
+
suffixable: false
|
|
1420
|
+
},
|
|
1421
|
+
{
|
|
1422
|
+
root: "gilipollas",
|
|
1423
|
+
variants: ["gilipolleces", "gilipollez"],
|
|
1424
|
+
severity: "high",
|
|
1425
|
+
category: "insult",
|
|
1426
|
+
suffixable: false
|
|
1427
|
+
},
|
|
1428
|
+
{
|
|
1429
|
+
root: "huevon",
|
|
1430
|
+
variants: ["huevones", "huevona", "huevonazo", "guevon"],
|
|
1431
|
+
severity: "medium",
|
|
1432
|
+
category: "insult",
|
|
1433
|
+
suffixable: false
|
|
1434
|
+
},
|
|
1435
|
+
{
|
|
1436
|
+
root: "mamada",
|
|
1437
|
+
variants: ["mamadas", "mamazo", "mamon", "mamona", "mamones"],
|
|
1438
|
+
severity: "medium",
|
|
1439
|
+
category: "sexual",
|
|
1440
|
+
suffixable: false
|
|
1441
|
+
},
|
|
1442
|
+
{
|
|
1443
|
+
root: "pinche",
|
|
1444
|
+
variants: ["pinches"],
|
|
1445
|
+
severity: "medium",
|
|
1446
|
+
category: "general",
|
|
1447
|
+
suffixable: false
|
|
1448
|
+
}
|
|
1449
|
+
],
|
|
1450
|
+
whitelist: [
|
|
1451
|
+
"putamen",
|
|
1452
|
+
"computadora",
|
|
1453
|
+
"computar",
|
|
1454
|
+
"disputar",
|
|
1455
|
+
"disputa",
|
|
1456
|
+
"reputacion",
|
|
1457
|
+
"imputar",
|
|
1458
|
+
"inocular",
|
|
1459
|
+
"acular",
|
|
1460
|
+
"calcular",
|
|
1461
|
+
"icular",
|
|
1462
|
+
"vehicular",
|
|
1463
|
+
"particular",
|
|
1464
|
+
"articulo",
|
|
1465
|
+
"maricopa"
|
|
1466
|
+
]
|
|
1467
|
+
};
|
|
1468
|
+
|
|
1469
|
+
// src/lang/es/config.ts
|
|
1470
|
+
var validatedData3 = validateDictionary(dictionary_default3);
|
|
1471
|
+
var config3 = {
|
|
1472
|
+
locale: "es",
|
|
1473
|
+
charMap: {
|
|
1474
|
+
\u00F1: "n",
|
|
1475
|
+
\u00D1: "n",
|
|
1476
|
+
\u00E1: "a",
|
|
1477
|
+
\u00C1: "a",
|
|
1478
|
+
\u00E9: "e",
|
|
1479
|
+
\u00C9: "e",
|
|
1480
|
+
\u00ED: "i",
|
|
1481
|
+
\u00CD: "i",
|
|
1482
|
+
\u00F3: "o",
|
|
1483
|
+
\u00D3: "o",
|
|
1484
|
+
\u00FA: "u",
|
|
1485
|
+
\u00DA: "u"
|
|
1486
|
+
},
|
|
1487
|
+
leetMap: {
|
|
1488
|
+
"0": "o",
|
|
1489
|
+
"1": "i",
|
|
1490
|
+
"3": "e",
|
|
1491
|
+
"4": "a",
|
|
1492
|
+
"5": "s",
|
|
1493
|
+
"7": "t",
|
|
1494
|
+
"@": "a",
|
|
1495
|
+
$: "s",
|
|
1496
|
+
"!": "i"
|
|
1497
|
+
},
|
|
1498
|
+
charClasses: {
|
|
1499
|
+
a: "[a4\xE1\xC1]",
|
|
1500
|
+
b: "[b8]",
|
|
1501
|
+
c: "[c]",
|
|
1502
|
+
d: "[d]",
|
|
1503
|
+
e: "[e3\xE9\xC9]",
|
|
1504
|
+
f: "[f]",
|
|
1505
|
+
g: "[g9]",
|
|
1506
|
+
h: "[h]",
|
|
1507
|
+
i: "[i1\xED\xCD]",
|
|
1508
|
+
j: "[j]",
|
|
1509
|
+
k: "[k]",
|
|
1510
|
+
l: "[l1]",
|
|
1511
|
+
m: "[m]",
|
|
1512
|
+
n: "[n\xF1\xD1]",
|
|
1513
|
+
o: "[o0\xF3\xD3]",
|
|
1514
|
+
p: "[p]",
|
|
1515
|
+
q: "[q]",
|
|
1516
|
+
r: "[r]",
|
|
1517
|
+
s: "[s5]",
|
|
1518
|
+
t: "[t7]",
|
|
1519
|
+
u: "[uv\xFA\xDA]",
|
|
1520
|
+
v: "[vu]",
|
|
1521
|
+
w: "[w]",
|
|
1522
|
+
x: "[x]",
|
|
1523
|
+
y: "[y]",
|
|
1524
|
+
z: "[z]"
|
|
1525
|
+
},
|
|
1526
|
+
dictionary: validatedData3
|
|
1527
|
+
};
|
|
1528
|
+
|
|
1529
|
+
// src/lang/de/dictionary.json
|
|
1530
|
+
var dictionary_default4 = {
|
|
1531
|
+
version: 1,
|
|
1532
|
+
suffixes: ["en", "er", "es", "em", "ung", "e", "te", "st"],
|
|
1533
|
+
entries: [
|
|
1534
|
+
{
|
|
1535
|
+
root: "schei\xDFe",
|
|
1536
|
+
variants: ["scheisse", "scheiss", "scheisser", "scheisserei", "beschissen"],
|
|
1537
|
+
severity: "high",
|
|
1538
|
+
category: "general",
|
|
1539
|
+
suffixable: true
|
|
1540
|
+
},
|
|
1541
|
+
{
|
|
1542
|
+
root: "fick",
|
|
1543
|
+
variants: ["ficken", "ficker", "gefickt", "fickend", "fickerei", "abgefickt"],
|
|
1544
|
+
severity: "high",
|
|
1545
|
+
category: "sexual",
|
|
1546
|
+
suffixable: true
|
|
1547
|
+
},
|
|
1548
|
+
{
|
|
1549
|
+
root: "arsch",
|
|
1550
|
+
variants: ["arschloch", "arscher", "arschgeige", "arschgesicht", "arschkriecher"],
|
|
1551
|
+
severity: "high",
|
|
1552
|
+
category: "insult",
|
|
1553
|
+
suffixable: true
|
|
1554
|
+
},
|
|
1555
|
+
{
|
|
1556
|
+
root: "hurensohn",
|
|
1557
|
+
variants: ["hurensohne", "hurens\xF6hne"],
|
|
1558
|
+
severity: "high",
|
|
1559
|
+
category: "insult",
|
|
1560
|
+
suffixable: false
|
|
1561
|
+
},
|
|
1562
|
+
{
|
|
1563
|
+
root: "hure",
|
|
1564
|
+
variants: ["huren", "hurig"],
|
|
1565
|
+
severity: "high",
|
|
1566
|
+
category: "insult",
|
|
1567
|
+
suffixable: true
|
|
1568
|
+
},
|
|
1569
|
+
{
|
|
1570
|
+
root: "fotze",
|
|
1571
|
+
variants: ["fotzen"],
|
|
1572
|
+
severity: "high",
|
|
1573
|
+
category: "sexual",
|
|
1574
|
+
suffixable: true
|
|
1575
|
+
},
|
|
1576
|
+
{
|
|
1577
|
+
root: "wichser",
|
|
1578
|
+
variants: ["wichsern", "wichse", "wichsen", "gewichst"],
|
|
1579
|
+
severity: "high",
|
|
1580
|
+
category: "sexual",
|
|
1581
|
+
suffixable: false
|
|
1582
|
+
},
|
|
1583
|
+
{
|
|
1584
|
+
root: "schwanz",
|
|
1585
|
+
variants: ["schw\xE4nze", "schwanze"],
|
|
1586
|
+
severity: "medium",
|
|
1587
|
+
category: "sexual",
|
|
1588
|
+
suffixable: false
|
|
1589
|
+
},
|
|
1590
|
+
{
|
|
1591
|
+
root: "schlampe",
|
|
1592
|
+
variants: ["schlampen", "schlampig"],
|
|
1593
|
+
severity: "high",
|
|
1594
|
+
category: "insult",
|
|
1595
|
+
suffixable: true
|
|
1596
|
+
},
|
|
1597
|
+
{
|
|
1598
|
+
root: "mistkerl",
|
|
1599
|
+
variants: ["mistkerle"],
|
|
1600
|
+
severity: "medium",
|
|
1601
|
+
category: "insult",
|
|
1602
|
+
suffixable: false
|
|
1603
|
+
},
|
|
1604
|
+
{
|
|
1605
|
+
root: "idiot",
|
|
1606
|
+
variants: ["idioten", "idiotin", "idiotisch"],
|
|
1607
|
+
severity: "low",
|
|
1608
|
+
category: "insult",
|
|
1609
|
+
suffixable: true
|
|
1610
|
+
},
|
|
1611
|
+
{
|
|
1612
|
+
root: "dumm",
|
|
1613
|
+
variants: ["dummkopf", "dumme", "dummer", "dummes", "dummheit"],
|
|
1614
|
+
severity: "low",
|
|
1615
|
+
category: "insult",
|
|
1616
|
+
suffixable: true
|
|
1617
|
+
},
|
|
1618
|
+
{
|
|
1619
|
+
root: "depp",
|
|
1620
|
+
variants: ["deppen", "deppert"],
|
|
1621
|
+
severity: "low",
|
|
1622
|
+
category: "insult",
|
|
1623
|
+
suffixable: true
|
|
1624
|
+
},
|
|
1625
|
+
{
|
|
1626
|
+
root: "vollidiot",
|
|
1627
|
+
variants: ["vollidioten"],
|
|
1628
|
+
severity: "medium",
|
|
1629
|
+
category: "insult",
|
|
1630
|
+
suffixable: false
|
|
1631
|
+
},
|
|
1632
|
+
{
|
|
1633
|
+
root: "missgeburt",
|
|
1634
|
+
variants: ["missgeburten"],
|
|
1635
|
+
severity: "high",
|
|
1636
|
+
category: "insult",
|
|
1637
|
+
suffixable: false
|
|
1638
|
+
},
|
|
1639
|
+
{
|
|
1640
|
+
root: "drecksau",
|
|
1641
|
+
variants: ["drecks\xE4ue"],
|
|
1642
|
+
severity: "high",
|
|
1643
|
+
category: "insult",
|
|
1644
|
+
suffixable: false
|
|
1645
|
+
},
|
|
1646
|
+
{
|
|
1647
|
+
root: "dreck",
|
|
1648
|
+
variants: ["dreckig", "dreckiger", "dreckiges"],
|
|
1649
|
+
severity: "medium",
|
|
1650
|
+
category: "general",
|
|
1651
|
+
suffixable: true
|
|
1652
|
+
},
|
|
1653
|
+
{
|
|
1654
|
+
root: "trottel",
|
|
1655
|
+
variants: ["trotteln", "trottelig"],
|
|
1656
|
+
severity: "low",
|
|
1657
|
+
category: "insult",
|
|
1658
|
+
suffixable: false
|
|
1659
|
+
}
|
|
1660
|
+
],
|
|
1661
|
+
whitelist: [
|
|
1662
|
+
"ficktion",
|
|
1663
|
+
"arschen",
|
|
1664
|
+
"schwanzen"
|
|
1665
|
+
]
|
|
1666
|
+
};
|
|
1667
|
+
|
|
1668
|
+
// src/lang/de/config.ts
|
|
1669
|
+
var validatedData4 = validateDictionary(dictionary_default4);
|
|
1670
|
+
var config4 = {
|
|
1671
|
+
locale: "de",
|
|
1672
|
+
charMap: {
|
|
1673
|
+
\u00E4: "a",
|
|
1674
|
+
\u00C4: "a",
|
|
1675
|
+
\u00F6: "o",
|
|
1676
|
+
\u00D6: "o",
|
|
1677
|
+
\u00FC: "u",
|
|
1678
|
+
\u00DC: "u",
|
|
1679
|
+
\u00DF: "ss"
|
|
1680
|
+
},
|
|
1681
|
+
leetMap: {
|
|
1682
|
+
"0": "o",
|
|
1683
|
+
"1": "i",
|
|
1684
|
+
"3": "e",
|
|
1685
|
+
"4": "a",
|
|
1686
|
+
"5": "s",
|
|
1687
|
+
"7": "t",
|
|
1688
|
+
"@": "a",
|
|
1689
|
+
$: "s",
|
|
1690
|
+
"!": "i"
|
|
1691
|
+
},
|
|
1692
|
+
charClasses: {
|
|
1693
|
+
a: "[a4\xE4\xC4]",
|
|
1694
|
+
b: "[b8]",
|
|
1695
|
+
c: "[c]",
|
|
1696
|
+
d: "[d]",
|
|
1697
|
+
e: "[e3]",
|
|
1698
|
+
f: "[f]",
|
|
1699
|
+
g: "[g9]",
|
|
1700
|
+
h: "[h]",
|
|
1701
|
+
i: "[i1]",
|
|
1702
|
+
j: "[j]",
|
|
1703
|
+
k: "[k]",
|
|
1704
|
+
l: "[l1]",
|
|
1705
|
+
m: "[m]",
|
|
1706
|
+
n: "[n]",
|
|
1707
|
+
o: "[o0\xF6\xD6]",
|
|
1708
|
+
p: "[p]",
|
|
1709
|
+
q: "[q]",
|
|
1710
|
+
r: "[r]",
|
|
1711
|
+
s: "[s5\xDF]",
|
|
1712
|
+
t: "[t7]",
|
|
1713
|
+
u: "[uv\xFC\xDC]",
|
|
1714
|
+
v: "[vu]",
|
|
1715
|
+
w: "[w]",
|
|
1716
|
+
x: "[x]",
|
|
1717
|
+
y: "[y]",
|
|
1718
|
+
z: "[z]"
|
|
1719
|
+
},
|
|
1720
|
+
dictionary: validatedData4
|
|
1721
|
+
};
|
|
1722
|
+
|
|
1723
|
+
// src/lang/index.ts
|
|
1724
|
+
var CORE_DICT_VERSION = 1;
|
|
1725
|
+
var REGISTRY = {
|
|
1726
|
+
tr: config,
|
|
1727
|
+
en: config2,
|
|
1728
|
+
es: config3,
|
|
1729
|
+
de: config4
|
|
1730
|
+
};
|
|
1731
|
+
function getLanguageConfig(lang) {
|
|
1732
|
+
const config5 = REGISTRY[lang];
|
|
1733
|
+
if (!config5) {
|
|
1734
|
+
const available = getSupportedLanguages().join(", ");
|
|
1735
|
+
throw new Error(
|
|
1736
|
+
`Unsupported language: "${lang}". Available languages: ${available}`
|
|
1737
|
+
);
|
|
1738
|
+
}
|
|
1739
|
+
if (config5.dictionary.version < CORE_DICT_VERSION) {
|
|
1740
|
+
throw new Error(
|
|
1741
|
+
`Dictionary version ${config5.dictionary.version} for language "${lang}" is below minimum required version ${CORE_DICT_VERSION}. Please update the language pack.`
|
|
1742
|
+
);
|
|
1743
|
+
}
|
|
1744
|
+
return config5;
|
|
1745
|
+
}
|
|
1746
|
+
function getSupportedLanguages() {
|
|
1747
|
+
return Object.keys(REGISTRY);
|
|
1748
|
+
}
|
|
1749
|
+
|
|
1750
|
+
// src/normalizer.ts
|
|
1751
|
+
function replaceFromMap(text, map) {
|
|
1752
|
+
let result = "";
|
|
1753
|
+
for (const ch of text) {
|
|
1754
|
+
result += map[ch] ?? ch;
|
|
1755
|
+
}
|
|
1756
|
+
return result;
|
|
1757
|
+
}
|
|
1758
|
+
function buildNumberExpander(expansions) {
|
|
1759
|
+
if (expansions.length === 0) return null;
|
|
1760
|
+
const regex = new RegExp(
|
|
1761
|
+
expansions.map(([num]) => {
|
|
1762
|
+
const escaped = num.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
1763
|
+
return `(?<=\\p{L})${escaped}(?=\\p{L})`;
|
|
1764
|
+
}).join("|"),
|
|
1765
|
+
"gu"
|
|
1766
|
+
);
|
|
1767
|
+
const lookup = Object.fromEntries(expansions);
|
|
1768
|
+
return (text) => text.replace(regex, (match) => lookup[match] ?? match);
|
|
1769
|
+
}
|
|
1770
|
+
function removePunctuation(text) {
|
|
1771
|
+
return text.replace(/(?<=\p{L})[.\-_*,;:!?]+(?=\p{L})/gu, "");
|
|
1772
|
+
}
|
|
1773
|
+
function collapseRepeats(text) {
|
|
1774
|
+
return text.replace(/(.)\1{2,}/g, "$1");
|
|
1775
|
+
}
|
|
1776
|
+
function trimWhitespace(text) {
|
|
1777
|
+
return text.replace(/\s+/g, " ").trim();
|
|
1778
|
+
}
|
|
1779
|
+
function createNormalizer(config5) {
|
|
1780
|
+
const expandNumbers = config5.numberExpansions ? buildNumberExpander(config5.numberExpansions) : null;
|
|
1781
|
+
return function normalize2(text) {
|
|
1782
|
+
let result = text;
|
|
1783
|
+
result = result.toLocaleLowerCase(config5.locale);
|
|
1784
|
+
result = replaceFromMap(result, config5.charMap);
|
|
1785
|
+
if (expandNumbers) {
|
|
1786
|
+
result = expandNumbers(result);
|
|
1787
|
+
}
|
|
1788
|
+
result = replaceFromMap(result, config5.leetMap);
|
|
1789
|
+
result = removePunctuation(result);
|
|
1790
|
+
result = collapseRepeats(result);
|
|
1791
|
+
result = trimWhitespace(result);
|
|
1792
|
+
return result;
|
|
1793
|
+
};
|
|
1794
|
+
}
|
|
1795
|
+
var TURKISH_CHAR_MAP = {
|
|
1796
|
+
\u00E7: "c",
|
|
1797
|
+
\u00C7: "c",
|
|
1798
|
+
\u011F: "g",
|
|
1799
|
+
\u011E: "g",
|
|
1800
|
+
\u0131: "i",
|
|
1801
|
+
\u0130: "i",
|
|
1802
|
+
\u00F6: "o",
|
|
1803
|
+
\u00D6: "o",
|
|
1804
|
+
\u015F: "s",
|
|
1805
|
+
\u015E: "s",
|
|
1806
|
+
\u00FC: "u",
|
|
1807
|
+
\u00DC: "u"
|
|
1808
|
+
};
|
|
1809
|
+
var LEET_MAP = {
|
|
1810
|
+
"0": "o",
|
|
1811
|
+
"1": "i",
|
|
1812
|
+
"2": "i",
|
|
1813
|
+
"3": "e",
|
|
1814
|
+
"4": "a",
|
|
1815
|
+
"5": "s",
|
|
1816
|
+
"6": "g",
|
|
1817
|
+
"7": "t",
|
|
1818
|
+
"8": "b",
|
|
1819
|
+
"9": "g",
|
|
1820
|
+
"@": "a",
|
|
1821
|
+
$: "s",
|
|
1822
|
+
"!": "i"
|
|
1823
|
+
};
|
|
1824
|
+
var TR_NUMBER_MAP = [
|
|
1825
|
+
["100", "yuz"],
|
|
1826
|
+
["50", "elli"],
|
|
1827
|
+
["10", "on"],
|
|
1828
|
+
["2", "iki"]
|
|
1829
|
+
];
|
|
1830
|
+
var _turkishNormalize = createNormalizer({
|
|
1831
|
+
locale: "tr",
|
|
1832
|
+
charMap: TURKISH_CHAR_MAP,
|
|
1833
|
+
leetMap: LEET_MAP,
|
|
1834
|
+
numberExpansions: TR_NUMBER_MAP
|
|
1835
|
+
});
|
|
1836
|
+
function normalize(text) {
|
|
1837
|
+
return _turkishNormalize(text);
|
|
1838
|
+
}
|
|
1839
|
+
|
|
1840
|
+
// src/terlik.ts
|
|
1841
|
+
var Terlik = class _Terlik {
|
|
1842
|
+
dictionary;
|
|
1843
|
+
detector;
|
|
1844
|
+
mode;
|
|
1845
|
+
maskStyle;
|
|
1846
|
+
enableFuzzy;
|
|
1847
|
+
fuzzyThreshold;
|
|
1848
|
+
fuzzyAlgorithm;
|
|
1849
|
+
maxLength;
|
|
1850
|
+
replaceMask;
|
|
1851
|
+
/** The language code this instance was created with. */
|
|
1852
|
+
language;
|
|
1853
|
+
/**
|
|
1854
|
+
* Creates a new Terlik instance.
|
|
1855
|
+
* @param options - Configuration options.
|
|
1856
|
+
* @throws {Error} If the specified language is not supported.
|
|
1857
|
+
*/
|
|
1858
|
+
constructor(options) {
|
|
1859
|
+
this.language = options?.language ?? "tr";
|
|
1860
|
+
this.mode = options?.mode ?? "balanced";
|
|
1861
|
+
this.maskStyle = options?.maskStyle ?? "stars";
|
|
1862
|
+
this.enableFuzzy = options?.enableFuzzy ?? false;
|
|
1863
|
+
this.fuzzyAlgorithm = options?.fuzzyAlgorithm ?? "levenshtein";
|
|
1864
|
+
this.replaceMask = options?.replaceMask ?? "[***]";
|
|
1865
|
+
const threshold = options?.fuzzyThreshold ?? 0.8;
|
|
1866
|
+
if (threshold < 0 || threshold > 1) {
|
|
1867
|
+
throw new Error(`fuzzyThreshold must be between 0 and 1, got ${threshold}`);
|
|
1868
|
+
}
|
|
1869
|
+
this.fuzzyThreshold = threshold;
|
|
1870
|
+
const maxLen = options?.maxLength ?? MAX_INPUT_LENGTH;
|
|
1871
|
+
if (maxLen < 1) {
|
|
1872
|
+
throw new Error(`maxLength must be at least 1, got ${maxLen}`);
|
|
1873
|
+
}
|
|
1874
|
+
this.maxLength = maxLen;
|
|
1875
|
+
const langConfig = getLanguageConfig(this.language);
|
|
1876
|
+
const normalizeFn = createNormalizer({
|
|
1877
|
+
locale: langConfig.locale,
|
|
1878
|
+
charMap: langConfig.charMap,
|
|
1879
|
+
leetMap: langConfig.leetMap,
|
|
1880
|
+
numberExpansions: langConfig.numberExpansions
|
|
1881
|
+
});
|
|
1882
|
+
this.dictionary = new Dictionary(
|
|
1883
|
+
langConfig.dictionary,
|
|
1884
|
+
options?.customList,
|
|
1885
|
+
options?.whitelist
|
|
1886
|
+
);
|
|
1887
|
+
this.detector = new Detector(
|
|
1888
|
+
this.dictionary,
|
|
1889
|
+
normalizeFn,
|
|
1890
|
+
langConfig.locale,
|
|
1891
|
+
langConfig.charClasses
|
|
1892
|
+
);
|
|
1893
|
+
if (options?.backgroundWarmup) {
|
|
1894
|
+
setTimeout(() => {
|
|
1895
|
+
this.detector.compile();
|
|
1896
|
+
this.containsProfanity("warmup");
|
|
1897
|
+
}, 0);
|
|
1898
|
+
}
|
|
1899
|
+
}
|
|
1900
|
+
/**
|
|
1901
|
+
* Creates and JIT-warms instances for multiple languages at once.
|
|
1902
|
+
* Useful for server deployments to eliminate cold-start latency.
|
|
1903
|
+
*
|
|
1904
|
+
* @param languages - Language codes to warm up (e.g. `["tr", "en"]`).
|
|
1905
|
+
* @param baseOptions - Shared options applied to all instances.
|
|
1906
|
+
* @returns A map of language code to warmed-up Terlik instance.
|
|
1907
|
+
*
|
|
1908
|
+
* @example
|
|
1909
|
+
* ```ts
|
|
1910
|
+
* const cache = Terlik.warmup(["tr", "en", "es"]);
|
|
1911
|
+
* cache.get("en")!.containsProfanity("fuck"); // true, no cold start
|
|
1912
|
+
* ```
|
|
1913
|
+
*/
|
|
1914
|
+
static warmup(languages, baseOptions) {
|
|
1915
|
+
const map = /* @__PURE__ */ new Map();
|
|
1916
|
+
for (const lang of languages) {
|
|
1917
|
+
const instance = new _Terlik({ ...baseOptions, language: lang });
|
|
1918
|
+
instance.containsProfanity("warmup");
|
|
1919
|
+
map.set(lang, instance);
|
|
1920
|
+
}
|
|
1921
|
+
return map;
|
|
1922
|
+
}
|
|
1923
|
+
/**
|
|
1924
|
+
* Checks whether the text contains profanity.
|
|
1925
|
+
* @param text - The text to check.
|
|
1926
|
+
* @param options - Per-call detection options (overrides instance defaults).
|
|
1927
|
+
* @returns `true` if profanity is detected, `false` otherwise.
|
|
1928
|
+
*/
|
|
1929
|
+
containsProfanity(text, options) {
|
|
1930
|
+
const input = validateInput(text, this.maxLength);
|
|
1931
|
+
if (input.length === 0) return false;
|
|
1932
|
+
const matches = this.detector.detect(input, this.mergeDetectOptions(options));
|
|
1933
|
+
return matches.length > 0;
|
|
1934
|
+
}
|
|
1935
|
+
/**
|
|
1936
|
+
* Returns all profanity matches with details (word, root, index, severity, method).
|
|
1937
|
+
* @param text - The text to analyze.
|
|
1938
|
+
* @param options - Per-call detection options (overrides instance defaults).
|
|
1939
|
+
* @returns Array of match results, sorted by index.
|
|
1940
|
+
*/
|
|
1941
|
+
getMatches(text, options) {
|
|
1942
|
+
const input = validateInput(text, this.maxLength);
|
|
1943
|
+
if (input.length === 0) return [];
|
|
1944
|
+
return this.detector.detect(input, this.mergeDetectOptions(options));
|
|
1945
|
+
}
|
|
1946
|
+
/**
|
|
1947
|
+
* Returns the text with detected profanity masked.
|
|
1948
|
+
* @param text - The text to clean.
|
|
1949
|
+
* @param options - Per-call clean options (overrides instance defaults).
|
|
1950
|
+
* @returns The cleaned text with profanity replaced by mask characters.
|
|
1951
|
+
*/
|
|
1952
|
+
clean(text, options) {
|
|
1953
|
+
const input = validateInput(text, this.maxLength);
|
|
1954
|
+
if (input.length === 0) return input;
|
|
1955
|
+
const matches = this.detector.detect(input, this.mergeDetectOptions(options));
|
|
1956
|
+
const style = options?.maskStyle ?? this.maskStyle;
|
|
1957
|
+
const replaceMask = options?.replaceMask ?? this.replaceMask;
|
|
1958
|
+
return cleanText(input, matches, style, replaceMask);
|
|
1959
|
+
}
|
|
1960
|
+
/**
|
|
1961
|
+
* Adds custom words to the detection dictionary at runtime.
|
|
1962
|
+
* Triggers pattern recompilation.
|
|
1963
|
+
* @param words - Words to add.
|
|
1964
|
+
*/
|
|
1965
|
+
addWords(words) {
|
|
1966
|
+
this.dictionary.addWords(words);
|
|
1967
|
+
this.detector.recompile();
|
|
1968
|
+
}
|
|
1969
|
+
/**
|
|
1970
|
+
* Removes words from the detection dictionary at runtime.
|
|
1971
|
+
* Triggers pattern recompilation.
|
|
1972
|
+
* @param words - Words to remove.
|
|
1973
|
+
*/
|
|
1974
|
+
removeWords(words) {
|
|
1975
|
+
this.dictionary.removeWords(words);
|
|
1976
|
+
this.detector.recompile();
|
|
1977
|
+
}
|
|
1978
|
+
/**
|
|
1979
|
+
* Returns the compiled regex patterns keyed by root word.
|
|
1980
|
+
* Useful for debugging or advanced usage.
|
|
1981
|
+
* @returns Map of root word to compiled RegExp.
|
|
1982
|
+
*/
|
|
1983
|
+
getPatterns() {
|
|
1984
|
+
return this.detector.getPatterns();
|
|
1985
|
+
}
|
|
1986
|
+
mergeDetectOptions(options) {
|
|
1987
|
+
return {
|
|
1988
|
+
mode: options?.mode ?? this.mode,
|
|
1989
|
+
enableFuzzy: options?.enableFuzzy ?? this.enableFuzzy,
|
|
1990
|
+
fuzzyThreshold: options?.fuzzyThreshold ?? this.fuzzyThreshold,
|
|
1991
|
+
fuzzyAlgorithm: options?.fuzzyAlgorithm ?? this.fuzzyAlgorithm
|
|
1992
|
+
};
|
|
1993
|
+
}
|
|
1994
|
+
};
|
|
1995
|
+
export {
|
|
1996
|
+
Terlik,
|
|
1997
|
+
createNormalizer,
|
|
1998
|
+
diceSimilarity,
|
|
1999
|
+
getLanguageConfig,
|
|
2000
|
+
getSupportedLanguages,
|
|
2001
|
+
levenshteinDistance,
|
|
2002
|
+
levenshteinSimilarity,
|
|
2003
|
+
normalize
|
|
2004
|
+
};
|