terlik.js 2.3.0 → 2.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +59 -107
- package/dist/index.d.mts +16 -14
- package/dist/index.d.ts +16 -14
- package/dist/index.js +495 -32
- package/dist/index.mjs +495 -32
- package/package.json +14 -6
package/README.md
CHANGED
|
@@ -1,17 +1,32 @@
|
|
|
1
1
|
# terlik.js
|
|
2
2
|
|
|
3
|
-

|
|
3
|
+

|
|
4
4
|
|
|
5
5
|
[](https://github.com/badursun/terlik.js/actions/workflows/ci.yml)
|
|
6
6
|
[](https://www.npmjs.com/package/terlik.js)
|
|
7
|
+
[](https://www.npmjs.com/package/terlik.js)
|
|
7
8
|
[](https://bundlephobia.com/package/terlik.js)
|
|
9
|
+
[](https://www.typescriptlang.org/)
|
|
10
|
+
[]()
|
|
8
11
|
[](https://opensource.org/licenses/MIT)
|
|
9
12
|
|
|
10
|
-
|
|
13
|
+
Multi-language profanity detection and filtering engine, designed Turkish-first and **extensible to any language**. Not a naive blacklist — a multi-layered normalization and pattern engine that catches what simple string matching misses.
|
|
11
14
|
|
|
12
|
-
|
|
15
|
+
Ships with **Turkish** (flagship, full coverage), **English**, **Spanish**, and **German** built-in. Add any language with a folder and two files, or extend at runtime via `extendDictionary`.
|
|
13
16
|
|
|
14
|
-
|
|
17
|
+
> **Turkce:** Turkce oncelikli, her dile genisletilebilir kufur tespit ve filtreleme motoru. Leet speak, karakter tekrari, ayirici karakterler ve Turkce ek sistemi destegi ile yaratici kufur denemelerini yakalar. Sifir bagimlilik, TypeScript, 35 KB.
|
|
18
|
+
|
|
19
|
+
## Features
|
|
20
|
+
|
|
21
|
+
- **Extensible to any language** — ships with TR/EN/ES/DE, add more via language packs or `extendDictionary`
|
|
22
|
+
- Catches leet speak, separators, char repetition, mixed case, zero-width chars
|
|
23
|
+
- Turkish suffix engine (83 suffixes, ~3,000+ detectable forms from 25 roots)
|
|
24
|
+
- Three detection modes: strict, balanced, loose (with fuzzy matching)
|
|
25
|
+
- Zero dependencies, **35 KB** gzipped
|
|
26
|
+
- ESM + CJS — works in Node.js, Bun, Deno, browsers, Cloudflare Workers, Edge runtimes
|
|
27
|
+
- Lazy compilation: ~1.5ms construction, <1ms per check after warmup
|
|
28
|
+
- ReDoS-safe regex patterns with timeout safety net
|
|
29
|
+
- Full TypeScript support with exported types
|
|
15
30
|
|
|
16
31
|
## Why terlik.js?
|
|
17
32
|
|
|
@@ -113,12 +128,14 @@ input
|
|
|
113
128
|
→ result
|
|
114
129
|
```
|
|
115
130
|
|
|
116
|
-
Each language has its own char map, leet map, char classes, and optional number expansions. The engine is language-agnostic — only the data is language-specific.
|
|
131
|
+
Each language has its own char map, leet map, char classes, and optional number expansions. The engine is language-agnostic — only the data is language-specific. This means **any language can be added** without modifying the core engine.
|
|
117
132
|
|
|
118
133
|
For suffixable roots, the engine appends an optional suffix group (up to 2 chained suffixes). Turkish has 83 suffixes (including question particles and adverbial forms), English has 8, Spanish has 13, German has 8.
|
|
119
134
|
|
|
120
135
|
### Language Packs
|
|
121
136
|
|
|
137
|
+
Community contributions to existing language packs (new words, variants, whitelist entries) and entirely new language packs are welcome! See [CONTRIBUTING.md](./CONTRIBUTING.md) for step-by-step instructions.
|
|
138
|
+
|
|
122
139
|
Each language lives in its own folder under `src/lang/`:
|
|
123
140
|
|
|
124
141
|
```
|
|
@@ -161,15 +178,17 @@ terlik.js ships with a **deliberately narrow dictionary** — the goal is to **m
|
|
|
161
178
|
|
|
162
179
|
### Coverage
|
|
163
180
|
|
|
164
|
-
| Language | Roots | Explicit Variants | Suffixes | Whitelist | Effective Forms |
|
|
165
|
-
|
|
166
|
-
| Turkish | 25 | 88 | 83 | 52 | ~3,000+ |
|
|
167
|
-
| English | 23 | 106 | 8 | 42 | ~700+ |
|
|
168
|
-
| Spanish | 19 | 73 | 13 | 15 | ~500+ |
|
|
169
|
-
| German | 18 | 48 | 8 | 3 | ~300+ |
|
|
181
|
+
| Language | Status | Roots | Explicit Variants | Suffixes | Whitelist | Effective Forms |
|
|
182
|
+
|---|---|---|---|---|---|---|
|
|
183
|
+
| Turkish | Flagship | 25 | 88 | 83 | 52 | ~3,000+ |
|
|
184
|
+
| English | Community | 23 | 106 | 8 | 42 | ~700+ |
|
|
185
|
+
| Spanish | Community | 19 | 73 | 13 | 15 | ~500+ |
|
|
186
|
+
| German | Community | 18 | 48 | 8 | 3 | ~300+ |
|
|
170
187
|
|
|
171
188
|
"Effective forms" = roots × normalization variants × suffix combinations × evasion patterns. A root like `sik` with 83 possible suffixes, leet decoding, separator tolerance, and repeat collapse produces thousands of detectable surface forms.
|
|
172
189
|
|
|
190
|
+
> **Add your language!** The engine is language-agnostic. See [Adding a New Language](#adding-a-new-language) or use [`extendDictionary`](#extenddictionary-option) for runtime extension.
|
|
191
|
+
|
|
173
192
|
### What IS Covered
|
|
174
193
|
|
|
175
194
|
- **Core profanity roots** per language (high-severity sexual, insults, slurs)
|
|
@@ -306,7 +325,7 @@ Reproduce: `pnpm bench:accuracy` — outputs per-category breakdown, failure lis
|
|
|
306
325
|
|
|
307
326
|
```ts
|
|
308
327
|
const terlik = new Terlik({
|
|
309
|
-
language: "tr", // "tr" | "en" | "es" | "de" (default: "tr")
|
|
328
|
+
language: "tr", // built-in: "tr" | "en" | "es" | "de" (default: "tr")
|
|
310
329
|
mode: "balanced", // "strict" | "balanced" | "loose"
|
|
311
330
|
maskStyle: "stars", // "stars" | "partial" | "replace"
|
|
312
331
|
replaceMask: "[***]", // mask text for "replace" style
|
|
@@ -317,6 +336,7 @@ const terlik = new Terlik({
|
|
|
317
336
|
fuzzyAlgorithm: "levenshtein", // "levenshtein" | "dice"
|
|
318
337
|
maxLength: 10000, // truncate input beyond this
|
|
319
338
|
backgroundWarmup: false, // compile patterns in background via setTimeout
|
|
339
|
+
extendDictionary: undefined, // DictionaryData object to merge with built-in dictionary
|
|
320
340
|
});
|
|
321
341
|
```
|
|
322
342
|
|
|
@@ -379,6 +399,30 @@ const cache = Terlik.warmup(["tr", "en", "es", "de"]);
|
|
|
379
399
|
cache.get("en")!.containsProfanity("fuck"); // true — no cold start
|
|
380
400
|
```
|
|
381
401
|
|
|
402
|
+
### `extendDictionary` Option
|
|
403
|
+
|
|
404
|
+
Merge an external dictionary with the built-in one. Useful for teams managing custom word lists without modifying the core package:
|
|
405
|
+
|
|
406
|
+
```ts
|
|
407
|
+
const terlik = new Terlik({
|
|
408
|
+
extendDictionary: {
|
|
409
|
+
version: 1,
|
|
410
|
+
suffixes: ["ci", "cu"],
|
|
411
|
+
entries: [
|
|
412
|
+
{ root: "customword", variants: ["cust0mword"], severity: "high", category: "general", suffixable: true },
|
|
413
|
+
],
|
|
414
|
+
whitelist: ["safeterm"],
|
|
415
|
+
},
|
|
416
|
+
});
|
|
417
|
+
|
|
418
|
+
terlik.containsProfanity("customword"); // true
|
|
419
|
+
terlik.containsProfanity("customwordci"); // true (suffix match)
|
|
420
|
+
terlik.containsProfanity("safeterm"); // false (whitelisted)
|
|
421
|
+
terlik.containsProfanity("siktir"); // true (built-in still works)
|
|
422
|
+
```
|
|
423
|
+
|
|
424
|
+
The extension dictionary must follow the same schema as built-in dictionaries. Duplicate roots are skipped; suffixes and whitelist entries are merged. Pattern cache is disabled for extended instances.
|
|
425
|
+
|
|
382
426
|
### `terlik.language: string`
|
|
383
427
|
|
|
384
428
|
Read-only property. Returns the language code of the instance.
|
|
@@ -412,7 +456,7 @@ deNormalize("Scheiße"); // "scheisse"
|
|
|
412
456
|
|
|
413
457
|
## Testing
|
|
414
458
|
|
|
415
|
-
|
|
459
|
+
874 tests covering all built-in languages, 25 Turkish root words, suffix detection, lazy compilation, multi-language isolation, normalization, fuzzy matching, cleaning, integration, ReDoS hardening, attack surface coverage, external dictionary merging, and edge cases:
|
|
416
460
|
|
|
417
461
|
```bash
|
|
418
462
|
pnpm test # run once
|
|
@@ -427,7 +471,7 @@ An interactive browser-based test environment is included. Chat interface on the
|
|
|
427
471
|
pnpm dev:live # http://localhost:2026
|
|
428
472
|
```
|
|
429
473
|
|
|
430
|
-
See [`
|
|
474
|
+
See [`tools/README.md`](./tools/README.md) for details.
|
|
431
475
|
|
|
432
476
|
### Integration Guide
|
|
433
477
|
|
|
@@ -451,99 +495,7 @@ See [CONTRIBUTING.md](./CONTRIBUTING.md) for contribution guidelines.
|
|
|
451
495
|
|
|
452
496
|
## Changelog
|
|
453
497
|
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
**Replaces `\p{L}`/`\p{N}` Unicode property escapes with explicit Latin ranges, eliminating V8 JIT bottleneck.**
|
|
457
|
-
|
|
458
|
-
- **40x faster cold start** — First `containsProfanity()` call: 16,494ms → 404ms.
|
|
459
|
-
- **356x faster multi-language warmup** — 4-language warmup: 19,234ms → 54ms.
|
|
460
|
-
- **13x less memory** — Heap usage: 492MB → 38MB.
|
|
461
|
-
- **Static pattern cache** — Same-language instances share compiled patterns via `Detector.patternCache`.
|
|
462
|
-
- **Background warmup** — Dev server starts instantly, warms up in background.
|
|
463
|
-
|
|
464
|
-
| Change | File |
|
|
465
|
-
|---|---|
|
|
466
|
-
| Replace `\p{L}\p{N}` with `[a-zA-Z0-9À-ɏ]` | `src/patterns.ts` |
|
|
467
|
-
| Static pattern cache + explicit range in getSurroundingWord | `src/detector.ts` |
|
|
468
|
-
| Explicit range in number expander + punctuation removal | `src/normalizer.ts` |
|
|
469
|
-
| Pass cacheKey to Detector | `src/terlik.ts` |
|
|
470
|
-
| Background warmup, lazy instance cache | `live_test_server/server.ts` |
|
|
471
|
-
| NODE_OPTIONS heap safety net | `.github/workflows/ci.yml` |
|
|
472
|
-
|
|
473
|
-
### 2026-02-28 (v2.2.1) — CI Fix: Timeout Race Condition + İ Platform Compatibility
|
|
474
|
-
|
|
475
|
-
**Fixes detection failures on slow runners and cross-platform İ (U+0130) handling.**
|
|
476
|
-
|
|
477
|
-
- **Timeout race condition fix** — `REGEX_TIMEOUT_MS` check moved from _before_ match processing to _after_. Previously, V8 JIT compilation on first `exec()` call (triggered by lazy compilation) could exceed 250ms, causing the timeout to discard a valid match before it was recorded. Now the current match is always processed; the timeout only prevents scanning for additional matches.
|
|
478
|
-
- **İ (U+0130) cross-platform fix** — First regex pass now runs on `text.toLocaleLowerCase(locale)` instead of raw text. Turkish İ→i mapping is performed explicitly before regex matching, avoiding inconsistent V8/ICU case-folding behavior across platforms (Ubuntu vs macOS). The `mapNormalizedToOriginal()` mapper recovers original-cased words for result output.
|
|
479
|
-
|
|
480
|
-
| Change | File |
|
|
481
|
-
|---|---|
|
|
482
|
-
| Timeout check moved after match processing | `src/detector.ts` (`runPatterns`) |
|
|
483
|
-
| Locale-lower first pass for İ safety | `src/detector.ts` (`detectPattern`) |
|
|
484
|
-
|
|
485
|
-
### 2026-02-28 (v2.2) — Lazy Compilation + Linguistic Patch
|
|
486
|
-
|
|
487
|
-
**Zero-cost construction. Background warmup. Turkish agglutination hardening.**
|
|
488
|
-
|
|
489
|
-
- **Lazy compilation** — Pattern compilation deferred from constructor to first `detect()` call. `new Terlik()` drops from ~225ms to **~1.5ms**. Strict-mode users never pay regex cost (hash lookup only).
|
|
490
|
-
- **`backgroundWarmup` option** — `new Terlik({ backgroundWarmup: true })` schedules compilation + JIT warmup via `setTimeout(fn, 0)`. Idempotent: if `detect()` is called before the timer fires, it compiles synchronously and the timer becomes a no-op.
|
|
491
|
-
- **`detector.compile()` public method** — Allows manual precompilation for advanced use cases.
|
|
492
|
-
- **Turkish suffix expansion** — Added question particles (`misin`, `misiniz`, `musun`, `musunuz`, `miyim`, `miyiz`) and adverbial forms (`cesine`, `casina`) to suffix engine (now 83 total). All suffixable entries (orospu, piç, yarrak, ibne, etc.) now catch question and adverbial inflections.
|
|
493
|
-
- **Deep agglutination variants** — Added explicit variants for `siktiğimin`, `sikermisiniz`, `sikermisin`, `siktirmişcesine`. These forms require 3+ suffix chains or non-standard morpheme boundaries (ğ→g bridge) that the suffix engine can't generalize without false positives.
|
|
494
|
-
- **`MAX_PATTERN_LENGTH` 6000 → 10000** — Accommodates the larger suffix group without fallback to non-suffix mode.
|
|
495
|
-
- **Test count** — 619 → 631. New `tests/lazy-compilation.test.ts` covers construction timing, transparent lazy compile, strict-mode optimization, backgroundWarmup with fake timers, and idempotent early-detect.
|
|
496
|
-
|
|
497
|
-
| Change | File |
|
|
498
|
-
|---|---|
|
|
499
|
-
| `backgroundWarmup` option | `src/types.ts` |
|
|
500
|
-
| Lazy `_patterns`, `ensureCompiled()`, `compile()` | `src/detector.ts` |
|
|
501
|
-
| backgroundWarmup setTimeout scheduling | `src/terlik.ts` |
|
|
502
|
-
| Suffix + variant expansion, MAX_PATTERN_LENGTH | `src/patterns.ts`, `src/lang/tr/dictionary.json` |
|
|
503
|
-
| Lazy compilation tests (new) | `tests/lazy-compilation.test.ts` |
|
|
504
|
-
|
|
505
|
-
### 2026-02-28 (v2.1) — ReDoS Security Hardening
|
|
506
|
-
|
|
507
|
-
**Added Regex Denial-of-Service protection.**
|
|
508
|
-
|
|
509
|
-
Identified vulnerability: overlap between `charClasses` and `separator` (`@`, `$`, `!`, `|`, `+`, `#`, `€`, `¢`, `©` could be matched by both char class and separator) enabled polynomial O(n^2) backtracking via adversarial input.
|
|
510
|
-
|
|
511
|
-
- **Bounded separator** — `[^\p{L}\p{N}]*` (unbounded) replaced with `[^\p{L}\p{N}]{0,3}` (max 3 chars). Real-world evasions (`s.i.k.t.i.r`, `s_i_k`) use 1 separator char. This reduces backtracking from O(n^2) to O(1) per boundary.
|
|
512
|
-
- **Regex timeout safety net** — Added 250ms timeout (`REGEX_TIMEOUT_MS`) to `runPatterns()` and `detectFuzzy()` loops. Never triggers on normal input (<1ms), but provides a hard cap on adversarial input.
|
|
513
|
-
- **charClasses cleanup** — Removed separator-overlapping symbols from all 4 language configs (TR, EN, ES, DE). These symbols are already defined in `leetMap` and converted during the normalizer pass — removing them from pattern matching causes no false negatives.
|
|
514
|
-
- **ReDoS test suite** — `tests/redos.test.ts`: 71 tests covering adversarial timing, attack surface (separator abuse, leet bypass, char repetition, Unicode tricks, whitelist integrity, boundary attacks, multi-match, input edge cases, suffix hardening).
|
|
515
|
-
- **MAX_PATTERN_LENGTH** — 5000 → 6000 (later raised to 10000 in v2.2). The `{0,3}` separator adds ~3 chars per boundary; raised the limit so large suffix patterns (e.g. `orospu`) don't fall back to non-suffix mode.
|
|
516
|
-
- **Test count** — 548 → 619.
|
|
517
|
-
|
|
518
|
-
| Change | File |
|
|
519
|
-
|---|---|
|
|
520
|
-
| Separator `*` → `{0,3}`, timeout constant | `src/patterns.ts` |
|
|
521
|
-
| Timeout loop guard | `src/detector.ts` |
|
|
522
|
-
| charClasses cleanup | `src/lang/{tr,en,es,de}/config.ts` |
|
|
523
|
-
| ReDoS + attack surface test suite (new) | `tests/redos.test.ts` |
|
|
524
|
-
|
|
525
|
-
### 2026-02-28 (v2)
|
|
526
|
-
|
|
527
|
-
**Multi-Language Support**
|
|
528
|
-
|
|
529
|
-
- **4 built-in languages** — Turkish (tr), English (en), Spanish (es), German (de). Each language is a self-contained folder (`src/lang/xx/`) with `config.ts` and `dictionary.json`.
|
|
530
|
-
- **Folder-based language packs** — Adding a new language requires creating one folder with two files and one import line in the registry.
|
|
531
|
-
- **`Terlik.warmup()`** — Static method to create and JIT-warm multiple language instances at once for server deployments.
|
|
532
|
-
- **`language` option** — `new Terlik({ language: "en" })`. Default remains `"tr"` (backward compatible).
|
|
533
|
-
- **Language-agnostic engine** — Normalizer, pattern compiler, detector, and cleaner are now fully parametric. Language-specific data (charMap, leetMap, charClasses, numberExpansions) comes from config files.
|
|
534
|
-
- **New exports** — `createNormalizer`, `getLanguageConfig`, `getSupportedLanguages`, `LanguageConfig` type.
|
|
535
|
-
- **Test coverage** — 346 → 418 tests. Added language-specific tests, cross-language isolation tests, and registry tests.
|
|
536
|
-
|
|
537
|
-
### 2026-02-28
|
|
538
|
-
|
|
539
|
-
**Suffix Engine + JSON Dictionary Migration**
|
|
540
|
-
|
|
541
|
-
- **JSON dictionary** — Migrated dictionary from `tr.ts` to community-friendly `tr.json` format. Added runtime schema validation (`validateDictionary`). Each entry now includes `category` and `suffixable` fields.
|
|
542
|
-
- **Suffix engine** — Defined Turkish grammatical suffixes (later expanded to 83 in v2.2). Suffixable roots (`orospu`, `salak`, `aptal`, `kahpe`, etc.) automatically catch inflected forms like `orospuluk`, `salaksin`, `aptallarin`, `kahpeler`. Short roots (3-char: `sik`, `bok`, `göt`, `döl`) use explicit variants instead to prevent false positives.
|
|
543
|
-
- **Critical bug fix: `\W` separator** — JavaScript's `\W` treats Turkish characters (`ı`, `ş`, `ğ`, `ö`, `ü`, `ç`) as non-word characters. The pattern engine separator `[\W_]*` was changed to `[^\p{L}\p{N}]*` (Unicode-aware). This fixed false positives on innocent words like `sıkma`, `sıkıntı`, `sıkıştı`.
|
|
544
|
-
- **Live test server warmup fix** — Fixed cache key mismatch and added JIT warmup. First request latency reduced from 3318ms to 37ms.
|
|
545
|
-
- **Test coverage** — 101 → 346 tests. All 25 root words are comprehensively tested.
|
|
546
|
-
- **Expanded whitelist** — Added `ama`, `ami`, `amen`, `amir`, `amil`, `dolmen`.
|
|
498
|
+
See [CHANGELOG.md](./CHANGELOG.md) for the full version history.
|
|
547
499
|
|
|
548
500
|
## License
|
|
549
501
|
|
package/dist/index.d.mts
CHANGED
|
@@ -1,3 +1,17 @@
|
|
|
1
|
+
/** Raw dictionary data structure as loaded from JSON. */
|
|
2
|
+
interface DictionaryData {
|
|
3
|
+
version: number;
|
|
4
|
+
suffixes: string[];
|
|
5
|
+
entries: Array<{
|
|
6
|
+
root: string;
|
|
7
|
+
variants: string[];
|
|
8
|
+
severity: string;
|
|
9
|
+
category: string;
|
|
10
|
+
suffixable: boolean;
|
|
11
|
+
}>;
|
|
12
|
+
whitelist: string[];
|
|
13
|
+
}
|
|
14
|
+
|
|
1
15
|
/** Profanity severity level. */
|
|
2
16
|
type Severity = "high" | "medium" | "low";
|
|
3
17
|
/** Detection mode controlling the balance between precision and recall. */
|
|
@@ -45,6 +59,8 @@ interface TerlikOptions {
|
|
|
45
59
|
replaceMask?: string;
|
|
46
60
|
/** Background'da regex derleme + JIT warmup. Default: false. Serverless'da önerilmez. */
|
|
47
61
|
backgroundWarmup?: boolean;
|
|
62
|
+
/** External dictionary data to merge with the built-in language dictionary. */
|
|
63
|
+
extendDictionary?: DictionaryData;
|
|
48
64
|
}
|
|
49
65
|
/** Per-call detection options that override instance defaults. */
|
|
50
66
|
interface DetectOptions {
|
|
@@ -226,20 +242,6 @@ declare function levenshteinSimilarity(a: string, b: string): number;
|
|
|
226
242
|
*/
|
|
227
243
|
declare function diceSimilarity(a: string, b: string): number;
|
|
228
244
|
|
|
229
|
-
/** Raw dictionary data structure as loaded from JSON. */
|
|
230
|
-
interface DictionaryData {
|
|
231
|
-
version: number;
|
|
232
|
-
suffixes: string[];
|
|
233
|
-
entries: Array<{
|
|
234
|
-
root: string;
|
|
235
|
-
variants: string[];
|
|
236
|
-
severity: string;
|
|
237
|
-
category: string;
|
|
238
|
-
suffixable: boolean;
|
|
239
|
-
}>;
|
|
240
|
-
whitelist: string[];
|
|
241
|
-
}
|
|
242
|
-
|
|
243
245
|
interface LanguageConfig {
|
|
244
246
|
/** BCP-47 locale tag for toLocaleLowerCase (e.g. "tr", "en", "es", "de") */
|
|
245
247
|
locale: string;
|
package/dist/index.d.ts
CHANGED
|
@@ -1,3 +1,17 @@
|
|
|
1
|
+
/** Raw dictionary data structure as loaded from JSON. */
|
|
2
|
+
interface DictionaryData {
|
|
3
|
+
version: number;
|
|
4
|
+
suffixes: string[];
|
|
5
|
+
entries: Array<{
|
|
6
|
+
root: string;
|
|
7
|
+
variants: string[];
|
|
8
|
+
severity: string;
|
|
9
|
+
category: string;
|
|
10
|
+
suffixable: boolean;
|
|
11
|
+
}>;
|
|
12
|
+
whitelist: string[];
|
|
13
|
+
}
|
|
14
|
+
|
|
1
15
|
/** Profanity severity level. */
|
|
2
16
|
type Severity = "high" | "medium" | "low";
|
|
3
17
|
/** Detection mode controlling the balance between precision and recall. */
|
|
@@ -45,6 +59,8 @@ interface TerlikOptions {
|
|
|
45
59
|
replaceMask?: string;
|
|
46
60
|
/** Background'da regex derleme + JIT warmup. Default: false. Serverless'da önerilmez. */
|
|
47
61
|
backgroundWarmup?: boolean;
|
|
62
|
+
/** External dictionary data to merge with the built-in language dictionary. */
|
|
63
|
+
extendDictionary?: DictionaryData;
|
|
48
64
|
}
|
|
49
65
|
/** Per-call detection options that override instance defaults. */
|
|
50
66
|
interface DetectOptions {
|
|
@@ -226,20 +242,6 @@ declare function levenshteinSimilarity(a: string, b: string): number;
|
|
|
226
242
|
*/
|
|
227
243
|
declare function diceSimilarity(a: string, b: string): number;
|
|
228
244
|
|
|
229
|
-
/** Raw dictionary data structure as loaded from JSON. */
|
|
230
|
-
interface DictionaryData {
|
|
231
|
-
version: number;
|
|
232
|
-
suffixes: string[];
|
|
233
|
-
entries: Array<{
|
|
234
|
-
root: string;
|
|
235
|
-
variants: string[];
|
|
236
|
-
severity: string;
|
|
237
|
-
category: string;
|
|
238
|
-
suffixable: boolean;
|
|
239
|
-
}>;
|
|
240
|
-
whitelist: string[];
|
|
241
|
-
}
|
|
242
|
-
|
|
243
245
|
interface LanguageConfig {
|
|
244
246
|
/** BCP-47 locale tag for toLocaleLowerCase (e.g. "tr", "en", "es", "de") */
|
|
245
247
|
locale: string;
|