terlik.js 2.2.1 → 2.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +19 -0
- package/dist/index.js +35 -15
- package/dist/index.mjs +35 -15
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -451,6 +451,25 @@ See [CONTRIBUTING.md](./CONTRIBUTING.md) for contribution guidelines.
|
|
|
451
451
|
|
|
452
452
|
## Changelog
|
|
453
453
|
|
|
454
|
+
### 2026-02-28 (v2.3.0) — 40x Faster Cold Start: V8 JIT Regex Optimization
|
|
455
|
+
|
|
456
|
+
**Replaces `\p{L}`/`\p{N}` Unicode property escapes with explicit Latin ranges, eliminating V8 JIT bottleneck.**
|
|
457
|
+
|
|
458
|
+
- **40x faster cold start** — First `containsProfanity()` call: 16,494ms → 404ms.
|
|
459
|
+
- **356x faster multi-language warmup** — 4-language warmup: 19,234ms → 54ms.
|
|
460
|
+
- **13x less memory** — Heap usage: 492MB → 38MB.
|
|
461
|
+
- **Static pattern cache** — Same-language instances share compiled patterns via `Detector.patternCache`.
|
|
462
|
+
- **Background warmup** — Dev server starts instantly, warms up in background.
|
|
463
|
+
|
|
464
|
+
| Change | File |
|
|
465
|
+
|---|---|
|
|
466
|
+
| Replace `\p{L}\p{N}` with `[a-zA-Z0-9À-ɏ]` | `src/patterns.ts` |
|
|
467
|
+
| Static pattern cache + explicit range in getSurroundingWord | `src/detector.ts` |
|
|
468
|
+
| Explicit range in number expander + punctuation removal | `src/normalizer.ts` |
|
|
469
|
+
| Pass cacheKey to Detector | `src/terlik.ts` |
|
|
470
|
+
| Background warmup, lazy instance cache | `live_test_server/server.ts` |
|
|
471
|
+
| NODE_OPTIONS heap safety net | `.github/workflows/ci.yml` |
|
|
472
|
+
|
|
454
473
|
### 2026-02-28 (v2.2.1) — CI Fix: Timeout Race Condition + İ Platform Compatibility
|
|
455
474
|
|
|
456
475
|
**Fixes detection failures on slow runners and cross-platform İ (U+0130) handling.**
|
package/dist/index.js
CHANGED
|
@@ -147,7 +147,10 @@ var Dictionary = class {
|
|
|
147
147
|
};
|
|
148
148
|
|
|
149
149
|
// src/patterns.ts
|
|
150
|
-
var
|
|
150
|
+
var WORD_CHAR = "a-zA-Z0-9\xC0-\u024F";
|
|
151
|
+
var SEPARATOR = `[^${WORD_CHAR}]{0,3}`;
|
|
152
|
+
var WORD_BOUNDARY_BEHIND = `(?<![${WORD_CHAR}])`;
|
|
153
|
+
var WORD_BOUNDARY_AHEAD = `(?![${WORD_CHAR}])`;
|
|
151
154
|
var MAX_PATTERN_LENGTH = 1e4;
|
|
152
155
|
var MAX_SUFFIX_CHAIN = 2;
|
|
153
156
|
var REGEX_TIMEOUT_MS = 250;
|
|
@@ -185,15 +188,15 @@ function compilePatterns(entries, suffixes, charClasses, normalizeFn) {
|
|
|
185
188
|
const useSuffix = entry.suffixable && suffixGroup.length > 0;
|
|
186
189
|
let pattern;
|
|
187
190
|
if (useSuffix) {
|
|
188
|
-
pattern =
|
|
191
|
+
pattern = `${WORD_BOUNDARY_BEHIND}(?:${combined})${suffixGroup}{0,${MAX_SUFFIX_CHAIN}}${WORD_BOUNDARY_AHEAD}`;
|
|
189
192
|
} else {
|
|
190
|
-
pattern =
|
|
193
|
+
pattern = `${WORD_BOUNDARY_BEHIND}(?:${combined})${WORD_BOUNDARY_AHEAD}`;
|
|
191
194
|
}
|
|
192
195
|
if (pattern.length > MAX_PATTERN_LENGTH && useSuffix) {
|
|
193
|
-
pattern =
|
|
196
|
+
pattern = `${WORD_BOUNDARY_BEHIND}(?:${combined})${WORD_BOUNDARY_AHEAD}`;
|
|
194
197
|
}
|
|
195
198
|
try {
|
|
196
|
-
const regex = new RegExp(pattern, "
|
|
199
|
+
const regex = new RegExp(pattern, "gi");
|
|
197
200
|
patterns.push({
|
|
198
201
|
root: entry.root,
|
|
199
202
|
severity: entry.severity,
|
|
@@ -203,8 +206,8 @@ function compilePatterns(entries, suffixes, charClasses, normalizeFn) {
|
|
|
203
206
|
} catch (err) {
|
|
204
207
|
if (useSuffix) {
|
|
205
208
|
try {
|
|
206
|
-
const fallbackPattern =
|
|
207
|
-
const regex = new RegExp(fallbackPattern, "
|
|
209
|
+
const fallbackPattern = `${WORD_BOUNDARY_BEHIND}(?:${combined})${WORD_BOUNDARY_AHEAD}`;
|
|
210
|
+
const regex = new RegExp(fallbackPattern, "gi");
|
|
208
211
|
patterns.push({
|
|
209
212
|
root: entry.root,
|
|
210
213
|
severity: entry.severity,
|
|
@@ -278,31 +281,45 @@ function getFuzzyMatcher(algorithm) {
|
|
|
278
281
|
}
|
|
279
282
|
|
|
280
283
|
// src/detector.ts
|
|
281
|
-
var Detector = class {
|
|
284
|
+
var Detector = class _Detector {
|
|
285
|
+
/** Static cache: shares compiled patterns across instances with identical dictionaries. */
|
|
286
|
+
static patternCache = /* @__PURE__ */ new Map();
|
|
282
287
|
dictionary;
|
|
283
288
|
_patterns = null;
|
|
289
|
+
cacheKey;
|
|
284
290
|
normalizedWordSet;
|
|
285
291
|
normalizedWordToRoot;
|
|
286
292
|
normalizeFn;
|
|
287
293
|
locale;
|
|
288
294
|
charClasses;
|
|
289
|
-
constructor(dictionary, normalizeFn, locale, charClasses) {
|
|
295
|
+
constructor(dictionary, normalizeFn, locale, charClasses, cacheKey) {
|
|
290
296
|
this.dictionary = dictionary;
|
|
291
297
|
this.normalizeFn = normalizeFn;
|
|
292
298
|
this.locale = locale;
|
|
293
299
|
this.charClasses = charClasses;
|
|
300
|
+
this.cacheKey = cacheKey ?? null;
|
|
294
301
|
this.normalizedWordSet = /* @__PURE__ */ new Set();
|
|
295
302
|
this.normalizedWordToRoot = /* @__PURE__ */ new Map();
|
|
296
303
|
this.buildNormalizedLookup();
|
|
297
304
|
}
|
|
298
305
|
ensureCompiled() {
|
|
299
306
|
if (this._patterns === null) {
|
|
307
|
+
if (this.cacheKey) {
|
|
308
|
+
const cached = _Detector.patternCache.get(this.cacheKey);
|
|
309
|
+
if (cached) {
|
|
310
|
+
this._patterns = cached;
|
|
311
|
+
return this._patterns;
|
|
312
|
+
}
|
|
313
|
+
}
|
|
300
314
|
this._patterns = compilePatterns(
|
|
301
315
|
this.dictionary.getEntries(),
|
|
302
316
|
this.dictionary.getSuffixes(),
|
|
303
317
|
this.charClasses,
|
|
304
318
|
this.normalizeFn
|
|
305
319
|
);
|
|
320
|
+
if (this.cacheKey) {
|
|
321
|
+
_Detector.patternCache.set(this.cacheKey, this._patterns);
|
|
322
|
+
}
|
|
306
323
|
}
|
|
307
324
|
return this._patterns;
|
|
308
325
|
}
|
|
@@ -310,6 +327,7 @@ var Detector = class {
|
|
|
310
327
|
this.ensureCompiled();
|
|
311
328
|
}
|
|
312
329
|
recompile() {
|
|
330
|
+
this.cacheKey = null;
|
|
313
331
|
this._patterns = compilePatterns(
|
|
314
332
|
this.dictionary.getEntries(),
|
|
315
333
|
this.dictionary.getSuffixes(),
|
|
@@ -499,8 +517,8 @@ var Detector = class {
|
|
|
499
517
|
getSurroundingWord(text, index, length) {
|
|
500
518
|
let start = index;
|
|
501
519
|
let end = index + length;
|
|
502
|
-
while (start > 0 &&
|
|
503
|
-
while (end < text.length &&
|
|
520
|
+
while (start > 0 && /[a-zA-ZÀ-ɏ]/.test(text[start - 1])) start--;
|
|
521
|
+
while (end < text.length && /[a-zA-ZÀ-ɏ]/.test(text[end])) end++;
|
|
504
522
|
return text.slice(start, end);
|
|
505
523
|
}
|
|
506
524
|
deduplicateResults(results) {
|
|
@@ -1793,15 +1811,15 @@ function buildNumberExpander(expansions) {
|
|
|
1793
1811
|
const regex = new RegExp(
|
|
1794
1812
|
expansions.map(([num]) => {
|
|
1795
1813
|
const escaped = num.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
1796
|
-
return `(
|
|
1814
|
+
return `(?<=[a-zA-Z\xC0-\u024F])${escaped}(?=[a-zA-Z\xC0-\u024F])`;
|
|
1797
1815
|
}).join("|"),
|
|
1798
|
-
"
|
|
1816
|
+
"g"
|
|
1799
1817
|
);
|
|
1800
1818
|
const lookup = Object.fromEntries(expansions);
|
|
1801
1819
|
return (text) => text.replace(regex, (match) => lookup[match] ?? match);
|
|
1802
1820
|
}
|
|
1803
1821
|
function removePunctuation(text) {
|
|
1804
|
-
return text.replace(/(
|
|
1822
|
+
return text.replace(/(?<=[a-zA-ZÀ-ɏ])[.\-_*,;:!?]+(?=[a-zA-ZÀ-ɏ])/g, "");
|
|
1805
1823
|
}
|
|
1806
1824
|
function collapseRepeats(text) {
|
|
1807
1825
|
return text.replace(/(.)\1{2,}/g, "$1");
|
|
@@ -1917,11 +1935,13 @@ var Terlik = class _Terlik {
|
|
|
1917
1935
|
options?.customList,
|
|
1918
1936
|
options?.whitelist
|
|
1919
1937
|
);
|
|
1938
|
+
const hasCustomDict = !!(options?.customList?.length || options?.whitelist?.length);
|
|
1920
1939
|
this.detector = new Detector(
|
|
1921
1940
|
this.dictionary,
|
|
1922
1941
|
normalizeFn,
|
|
1923
1942
|
langConfig.locale,
|
|
1924
|
-
langConfig.charClasses
|
|
1943
|
+
langConfig.charClasses,
|
|
1944
|
+
hasCustomDict ? null : this.language
|
|
1925
1945
|
);
|
|
1926
1946
|
if (options?.backgroundWarmup) {
|
|
1927
1947
|
setTimeout(() => {
|
package/dist/index.mjs
CHANGED
|
@@ -114,7 +114,10 @@ var Dictionary = class {
|
|
|
114
114
|
};
|
|
115
115
|
|
|
116
116
|
// src/patterns.ts
|
|
117
|
-
var
|
|
117
|
+
var WORD_CHAR = "a-zA-Z0-9\xC0-\u024F";
|
|
118
|
+
var SEPARATOR = `[^${WORD_CHAR}]{0,3}`;
|
|
119
|
+
var WORD_BOUNDARY_BEHIND = `(?<![${WORD_CHAR}])`;
|
|
120
|
+
var WORD_BOUNDARY_AHEAD = `(?![${WORD_CHAR}])`;
|
|
118
121
|
var MAX_PATTERN_LENGTH = 1e4;
|
|
119
122
|
var MAX_SUFFIX_CHAIN = 2;
|
|
120
123
|
var REGEX_TIMEOUT_MS = 250;
|
|
@@ -152,15 +155,15 @@ function compilePatterns(entries, suffixes, charClasses, normalizeFn) {
|
|
|
152
155
|
const useSuffix = entry.suffixable && suffixGroup.length > 0;
|
|
153
156
|
let pattern;
|
|
154
157
|
if (useSuffix) {
|
|
155
|
-
pattern =
|
|
158
|
+
pattern = `${WORD_BOUNDARY_BEHIND}(?:${combined})${suffixGroup}{0,${MAX_SUFFIX_CHAIN}}${WORD_BOUNDARY_AHEAD}`;
|
|
156
159
|
} else {
|
|
157
|
-
pattern =
|
|
160
|
+
pattern = `${WORD_BOUNDARY_BEHIND}(?:${combined})${WORD_BOUNDARY_AHEAD}`;
|
|
158
161
|
}
|
|
159
162
|
if (pattern.length > MAX_PATTERN_LENGTH && useSuffix) {
|
|
160
|
-
pattern =
|
|
163
|
+
pattern = `${WORD_BOUNDARY_BEHIND}(?:${combined})${WORD_BOUNDARY_AHEAD}`;
|
|
161
164
|
}
|
|
162
165
|
try {
|
|
163
|
-
const regex = new RegExp(pattern, "
|
|
166
|
+
const regex = new RegExp(pattern, "gi");
|
|
164
167
|
patterns.push({
|
|
165
168
|
root: entry.root,
|
|
166
169
|
severity: entry.severity,
|
|
@@ -170,8 +173,8 @@ function compilePatterns(entries, suffixes, charClasses, normalizeFn) {
|
|
|
170
173
|
} catch (err) {
|
|
171
174
|
if (useSuffix) {
|
|
172
175
|
try {
|
|
173
|
-
const fallbackPattern =
|
|
174
|
-
const regex = new RegExp(fallbackPattern, "
|
|
176
|
+
const fallbackPattern = `${WORD_BOUNDARY_BEHIND}(?:${combined})${WORD_BOUNDARY_AHEAD}`;
|
|
177
|
+
const regex = new RegExp(fallbackPattern, "gi");
|
|
175
178
|
patterns.push({
|
|
176
179
|
root: entry.root,
|
|
177
180
|
severity: entry.severity,
|
|
@@ -245,31 +248,45 @@ function getFuzzyMatcher(algorithm) {
|
|
|
245
248
|
}
|
|
246
249
|
|
|
247
250
|
// src/detector.ts
|
|
248
|
-
var Detector = class {
|
|
251
|
+
var Detector = class _Detector {
|
|
252
|
+
/** Static cache: shares compiled patterns across instances with identical dictionaries. */
|
|
253
|
+
static patternCache = /* @__PURE__ */ new Map();
|
|
249
254
|
dictionary;
|
|
250
255
|
_patterns = null;
|
|
256
|
+
cacheKey;
|
|
251
257
|
normalizedWordSet;
|
|
252
258
|
normalizedWordToRoot;
|
|
253
259
|
normalizeFn;
|
|
254
260
|
locale;
|
|
255
261
|
charClasses;
|
|
256
|
-
constructor(dictionary, normalizeFn, locale, charClasses) {
|
|
262
|
+
constructor(dictionary, normalizeFn, locale, charClasses, cacheKey) {
|
|
257
263
|
this.dictionary = dictionary;
|
|
258
264
|
this.normalizeFn = normalizeFn;
|
|
259
265
|
this.locale = locale;
|
|
260
266
|
this.charClasses = charClasses;
|
|
267
|
+
this.cacheKey = cacheKey ?? null;
|
|
261
268
|
this.normalizedWordSet = /* @__PURE__ */ new Set();
|
|
262
269
|
this.normalizedWordToRoot = /* @__PURE__ */ new Map();
|
|
263
270
|
this.buildNormalizedLookup();
|
|
264
271
|
}
|
|
265
272
|
ensureCompiled() {
|
|
266
273
|
if (this._patterns === null) {
|
|
274
|
+
if (this.cacheKey) {
|
|
275
|
+
const cached = _Detector.patternCache.get(this.cacheKey);
|
|
276
|
+
if (cached) {
|
|
277
|
+
this._patterns = cached;
|
|
278
|
+
return this._patterns;
|
|
279
|
+
}
|
|
280
|
+
}
|
|
267
281
|
this._patterns = compilePatterns(
|
|
268
282
|
this.dictionary.getEntries(),
|
|
269
283
|
this.dictionary.getSuffixes(),
|
|
270
284
|
this.charClasses,
|
|
271
285
|
this.normalizeFn
|
|
272
286
|
);
|
|
287
|
+
if (this.cacheKey) {
|
|
288
|
+
_Detector.patternCache.set(this.cacheKey, this._patterns);
|
|
289
|
+
}
|
|
273
290
|
}
|
|
274
291
|
return this._patterns;
|
|
275
292
|
}
|
|
@@ -277,6 +294,7 @@ var Detector = class {
|
|
|
277
294
|
this.ensureCompiled();
|
|
278
295
|
}
|
|
279
296
|
recompile() {
|
|
297
|
+
this.cacheKey = null;
|
|
280
298
|
this._patterns = compilePatterns(
|
|
281
299
|
this.dictionary.getEntries(),
|
|
282
300
|
this.dictionary.getSuffixes(),
|
|
@@ -466,8 +484,8 @@ var Detector = class {
|
|
|
466
484
|
getSurroundingWord(text, index, length) {
|
|
467
485
|
let start = index;
|
|
468
486
|
let end = index + length;
|
|
469
|
-
while (start > 0 &&
|
|
470
|
-
while (end < text.length &&
|
|
487
|
+
while (start > 0 && /[a-zA-ZÀ-ɏ]/.test(text[start - 1])) start--;
|
|
488
|
+
while (end < text.length && /[a-zA-ZÀ-ɏ]/.test(text[end])) end++;
|
|
471
489
|
return text.slice(start, end);
|
|
472
490
|
}
|
|
473
491
|
deduplicateResults(results) {
|
|
@@ -1760,15 +1778,15 @@ function buildNumberExpander(expansions) {
|
|
|
1760
1778
|
const regex = new RegExp(
|
|
1761
1779
|
expansions.map(([num]) => {
|
|
1762
1780
|
const escaped = num.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
1763
|
-
return `(
|
|
1781
|
+
return `(?<=[a-zA-Z\xC0-\u024F])${escaped}(?=[a-zA-Z\xC0-\u024F])`;
|
|
1764
1782
|
}).join("|"),
|
|
1765
|
-
"
|
|
1783
|
+
"g"
|
|
1766
1784
|
);
|
|
1767
1785
|
const lookup = Object.fromEntries(expansions);
|
|
1768
1786
|
return (text) => text.replace(regex, (match) => lookup[match] ?? match);
|
|
1769
1787
|
}
|
|
1770
1788
|
function removePunctuation(text) {
|
|
1771
|
-
return text.replace(/(
|
|
1789
|
+
return text.replace(/(?<=[a-zA-ZÀ-ɏ])[.\-_*,;:!?]+(?=[a-zA-ZÀ-ɏ])/g, "");
|
|
1772
1790
|
}
|
|
1773
1791
|
function collapseRepeats(text) {
|
|
1774
1792
|
return text.replace(/(.)\1{2,}/g, "$1");
|
|
@@ -1884,11 +1902,13 @@ var Terlik = class _Terlik {
|
|
|
1884
1902
|
options?.customList,
|
|
1885
1903
|
options?.whitelist
|
|
1886
1904
|
);
|
|
1905
|
+
const hasCustomDict = !!(options?.customList?.length || options?.whitelist?.length);
|
|
1887
1906
|
this.detector = new Detector(
|
|
1888
1907
|
this.dictionary,
|
|
1889
1908
|
normalizeFn,
|
|
1890
1909
|
langConfig.locale,
|
|
1891
|
-
langConfig.charClasses
|
|
1910
|
+
langConfig.charClasses,
|
|
1911
|
+
hasCustomDict ? null : this.language
|
|
1892
1912
|
);
|
|
1893
1913
|
if (options?.backgroundWarmup) {
|
|
1894
1914
|
setTimeout(() => {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "terlik.js",
|
|
3
|
-
"version": "2.
|
|
3
|
+
"version": "2.3.0",
|
|
4
4
|
"description": "Ultra-fast, zero-dependency multi-language profanity detection engine for Turkish, English, Spanish, and German with lazy compilation, deep agglutination support, and ReDoS-safe regex patterns",
|
|
5
5
|
"main": "./dist/index.js",
|
|
6
6
|
"module": "./dist/index.mjs",
|