terlik.js 2.2.0 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -451,6 +451,37 @@ See [CONTRIBUTING.md](./CONTRIBUTING.md) for contribution guidelines.
451
451
 
452
452
  ## Changelog
453
453
 
454
+ ### 2026-02-28 (v2.3.0) — 40x Faster Cold Start: V8 JIT Regex Optimization
455
+
456
+ **Replaces `\p{L}`/`\p{N}` Unicode property escapes with explicit Latin ranges, eliminating V8 JIT bottleneck.**
457
+
458
+ - **40x faster cold start** — First `containsProfanity()` call: 16,494ms → 404ms.
459
+ - **356x faster multi-language warmup** — 4-language warmup: 19,234ms → 54ms.
460
+ - **13x less memory** — Heap usage: 492MB → 38MB.
461
+ - **Static pattern cache** — Same-language instances share compiled patterns via `Detector.patternCache`.
462
+ - **Background warmup** — Dev server starts instantly, warms up in background.
463
+
464
+ | Change | File |
465
+ |---|---|
466
+ | Replace `\p{L}\p{N}` with `[a-zA-Z0-9À-ɏ]` | `src/patterns.ts` |
467
+ | Static pattern cache + explicit range in getSurroundingWord | `src/detector.ts` |
468
+ | Explicit range in number expander + punctuation removal | `src/normalizer.ts` |
469
+ | Pass cacheKey to Detector | `src/terlik.ts` |
470
+ | Background warmup, lazy instance cache | `live_test_server/server.ts` |
471
+ | NODE_OPTIONS heap safety net | `.github/workflows/ci.yml` |
472
+
473
+ ### 2026-02-28 (v2.2.1) — CI Fix: Timeout Race Condition + İ Platform Compatibility
474
+
475
+ **Fixes detection failures on slow runners and cross-platform İ (U+0130) handling.**
476
+
477
+ - **Timeout race condition fix** — `REGEX_TIMEOUT_MS` check moved from _before_ match processing to _after_. Previously, V8 JIT compilation on first `exec()` call (triggered by lazy compilation) could exceed 250ms, causing the timeout to discard a valid match before it was recorded. Now the current match is always processed; the timeout only prevents scanning for additional matches.
478
+ - **İ (U+0130) cross-platform fix** — First regex pass now runs on `text.toLocaleLowerCase(locale)` instead of raw text. Turkish İ→i mapping is performed explicitly before regex matching, avoiding inconsistent V8/ICU case-folding behavior across platforms (Ubuntu vs macOS). The `mapNormalizedToOriginal()` mapper recovers original-cased words for result output.
479
+
480
+ | Change | File |
481
+ |---|---|
482
+ | Timeout check moved after match processing | `src/detector.ts` (`runPatterns`) |
483
+ | Locale-lower first pass for İ safety | `src/detector.ts` (`detectPattern`) |
484
+
454
485
  ### 2026-02-28 (v2.2) — Lazy Compilation + Linguistic Patch
455
486
 
456
487
  **Zero-cost construction. Background warmup. Turkish agglutination hardening.**
package/dist/index.js CHANGED
@@ -147,7 +147,10 @@ var Dictionary = class {
147
147
  };
148
148
 
149
149
  // src/patterns.ts
150
- var SEPARATOR = "[^\\p{L}\\p{N}]{0,3}";
150
+ var WORD_CHAR = "a-zA-Z0-9\xC0-\u024F";
151
+ var SEPARATOR = `[^${WORD_CHAR}]{0,3}`;
152
+ var WORD_BOUNDARY_BEHIND = `(?<![${WORD_CHAR}])`;
153
+ var WORD_BOUNDARY_AHEAD = `(?![${WORD_CHAR}])`;
151
154
  var MAX_PATTERN_LENGTH = 1e4;
152
155
  var MAX_SUFFIX_CHAIN = 2;
153
156
  var REGEX_TIMEOUT_MS = 250;
@@ -185,15 +188,15 @@ function compilePatterns(entries, suffixes, charClasses, normalizeFn) {
185
188
  const useSuffix = entry.suffixable && suffixGroup.length > 0;
186
189
  let pattern;
187
190
  if (useSuffix) {
188
- pattern = `(?<![\\p{L}\\p{N}])(?:${combined})${suffixGroup}{0,${MAX_SUFFIX_CHAIN}}(?![\\p{L}\\p{N}])`;
191
+ pattern = `${WORD_BOUNDARY_BEHIND}(?:${combined})${suffixGroup}{0,${MAX_SUFFIX_CHAIN}}${WORD_BOUNDARY_AHEAD}`;
189
192
  } else {
190
- pattern = `(?<![\\p{L}\\p{N}])(?:${combined})(?![\\p{L}\\p{N}])`;
193
+ pattern = `${WORD_BOUNDARY_BEHIND}(?:${combined})${WORD_BOUNDARY_AHEAD}`;
191
194
  }
192
195
  if (pattern.length > MAX_PATTERN_LENGTH && useSuffix) {
193
- pattern = `(?<![\\p{L}\\p{N}])(?:${combined})(?![\\p{L}\\p{N}])`;
196
+ pattern = `${WORD_BOUNDARY_BEHIND}(?:${combined})${WORD_BOUNDARY_AHEAD}`;
194
197
  }
195
198
  try {
196
- const regex = new RegExp(pattern, "giu");
199
+ const regex = new RegExp(pattern, "gi");
197
200
  patterns.push({
198
201
  root: entry.root,
199
202
  severity: entry.severity,
@@ -203,8 +206,8 @@ function compilePatterns(entries, suffixes, charClasses, normalizeFn) {
203
206
  } catch (err) {
204
207
  if (useSuffix) {
205
208
  try {
206
- const fallbackPattern = `(?<![\\p{L}\\p{N}])(?:${combined})(?![\\p{L}\\p{N}])`;
207
- const regex = new RegExp(fallbackPattern, "giu");
209
+ const fallbackPattern = `${WORD_BOUNDARY_BEHIND}(?:${combined})${WORD_BOUNDARY_AHEAD}`;
210
+ const regex = new RegExp(fallbackPattern, "gi");
208
211
  patterns.push({
209
212
  root: entry.root,
210
213
  severity: entry.severity,
@@ -278,31 +281,45 @@ function getFuzzyMatcher(algorithm) {
278
281
  }
279
282
 
280
283
  // src/detector.ts
281
- var Detector = class {
284
+ var Detector = class _Detector {
285
+ /** Static cache: shares compiled patterns across instances with identical dictionaries. */
286
+ static patternCache = /* @__PURE__ */ new Map();
282
287
  dictionary;
283
288
  _patterns = null;
289
+ cacheKey;
284
290
  normalizedWordSet;
285
291
  normalizedWordToRoot;
286
292
  normalizeFn;
287
293
  locale;
288
294
  charClasses;
289
- constructor(dictionary, normalizeFn, locale, charClasses) {
295
+ constructor(dictionary, normalizeFn, locale, charClasses, cacheKey) {
290
296
  this.dictionary = dictionary;
291
297
  this.normalizeFn = normalizeFn;
292
298
  this.locale = locale;
293
299
  this.charClasses = charClasses;
300
+ this.cacheKey = cacheKey ?? null;
294
301
  this.normalizedWordSet = /* @__PURE__ */ new Set();
295
302
  this.normalizedWordToRoot = /* @__PURE__ */ new Map();
296
303
  this.buildNormalizedLookup();
297
304
  }
298
305
  ensureCompiled() {
299
306
  if (this._patterns === null) {
307
+ if (this.cacheKey) {
308
+ const cached = _Detector.patternCache.get(this.cacheKey);
309
+ if (cached) {
310
+ this._patterns = cached;
311
+ return this._patterns;
312
+ }
313
+ }
300
314
  this._patterns = compilePatterns(
301
315
  this.dictionary.getEntries(),
302
316
  this.dictionary.getSuffixes(),
303
317
  this.charClasses,
304
318
  this.normalizeFn
305
319
  );
320
+ if (this.cacheKey) {
321
+ _Detector.patternCache.set(this.cacheKey, this._patterns);
322
+ }
306
323
  }
307
324
  return this._patterns;
308
325
  }
@@ -310,6 +327,7 @@ var Detector = class {
310
327
  this.ensureCompiled();
311
328
  }
312
329
  recompile() {
330
+ this.cacheKey = null;
313
331
  this._patterns = compilePatterns(
314
332
  this.dictionary.getEntries(),
315
333
  this.dictionary.getSuffixes(),
@@ -383,9 +401,9 @@ var Detector = class {
383
401
  }
384
402
  }
385
403
  detectPattern(text, whitelist, results) {
386
- this.runPatterns(text, text, whitelist, results, false);
387
- const normalizedText = this.normalizeFn(text);
388
404
  const lowerText = text.toLocaleLowerCase(this.locale);
405
+ this.runPatterns(lowerText, text, whitelist, results, lowerText !== text);
406
+ const normalizedText = this.normalizeFn(text);
389
407
  if (normalizedText !== lowerText && normalizedText.length > 0) {
390
408
  this.runPatterns(normalizedText, text, whitelist, results, true);
391
409
  }
@@ -398,7 +416,6 @@ var Detector = class {
398
416
  pattern.regex.lastIndex = 0;
399
417
  let match;
400
418
  while ((match = pattern.regex.exec(searchText)) !== null) {
401
- if (Date.now() - patternStart > REGEX_TIMEOUT_MS) break;
402
419
  const matchedText = match[0];
403
420
  const matchIndex = match.index;
404
421
  const normalizedMatch = this.normalizeFn(matchedText);
@@ -433,6 +450,7 @@ var Detector = class {
433
450
  if (matchedText.length === 0) {
434
451
  pattern.regex.lastIndex++;
435
452
  }
453
+ if (Date.now() - patternStart > REGEX_TIMEOUT_MS) break;
436
454
  }
437
455
  }
438
456
  }
@@ -499,8 +517,8 @@ var Detector = class {
499
517
  getSurroundingWord(text, index, length) {
500
518
  let start = index;
501
519
  let end = index + length;
502
- while (start > 0 && /\p{L}/u.test(text[start - 1])) start--;
503
- while (end < text.length && /\p{L}/u.test(text[end])) end++;
520
+ while (start > 0 && /[a-zA-ZÀ-ɏ]/.test(text[start - 1])) start--;
521
+ while (end < text.length && /[a-zA-ZÀ-ɏ]/.test(text[end])) end++;
504
522
  return text.slice(start, end);
505
523
  }
506
524
  deduplicateResults(results) {
@@ -1793,15 +1811,15 @@ function buildNumberExpander(expansions) {
1793
1811
  const regex = new RegExp(
1794
1812
  expansions.map(([num]) => {
1795
1813
  const escaped = num.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
1796
- return `(?<=\\p{L})${escaped}(?=\\p{L})`;
1814
+ return `(?<=[a-zA-Z\xC0-\u024F])${escaped}(?=[a-zA-Z\xC0-\u024F])`;
1797
1815
  }).join("|"),
1798
- "gu"
1816
+ "g"
1799
1817
  );
1800
1818
  const lookup = Object.fromEntries(expansions);
1801
1819
  return (text) => text.replace(regex, (match) => lookup[match] ?? match);
1802
1820
  }
1803
1821
  function removePunctuation(text) {
1804
- return text.replace(/(?<=\p{L})[.\-_*,;:!?]+(?=\p{L})/gu, "");
1822
+ return text.replace(/(?<=[a-zA-ZÀ-ɏ])[.\-_*,;:!?]+(?=[a-zA-ZÀ-ɏ])/g, "");
1805
1823
  }
1806
1824
  function collapseRepeats(text) {
1807
1825
  return text.replace(/(.)\1{2,}/g, "$1");
@@ -1917,11 +1935,13 @@ var Terlik = class _Terlik {
1917
1935
  options?.customList,
1918
1936
  options?.whitelist
1919
1937
  );
1938
+ const hasCustomDict = !!(options?.customList?.length || options?.whitelist?.length);
1920
1939
  this.detector = new Detector(
1921
1940
  this.dictionary,
1922
1941
  normalizeFn,
1923
1942
  langConfig.locale,
1924
- langConfig.charClasses
1943
+ langConfig.charClasses,
1944
+ hasCustomDict ? null : this.language
1925
1945
  );
1926
1946
  if (options?.backgroundWarmup) {
1927
1947
  setTimeout(() => {
package/dist/index.mjs CHANGED
@@ -114,7 +114,10 @@ var Dictionary = class {
114
114
  };
115
115
 
116
116
  // src/patterns.ts
117
- var SEPARATOR = "[^\\p{L}\\p{N}]{0,3}";
117
+ var WORD_CHAR = "a-zA-Z0-9\xC0-\u024F";
118
+ var SEPARATOR = `[^${WORD_CHAR}]{0,3}`;
119
+ var WORD_BOUNDARY_BEHIND = `(?<![${WORD_CHAR}])`;
120
+ var WORD_BOUNDARY_AHEAD = `(?![${WORD_CHAR}])`;
118
121
  var MAX_PATTERN_LENGTH = 1e4;
119
122
  var MAX_SUFFIX_CHAIN = 2;
120
123
  var REGEX_TIMEOUT_MS = 250;
@@ -152,15 +155,15 @@ function compilePatterns(entries, suffixes, charClasses, normalizeFn) {
152
155
  const useSuffix = entry.suffixable && suffixGroup.length > 0;
153
156
  let pattern;
154
157
  if (useSuffix) {
155
- pattern = `(?<![\\p{L}\\p{N}])(?:${combined})${suffixGroup}{0,${MAX_SUFFIX_CHAIN}}(?![\\p{L}\\p{N}])`;
158
+ pattern = `${WORD_BOUNDARY_BEHIND}(?:${combined})${suffixGroup}{0,${MAX_SUFFIX_CHAIN}}${WORD_BOUNDARY_AHEAD}`;
156
159
  } else {
157
- pattern = `(?<![\\p{L}\\p{N}])(?:${combined})(?![\\p{L}\\p{N}])`;
160
+ pattern = `${WORD_BOUNDARY_BEHIND}(?:${combined})${WORD_BOUNDARY_AHEAD}`;
158
161
  }
159
162
  if (pattern.length > MAX_PATTERN_LENGTH && useSuffix) {
160
- pattern = `(?<![\\p{L}\\p{N}])(?:${combined})(?![\\p{L}\\p{N}])`;
163
+ pattern = `${WORD_BOUNDARY_BEHIND}(?:${combined})${WORD_BOUNDARY_AHEAD}`;
161
164
  }
162
165
  try {
163
- const regex = new RegExp(pattern, "giu");
166
+ const regex = new RegExp(pattern, "gi");
164
167
  patterns.push({
165
168
  root: entry.root,
166
169
  severity: entry.severity,
@@ -170,8 +173,8 @@ function compilePatterns(entries, suffixes, charClasses, normalizeFn) {
170
173
  } catch (err) {
171
174
  if (useSuffix) {
172
175
  try {
173
- const fallbackPattern = `(?<![\\p{L}\\p{N}])(?:${combined})(?![\\p{L}\\p{N}])`;
174
- const regex = new RegExp(fallbackPattern, "giu");
176
+ const fallbackPattern = `${WORD_BOUNDARY_BEHIND}(?:${combined})${WORD_BOUNDARY_AHEAD}`;
177
+ const regex = new RegExp(fallbackPattern, "gi");
175
178
  patterns.push({
176
179
  root: entry.root,
177
180
  severity: entry.severity,
@@ -245,31 +248,45 @@ function getFuzzyMatcher(algorithm) {
245
248
  }
246
249
 
247
250
  // src/detector.ts
248
- var Detector = class {
251
+ var Detector = class _Detector {
252
+ /** Static cache: shares compiled patterns across instances with identical dictionaries. */
253
+ static patternCache = /* @__PURE__ */ new Map();
249
254
  dictionary;
250
255
  _patterns = null;
256
+ cacheKey;
251
257
  normalizedWordSet;
252
258
  normalizedWordToRoot;
253
259
  normalizeFn;
254
260
  locale;
255
261
  charClasses;
256
- constructor(dictionary, normalizeFn, locale, charClasses) {
262
+ constructor(dictionary, normalizeFn, locale, charClasses, cacheKey) {
257
263
  this.dictionary = dictionary;
258
264
  this.normalizeFn = normalizeFn;
259
265
  this.locale = locale;
260
266
  this.charClasses = charClasses;
267
+ this.cacheKey = cacheKey ?? null;
261
268
  this.normalizedWordSet = /* @__PURE__ */ new Set();
262
269
  this.normalizedWordToRoot = /* @__PURE__ */ new Map();
263
270
  this.buildNormalizedLookup();
264
271
  }
265
272
  ensureCompiled() {
266
273
  if (this._patterns === null) {
274
+ if (this.cacheKey) {
275
+ const cached = _Detector.patternCache.get(this.cacheKey);
276
+ if (cached) {
277
+ this._patterns = cached;
278
+ return this._patterns;
279
+ }
280
+ }
267
281
  this._patterns = compilePatterns(
268
282
  this.dictionary.getEntries(),
269
283
  this.dictionary.getSuffixes(),
270
284
  this.charClasses,
271
285
  this.normalizeFn
272
286
  );
287
+ if (this.cacheKey) {
288
+ _Detector.patternCache.set(this.cacheKey, this._patterns);
289
+ }
273
290
  }
274
291
  return this._patterns;
275
292
  }
@@ -277,6 +294,7 @@ var Detector = class {
277
294
  this.ensureCompiled();
278
295
  }
279
296
  recompile() {
297
+ this.cacheKey = null;
280
298
  this._patterns = compilePatterns(
281
299
  this.dictionary.getEntries(),
282
300
  this.dictionary.getSuffixes(),
@@ -350,9 +368,9 @@ var Detector = class {
350
368
  }
351
369
  }
352
370
  detectPattern(text, whitelist, results) {
353
- this.runPatterns(text, text, whitelist, results, false);
354
- const normalizedText = this.normalizeFn(text);
355
371
  const lowerText = text.toLocaleLowerCase(this.locale);
372
+ this.runPatterns(lowerText, text, whitelist, results, lowerText !== text);
373
+ const normalizedText = this.normalizeFn(text);
356
374
  if (normalizedText !== lowerText && normalizedText.length > 0) {
357
375
  this.runPatterns(normalizedText, text, whitelist, results, true);
358
376
  }
@@ -365,7 +383,6 @@ var Detector = class {
365
383
  pattern.regex.lastIndex = 0;
366
384
  let match;
367
385
  while ((match = pattern.regex.exec(searchText)) !== null) {
368
- if (Date.now() - patternStart > REGEX_TIMEOUT_MS) break;
369
386
  const matchedText = match[0];
370
387
  const matchIndex = match.index;
371
388
  const normalizedMatch = this.normalizeFn(matchedText);
@@ -400,6 +417,7 @@ var Detector = class {
400
417
  if (matchedText.length === 0) {
401
418
  pattern.regex.lastIndex++;
402
419
  }
420
+ if (Date.now() - patternStart > REGEX_TIMEOUT_MS) break;
403
421
  }
404
422
  }
405
423
  }
@@ -466,8 +484,8 @@ var Detector = class {
466
484
  getSurroundingWord(text, index, length) {
467
485
  let start = index;
468
486
  let end = index + length;
469
- while (start > 0 && /\p{L}/u.test(text[start - 1])) start--;
470
- while (end < text.length && /\p{L}/u.test(text[end])) end++;
487
+ while (start > 0 && /[a-zA-ZÀ-ɏ]/.test(text[start - 1])) start--;
488
+ while (end < text.length && /[a-zA-ZÀ-ɏ]/.test(text[end])) end++;
471
489
  return text.slice(start, end);
472
490
  }
473
491
  deduplicateResults(results) {
@@ -1760,15 +1778,15 @@ function buildNumberExpander(expansions) {
1760
1778
  const regex = new RegExp(
1761
1779
  expansions.map(([num]) => {
1762
1780
  const escaped = num.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
1763
- return `(?<=\\p{L})${escaped}(?=\\p{L})`;
1781
+ return `(?<=[a-zA-Z\xC0-\u024F])${escaped}(?=[a-zA-Z\xC0-\u024F])`;
1764
1782
  }).join("|"),
1765
- "gu"
1783
+ "g"
1766
1784
  );
1767
1785
  const lookup = Object.fromEntries(expansions);
1768
1786
  return (text) => text.replace(regex, (match) => lookup[match] ?? match);
1769
1787
  }
1770
1788
  function removePunctuation(text) {
1771
- return text.replace(/(?<=\p{L})[.\-_*,;:!?]+(?=\p{L})/gu, "");
1789
+ return text.replace(/(?<=[a-zA-ZÀ-ɏ])[.\-_*,;:!?]+(?=[a-zA-ZÀ-ɏ])/g, "");
1772
1790
  }
1773
1791
  function collapseRepeats(text) {
1774
1792
  return text.replace(/(.)\1{2,}/g, "$1");
@@ -1884,11 +1902,13 @@ var Terlik = class _Terlik {
1884
1902
  options?.customList,
1885
1903
  options?.whitelist
1886
1904
  );
1905
+ const hasCustomDict = !!(options?.customList?.length || options?.whitelist?.length);
1887
1906
  this.detector = new Detector(
1888
1907
  this.dictionary,
1889
1908
  normalizeFn,
1890
1909
  langConfig.locale,
1891
- langConfig.charClasses
1910
+ langConfig.charClasses,
1911
+ hasCustomDict ? null : this.language
1892
1912
  );
1893
1913
  if (options?.backgroundWarmup) {
1894
1914
  setTimeout(() => {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "terlik.js",
3
- "version": "2.2.0",
3
+ "version": "2.3.0",
4
4
  "description": "Ultra-fast, zero-dependency multi-language profanity detection engine for Turkish, English, Spanish, and German with lazy compilation, deep agglutination support, and ReDoS-safe regex patterns",
5
5
  "main": "./dist/index.js",
6
6
  "module": "./dist/index.mjs",