npm - terlik.js - Versions diffs - 2.2.0 → 2.3.0 - Mend

terlik.js 2.2.0 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/README.md CHANGED Viewed

@@ -451,6 +451,37 @@ See [CONTRIBUTING.md](./CONTRIBUTING.md) for contribution guidelines.
 ## Changelog
+### 2026-02-28 (v2.3.0) — 40x Faster Cold Start: V8 JIT Regex Optimization
+**Replaces `\p{L}`/`\p{N}` Unicode property escapes with explicit Latin ranges, eliminating V8 JIT bottleneck.**
+- **40x faster cold start** — First `containsProfanity()` call: 16,494ms → 404ms.
+- **356x faster multi-language warmup** — 4-language warmup: 19,234ms → 54ms.
+- **13x less memory** — Heap usage: 492MB → 38MB.
+- **Static pattern cache** — Same-language instances share compiled patterns via `Detector.patternCache`.
+- **Background warmup** — Dev server starts instantly, warms up in background.
+| Change | File |
+|---|---|
+| Replace `\p{L}\p{N}` with `[a-zA-Z0-9À-ɏ]` | `src/patterns.ts` |
+| Static pattern cache + explicit range in getSurroundingWord | `src/detector.ts` |
+| Explicit range in number expander + punctuation removal | `src/normalizer.ts` |
+| Pass cacheKey to Detector | `src/terlik.ts` |
+| Background warmup, lazy instance cache | `live_test_server/server.ts` |
+| NODE_OPTIONS heap safety net | `.github/workflows/ci.yml` |
+### 2026-02-28 (v2.2.1) — CI Fix: Timeout Race Condition + İ Platform Compatibility
+**Fixes detection failures on slow runners and cross-platform İ (U+0130) handling.**
+- **Timeout race condition fix** — `REGEX_TIMEOUT_MS` check moved from _before_ match processing to _after_. Previously, V8 JIT compilation on first `exec()` call (triggered by lazy compilation) could exceed 250ms, causing the timeout to discard a valid match before it was recorded. Now the current match is always processed; the timeout only prevents scanning for additional matches.
+- **İ (U+0130) cross-platform fix** — First regex pass now runs on `text.toLocaleLowerCase(locale)` instead of raw text. Turkish İ→i mapping is performed explicitly before regex matching, avoiding inconsistent V8/ICU case-folding behavior across platforms (Ubuntu vs macOS). The `mapNormalizedToOriginal()` mapper recovers original-cased words for result output.
+| Change | File |
+|---|---|
+| Timeout check moved after match processing | `src/detector.ts` (`runPatterns`) |
+| Locale-lower first pass for İ safety | `src/detector.ts` (`detectPattern`) |
 ### 2026-02-28 (v2.2) — Lazy Compilation + Linguistic Patch
 **Zero-cost construction. Background warmup. Turkish agglutination hardening.**

package/dist/index.js CHANGED Viewed

@@ -147,7 +147,10 @@ var Dictionary = class {
 };
 // src/patterns.ts
-var SEPARATOR = "[^\\p{L}\\p{N}]{0,3}";
+var WORD_CHAR = "a-zA-Z0-9\xC0-\u024F";
+var SEPARATOR = `[^${WORD_CHAR}]{0,3}`;
+var WORD_BOUNDARY_BEHIND = `(?<![${WORD_CHAR}])`;
+var WORD_BOUNDARY_AHEAD = `(?![${WORD_CHAR}])`;
 var MAX_PATTERN_LENGTH = 1e4;
 var MAX_SUFFIX_CHAIN = 2;
 var REGEX_TIMEOUT_MS = 250;
@@ -185,15 +188,15 @@ function compilePatterns(entries, suffixes, charClasses, normalizeFn) {
     const useSuffix = entry.suffixable && suffixGroup.length > 0;
     let pattern;
     if (useSuffix) {
-      pattern = `(?<![\\p{L}\\p{N}])(?:${combined})${suffixGroup}{0,${MAX_SUFFIX_CHAIN}}(?![\\p{L}\\p{N}])`;
+      pattern = `${WORD_BOUNDARY_BEHIND}(?:${combined})${suffixGroup}{0,${MAX_SUFFIX_CHAIN}}${WORD_BOUNDARY_AHEAD}`;
     } else {
-      pattern = `(?<![\\p{L}\\p{N}])(?:${combined})(?![\\p{L}\\p{N}])`;
+      pattern = `${WORD_BOUNDARY_BEHIND}(?:${combined})${WORD_BOUNDARY_AHEAD}`;
     }
     if (pattern.length > MAX_PATTERN_LENGTH && useSuffix) {
-      pattern = `(?<![\\p{L}\\p{N}])(?:${combined})(?![\\p{L}\\p{N}])`;
+      pattern = `${WORD_BOUNDARY_BEHIND}(?:${combined})${WORD_BOUNDARY_AHEAD}`;
     }
     try {
-      const regex = new RegExp(pattern, "giu");
+      const regex = new RegExp(pattern, "gi");
       patterns.push({
         root: entry.root,
         severity: entry.severity,
@@ -203,8 +206,8 @@ function compilePatterns(entries, suffixes, charClasses, normalizeFn) {
     } catch (err) {
       if (useSuffix) {
         try {
-          const fallbackPattern = `(?<![\\p{L}\\p{N}])(?:${combined})(?![\\p{L}\\p{N}])`;
-          const regex = new RegExp(fallbackPattern, "giu");
+          const fallbackPattern = `${WORD_BOUNDARY_BEHIND}(?:${combined})${WORD_BOUNDARY_AHEAD}`;
+          const regex = new RegExp(fallbackPattern, "gi");
           patterns.push({
             root: entry.root,
             severity: entry.severity,
@@ -278,31 +281,45 @@ function getFuzzyMatcher(algorithm) {
 }
 // src/detector.ts
-var Detector = class {
+var Detector = class _Detector {
+  /** Static cache: shares compiled patterns across instances with identical dictionaries. */
+  static patternCache = /* @__PURE__ */ new Map();
   dictionary;
   _patterns = null;
+  cacheKey;
   normalizedWordSet;
   normalizedWordToRoot;
   normalizeFn;
   locale;
   charClasses;
-  constructor(dictionary, normalizeFn, locale, charClasses) {
+  constructor(dictionary, normalizeFn, locale, charClasses, cacheKey) {
     this.dictionary = dictionary;
     this.normalizeFn = normalizeFn;
     this.locale = locale;
     this.charClasses = charClasses;
+    this.cacheKey = cacheKey ?? null;
     this.normalizedWordSet = /* @__PURE__ */ new Set();
     this.normalizedWordToRoot = /* @__PURE__ */ new Map();
     this.buildNormalizedLookup();
   }
   ensureCompiled() {
     if (this._patterns === null) {
+      if (this.cacheKey) {
+        const cached = _Detector.patternCache.get(this.cacheKey);
+        if (cached) {
+          this._patterns = cached;
+          return this._patterns;
+        }
+      }
       this._patterns = compilePatterns(
         this.dictionary.getEntries(),
         this.dictionary.getSuffixes(),
         this.charClasses,
         this.normalizeFn
       );
+      if (this.cacheKey) {
+        _Detector.patternCache.set(this.cacheKey, this._patterns);
+      }
     }
     return this._patterns;
   }
@@ -310,6 +327,7 @@ var Detector = class {
     this.ensureCompiled();
   }
   recompile() {
+    this.cacheKey = null;
     this._patterns = compilePatterns(
       this.dictionary.getEntries(),
       this.dictionary.getSuffixes(),
@@ -383,9 +401,9 @@ var Detector = class {
     }
   }
   detectPattern(text, whitelist, results) {
-    this.runPatterns(text, text, whitelist, results, false);
-    const normalizedText = this.normalizeFn(text);
     const lowerText = text.toLocaleLowerCase(this.locale);
+    this.runPatterns(lowerText, text, whitelist, results, lowerText !== text);
+    const normalizedText = this.normalizeFn(text);
     if (normalizedText !== lowerText && normalizedText.length > 0) {
       this.runPatterns(normalizedText, text, whitelist, results, true);
     }
@@ -398,7 +416,6 @@ var Detector = class {
       pattern.regex.lastIndex = 0;
       let match;
       while ((match = pattern.regex.exec(searchText)) !== null) {
-        if (Date.now() - patternStart > REGEX_TIMEOUT_MS) break;
         const matchedText = match[0];
         const matchIndex = match.index;
         const normalizedMatch = this.normalizeFn(matchedText);
@@ -433,6 +450,7 @@ var Detector = class {
         if (matchedText.length === 0) {
           pattern.regex.lastIndex++;
         }
+        if (Date.now() - patternStart > REGEX_TIMEOUT_MS) break;
       }
     }
   }
@@ -499,8 +517,8 @@ var Detector = class {
   getSurroundingWord(text, index, length) {
     let start = index;
     let end = index + length;
-    while (start > 0 && /\p{L}/u.test(text[start - 1])) start--;
-    while (end < text.length && /\p{L}/u.test(text[end])) end++;
+    while (start > 0 && /[a-zA-ZÀ-ɏ]/.test(text[start - 1])) start--;
+    while (end < text.length && /[a-zA-ZÀ-ɏ]/.test(text[end])) end++;
     return text.slice(start, end);
   }
   deduplicateResults(results) {
@@ -1793,15 +1811,15 @@ function buildNumberExpander(expansions) {
   const regex = new RegExp(
     expansions.map(([num]) => {
       const escaped = num.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
-      return `(?<=\\p{L})${escaped}(?=\\p{L})`;
+      return `(?<=[a-zA-Z\xC0-\u024F])${escaped}(?=[a-zA-Z\xC0-\u024F])`;
     }).join("|"),
-    "gu"
+    "g"
   );
   const lookup = Object.fromEntries(expansions);
   return (text) => text.replace(regex, (match) => lookup[match] ?? match);
 }
 function removePunctuation(text) {
-  return text.replace(/(?<=\p{L})[.\-_*,;:!?]+(?=\p{L})/gu, "");
+  return text.replace(/(?<=[a-zA-ZÀ-ɏ])[.\-_*,;:!?]+(?=[a-zA-ZÀ-ɏ])/g, "");
 }
 function collapseRepeats(text) {
   return text.replace(/(.)\1{2,}/g, "$1");
@@ -1917,11 +1935,13 @@ var Terlik = class _Terlik {
       options?.customList,
       options?.whitelist
     );
+    const hasCustomDict = !!(options?.customList?.length || options?.whitelist?.length);
     this.detector = new Detector(
       this.dictionary,
       normalizeFn,
       langConfig.locale,
-      langConfig.charClasses
+      langConfig.charClasses,
+      hasCustomDict ? null : this.language
     );
     if (options?.backgroundWarmup) {
       setTimeout(() => {

package/dist/index.mjs CHANGED Viewed

@@ -114,7 +114,10 @@ var Dictionary = class {
 };
 // src/patterns.ts
-var SEPARATOR = "[^\\p{L}\\p{N}]{0,3}";
+var WORD_CHAR = "a-zA-Z0-9\xC0-\u024F";
+var SEPARATOR = `[^${WORD_CHAR}]{0,3}`;
+var WORD_BOUNDARY_BEHIND = `(?<![${WORD_CHAR}])`;
+var WORD_BOUNDARY_AHEAD = `(?![${WORD_CHAR}])`;
 var MAX_PATTERN_LENGTH = 1e4;
 var MAX_SUFFIX_CHAIN = 2;
 var REGEX_TIMEOUT_MS = 250;
@@ -152,15 +155,15 @@ function compilePatterns(entries, suffixes, charClasses, normalizeFn) {
     const useSuffix = entry.suffixable && suffixGroup.length > 0;
     let pattern;
     if (useSuffix) {
-      pattern = `(?<![\\p{L}\\p{N}])(?:${combined})${suffixGroup}{0,${MAX_SUFFIX_CHAIN}}(?![\\p{L}\\p{N}])`;
+      pattern = `${WORD_BOUNDARY_BEHIND}(?:${combined})${suffixGroup}{0,${MAX_SUFFIX_CHAIN}}${WORD_BOUNDARY_AHEAD}`;
     } else {
-      pattern = `(?<![\\p{L}\\p{N}])(?:${combined})(?![\\p{L}\\p{N}])`;
+      pattern = `${WORD_BOUNDARY_BEHIND}(?:${combined})${WORD_BOUNDARY_AHEAD}`;
     }
     if (pattern.length > MAX_PATTERN_LENGTH && useSuffix) {
-      pattern = `(?<![\\p{L}\\p{N}])(?:${combined})(?![\\p{L}\\p{N}])`;
+      pattern = `${WORD_BOUNDARY_BEHIND}(?:${combined})${WORD_BOUNDARY_AHEAD}`;
     }
     try {
-      const regex = new RegExp(pattern, "giu");
+      const regex = new RegExp(pattern, "gi");
       patterns.push({
         root: entry.root,
         severity: entry.severity,
@@ -170,8 +173,8 @@ function compilePatterns(entries, suffixes, charClasses, normalizeFn) {
     } catch (err) {
       if (useSuffix) {
         try {
-          const fallbackPattern = `(?<![\\p{L}\\p{N}])(?:${combined})(?![\\p{L}\\p{N}])`;
-          const regex = new RegExp(fallbackPattern, "giu");
+          const fallbackPattern = `${WORD_BOUNDARY_BEHIND}(?:${combined})${WORD_BOUNDARY_AHEAD}`;
+          const regex = new RegExp(fallbackPattern, "gi");
           patterns.push({
             root: entry.root,
             severity: entry.severity,
@@ -245,31 +248,45 @@ function getFuzzyMatcher(algorithm) {
 }
 // src/detector.ts
-var Detector = class {
+var Detector = class _Detector {
+  /** Static cache: shares compiled patterns across instances with identical dictionaries. */
+  static patternCache = /* @__PURE__ */ new Map();
   dictionary;
   _patterns = null;
+  cacheKey;
   normalizedWordSet;
   normalizedWordToRoot;
   normalizeFn;
   locale;
   charClasses;
-  constructor(dictionary, normalizeFn, locale, charClasses) {
+  constructor(dictionary, normalizeFn, locale, charClasses, cacheKey) {
     this.dictionary = dictionary;
     this.normalizeFn = normalizeFn;
     this.locale = locale;
     this.charClasses = charClasses;
+    this.cacheKey = cacheKey ?? null;
     this.normalizedWordSet = /* @__PURE__ */ new Set();
     this.normalizedWordToRoot = /* @__PURE__ */ new Map();
     this.buildNormalizedLookup();
   }
   ensureCompiled() {
     if (this._patterns === null) {
+      if (this.cacheKey) {
+        const cached = _Detector.patternCache.get(this.cacheKey);
+        if (cached) {
+          this._patterns = cached;
+          return this._patterns;
+        }
+      }
       this._patterns = compilePatterns(
         this.dictionary.getEntries(),
         this.dictionary.getSuffixes(),
         this.charClasses,
         this.normalizeFn
       );
+      if (this.cacheKey) {
+        _Detector.patternCache.set(this.cacheKey, this._patterns);
+      }
     }
     return this._patterns;
   }
@@ -277,6 +294,7 @@ var Detector = class {
     this.ensureCompiled();
   }
   recompile() {
+    this.cacheKey = null;
     this._patterns = compilePatterns(
       this.dictionary.getEntries(),
       this.dictionary.getSuffixes(),
@@ -350,9 +368,9 @@ var Detector = class {
     }
   }
   detectPattern(text, whitelist, results) {
-    this.runPatterns(text, text, whitelist, results, false);
-    const normalizedText = this.normalizeFn(text);
     const lowerText = text.toLocaleLowerCase(this.locale);
+    this.runPatterns(lowerText, text, whitelist, results, lowerText !== text);
+    const normalizedText = this.normalizeFn(text);
     if (normalizedText !== lowerText && normalizedText.length > 0) {
       this.runPatterns(normalizedText, text, whitelist, results, true);
     }
@@ -365,7 +383,6 @@ var Detector = class {
       pattern.regex.lastIndex = 0;
       let match;
       while ((match = pattern.regex.exec(searchText)) !== null) {
-        if (Date.now() - patternStart > REGEX_TIMEOUT_MS) break;
         const matchedText = match[0];
         const matchIndex = match.index;
         const normalizedMatch = this.normalizeFn(matchedText);
@@ -400,6 +417,7 @@ var Detector = class {
         if (matchedText.length === 0) {
           pattern.regex.lastIndex++;
         }
+        if (Date.now() - patternStart > REGEX_TIMEOUT_MS) break;
       }
     }
   }
@@ -466,8 +484,8 @@ var Detector = class {
   getSurroundingWord(text, index, length) {
     let start = index;
     let end = index + length;
-    while (start > 0 && /\p{L}/u.test(text[start - 1])) start--;
-    while (end < text.length && /\p{L}/u.test(text[end])) end++;
+    while (start > 0 && /[a-zA-ZÀ-ɏ]/.test(text[start - 1])) start--;
+    while (end < text.length && /[a-zA-ZÀ-ɏ]/.test(text[end])) end++;
     return text.slice(start, end);
   }
   deduplicateResults(results) {
@@ -1760,15 +1778,15 @@ function buildNumberExpander(expansions) {
   const regex = new RegExp(
     expansions.map(([num]) => {
       const escaped = num.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
-      return `(?<=\\p{L})${escaped}(?=\\p{L})`;
+      return `(?<=[a-zA-Z\xC0-\u024F])${escaped}(?=[a-zA-Z\xC0-\u024F])`;
     }).join("|"),
-    "gu"
+    "g"
   );
   const lookup = Object.fromEntries(expansions);
   return (text) => text.replace(regex, (match) => lookup[match] ?? match);
 }
 function removePunctuation(text) {
-  return text.replace(/(?<=\p{L})[.\-_*,;:!?]+(?=\p{L})/gu, "");
+  return text.replace(/(?<=[a-zA-ZÀ-ɏ])[.\-_*,;:!?]+(?=[a-zA-ZÀ-ɏ])/g, "");
 }
 function collapseRepeats(text) {
   return text.replace(/(.)\1{2,}/g, "$1");
@@ -1884,11 +1902,13 @@ var Terlik = class _Terlik {
       options?.customList,
       options?.whitelist
     );
+    const hasCustomDict = !!(options?.customList?.length || options?.whitelist?.length);
     this.detector = new Detector(
       this.dictionary,
       normalizeFn,
       langConfig.locale,
-      langConfig.charClasses
+      langConfig.charClasses,
+      hasCustomDict ? null : this.language
     );
     if (options?.backgroundWarmup) {
       setTimeout(() => {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "terlik.js",
-  "version": "2.2.0",
+  "version": "2.3.0",
   "description": "Ultra-fast, zero-dependency multi-language profanity detection engine for Turkish, English, Spanish, and German with lazy compilation, deep agglutination support, and ReDoS-safe regex patterns",
   "main": "./dist/index.js",
   "module": "./dist/index.mjs",