text-sanctifier 1.0.15 → 1.0.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,7 @@
1
1
  function g(a={}){const b=!!a.purgeInvisibleChars,c=!!a.purgeEmojis,d=!!a.nukeControls,e=!!a.keyboardOnlyFilter,k=!!a.normalizeNewlines,f=!!a.trimSpacesAroundNewlines,l=!!a.collapseNewLines,m=!!a.preserveParagraphs,p=!!a.collapseSpaces,q=!!a.finalTrim;return w=>h(w,b,c,d,e,k,f,l,m,p,q)}g.strict=a=>h(a,!0,!0,!0,!1,!0,!0,!0,!1,!0,!0);g.loose=a=>h(a,!1,!1,!1,!1,!0,!0,!0,!0,!0,!0);g.keyboardOnlyEmoji=a=>h(a,!1,!1,!1,!0,!0,!0,!1,!1,!1,!0);g.keyboardOnly=a=>h(a,!0,!0,!0,!0,!0,!0,!0,!1,!0,!0);
2
2
  function h(a,b=!1,c=!1,d=!1,e=!1,k=!1,f=!1,l=!1,m=!1,p=!1,q=!1){if("string"!==typeof a)throw new TypeError("sanctifyText expects a string input.");b&&(a=a.replace(n,""));c&&(a=a.replace(r,""));d&&(a=a.replace(t,""));e&&(a=u(a,c));k&&(a=a.replace(v,"\n"));f&&(a=a.replace(x,"$1"));l&&(b=a,a=m?b.replace(y,"\n\n"):b.replace(z,"\n"));p&&(a=a.replace(A," "));return q?a.trim():a}var n=/[\u00A0\u2000-\u200D\u202F\u2060\u3000\uFEFF\u200E\u200F\u202A-\u202E]+/g,B=/[^\x20-\x7E\r\n]+/gu;
3
3
  function u(a,b=!1){a=C(a);return b?a.replace(B,""):a.replace(B,c=>c.match(r)?c:"")}var D=/[\u2018\u2019\u201A\u201B\u2032\u2035]/g,E=/[\u201C\u201D\u201E\u201F\u2033\u2036\u00AB\u00BB]/g,F=/[\u2012\u2013\u2014\u2015\u2212]/g,G=/\u2026/g,H=/[\u2022\u00B7]/g,I=/[\uFF01-\uFF5E]/g;function C(a){return a.replace(D,"'").replace(E,'"').replace(F,"-").replace(G,"...").replace(H,"*").replace(I,b=>String.fromCharCode(b.charCodeAt(0)-65248))}var r;
4
- try{r=RegExp("(?:\\p{Extended_Pictographic}(?:\\uFE0F|\\uFE0E)?(?:\\u200D(?:\\p{Extended_Pictographic}|\\w)+)*)","gu")}catch{r=/[\u{1F300}-\u{1FAFF}]/gu}var v=/\r\n|\r|\n/g,x=/[ \t]*(\n+)[ \t]*/g,z=/\n{2,}/g,y=/\n{3,}/g,A=/ {2,}/g,t=/[\u0000-\u0008\u000B\u000C\u000E-\u001F\u007F\u0080-\u009F\u200E\u200F\u202A-\u202E]+/g;
4
+ try{r=RegExp("(?:\\p{Extended_Pictographic}(?:\\uFE0F|\\uFE0E)?(?:\\u200D(?:\\p{Extended_Pictographic}|\\w)+)*)","gu")}catch{r=/[\u{2700}-\u{27BF}\u{1F300}-\u{1FAFF}\u{1F3FB}-\u{1F3FF}\u200D\uFE0F\u{1F1E6}-\u{1F1FF}]/gu}var v=/\r\n|\r|\n/g,x=/[ \t]*(\n+)[ \t]*/g,z=/\n{2,}/g,y=/\n{3,}/g,A=/ {2,}/g,t=/[\u0000-\u0008\u000B\u000C\u000E-\u001F\u007F\u0080-\u009F\u200E\u200F\u202A-\u202E]+/g;
5
5
  function J(a){if("string"!==typeof a)throw new TypeError("inspectText expects a string input.");const b=[],c={hasControlChars:!1,hasInvisibleChars:!1,hasMixedNewlines:!1,newlineStyle:null,hasEmojis:!1,hasNonKeyboardChars:!1,summary:b},d=(f,l,m)=>{f&&(c[l]=!0,b.push(m))};d(t.test(a),"hasControlChars","Control characters detected.");d(n.test(a),"hasInvisibleChars","Invisible Unicode characters detected.");d(r.test(a),"hasEmojis","Emojis detected.");const {j:e,types:k}=K(a);c.hasMixedNewlines=e;c.newlineStyle=
6
6
  e?"Mixed":k[0]||null;c.newlineStyle&&b.push(e?"Mixed newline styles detected.":`Consistent newline style: ${c.newlineStyle}`);a=C(a).replace(B,f=>f.match(r)?"":"\u2612");d(/[\u2612]/.test(a),"hasNonKeyboardChars","Non-keyboard characters detected.");return c}
7
7
  function K(a){if("string"!==typeof a)throw new TypeError("getNewlineStats expects a string input.");var b=a.replace(/\r\n/g,"");a={h:(a.match(/\r\n/g)||[]).length,g:(b.match(/\r/g)||[]).length,i:(b.match(/\n/g)||[]).length};b=[];0<a.h&&b.push("CRLF");0<a.g&&b.push("CR");0<a.i&&b.push("LF");return{...a,types:b,j:1<b.length}}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "text-sanctifier",
3
- "version": "1.0.15",
3
+ "version": "1.0.16",
4
4
  "type": "module",
5
5
  "description": "A brutal text normalizer and invisible trash scrubber for modern web projects.",
6
6
  "main": "./src/index.js",
@@ -190,7 +190,7 @@ export function sanctifyText(
190
190
  if (purgeInvisibleChars) cleaned = purgeInvisibleTrash(cleaned);
191
191
 
192
192
  // Remove emojis
193
- if (purgeEmojis) cleaned = purgeEmojisCharacters(cleaned);
193
+ if (purgeEmojis) cleaned = purgeEmojiCharacters(cleaned);
194
194
 
195
195
  // Nuke control characters (excluding whitespace)
196
196
  if (nukeControls) cleaned = purgeControlCharacters(cleaned);
@@ -297,7 +297,7 @@ export let EMOJI_REGEX;
297
297
 
298
298
  /**
299
299
  * Try Unicode property escape regex (preferred).
300
- * Fallback to basic emoji range if unsupported.
300
+ * Fallback to basic emoji ranges if unsupported.
301
301
  */
302
302
  try {
303
303
  EMOJI_REGEX = new RegExp(
@@ -305,11 +305,16 @@ try {
305
305
  'gu'
306
306
  );
307
307
  } catch {
308
- // Fallback: less precise but safe
309
- EMOJI_REGEX = /[\u{1F300}-\u{1FAFF}]/gu;
308
+ // Fallback: wide-range emoji component match (flags, tones, symbols)
309
+ // * Covers:
310
+ // * - Emoji base chars (1F300–1FAFF)
311
+ // * - Dingbats (2700–27BF) like ❌, ✅, ☑️
312
+ // * - Skin tones (1F3FB–1F3FF)
313
+ // * - ZWJ (200D), variation selectors (FE0F)
314
+ // * - Regional indicators (1F1E6–1F1FF)
315
+ EMOJI_REGEX = /[\u{2700}-\u{27BF}\u{1F300}-\u{1FAFF}\u{1F3FB}-\u{1F3FF}\u200D\uFE0F\u{1F1E6}-\u{1F1FF}]/gu;
310
316
  }
311
317
 
312
-
313
318
  /**
314
319
  * Removes all emoji characters using Unicode property escapes.
315
320
  * Supports modern environments (Unicode v13+) with fallback.
@@ -317,7 +322,7 @@ try {
317
322
  * @param {string} text
318
323
  * @returns {string}
319
324
  */
320
- function purgeEmojisCharacters(text) {
325
+ function purgeEmojiCharacters(text) {
321
326
  return text.replace(EMOJI_REGEX, '');
322
327
  }
323
328