text-sanctifier 1.0.15 → 1.0.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/text-sanctifier.min.js +1 -1
- package/package.json +1 -1
- package/src/sanctifyText.js +11 -6
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
function g(a={}){const b=!!a.purgeInvisibleChars,c=!!a.purgeEmojis,d=!!a.nukeControls,e=!!a.keyboardOnlyFilter,k=!!a.normalizeNewlines,f=!!a.trimSpacesAroundNewlines,l=!!a.collapseNewLines,m=!!a.preserveParagraphs,p=!!a.collapseSpaces,q=!!a.finalTrim;return w=>h(w,b,c,d,e,k,f,l,m,p,q)}g.strict=a=>h(a,!0,!0,!0,!1,!0,!0,!0,!1,!0,!0);g.loose=a=>h(a,!1,!1,!1,!1,!0,!0,!0,!0,!0,!0);g.keyboardOnlyEmoji=a=>h(a,!1,!1,!1,!0,!0,!0,!1,!1,!1,!0);g.keyboardOnly=a=>h(a,!0,!0,!0,!0,!0,!0,!0,!1,!0,!0);
|
|
2
2
|
function h(a,b=!1,c=!1,d=!1,e=!1,k=!1,f=!1,l=!1,m=!1,p=!1,q=!1){if("string"!==typeof a)throw new TypeError("sanctifyText expects a string input.");b&&(a=a.replace(n,""));c&&(a=a.replace(r,""));d&&(a=a.replace(t,""));e&&(a=u(a,c));k&&(a=a.replace(v,"\n"));f&&(a=a.replace(x,"$1"));l&&(b=a,a=m?b.replace(y,"\n\n"):b.replace(z,"\n"));p&&(a=a.replace(A," "));return q?a.trim():a}var n=/[\u00A0\u2000-\u200D\u202F\u2060\u3000\uFEFF\u200E\u200F\u202A-\u202E]+/g,B=/[^\x20-\x7E\r\n]+/gu;
|
|
3
3
|
function u(a,b=!1){a=C(a);return b?a.replace(B,""):a.replace(B,c=>c.match(r)?c:"")}var D=/[\u2018\u2019\u201A\u201B\u2032\u2035]/g,E=/[\u201C\u201D\u201E\u201F\u2033\u2036\u00AB\u00BB]/g,F=/[\u2012\u2013\u2014\u2015\u2212]/g,G=/\u2026/g,H=/[\u2022\u00B7]/g,I=/[\uFF01-\uFF5E]/g;function C(a){return a.replace(D,"'").replace(E,'"').replace(F,"-").replace(G,"...").replace(H,"*").replace(I,b=>String.fromCharCode(b.charCodeAt(0)-65248))}var r;
|
|
4
|
-
try{r=RegExp("(?:\\p{Extended_Pictographic}(?:\\uFE0F|\\uFE0E)?(?:\\u200D(?:\\p{Extended_Pictographic}|\\w)+)*)","gu")}catch{r=/[\u{1F300}-\u{1FAFF}]/gu}var v=/\r\n|\r|\n/g,x=/[ \t]*(\n+)[ \t]*/g,z=/\n{2,}/g,y=/\n{3,}/g,A=/ {2,}/g,t=/[\u0000-\u0008\u000B\u000C\u000E-\u001F\u007F\u0080-\u009F\u200E\u200F\u202A-\u202E]+/g;
|
|
4
|
+
try{r=RegExp("(?:\\p{Extended_Pictographic}(?:\\uFE0F|\\uFE0E)?(?:\\u200D(?:\\p{Extended_Pictographic}|\\w)+)*)","gu")}catch{r=/[\u{2700}-\u{27BF}\u{1F300}-\u{1FAFF}\u{1F3FB}-\u{1F3FF}\u200D\uFE0F\u{1F1E6}-\u{1F1FF}]/gu}var v=/\r\n|\r|\n/g,x=/[ \t]*(\n+)[ \t]*/g,z=/\n{2,}/g,y=/\n{3,}/g,A=/ {2,}/g,t=/[\u0000-\u0008\u000B\u000C\u000E-\u001F\u007F\u0080-\u009F\u200E\u200F\u202A-\u202E]+/g;
|
|
5
5
|
function J(a){if("string"!==typeof a)throw new TypeError("inspectText expects a string input.");const b=[],c={hasControlChars:!1,hasInvisibleChars:!1,hasMixedNewlines:!1,newlineStyle:null,hasEmojis:!1,hasNonKeyboardChars:!1,summary:b},d=(f,l,m)=>{f&&(c[l]=!0,b.push(m))};d(t.test(a),"hasControlChars","Control characters detected.");d(n.test(a),"hasInvisibleChars","Invisible Unicode characters detected.");d(r.test(a),"hasEmojis","Emojis detected.");const {j:e,types:k}=K(a);c.hasMixedNewlines=e;c.newlineStyle=
|
|
6
6
|
e?"Mixed":k[0]||null;c.newlineStyle&&b.push(e?"Mixed newline styles detected.":`Consistent newline style: ${c.newlineStyle}`);a=C(a).replace(B,f=>f.match(r)?"":"\u2612");d(/[\u2612]/.test(a),"hasNonKeyboardChars","Non-keyboard characters detected.");return c}
|
|
7
7
|
function K(a){if("string"!==typeof a)throw new TypeError("getNewlineStats expects a string input.");var b=a.replace(/\r\n/g,"");a={h:(a.match(/\r\n/g)||[]).length,g:(b.match(/\r/g)||[]).length,i:(b.match(/\n/g)||[]).length};b=[];0<a.h&&b.push("CRLF");0<a.g&&b.push("CR");0<a.i&&b.push("LF");return{...a,types:b,j:1<b.length}}
|
package/package.json
CHANGED
package/src/sanctifyText.js
CHANGED
|
@@ -190,7 +190,7 @@ export function sanctifyText(
|
|
|
190
190
|
if (purgeInvisibleChars) cleaned = purgeInvisibleTrash(cleaned);
|
|
191
191
|
|
|
192
192
|
// Remove emojis
|
|
193
|
-
if (purgeEmojis) cleaned =
|
|
193
|
+
if (purgeEmojis) cleaned = purgeEmojiCharacters(cleaned);
|
|
194
194
|
|
|
195
195
|
// Nuke control characters (excluding whitespace)
|
|
196
196
|
if (nukeControls) cleaned = purgeControlCharacters(cleaned);
|
|
@@ -297,7 +297,7 @@ export let EMOJI_REGEX;
|
|
|
297
297
|
|
|
298
298
|
/**
|
|
299
299
|
* Try Unicode property escape regex (preferred).
|
|
300
|
-
* Fallback to basic emoji
|
|
300
|
+
* Fallback to basic emoji ranges if unsupported.
|
|
301
301
|
*/
|
|
302
302
|
try {
|
|
303
303
|
EMOJI_REGEX = new RegExp(
|
|
@@ -305,11 +305,16 @@ try {
|
|
|
305
305
|
'gu'
|
|
306
306
|
);
|
|
307
307
|
} catch {
|
|
308
|
-
// Fallback:
|
|
309
|
-
|
|
308
|
+
// Fallback: wide-range emoji component match (flags, tones, symbols)
|
|
309
|
+
// * Covers:
|
|
310
|
+
// * - Emoji base chars (1F300–1FAFF)
|
|
311
|
+
// * - Dingbats (2700–27BF) like ❌, ✅, ☑️
|
|
312
|
+
// * - Skin tones (1F3FB–1F3FF)
|
|
313
|
+
// * - ZWJ (200D), variation selectors (FE0F)
|
|
314
|
+
// * - Regional indicators (1F1E6–1F1FF)
|
|
315
|
+
EMOJI_REGEX = /[\u{2700}-\u{27BF}\u{1F300}-\u{1FAFF}\u{1F3FB}-\u{1F3FF}\u200D\uFE0F\u{1F1E6}-\u{1F1FF}]/gu;
|
|
310
316
|
}
|
|
311
317
|
|
|
312
|
-
|
|
313
318
|
/**
|
|
314
319
|
* Removes all emoji characters using Unicode property escapes.
|
|
315
320
|
* Supports modern environments (Unicode v13+) with fallback.
|
|
@@ -317,7 +322,7 @@ try {
|
|
|
317
322
|
* @param {string} text
|
|
318
323
|
* @returns {string}
|
|
319
324
|
*/
|
|
320
|
-
function
|
|
325
|
+
function purgeEmojiCharacters(text) {
|
|
321
326
|
return text.replace(EMOJI_REGEX, '');
|
|
322
327
|
}
|
|
323
328
|
|