text-sanctifier 1.0.14 → 1.0.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -7,8 +7,8 @@
7
7
 
8
8
  Brutal text normalizer and invisible trash scrubber for modern web projects.
9
9
 
10
- * Minified: (2.69 KB)
11
- * Gzipped (GCC): (1.27 KB)
10
+ * Minified: (3.09 KB)
11
+ * Gzipped (GCC): (1.36 KB)
12
12
 
13
13
  ## Features
14
14
 
@@ -109,6 +109,8 @@ const report = inspectText(input);
109
109
 
110
110
  Use `inspectText` to preflight text content before rendering, storing, or linting. It's a diagnostic tool to help inform sanitization needs.
111
111
 
112
+ Pass the report to getRecommendedSanctifierOptions(report) to auto-generate config flags for summonSanctifier().
113
+
112
114
  ---
113
115
 
114
116
  ## API
@@ -1,7 +1,8 @@
1
1
  function g(a={}){const b=!!a.purgeInvisibleChars,c=!!a.purgeEmojis,d=!!a.nukeControls,e=!!a.keyboardOnlyFilter,k=!!a.normalizeNewlines,f=!!a.trimSpacesAroundNewlines,l=!!a.collapseNewLines,m=!!a.preserveParagraphs,p=!!a.collapseSpaces,q=!!a.finalTrim;return w=>h(w,b,c,d,e,k,f,l,m,p,q)}g.strict=a=>h(a,!0,!0,!0,!1,!0,!0,!0,!1,!0,!0);g.loose=a=>h(a,!1,!1,!1,!1,!0,!0,!0,!0,!0,!0);g.keyboardOnlyEmoji=a=>h(a,!1,!1,!1,!0,!0,!0,!1,!1,!1,!0);g.keyboardOnly=a=>h(a,!0,!0,!0,!0,!0,!0,!0,!1,!0,!0);
2
2
  function h(a,b=!1,c=!1,d=!1,e=!1,k=!1,f=!1,l=!1,m=!1,p=!1,q=!1){if("string"!==typeof a)throw new TypeError("sanctifyText expects a string input.");b&&(a=a.replace(n,""));c&&(a=a.replace(r,""));d&&(a=a.replace(t,""));e&&(a=u(a,c));k&&(a=a.replace(v,"\n"));f&&(a=a.replace(x,"$1"));l&&(b=a,a=m?b.replace(y,"\n\n"):b.replace(z,"\n"));p&&(a=a.replace(A," "));return q?a.trim():a}var n=/[\u00A0\u2000-\u200D\u202F\u2060\u3000\uFEFF\u200E\u200F\u202A-\u202E]+/g,B=/[^\x20-\x7E\r\n]+/gu;
3
3
  function u(a,b=!1){a=C(a);return b?a.replace(B,""):a.replace(B,c=>c.match(r)?c:"")}var D=/[\u2018\u2019\u201A\u201B\u2032\u2035]/g,E=/[\u201C\u201D\u201E\u201F\u2033\u2036\u00AB\u00BB]/g,F=/[\u2012\u2013\u2014\u2015\u2212]/g,G=/\u2026/g,H=/[\u2022\u00B7]/g,I=/[\uFF01-\uFF5E]/g;function C(a){return a.replace(D,"'").replace(E,'"').replace(F,"-").replace(G,"...").replace(H,"*").replace(I,b=>String.fromCharCode(b.charCodeAt(0)-65248))}var r;
4
- try{r=RegExp("(?:\\p{Extended_Pictographic}(?:\\uFE0F|\\uFE0E)?(?:\\u200D(?:\\p{Extended_Pictographic}|\\w)+)*)","gu")}catch{r=/[\u{1F300}-\u{1FAFF}]/gu}var v=/\r\n|\r|\n/g,x=/[ \t]*(\n+)[ \t]*/g,z=/\n{2,}/g,y=/\n{3,}/g,A=/ {2,}/g,t=/[\u0000-\u0008\u000B\u000C\u000E-\u001F\u007F\u0080-\u009F\u200E\u200F\u202A-\u202E]+/g;
5
- function J(a){if("string"!==typeof a)throw new TypeError("inspectText expects a string input.");const b=[],c={o:!1,u:!1,j:!1,g:null,s:!1,v:!1,summary:b},d=(f,l,m)=>{f&&(c[l]=!0,b.push(m))};d(t.test(a),"hasControlChars","Control characters detected.");d(n.test(a),"hasInvisibleChars","Invisible Unicode characters detected.");d(r.test(a),"hasEmojis","Emojis detected.");const {m:e,types:k}=K(a);c.j=e;c.g=e?"Mixed":k[0]||null;c.g&&b.push(e?"Mixed newline styles detected.":`Consistent newline style: ${c.g}`);
6
- a=C(a).replace(B,f=>f.match(r)?"":"\u2612");d(/[\u2612]/.test(a),"hasNonKeyboardChars","Non-keyboard characters detected.");return c}function K(a){if("string"!==typeof a)throw new TypeError("getNewlineStats expects a string input.");var b=a.replace(/\r\n/g,"");a={i:(a.match(/\r\n/g)||[]).length,h:(b.match(/\r/g)||[]).length,l:(b.match(/\n/g)||[]).length};b=[];0<a.i&&b.push("CRLF");0<a.h&&b.push("CR");0<a.l&&b.push("LF");return{...a,types:b,m:1<b.length}}
7
- export { g as summonSanctifier, J as inspectText };
4
+ try{r=RegExp("(?:\\p{Extended_Pictographic}(?:\\uFE0F|\\uFE0E)?(?:\\u200D(?:\\p{Extended_Pictographic}|\\w)+)*)","gu")}catch{r=/[\u{2700}-\u{27BF}\u{1F300}-\u{1FAFF}\u{1F3FB}-\u{1F3FF}\u200D\uFE0F\u{1F1E6}-\u{1F1FF}]/gu}var v=/\r\n|\r|\n/g,x=/[ \t]*(\n+)[ \t]*/g,z=/\n{2,}/g,y=/\n{3,}/g,A=/ {2,}/g,t=/[\u0000-\u0008\u000B\u000C\u000E-\u001F\u007F\u0080-\u009F\u200E\u200F\u202A-\u202E]+/g;
5
+ function J(a){if("string"!==typeof a)throw new TypeError("inspectText expects a string input.");const b=[],c={hasControlChars:!1,hasInvisibleChars:!1,hasMixedNewlines:!1,newlineStyle:null,hasEmojis:!1,hasNonKeyboardChars:!1,summary:b},d=(f,l,m)=>{f&&(c[l]=!0,b.push(m))};d(t.test(a),"hasControlChars","Control characters detected.");d(n.test(a),"hasInvisibleChars","Invisible Unicode characters detected.");d(r.test(a),"hasEmojis","Emojis detected.");const {j:e,types:k}=K(a);c.hasMixedNewlines=e;c.newlineStyle=
6
+ e?"Mixed":k[0]||null;c.newlineStyle&&b.push(e?"Mixed newline styles detected.":`Consistent newline style: ${c.newlineStyle}`);a=C(a).replace(B,f=>f.match(r)?"":"\u2612");d(/[\u2612]/.test(a),"hasNonKeyboardChars","Non-keyboard characters detected.");return c}
7
+ function K(a){if("string"!==typeof a)throw new TypeError("getNewlineStats expects a string input.");var b=a.replace(/\r\n/g,"");a={h:(a.match(/\r\n/g)||[]).length,g:(b.match(/\r/g)||[]).length,i:(b.match(/\n/g)||[]).length};b=[];0<a.h&&b.push("CRLF");0<a.g&&b.push("CR");0<a.i&&b.push("LF");return{...a,types:b,j:1<b.length}}
8
+ function L(a){return{purgeInvisibleChars:a.hasInvisibleChars,purgeEmojis:a.hasEmojis,nukeControls:a.hasControlChars,keyboardOnlyFilter:a.hasNonKeyboardChars,normalizeNewlines:a.hasMixedNewlines||"CRLF"===a.newlineStyle||"CR"===a.newlineStyle}}export { g as summonSanctifier, J as inspectText, L as getRecommendedSanctifierOptions };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "text-sanctifier",
3
- "version": "1.0.14",
3
+ "version": "1.0.16",
4
4
  "type": "module",
5
5
  "description": "A brutal text normalizer and invisible trash scrubber for modern web projects.",
6
6
  "main": "./src/index.js",
package/src/index.d.ts CHANGED
@@ -104,3 +104,13 @@ export interface UnicodeTrashReport {
104
104
  * invisible characters, newline styles, emojis, and more.
105
105
  */
106
106
  export function inspectText(text: string): UnicodeTrashReport;
107
+
108
+
109
+ /**
110
+ * Creates a recommended set of `summonSanctifier` options based on the findings
111
+ * of `inspectText()`. This maps only what can be inferred automatically —
112
+ * user-preference settings like whitespace collapsing are left unset.
113
+ */
114
+ export function getRecommendedSanctifierOptions(
115
+ report: UnicodeTrashReport
116
+ ): SanctifyOptions;
package/src/index.js CHANGED
@@ -1,8 +1,8 @@
1
1
  // src/index.js
2
2
 
3
3
 
4
- import { inspectText } from './inspectText.js';
5
- export { inspectText };
4
+ import { inspectText, getRecommendedSanctifierOptions } from './inspectText.js';
5
+ export { inspectText, getRecommendedSanctifierOptions };
6
6
 
7
7
  import { summonSanctifier } from './sanctifyText.js';
8
8
  export { summonSanctifier };
@@ -1,4 +1,5 @@
1
1
 
2
+ // ./src/inspectText.js
2
3
 
3
4
 
4
5
  import {
@@ -7,9 +8,10 @@ import {
7
8
  EMOJI_REGEX,
8
9
  ASCII_KEYBOARD_SAFE_REGEX,
9
10
  normalizeTypographicJank
10
- } from './sanctifyText.js';
11
+ } from './sanctifyText.js';
12
+
13
+
11
14
 
12
-
13
15
  /**
14
16
  * Detects textual "trash" or anomalies in a given string.
15
17
  * @param {string} text
@@ -24,53 +26,53 @@ import {
24
26
  * }}
25
27
  */
26
28
  export function inspectText(text) {
27
- if (typeof text !== 'string') {
28
- throw new TypeError('inspectText expects a string input.');
29
- }
30
-
31
- const summary = [];
32
- const report = {
33
- hasControlChars: false,
34
- hasInvisibleChars: false,
35
- hasMixedNewlines: false,
36
- newlineStyle: null,
37
- hasEmojis: false,
38
- hasNonKeyboardChars: false,
39
- summary
40
- };
41
-
42
- const flag = (condition, key, message) => {
43
- if (condition) {
44
- report[key] = true;
45
- summary.push(message);
46
- }
47
- };
48
-
49
- // === Pattern Checks ===
50
- flag(CONTROL_CHARS_REGEX.test(text), 'hasControlChars', 'Control characters detected.');
51
- flag(INVISIBLE_TRASH_REGEX.test(text), 'hasInvisibleChars', 'Invisible Unicode characters detected.');
52
- flag(EMOJI_REGEX.test(text), 'hasEmojis', 'Emojis detected.');
53
-
54
- // === Newline Analysis ===
55
- const { mixed, types } = getNewlineStats(text);
56
- report.hasMixedNewlines = mixed;
57
- report.newlineStyle = mixed ? 'Mixed' : types[0] || null;
58
-
59
- if (report.newlineStyle) {
60
- summary.push(
61
- mixed
62
- ? 'Mixed newline styles detected.'
63
- : `Consistent newline style: ${report.newlineStyle}`
64
- );
65
- }
66
-
67
- // === Non-keyboard characters (excluding emojis) ===
68
- const filtered = normalizeTypographicJank(text).replace(ASCII_KEYBOARD_SAFE_REGEX, m =>
69
- m.match(EMOJI_REGEX) ? '' : '☒'
70
- );
71
- flag(/[☒]/.test(filtered), 'hasNonKeyboardChars', 'Non-keyboard characters detected.');
72
-
73
- return report;
29
+ if (typeof text !== 'string') {
30
+ throw new TypeError('inspectText expects a string input.');
31
+ }
32
+
33
+ const summary = [];
34
+ const report = {
35
+ hasControlChars: false,
36
+ hasInvisibleChars: false,
37
+ hasMixedNewlines: false,
38
+ newlineStyle: null,
39
+ hasEmojis: false,
40
+ hasNonKeyboardChars: false,
41
+ summary
42
+ };
43
+
44
+ const flag = (condition, key, message) => {
45
+ if (condition) {
46
+ report[key] = true;
47
+ summary.push(message);
48
+ }
49
+ };
50
+
51
+ // === Pattern Checks ===
52
+ flag(CONTROL_CHARS_REGEX.test(text), 'hasControlChars', 'Control characters detected.');
53
+ flag(INVISIBLE_TRASH_REGEX.test(text), 'hasInvisibleChars', 'Invisible Unicode characters detected.');
54
+ flag(EMOJI_REGEX.test(text), 'hasEmojis', 'Emojis detected.');
55
+
56
+ // === Newline Analysis ===
57
+ const { mixed, types } = getNewlineStats(text);
58
+ report.hasMixedNewlines = mixed;
59
+ report.newlineStyle = mixed ? 'Mixed' : types[0] || null;
60
+
61
+ if (report.newlineStyle) {
62
+ summary.push(
63
+ mixed
64
+ ? 'Mixed newline styles detected.'
65
+ : `Consistent newline style: ${report.newlineStyle}`
66
+ );
67
+ }
68
+
69
+ // === Non-keyboard characters (excluding emojis) ===
70
+ const filtered = normalizeTypographicJank(text).replace(ASCII_KEYBOARD_SAFE_REGEX, m =>
71
+ m.match(EMOJI_REGEX) ? '' : '☒'
72
+ );
73
+ flag(/[☒]/.test(filtered), 'hasNonKeyboardChars', 'Non-keyboard characters detected.');
74
+
75
+ return report;
74
76
  }
75
77
 
76
78
 
@@ -86,30 +88,52 @@ export function inspectText(text) {
86
88
  * }}
87
89
  */
88
90
  export function getNewlineStats(text) {
89
- if (typeof text !== 'string') {
90
- throw new TypeError('getNewlineStats expects a string input.');
91
- }
92
-
93
- const crlfMatches = text.match(/\r\n/g) || [];
94
- const textWithoutCRLF = text.replace(/\r\n/g, '');
95
-
96
- const crMatches = textWithoutCRLF.match(/\r/g) || [];
97
- const lfMatches = textWithoutCRLF.match(/\n/g) || [];
98
-
99
- const count = {
100
- crlf: crlfMatches.length,
101
- cr: crMatches.length,
102
- lf: lfMatches.length
103
- };
104
-
105
- const types = [];
106
- if (count.crlf > 0) types.push('CRLF');
107
- if (count.cr > 0) types.push('CR');
108
- if (count.lf > 0) types.push('LF');
109
-
110
- return {
111
- ...count,
112
- types,
113
- mixed: types.length > 1
114
- };
91
+ if (typeof text !== 'string') {
92
+ throw new TypeError('getNewlineStats expects a string input.');
93
+ }
94
+
95
+ const crlfMatches = text.match(/\r\n/g) || [];
96
+ const textWithoutCRLF = text.replace(/\r\n/g, '');
97
+
98
+ const crMatches = textWithoutCRLF.match(/\r/g) || [];
99
+ const lfMatches = textWithoutCRLF.match(/\n/g) || [];
100
+
101
+ const count = {
102
+ crlf: crlfMatches.length,
103
+ cr: crMatches.length,
104
+ lf: lfMatches.length
105
+ };
106
+
107
+ const types = [];
108
+ if (count.crlf > 0) types.push('CRLF');
109
+ if (count.cr > 0) types.push('CR');
110
+ if (count.lf > 0) types.push('LF');
111
+
112
+ return {
113
+ ...count,
114
+ types,
115
+ mixed: types.length > 1
116
+ };
117
+ }
118
+
119
+
120
+
121
+ /**
122
+ * Creates defaultOptions for summonSanctifier based on inspectText result
123
+ * @param {!UnicodeTrashReport} report
124
+ * @return {!SanctifyOptions}
125
+ */
126
+ export function getRecommendedSanctifierOptions(report) {
127
+ return {
128
+ purgeInvisibleChars: report.hasInvisibleChars,
129
+ purgeEmojis: report.hasEmojis,
130
+ nukeControls: report.hasControlChars,
131
+ keyboardOnlyFilter: report.hasNonKeyboardChars,
132
+ normalizeNewlines: report.hasMixedNewlines || report.newlineStyle === 'CRLF' || report.newlineStyle === 'CR',
133
+ // trimSpacesAroundNewlines: true,
134
+ // collapseNewLines: false,
135
+ // preserveParagraphs: true,
136
+ // collapseSpaces: true,
137
+ // finalTrim: true,
138
+ };
115
139
  }
@@ -190,7 +190,7 @@ export function sanctifyText(
190
190
  if (purgeInvisibleChars) cleaned = purgeInvisibleTrash(cleaned);
191
191
 
192
192
  // Remove emojis
193
- if (purgeEmojis) cleaned = purgeEmojisCharacters(cleaned);
193
+ if (purgeEmojis) cleaned = purgeEmojiCharacters(cleaned);
194
194
 
195
195
  // Nuke control characters (excluding whitespace)
196
196
  if (nukeControls) cleaned = purgeControlCharacters(cleaned);
@@ -297,7 +297,7 @@ export let EMOJI_REGEX;
297
297
 
298
298
  /**
299
299
  * Try Unicode property escape regex (preferred).
300
- * Fallback to basic emoji range if unsupported.
300
+ * Fallback to basic emoji ranges if unsupported.
301
301
  */
302
302
  try {
303
303
  EMOJI_REGEX = new RegExp(
@@ -305,11 +305,16 @@ try {
305
305
  'gu'
306
306
  );
307
307
  } catch {
308
- // Fallback: less precise but safe
309
- EMOJI_REGEX = /[\u{1F300}-\u{1FAFF}]/gu;
308
+ // Fallback: wide-range emoji component match (flags, tones, symbols)
309
+ // * Covers:
310
+ // * - Emoji base chars (1F300–1FAFF)
311
+ // * - Dingbats (2700–27BF) like ❌, ✅, ☑️
312
+ // * - Skin tones (1F3FB–1F3FF)
313
+ // * - ZWJ (200D), variation selectors (FE0F)
314
+ // * - Regional indicators (1F1E6–1F1FF)
315
+ EMOJI_REGEX = /[\u{2700}-\u{27BF}\u{1F300}-\u{1FAFF}\u{1F3FB}-\u{1F3FF}\u200D\uFE0F\u{1F1E6}-\u{1F1FF}]/gu;
310
316
  }
311
317
 
312
-
313
318
  /**
314
319
  * Removes all emoji characters using Unicode property escapes.
315
320
  * Supports modern environments (Unicode v13+) with fallback.
@@ -317,7 +322,7 @@ try {
317
322
  * @param {string} text
318
323
  * @returns {string}
319
324
  */
320
- function purgeEmojisCharacters(text) {
325
+ function purgeEmojiCharacters(text) {
321
326
  return text.replace(EMOJI_REGEX, '');
322
327
  }
323
328