text-sanctifier 1.0.13 → 1.0.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -5,22 +5,21 @@
5
5
  [![downloads](https://img.shields.io/npm/dw/text-sanctifier)](https://www.npmjs.com/package/text-sanctifier)
6
6
  [![GitHub stars](https://img.shields.io/github/stars/iWhatty/text-sanctifier?style=social)](https://github.com/iWhatty/text-sanctifier)
7
7
 
8
-
9
8
  Brutal text normalizer and invisible trash scrubber for modern web projects.
10
9
 
11
- * Minified: (2.45 KB)
12
- * Gzipped (GCC) : (1.18 KB)
10
+ * Minified: (3.09 KB)
11
+ * Gzipped (GCC): (1.36 KB)
13
12
 
14
13
  ## Features
15
14
 
16
15
  * Purges zero-width Unicode garbage
17
- * Normalizes line endings
16
+ * Normalizes line endings (CRLF, CR, LF) → LF
18
17
  * Collapses unwanted spaces and paragraphs
19
18
  * Nukes control characters (if enabled)
20
- * Configurable via options or presets
21
- * Includes strict and loose sanitization modes
22
- * **NEW:** Keyboard-only filtering retains only printable ASCII + emojis
23
- * **NEW:** Smart normalization of typographic junk (smart quotes, em dashes, full-width punctuation)
19
+ * Smart normalization of typographic junk (quotes, dashes, bullets, full-width punctuation)
20
+ * Keyboard-only filtering (retain printable ASCII + emoji, or restrict)
21
+ * Configurable via fine-grained flags or ready-made presets
22
+ * Includes strict, loose, and keyboard-only modes
24
23
 
25
24
  ## Install
26
25
 
@@ -30,98 +29,65 @@ npm install text-sanctifier
30
29
 
31
30
  ## 📦 Package & Build Info
32
31
 
33
- * **Source (`src/`)**: ES2020+ ESM modules with JSDoc. Designed for modern bundlers and full tree-shaking.
34
- * **Browser Bundle (`dist/`)**: Pre-minified ES2020+ module (`text-sanctifier.min.js`, 0.70 KB minified / 0.43 KB gzipped) for direct `<script type="module">` usage.
35
- * **Module Format**: Native ESM (ECMAScript Modules).
36
- * **Bundler Compatibility**: Optimized for Vite, Rollup, Webpack 5+, ESBuild, and Parcel.
37
- * **Transpilation**: The (`src/`) allows you to downlevel in your build process (e.g., targeting `es2015`).
38
- * **No Transpilers Included**: No built-in shims, polyfills, or transpilation; you control environment compatibility.
39
- * **Tree-shaking Friendly**: Fully optimized with `sideEffects: false` for dead code elimination.
40
- * **Publishing Philosophy**:
32
+ * **Source (`src/`)**: ES2020+ ESM modules with JSDoc
33
+ * **Browser Build (`dist/`)**: Minified ESM bundle for `<script type="module">`
34
+ * **Tree-shaking Friendly**: Fully optimized with `sideEffects: false`
35
+ * **Zero Transpilation**: No built-in polyfills or runtime overhead
36
+ * **Bundler Ready**: Works great with Vite, Rollup, Webpack, Parcel, etc.
41
37
 
42
- * Source-first design for flexibility, debuggability, and modern bundling pipelines.
43
- * Minified bundle included separately for raw browser consumption without a build step.
38
+ ---
44
39
 
45
- ## Quick Usage
40
+ ## 🔧 Quick Usage
46
41
 
47
- ### Basic (via `summonSanctifier`)
42
+ ### Custom Config
48
43
 
49
- ```javascript
44
+ ```js
50
45
  import { summonSanctifier } from 'text-sanctifier';
51
46
 
52
- const customSanitizer = summonSanctifier({
53
- preserveParagraphs: true,
54
- collapseSpaces: true,
55
- nukeControls: true,
47
+ const clean = summonSanctifier({
48
+ purgeInvisibleChars: true,
56
49
  purgeEmojis: true,
50
+ collapseSpaces: true,
51
+ collapseNewLines: true,
52
+ preserveParagraphs: true,
53
+ finalTrim: true,
57
54
  });
58
55
 
59
- const cleaned = customSanitizer(rawText);
56
+ const output = clean(rawText);
60
57
  ```
61
58
 
62
- ### Strict Mode (aggressive cleanup)
63
-
64
- ```javascript
65
- import { summonSanctifier } from 'text-sanctifier';
59
+ ### Strict Preset
66
60
 
67
- const strictSanitizer = summonSanctifier.strict;
68
- const cleanText = strictSanitizer(rawText);
61
+ ```js
62
+ const output = summonSanctifier.strict(rawText);
69
63
  ```
70
64
 
71
- ### Loose Mode (preserve paragraphs)
72
-
73
- ```javascript
74
- import { summonSanctifier } from 'text-sanctifier';
65
+ ### Loose Preset
75
66
 
76
- const looseSanitizer = summonSanctifier.loose;
77
- const cleanBodyText = looseSanitizer(rawInput);
67
+ ```js
68
+ const output = summonSanctifier.loose(rawText);
78
69
  ```
79
70
 
80
- ### Keyboard-only Mode (keep only printable ASCII)
71
+ ### Keyboard-Only (No Emojis)
81
72
 
82
- ```javascript
83
- const keyboardOnly = summonSanctifier.keyboardOnly;
84
- const asciiOnlyText = keyboardOnly(userInput);
73
+ ```js
74
+ const output = summonSanctifier.keyboardOnly(userInput);
85
75
  ```
86
76
 
87
- ### Keyboard-only with Emoji Support
77
+ ### Keyboard-Only (With Emojis)
88
78
 
89
- ```javascript
90
- const keyboardWithEmoji = summonSanctifier.keyboardOnlyEmoji;
91
- const cleanAndFun = keyboardWithEmoji(commentBox);
79
+ ```js
80
+ const output = summonSanctifier.keyboardOnlyEmoji(commentText);
92
81
  ```
93
82
 
94
- ## API
95
-
96
- #### `summonSanctifier(options?: SanctifyOptions): (text: string) => string`
97
-
98
- Creates a sanitizer with options pre-bound.
99
-
100
- #### `summonSanctifier.strict: (text: string) => string`
101
-
102
- Strict sanitizer preset (collapse spaces, collapse all newlines, nuke controls, purge Emojis).
103
-
104
- #### `summonSanctifier.loose: (text: string) => string`
105
-
106
- Loose sanitizer preset (preserve paragraph breaks, collapse spaces, skip nuking controls, preserve Emojis).
107
-
108
- #### `summonSanctifier.keyboardOnly: (text: string) => string`
109
-
110
- Removes everything except printable ASCII. Emojis are removed. Spaces are collapsed.
111
-
112
- #### `summonSanctifier.keyboardOnlyEmoji: (text: string) => string`
113
-
114
- Keeps printable ASCII and emoji characters. Typographic normalization included.
115
-
116
83
  ---
117
84
 
85
+ ## 🔍 Unicode Trash Detection
118
86
 
119
- ### Unicode Trash Detection
120
-
121
- ```javascript
87
+ ```js
122
88
  import { inspectText } from 'text-sanctifier';
123
89
 
124
- const report = inspectText(rawInput);
90
+ const report = inspectText(input);
125
91
 
126
92
  /*
127
93
  {
@@ -141,8 +107,37 @@ const report = inspectText(rawInput);
141
107
  */
142
108
  ```
143
109
 
144
- Use this to preflight inputs and flag unwanted characters (like control codes, zero-width spaces, or mixed newline styles) before sanitization or storage.
110
+ Use `inspectText` to preflight text content before rendering, storing, or linting. It's a diagnostic tool to help inform sanitization needs.
111
+
112
+ Pass the report to getRecommendedSanctifierOptions(report) to auto-generate config flags for summonSanctifier().
113
+
114
+ ---
115
+
116
+ ## API
117
+
118
+ ### `summonSanctifier(options?: SanctifyOptions): (text: string) => string`
119
+
120
+ Creates a reusable sanitizer from an option object.
121
+
122
+ ### `summonSanctifier.strict`
123
+
124
+ Aggressively purges: emojis, control characters, extra spacing, and newlines.
125
+
126
+ ### `summonSanctifier.loose`
127
+
128
+ Gently normalizes spacing and newlines while preserving emojis and paragraphs.
129
+
130
+ ### `summonSanctifier.keyboardOnly`
131
+
132
+ Restricts to printable ASCII only (removes emojis).
133
+
134
+ ### `summonSanctifier.keyboardOnlyEmoji`
135
+
136
+ Restricts to keyboard-safe ASCII + emojis. Preserves fun, removes weird.
137
+
138
+ ### `inspectText(text: string): UnicodeTrashReport`
145
139
 
140
+ Returns a structural report of control codes, invisible chars, newline styles, and more.
146
141
 
147
142
  ---
148
143
 
@@ -1,7 +1,8 @@
1
- function f(a={}){const b=!!a.preserveParagraphs,c=!!a.collapseSpaces,d=!!a.nukeControls,e=!!a.purgeEmojis,h=!!a.keyboardOnlyFilter;return k=>g(k,b,c,d,e,h)}f.strict=a=>g(a,!1,!0,!0,!0);f.loose=a=>g(a,!0,!0);f.keyboardOnlyEmoji=a=>g(a,!1,!1,!0,!1,!0);f.keyboardOnly=a=>g(a,!1,!0,!0,!0,!0);
2
- function g(a,b=!1,c=!1,d=!1,e=!1,h=!1){if("string"!==typeof a)throw new TypeError("sanctifyText expects a string input.");a=a.replace(l,"");e&&(a=a.replace(m,""));d&&(a=a.replace(n,""));h&&(a=p(a,e));a=a.replace(q,"\n");d=a=a.replace(r,"$1");a=b?d.replace(t,"\n\n"):d.replace(u,"\n");c&&(a=a.replace(v," "));return a.trim()}var l=/[\u00A0\u2000-\u200D\u202F\u2060\u3000\uFEFF\u200E\u200F\u202A-\u202E]+/g,w=/[^\x20-\x7E\r\n]+/gu;
3
- function p(a,b=!1){a=x(a);return b?a.replace(w,""):a.replace(w,c=>c.match(m)?c:"")}var y=/[\u2018\u2019\u201A\u201B\u2032\u2035]/g,z=/[\u201C\u201D\u201E\u201F\u2033\u2036\u00AB\u00BB]/g,A=/[\u2012\u2013\u2014\u2015\u2212]/g,B=/\u2026/g,C=/[\u2022\u00B7]/g,D=/[\uFF01-\uFF5E]/g;function x(a){return a.replace(y,"'").replace(z,'"').replace(A,"-").replace(B,"...").replace(C,"*").replace(D,b=>String.fromCharCode(b.charCodeAt(0)-65248))}var m;
4
- try{m=RegExp("(?:\\p{Extended_Pictographic}(?:\\uFE0F|\\uFE0E)?(?:\\u200D(?:\\p{Extended_Pictographic}|\\w)+)*)","gu")}catch{m=/[\u{1F300}-\u{1FAFF}]/gu}var q=/\r\n|\r|\n/g,r=/[ \t]*(\n+)[ \t]*/g,u=/\n{2,}/g,t=/\n{3,}/g,v=/ {2,}/g,n=/[\u0000-\u0008\u000B\u000C\u000E-\u001F\u007F\u0080-\u009F\u200E\u200F\u202A-\u202E]+/g;
5
- function E(a){if("string"!==typeof a)throw new TypeError("inspectText expects a string input.");const b=[],c={o:!1,u:!1,j:!1,g:null,s:!1,v:!1,summary:b},d=(k,F,G)=>{k&&(c[F]=!0,b.push(G))};d(n.test(a),"hasControlChars","Control characters detected.");d(l.test(a),"hasInvisibleChars","Invisible Unicode characters detected.");d(m.test(a),"hasEmojis","Emojis detected.");const {m:e,types:h}=H(a);c.j=e;c.g=e?"Mixed":h[0]||null;c.g&&b.push(e?"Mixed newline styles detected.":`Consistent newline style: ${c.g}`);
6
- a=x(a).replace(w,k=>k.match(m)?"":"\u2612");d(/[\u2612]/.test(a),"hasNonKeyboardChars","Non-keyboard characters detected.");return c}function H(a){if("string"!==typeof a)throw new TypeError("getNewlineStats expects a string input.");var b=a.replace(/\r\n/g,"");a={i:(a.match(/\r\n/g)||[]).length,h:(b.match(/\r/g)||[]).length,l:(b.match(/\n/g)||[]).length};b=[];0<a.i&&b.push("CRLF");0<a.h&&b.push("CR");0<a.l&&b.push("LF");return{...a,types:b,m:1<b.length}}
7
- export { f as summonSanctifier, E as inspectText };
1
+ function g(a={}){const b=!!a.purgeInvisibleChars,c=!!a.purgeEmojis,d=!!a.nukeControls,e=!!a.keyboardOnlyFilter,k=!!a.normalizeNewlines,f=!!a.trimSpacesAroundNewlines,l=!!a.collapseNewLines,m=!!a.preserveParagraphs,p=!!a.collapseSpaces,q=!!a.finalTrim;return w=>h(w,b,c,d,e,k,f,l,m,p,q)}g.strict=a=>h(a,!0,!0,!0,!1,!0,!0,!0,!1,!0,!0);g.loose=a=>h(a,!1,!1,!1,!1,!0,!0,!0,!0,!0,!0);g.keyboardOnlyEmoji=a=>h(a,!1,!1,!1,!0,!0,!0,!1,!1,!1,!0);g.keyboardOnly=a=>h(a,!0,!0,!0,!0,!0,!0,!0,!1,!0,!0);
2
+ function h(a,b=!1,c=!1,d=!1,e=!1,k=!1,f=!1,l=!1,m=!1,p=!1,q=!1){if("string"!==typeof a)throw new TypeError("sanctifyText expects a string input.");b&&(a=a.replace(n,""));c&&(a=a.replace(r,""));d&&(a=a.replace(t,""));e&&(a=u(a,c));k&&(a=a.replace(v,"\n"));f&&(a=a.replace(x,"$1"));l&&(b=a,a=m?b.replace(y,"\n\n"):b.replace(z,"\n"));p&&(a=a.replace(A," "));return q?a.trim():a}var n=/[\u00A0\u2000-\u200D\u202F\u2060\u3000\uFEFF\u200E\u200F\u202A-\u202E]+/g,B=/[^\x20-\x7E\r\n]+/gu;
3
+ function u(a,b=!1){a=C(a);return b?a.replace(B,""):a.replace(B,c=>c.match(r)?c:"")}var D=/[\u2018\u2019\u201A\u201B\u2032\u2035]/g,E=/[\u201C\u201D\u201E\u201F\u2033\u2036\u00AB\u00BB]/g,F=/[\u2012\u2013\u2014\u2015\u2212]/g,G=/\u2026/g,H=/[\u2022\u00B7]/g,I=/[\uFF01-\uFF5E]/g;function C(a){return a.replace(D,"'").replace(E,'"').replace(F,"-").replace(G,"...").replace(H,"*").replace(I,b=>String.fromCharCode(b.charCodeAt(0)-65248))}var r;
4
+ try{r=RegExp("(?:\\p{Extended_Pictographic}(?:\\uFE0F|\\uFE0E)?(?:\\u200D(?:\\p{Extended_Pictographic}|\\w)+)*)","gu")}catch{r=/[\u{1F300}-\u{1FAFF}]/gu}var v=/\r\n|\r|\n/g,x=/[ \t]*(\n+)[ \t]*/g,z=/\n{2,}/g,y=/\n{3,}/g,A=/ {2,}/g,t=/[\u0000-\u0008\u000B\u000C\u000E-\u001F\u007F\u0080-\u009F\u200E\u200F\u202A-\u202E]+/g;
5
+ function J(a){if("string"!==typeof a)throw new TypeError("inspectText expects a string input.");const b=[],c={hasControlChars:!1,hasInvisibleChars:!1,hasMixedNewlines:!1,newlineStyle:null,hasEmojis:!1,hasNonKeyboardChars:!1,summary:b},d=(f,l,m)=>{f&&(c[l]=!0,b.push(m))};d(t.test(a),"hasControlChars","Control characters detected.");d(n.test(a),"hasInvisibleChars","Invisible Unicode characters detected.");d(r.test(a),"hasEmojis","Emojis detected.");const {j:e,types:k}=K(a);c.hasMixedNewlines=e;c.newlineStyle=
6
+ e?"Mixed":k[0]||null;c.newlineStyle&&b.push(e?"Mixed newline styles detected.":`Consistent newline style: ${c.newlineStyle}`);a=C(a).replace(B,f=>f.match(r)?"":"\u2612");d(/[\u2612]/.test(a),"hasNonKeyboardChars","Non-keyboard characters detected.");return c}
7
+ function K(a){if("string"!==typeof a)throw new TypeError("getNewlineStats expects a string input.");var b=a.replace(/\r\n/g,"");a={h:(a.match(/\r\n/g)||[]).length,g:(b.match(/\r/g)||[]).length,i:(b.match(/\n/g)||[]).length};b=[];0<a.h&&b.push("CRLF");0<a.g&&b.push("CR");0<a.i&&b.push("LF");return{...a,types:b,j:1<b.length}}
8
+ function L(a){return{purgeInvisibleChars:a.hasInvisibleChars,purgeEmojis:a.hasEmojis,nukeControls:a.hasControlChars,keyboardOnlyFilter:a.hasNonKeyboardChars,normalizeNewlines:a.hasMixedNewlines||"CRLF"===a.newlineStyle||"CR"===a.newlineStyle}}export { g as summonSanctifier, J as inspectText, L as getRecommendedSanctifierOptions };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "text-sanctifier",
3
- "version": "1.0.13",
3
+ "version": "1.0.15",
4
4
  "type": "module",
5
5
  "description": "A brutal text normalizer and invisible trash scrubber for modern web projects.",
6
6
  "main": "./src/index.js",
package/src/index.d.ts CHANGED
@@ -1,20 +1,35 @@
1
1
  // src/index.d.ts
2
2
 
3
3
  export interface SanctifyOptions {
4
- /** Preserve paragraph breaks by collapsing 3+ newlines into 2 */
5
- preserveParagraphs?: boolean;
4
+ /** Remove ZWSP, NBSP, bidi, and other invisible Unicode trash */
5
+ purgeInvisibleChars?: boolean;
6
6
 
7
- /** Collapse multiple spaces into a single space */
8
- collapseSpaces?: boolean;
7
+ /** Remove emoji characters */
8
+ purgeEmojis?: boolean;
9
9
 
10
10
  /** Nuke hidden control characters (excluding whitespace like \n and \t) */
11
11
  nukeControls?: boolean;
12
12
 
13
- /** Remove emoji characters */
14
- purgeEmojis?: boolean;
15
-
16
13
  /** Restrict to printable ASCII (+ emoji if `purgeEmojis` is false) */
17
14
  keyboardOnlyFilter?: boolean;
15
+
16
+ /** Normalize all newline sequences to LF (`\n`) */
17
+ normalizeNewlines?: boolean;
18
+
19
+ /** Remove tabs and spaces before/after newline characters */
20
+ trimSpacesAroundNewlines?: boolean;
21
+
22
+ /** Collapse multiple consecutive newlines */
23
+ collapseNewLines?: boolean;
24
+
25
+ /** When collapsing newlines, preserve paragraph breaks as double `\n\n` */
26
+ preserveParagraphs?: boolean;
27
+
28
+ /** Collapse multiple spaces into a single space */
29
+ collapseSpaces?: boolean;
30
+
31
+ /** Trim leading and trailing whitespace from final result */
32
+ finalTrim?: boolean;
18
33
  }
19
34
 
20
35
  /** Preconfigured sanitizer function */
@@ -56,11 +71,16 @@ export namespace summonSanctifier {
56
71
  */
57
72
  export function sanctifyText(
58
73
  text: string,
74
+ purgeInvisibleChars?: boolean,
75
+ purgeEmojis?: boolean,
76
+ nukeControls?: boolean,
77
+ keyboardOnlyFilter?: boolean,
78
+ normalizeNewlines?: boolean,
79
+ trimSpacesAroundNewlines?: boolean,
80
+ collapseNewLines?: boolean,
59
81
  preserveParagraphs?: boolean,
60
82
  collapseSpaces?: boolean,
61
- nukeControls?: boolean,
62
- purgeEmojis?: boolean,
63
- keyboardOnlyFilter?: boolean
83
+ finalTrim?: boolean,
64
84
  ): string;
65
85
 
66
86
  /** Style of newline characters detected in a string */
@@ -84,3 +104,13 @@ export interface UnicodeTrashReport {
84
104
  * invisible characters, newline styles, emojis, and more.
85
105
  */
86
106
  export function inspectText(text: string): UnicodeTrashReport;
107
+
108
+
109
+ /**
110
+ * Creates a recommended set of `summonSanctifier` options based on the findings
111
+ * of `inspectText()`. This maps only what can be inferred automatically —
112
+ * user-preference settings like whitespace collapsing are left unset.
113
+ */
114
+ export function getRecommendedSanctifierOptions(
115
+ report: UnicodeTrashReport
116
+ ): SanctifyOptions;
package/src/index.js CHANGED
@@ -1,8 +1,8 @@
1
1
  // src/index.js
2
2
 
3
3
 
4
- import { inspectText } from './inspectText.js';
5
- export { inspectText };
4
+ import { inspectText, getRecommendedSanctifierOptions } from './inspectText.js';
5
+ export { inspectText, getRecommendedSanctifierOptions };
6
6
 
7
7
  import { summonSanctifier } from './sanctifyText.js';
8
8
  export { summonSanctifier };
@@ -1,4 +1,5 @@
1
1
 
2
+ // ./src/inspectText.js
2
3
 
3
4
 
4
5
  import {
@@ -7,9 +8,10 @@ import {
7
8
  EMOJI_REGEX,
8
9
  ASCII_KEYBOARD_SAFE_REGEX,
9
10
  normalizeTypographicJank
10
- } from './sanctifyText.js';
11
+ } from './sanctifyText.js';
12
+
13
+
11
14
 
12
-
13
15
  /**
14
16
  * Detects textual "trash" or anomalies in a given string.
15
17
  * @param {string} text
@@ -24,53 +26,53 @@ import {
24
26
  * }}
25
27
  */
26
28
  export function inspectText(text) {
27
- if (typeof text !== 'string') {
28
- throw new TypeError('inspectText expects a string input.');
29
- }
30
-
31
- const summary = [];
32
- const report = {
33
- hasControlChars: false,
34
- hasInvisibleChars: false,
35
- hasMixedNewlines: false,
36
- newlineStyle: null,
37
- hasEmojis: false,
38
- hasNonKeyboardChars: false,
39
- summary
40
- };
41
-
42
- const flag = (condition, key, message) => {
43
- if (condition) {
44
- report[key] = true;
45
- summary.push(message);
46
- }
47
- };
48
-
49
- // === Pattern Checks ===
50
- flag(CONTROL_CHARS_REGEX.test(text), 'hasControlChars', 'Control characters detected.');
51
- flag(INVISIBLE_TRASH_REGEX.test(text), 'hasInvisibleChars', 'Invisible Unicode characters detected.');
52
- flag(EMOJI_REGEX.test(text), 'hasEmojis', 'Emojis detected.');
53
-
54
- // === Newline Analysis ===
55
- const { mixed, types } = getNewlineStats(text);
56
- report.hasMixedNewlines = mixed;
57
- report.newlineStyle = mixed ? 'Mixed' : types[0] || null;
58
-
59
- if (report.newlineStyle) {
60
- summary.push(
61
- mixed
62
- ? 'Mixed newline styles detected.'
63
- : `Consistent newline style: ${report.newlineStyle}`
64
- );
65
- }
66
-
67
- // === Non-keyboard characters (excluding emojis) ===
68
- const filtered = normalizeTypographicJank(text).replace(ASCII_KEYBOARD_SAFE_REGEX, m =>
69
- m.match(EMOJI_REGEX) ? '' : '☒'
70
- );
71
- flag(/[☒]/.test(filtered), 'hasNonKeyboardChars', 'Non-keyboard characters detected.');
72
-
73
- return report;
29
+ if (typeof text !== 'string') {
30
+ throw new TypeError('inspectText expects a string input.');
31
+ }
32
+
33
+ const summary = [];
34
+ const report = {
35
+ hasControlChars: false,
36
+ hasInvisibleChars: false,
37
+ hasMixedNewlines: false,
38
+ newlineStyle: null,
39
+ hasEmojis: false,
40
+ hasNonKeyboardChars: false,
41
+ summary
42
+ };
43
+
44
+ const flag = (condition, key, message) => {
45
+ if (condition) {
46
+ report[key] = true;
47
+ summary.push(message);
48
+ }
49
+ };
50
+
51
+ // === Pattern Checks ===
52
+ flag(CONTROL_CHARS_REGEX.test(text), 'hasControlChars', 'Control characters detected.');
53
+ flag(INVISIBLE_TRASH_REGEX.test(text), 'hasInvisibleChars', 'Invisible Unicode characters detected.');
54
+ flag(EMOJI_REGEX.test(text), 'hasEmojis', 'Emojis detected.');
55
+
56
+ // === Newline Analysis ===
57
+ const { mixed, types } = getNewlineStats(text);
58
+ report.hasMixedNewlines = mixed;
59
+ report.newlineStyle = mixed ? 'Mixed' : types[0] || null;
60
+
61
+ if (report.newlineStyle) {
62
+ summary.push(
63
+ mixed
64
+ ? 'Mixed newline styles detected.'
65
+ : `Consistent newline style: ${report.newlineStyle}`
66
+ );
67
+ }
68
+
69
+ // === Non-keyboard characters (excluding emojis) ===
70
+ const filtered = normalizeTypographicJank(text).replace(ASCII_KEYBOARD_SAFE_REGEX, m =>
71
+ m.match(EMOJI_REGEX) ? '' : '☒'
72
+ );
73
+ flag(/[☒]/.test(filtered), 'hasNonKeyboardChars', 'Non-keyboard characters detected.');
74
+
75
+ return report;
74
76
  }
75
77
 
76
78
 
@@ -86,30 +88,52 @@ export function inspectText(text) {
86
88
  * }}
87
89
  */
88
90
  export function getNewlineStats(text) {
89
- if (typeof text !== 'string') {
90
- throw new TypeError('getNewlineStats expects a string input.');
91
- }
92
-
93
- const crlfMatches = text.match(/\r\n/g) || [];
94
- const textWithoutCRLF = text.replace(/\r\n/g, '');
95
-
96
- const crMatches = textWithoutCRLF.match(/\r/g) || [];
97
- const lfMatches = textWithoutCRLF.match(/\n/g) || [];
98
-
99
- const count = {
100
- crlf: crlfMatches.length,
101
- cr: crMatches.length,
102
- lf: lfMatches.length
103
- };
104
-
105
- const types = [];
106
- if (count.crlf > 0) types.push('CRLF');
107
- if (count.cr > 0) types.push('CR');
108
- if (count.lf > 0) types.push('LF');
109
-
110
- return {
111
- ...count,
112
- types,
113
- mixed: types.length > 1
114
- };
91
+ if (typeof text !== 'string') {
92
+ throw new TypeError('getNewlineStats expects a string input.');
93
+ }
94
+
95
+ const crlfMatches = text.match(/\r\n/g) || [];
96
+ const textWithoutCRLF = text.replace(/\r\n/g, '');
97
+
98
+ const crMatches = textWithoutCRLF.match(/\r/g) || [];
99
+ const lfMatches = textWithoutCRLF.match(/\n/g) || [];
100
+
101
+ const count = {
102
+ crlf: crlfMatches.length,
103
+ cr: crMatches.length,
104
+ lf: lfMatches.length
105
+ };
106
+
107
+ const types = [];
108
+ if (count.crlf > 0) types.push('CRLF');
109
+ if (count.cr > 0) types.push('CR');
110
+ if (count.lf > 0) types.push('LF');
111
+
112
+ return {
113
+ ...count,
114
+ types,
115
+ mixed: types.length > 1
116
+ };
117
+ }
118
+
119
+
120
+
121
+ /**
122
+ * Creates defaultOptions for summonSanctifier based on inspectText result
123
+ * @param {!UnicodeTrashReport} report
124
+ * @return {!SanctifyOptions}
125
+ */
126
+ export function getRecommendedSanctifierOptions(report) {
127
+ return {
128
+ purgeInvisibleChars: report.hasInvisibleChars,
129
+ purgeEmojis: report.hasEmojis,
130
+ nukeControls: report.hasControlChars,
131
+ keyboardOnlyFilter: report.hasNonKeyboardChars,
132
+ normalizeNewlines: report.hasMixedNewlines || report.newlineStyle === 'CRLF' || report.newlineStyle === 'CR',
133
+ // trimSpacesAroundNewlines: true,
134
+ // collapseNewLines: false,
135
+ // preserveParagraphs: true,
136
+ // collapseSpaces: true,
137
+ // finalTrim: true,
138
+ };
115
139
  }
@@ -3,77 +3,149 @@
3
3
 
4
4
  /**
5
5
  * @typedef {Object} SanctifyOptions
6
- * @property {boolean} [preserveParagraphs=false]
7
- * @property {boolean} [collapseSpaces=false]
8
- * @property {boolean} [nukeControls=false]
9
- * @property {boolean} [purgeEmojis=false]
10
- * @property {boolean} [keyboardOnlyFilter=false]
6
+ * @property {boolean} [purgeInvisibleChars]
7
+ * @property {boolean} [purgeEmojis]
8
+ * @property {boolean} [nukeControls]
9
+ * @property {boolean} [keyboardOnlyFilter]
10
+ * @property {boolean} [normalizeNewlines]
11
+ * @property {boolean} [trimSpacesAroundNewlines]
12
+ * @property {boolean} [collapseNewLines]
13
+ * @property {boolean} [preserveParagraphs]
14
+ * @property {boolean} [collapseSpaces]
15
+ * @property {boolean} [finalTrim]
11
16
  */
12
17
 
13
18
 
14
19
  /**
15
20
  * Summons a customized sanctifier function with pre-bound booleans.
16
- *
17
- * @param {Object} [o={}]
18
- * @param {boolean} [o.preserveParagraphs=false]
19
- * @param {boolean} [o.collapseSpaces=false]
20
- * @param {boolean} [o.nukeControls=false]
21
- * @param {boolean} [o.purgeEmojis=false]
22
- * @param {boolean} [o.keyboardOnlyFilter=false]
21
+ *
22
+ * Accepts full flag names and returns a text-cleaning function.
23
+ *
24
+ * @param {Object} [defaultOptions={}]
25
+ * @param {boolean} [defaultOptions.purgeInvisibleChars]
26
+ * @param {boolean} [defaultOptions.purgeEmojis]
27
+ * @param {boolean} [defaultOptions.nukeControls]
28
+ * @param {boolean} [defaultOptions.keyboardOnlyFilter]
29
+ * @param {boolean} [defaultOptions.normalizeNewlines]
30
+ * @param {boolean} [defaultOptions.trimSpacesAroundNewlines]
31
+ * @param {boolean} [defaultOptions.collapseNewLines]
32
+ * @param {boolean} [defaultOptions.preserveParagraphs]
33
+ * @param {boolean} [defaultOptions.collapseSpaces]
34
+ * @param {boolean} [defaultOptions.finalTrim]
23
35
  * @returns {(text: string) => string}
24
36
  */
25
37
  export function summonSanctifier(defaultOptions = {}) {
26
- const p = !!defaultOptions.preserveParagraphs;
27
- const c = !!defaultOptions.collapseSpaces;
28
- const n = !!defaultOptions.nukeControls;
29
- const e = !!defaultOptions.purgeEmojis;
30
- const k = !!defaultOptions.keyboardOnlyFilter;
31
-
32
- return text => sanctifyText(text, p, c, n, e, k);
38
+ const purgeInvisibleChars = !!defaultOptions.purgeInvisibleChars;
39
+ const purgeEmojis = !!defaultOptions.purgeEmojis;
40
+ const nukeControls = !!defaultOptions.nukeControls;
41
+ const keyboardOnlyFilter = !!defaultOptions.keyboardOnlyFilter;
42
+ const normalizeNewlines = !!defaultOptions.normalizeNewlines;
43
+ const trimSpacesAroundNewlines = !!defaultOptions.trimSpacesAroundNewlines;
44
+ const collapseNewLines = !!defaultOptions.collapseNewLines;
45
+ const preserveParagraphs = !!defaultOptions.preserveParagraphs;
46
+ const collapseSpaces = !!defaultOptions.collapseSpaces;
47
+ const finalTrim = !!defaultOptions.finalTrim;
48
+
49
+ return text => sanctifyText(
50
+ text,
51
+ purgeInvisibleChars,
52
+ purgeEmojis,
53
+ nukeControls,
54
+ keyboardOnlyFilter,
55
+ normalizeNewlines,
56
+ trimSpacesAroundNewlines,
57
+ collapseNewLines,
58
+ preserveParagraphs,
59
+ collapseSpaces,
60
+ finalTrim
61
+ );
33
62
  }
34
63
 
35
-
36
64
  // --- Added Presets ---
37
65
 
38
66
  /**
39
67
  * Strict sanitizer:
40
- * - Collapse spaces
68
+ * - Purge emojis
41
69
  * - Collapse all newlines
70
+ * - Collapse spaces
42
71
  * - Nuke control characters
43
72
  */
44
- summonSanctifier.strict = text => sanctifyText(text, false, true, true, true);
73
+ summonSanctifier.strict = text => sanctifyText(
74
+ text,
75
+ true, // purgeInvisibleChars
76
+ true, // purgeEmojis
77
+ true, // nukeControls
78
+ false, // keyboardOnlyFilter
79
+ true, // normalizeNewlines
80
+ true, // trimSpacesAroundNewlines
81
+ true, // collapseNewLines
82
+ false, // preserveParagraphs
83
+ true, // collapseSpaces
84
+ true // finalTrim
85
+ );
45
86
 
46
87
 
47
88
  /**
48
89
  * Loose sanitizer:
49
90
  * - Collapse spaces
50
91
  * - Preserve paragraphs
51
- * - Skip nuking control characters
92
+ * - Normalize newlines
52
93
  */
53
- summonSanctifier.loose = text => sanctifyText(text, true, true);
94
+ summonSanctifier.loose = text => sanctifyText(
95
+ text,
96
+ false, // purgeInvisibleChars
97
+ false, // purgeEmojis
98
+ false, // nukeControls
99
+ false, // keyboardOnlyFilter
100
+ true, // normalizeNewlines
101
+ true, // trimSpacesAroundNewlines
102
+ true, // collapseNewLines
103
+ true, // preserveParagraphs
104
+ true, // collapseSpaces
105
+ true // finalTrim
106
+ );
54
107
 
55
108
 
56
109
  /**
57
110
  * Keyboard-only (with emojis):
58
- * - Keeps emojis and printable ASCII.
59
- * - Normalizes typographic trash (quotes, dashes, etc.)
60
- * - Strips non-standard characters.
61
- * - Keeps spacing soft (spaces not collapsed).
111
+ * - Keeps emojis and printable ASCII
112
+ * - Strips non-standard characters
113
+ * - Normalizes typographic trash
62
114
  */
63
- summonSanctifier.keyboardOnlyEmoji = text =>
64
- sanctifyText(text, false, false, true, false, true);
115
+ summonSanctifier.keyboardOnlyEmoji = text => sanctifyText(
116
+ text,
117
+ false, // purgeInvisibleChars
118
+ false, // purgeEmojis
119
+ false, // nukeControls
120
+ true, // keyboardOnlyFilter
121
+ true, // normalizeNewlines
122
+ true, // trimSpacesAroundNewlines
123
+ false, // collapseNewLines
124
+ false, // preserveParagraphs
125
+ false, // collapseSpaces
126
+ true // finalTrim
127
+ );
65
128
 
66
129
 
67
130
  /**
68
- * Keyboard-only (strict):
69
- * - No emojis.
70
- * - Collapses whitespace.
71
- * - Keeps only printable ASCII.
72
- */
73
- summonSanctifier.keyboardOnly = text =>
74
- sanctifyText(text, false, true, true, true, true);
75
-
76
-
131
+ * Keyboard-only (strict):
132
+ * - Removes emojis
133
+ * - Collapses all whitespace
134
+ * - Restricts to printable ASCII only
135
+ */
136
+ summonSanctifier.keyboardOnly = text => sanctifyText(
137
+ text,
138
+ true, // purgeInvisibleChars
139
+ true, // purgeEmojis
140
+ true, // nukeControls
141
+ true, // keyboardOnlyFilter
142
+ true, // normalizeNewlines
143
+ true, // trimSpacesAroundNewlines
144
+ true, // collapseNewLines
145
+ false, // preserveParagraphs
146
+ true, // collapseSpaces
147
+ true // finalTrim
148
+ );
77
149
 
78
150
 
79
151
  /**
@@ -82,70 +154,64 @@ summonSanctifier.keyboardOnly = text =>
82
154
  * Brutal text normalizer and invisible trash scrubber,
83
155
  * configurable to kill whatever ghosts you want dead.
84
156
  *
85
- * Usage:
86
- *
87
- * import { sanctifyText } from './utils/sanctifyText';
88
- *
89
- * const cleaned = sanctifyText(rawText, FLAG_COLLAPSE_SPACES | FLAG_NUKE_CONTROLS);
90
- *
91
157
  * @param {string | null | undefined} text
92
- * @param {boolean} [preserveParagraphs=false] - Preserve paragraph breaks (2 newlines) instead of collapsing all.
93
- * @param {boolean} [collapseSpaces=false] - Collapse multiple spaces into a single space.
94
- * @param {boolean} [nukeControls=false] - Remove hidden control characters (except whitespace).
95
- * @param {boolean} [purgeEmojis=false] - Remove emoji characters from the text.
96
- * @param {boolean} [keyboardOnlyFilter=false] - Keep only printable ASCII and emoji characters.
158
+ * @param {boolean} [purgeInvisibleChars=false] - Remove ZWSP, NBSP, bidi, etc.
159
+ * @param {boolean} [purgeEmojis=false] - Remove emoji characters entirely.
160
+ * @param {boolean} [nukeControls=false] - Remove non-whitespace control characters.
161
+ * @param {boolean} [keyboardOnlyFilter=false] - Keep printable ASCII and emojis only.
162
+ * @param {boolean} [normalizeNewlines=false] - Convert all newlines to `\n`.
163
+ * @param {boolean} [trimSpacesAroundNewlines=false] - Remove spaces/tabs around newlines.
164
+ * @param {boolean} [collapseNewLines=false] - Collapse `\n` runs (optionally preserve paragraphs).
165
+ * @param {boolean} [preserveParagraphs=false] - Preserve paragraph breaks when collapsing newlines.
166
+ * @param {boolean} [collapseSpaces=false] - Collapse multiple spaces into one.
167
+ * @param {boolean} [finalTrim=false] - `.trim()` the final output (head/tail).
97
168
  * @returns {string}
98
169
  */
99
170
  export function sanctifyText(
100
171
  text,
172
+ purgeInvisibleChars = false,
173
+ purgeEmojis = false,
174
+ nukeControls = false,
175
+ keyboardOnlyFilter = false,
176
+ normalizeNewlines = false,
177
+ trimSpacesAroundNewlines = false,
178
+ collapseNewLines = false,
101
179
  preserveParagraphs = false,
102
180
  collapseSpaces = false,
103
- nukeControls = false,
104
- purgeEmojis = false,
105
- keyboardOnlyFilter = false
181
+ finalTrim = false,
106
182
  ) {
107
-
108
183
  if (typeof text !== 'string') {
109
184
  throw new TypeError('sanctifyText expects a string input.');
110
185
  }
111
186
 
112
187
  let cleaned = text;
113
188
 
114
- // Purge invisible Unicode trash (zero-width, non-breaking, bidi junk, etc.)
115
- cleaned = purgeInvisibleTrash(cleaned);
189
+ // Purge invisible Unicode trash (zero-width, non-breaking, bidi junk, etc.)
190
+ if (purgeInvisibleChars) cleaned = purgeInvisibleTrash(cleaned);
116
191
 
117
- // Optionally, remove emojis
118
- if (purgeEmojis) {
119
- cleaned = purgeEmojisCharacters(cleaned);
120
- }
192
+ // Remove emojis
193
+ if (purgeEmojis) cleaned = purgeEmojisCharacters(cleaned);
121
194
 
122
- // Optionally, nuke control characters (excluding whitespace)
123
- if (nukeControls) {
124
- cleaned = purgeControlCharacters(cleaned);
125
- }
126
-
127
-
128
- if (keyboardOnlyFilter) {
129
- cleaned = purgeNonKeyboardChars(cleaned, purgeEmojis);
130
- }
195
+ // Nuke control characters (excluding whitespace)
196
+ if (nukeControls) cleaned = purgeControlCharacters(cleaned);
131
197
 
198
+ // Keep only ASCII/emojis
199
+ if (keyboardOnlyFilter) cleaned = purgeNonKeyboardChars(cleaned, purgeEmojis);
132
200
 
133
201
  // Normalize line endings to Unix style (\n)
134
- cleaned = normalizeNewlines(cleaned);
202
+ if (normalizeNewlines) cleaned = normalizeNewlineChars(cleaned);
135
203
 
136
204
  // Remove spaces/tabs around newlines
137
- cleaned = trimSpacesAroundNewlines(cleaned);
205
+ if (trimSpacesAroundNewlines) cleaned = trimSpacesAroundNewlineChars(cleaned);
138
206
 
139
207
  // Collapse excessive newlines, Optionally preserve Paragraphs
140
- cleaned = collapseParagraphs(cleaned, preserveParagraphs);
208
+ if (collapseNewLines) cleaned = collapseMultipleNewLines(cleaned, preserveParagraphs);
141
209
 
142
- // Optionally, Collapse multiple spaces into a single space
143
- if (collapseSpaces) {
144
- cleaned = collapseExtraSpaces(cleaned);
145
- }
210
+ // Collapse multiple spaces into a single space
211
+ if (collapseSpaces) cleaned = collapseExtraSpaces(cleaned);
146
212
 
147
- // Final trim
148
- return cleaned.trim();
213
+ // Final trim, return Sanctified Text
214
+ return finalTrim ? cleaned.trim() : cleaned;
149
215
  }
150
216
 
151
217
 
@@ -268,7 +334,7 @@ function purgeEmojisCharacters(text) {
268
334
  * @returns {string}
269
335
  */
270
336
  const NORMALIZE_NEWLINES_REGEX = /\r\n|\r|\n/g;
271
- function normalizeNewlines(text, normalized = '\n') {
337
+ function normalizeNewlineChars(text, normalized = '\n') {
272
338
  return text.replace(NORMALIZE_NEWLINES_REGEX, normalized);
273
339
  }
274
340
 
@@ -284,7 +350,7 @@ function normalizeNewlines(text, normalized = '\n') {
284
350
  * @returns {string}
285
351
  */
286
352
  const TRIM_SPACES_AROUND_NEWLINES_REGEX = /[ \t]*(\n+)[ \t]*/g;
287
- function trimSpacesAroundNewlines(text) {
353
+ function trimSpacesAroundNewlineChars(text) {
288
354
  return text.replace(TRIM_SPACES_AROUND_NEWLINES_REGEX, '$1');
289
355
  }
290
356
 
@@ -307,7 +373,7 @@ function trimSpacesAroundNewlines(text) {
307
373
  const MULTIPLE_NEWLINES_REGEX = /\n{2,}/g;
308
374
  const TRIPLE_NEWLINES_REGEX = /\n{3,}/g;
309
375
 
310
- function collapseParagraphs(text, preserveParagraphs) {
376
+ function collapseMultipleNewLines(text, preserveParagraphs) {
311
377
  return preserveParagraphs
312
378
  ? text.replace(TRIPLE_NEWLINES_REGEX, '\n\n')
313
379
  : text.replace(MULTIPLE_NEWLINES_REGEX, '\n');