text-sanctifier 1.0.13 → 1.0.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +66 -73
- package/dist/text-sanctifier.min.js +7 -7
- package/package.json +1 -1
- package/src/index.d.ts +30 -10
- package/src/sanctifyText.js +147 -81
package/README.md
CHANGED
|
@@ -5,22 +5,21 @@
|
|
|
5
5
|
[](https://www.npmjs.com/package/text-sanctifier)
|
|
6
6
|
[](https://github.com/iWhatty/text-sanctifier)
|
|
7
7
|
|
|
8
|
-
|
|
9
8
|
Brutal text normalizer and invisible trash scrubber for modern web projects.
|
|
10
9
|
|
|
11
|
-
* Minified: (2.
|
|
12
|
-
* Gzipped (GCC)
|
|
10
|
+
* Minified: (2.69 KB)
|
|
11
|
+
* Gzipped (GCC): (1.27 KB)
|
|
13
12
|
|
|
14
13
|
## Features
|
|
15
14
|
|
|
16
15
|
* Purges zero-width Unicode garbage
|
|
17
|
-
* Normalizes line endings
|
|
16
|
+
* Normalizes line endings (CRLF, CR, LF) → LF
|
|
18
17
|
* Collapses unwanted spaces and paragraphs
|
|
19
18
|
* Nukes control characters (if enabled)
|
|
20
|
-
*
|
|
21
|
-
*
|
|
22
|
-
*
|
|
23
|
-
*
|
|
19
|
+
* Smart normalization of typographic junk (quotes, dashes, bullets, full-width punctuation)
|
|
20
|
+
* Keyboard-only filtering (retain printable ASCII + emoji, or restrict)
|
|
21
|
+
* Configurable via fine-grained flags or ready-made presets
|
|
22
|
+
* Includes strict, loose, and keyboard-only modes
|
|
24
23
|
|
|
25
24
|
## Install
|
|
26
25
|
|
|
@@ -30,98 +29,65 @@ npm install text-sanctifier
|
|
|
30
29
|
|
|
31
30
|
## 📦 Package & Build Info
|
|
32
31
|
|
|
33
|
-
* **Source (`src/`)**: ES2020+ ESM modules with JSDoc
|
|
34
|
-
* **Browser
|
|
35
|
-
* **
|
|
36
|
-
* **
|
|
37
|
-
* **
|
|
38
|
-
* **No Transpilers Included**: No built-in shims, polyfills, or transpilation; you control environment compatibility.
|
|
39
|
-
* **Tree-shaking Friendly**: Fully optimized with `sideEffects: false` for dead code elimination.
|
|
40
|
-
* **Publishing Philosophy**:
|
|
32
|
+
* **Source (`src/`)**: ES2020+ ESM modules with JSDoc
|
|
33
|
+
* **Browser Build (`dist/`)**: Minified ESM bundle for `<script type="module">`
|
|
34
|
+
* **Tree-shaking Friendly**: Fully optimized with `sideEffects: false`
|
|
35
|
+
* **Zero Transpilation**: No built-in polyfills or runtime overhead
|
|
36
|
+
* **Bundler Ready**: Works great with Vite, Rollup, Webpack, Parcel, etc.
|
|
41
37
|
|
|
42
|
-
|
|
43
|
-
* Minified bundle included separately for raw browser consumption without a build step.
|
|
38
|
+
---
|
|
44
39
|
|
|
45
|
-
## Quick Usage
|
|
40
|
+
## 🔧 Quick Usage
|
|
46
41
|
|
|
47
|
-
###
|
|
42
|
+
### Custom Config
|
|
48
43
|
|
|
49
|
-
```
|
|
44
|
+
```js
|
|
50
45
|
import { summonSanctifier } from 'text-sanctifier';
|
|
51
46
|
|
|
52
|
-
const
|
|
53
|
-
|
|
54
|
-
collapseSpaces: true,
|
|
55
|
-
nukeControls: true,
|
|
47
|
+
const clean = summonSanctifier({
|
|
48
|
+
purgeInvisibleChars: true,
|
|
56
49
|
purgeEmojis: true,
|
|
50
|
+
collapseSpaces: true,
|
|
51
|
+
collapseNewLines: true,
|
|
52
|
+
preserveParagraphs: true,
|
|
53
|
+
finalTrim: true,
|
|
57
54
|
});
|
|
58
55
|
|
|
59
|
-
const
|
|
56
|
+
const output = clean(rawText);
|
|
60
57
|
```
|
|
61
58
|
|
|
62
|
-
### Strict
|
|
63
|
-
|
|
64
|
-
```javascript
|
|
65
|
-
import { summonSanctifier } from 'text-sanctifier';
|
|
59
|
+
### Strict Preset
|
|
66
60
|
|
|
67
|
-
|
|
68
|
-
const
|
|
61
|
+
```js
|
|
62
|
+
const output = summonSanctifier.strict(rawText);
|
|
69
63
|
```
|
|
70
64
|
|
|
71
|
-
### Loose
|
|
72
|
-
|
|
73
|
-
```javascript
|
|
74
|
-
import { summonSanctifier } from 'text-sanctifier';
|
|
65
|
+
### Loose Preset
|
|
75
66
|
|
|
76
|
-
|
|
77
|
-
const
|
|
67
|
+
```js
|
|
68
|
+
const output = summonSanctifier.loose(rawText);
|
|
78
69
|
```
|
|
79
70
|
|
|
80
|
-
### Keyboard-
|
|
71
|
+
### Keyboard-Only (No Emojis)
|
|
81
72
|
|
|
82
|
-
```
|
|
83
|
-
const
|
|
84
|
-
const asciiOnlyText = keyboardOnly(userInput);
|
|
73
|
+
```js
|
|
74
|
+
const output = summonSanctifier.keyboardOnly(userInput);
|
|
85
75
|
```
|
|
86
76
|
|
|
87
|
-
### Keyboard-
|
|
77
|
+
### Keyboard-Only (With Emojis)
|
|
88
78
|
|
|
89
|
-
```
|
|
90
|
-
const
|
|
91
|
-
const cleanAndFun = keyboardWithEmoji(commentBox);
|
|
79
|
+
```js
|
|
80
|
+
const output = summonSanctifier.keyboardOnlyEmoji(commentText);
|
|
92
81
|
```
|
|
93
82
|
|
|
94
|
-
## API
|
|
95
|
-
|
|
96
|
-
#### `summonSanctifier(options?: SanctifyOptions): (text: string) => string`
|
|
97
|
-
|
|
98
|
-
Creates a sanitizer with options pre-bound.
|
|
99
|
-
|
|
100
|
-
#### `summonSanctifier.strict: (text: string) => string`
|
|
101
|
-
|
|
102
|
-
Strict sanitizer preset (collapse spaces, collapse all newlines, nuke controls, purge Emojis).
|
|
103
|
-
|
|
104
|
-
#### `summonSanctifier.loose: (text: string) => string`
|
|
105
|
-
|
|
106
|
-
Loose sanitizer preset (preserve paragraph breaks, collapse spaces, skip nuking controls, preserve Emojis).
|
|
107
|
-
|
|
108
|
-
#### `summonSanctifier.keyboardOnly: (text: string) => string`
|
|
109
|
-
|
|
110
|
-
Removes everything except printable ASCII. Emojis are removed. Spaces are collapsed.
|
|
111
|
-
|
|
112
|
-
#### `summonSanctifier.keyboardOnlyEmoji: (text: string) => string`
|
|
113
|
-
|
|
114
|
-
Keeps printable ASCII and emoji characters. Typographic normalization included.
|
|
115
|
-
|
|
116
83
|
---
|
|
117
84
|
|
|
85
|
+
## 🔍 Unicode Trash Detection
|
|
118
86
|
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
```javascript
|
|
87
|
+
```js
|
|
122
88
|
import { inspectText } from 'text-sanctifier';
|
|
123
89
|
|
|
124
|
-
const report = inspectText(
|
|
90
|
+
const report = inspectText(input);
|
|
125
91
|
|
|
126
92
|
/*
|
|
127
93
|
{
|
|
@@ -141,8 +107,35 @@ const report = inspectText(rawInput);
|
|
|
141
107
|
*/
|
|
142
108
|
```
|
|
143
109
|
|
|
144
|
-
Use
|
|
110
|
+
Use `inspectText` to preflight text content before rendering, storing, or linting. It's a diagnostic tool to help inform sanitization needs.
|
|
111
|
+
|
|
112
|
+
---
|
|
113
|
+
|
|
114
|
+
## API
|
|
115
|
+
|
|
116
|
+
### `summonSanctifier(options?: SanctifyOptions): (text: string) => string`
|
|
117
|
+
|
|
118
|
+
Creates a reusable sanitizer from an option object.
|
|
119
|
+
|
|
120
|
+
### `summonSanctifier.strict`
|
|
121
|
+
|
|
122
|
+
Aggressively purges: emojis, control characters, extra spacing, and newlines.
|
|
123
|
+
|
|
124
|
+
### `summonSanctifier.loose`
|
|
125
|
+
|
|
126
|
+
Gently normalizes spacing and newlines while preserving emojis and paragraphs.
|
|
127
|
+
|
|
128
|
+
### `summonSanctifier.keyboardOnly`
|
|
129
|
+
|
|
130
|
+
Restricts to printable ASCII only (removes emojis).
|
|
131
|
+
|
|
132
|
+
### `summonSanctifier.keyboardOnlyEmoji`
|
|
133
|
+
|
|
134
|
+
Restricts to keyboard-safe ASCII + emojis. Preserves fun, removes weird.
|
|
135
|
+
|
|
136
|
+
### `inspectText(text: string): UnicodeTrashReport`
|
|
145
137
|
|
|
138
|
+
Returns a structural report of control codes, invisible chars, newline styles, and more.
|
|
146
139
|
|
|
147
140
|
---
|
|
148
141
|
|
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
function
|
|
2
|
-
function
|
|
3
|
-
function
|
|
4
|
-
try{
|
|
5
|
-
function
|
|
6
|
-
a=
|
|
7
|
-
export {
|
|
1
|
+
function g(a={}){const b=!!a.purgeInvisibleChars,c=!!a.purgeEmojis,d=!!a.nukeControls,e=!!a.keyboardOnlyFilter,k=!!a.normalizeNewlines,f=!!a.trimSpacesAroundNewlines,l=!!a.collapseNewLines,m=!!a.preserveParagraphs,p=!!a.collapseSpaces,q=!!a.finalTrim;return w=>h(w,b,c,d,e,k,f,l,m,p,q)}g.strict=a=>h(a,!0,!0,!0,!1,!0,!0,!0,!1,!0,!0);g.loose=a=>h(a,!1,!1,!1,!1,!0,!0,!0,!0,!0,!0);g.keyboardOnlyEmoji=a=>h(a,!1,!1,!1,!0,!0,!0,!1,!1,!1,!0);g.keyboardOnly=a=>h(a,!0,!0,!0,!0,!0,!0,!0,!1,!0,!0);
|
|
2
|
+
function h(a,b=!1,c=!1,d=!1,e=!1,k=!1,f=!1,l=!1,m=!1,p=!1,q=!1){if("string"!==typeof a)throw new TypeError("sanctifyText expects a string input.");b&&(a=a.replace(n,""));c&&(a=a.replace(r,""));d&&(a=a.replace(t,""));e&&(a=u(a,c));k&&(a=a.replace(v,"\n"));f&&(a=a.replace(x,"$1"));l&&(b=a,a=m?b.replace(y,"\n\n"):b.replace(z,"\n"));p&&(a=a.replace(A," "));return q?a.trim():a}var n=/[\u00A0\u2000-\u200D\u202F\u2060\u3000\uFEFF\u200E\u200F\u202A-\u202E]+/g,B=/[^\x20-\x7E\r\n]+/gu;
|
|
3
|
+
function u(a,b=!1){a=C(a);return b?a.replace(B,""):a.replace(B,c=>c.match(r)?c:"")}var D=/[\u2018\u2019\u201A\u201B\u2032\u2035]/g,E=/[\u201C\u201D\u201E\u201F\u2033\u2036\u00AB\u00BB]/g,F=/[\u2012\u2013\u2014\u2015\u2212]/g,G=/\u2026/g,H=/[\u2022\u00B7]/g,I=/[\uFF01-\uFF5E]/g;function C(a){return a.replace(D,"'").replace(E,'"').replace(F,"-").replace(G,"...").replace(H,"*").replace(I,b=>String.fromCharCode(b.charCodeAt(0)-65248))}var r;
|
|
4
|
+
try{r=RegExp("(?:\\p{Extended_Pictographic}(?:\\uFE0F|\\uFE0E)?(?:\\u200D(?:\\p{Extended_Pictographic}|\\w)+)*)","gu")}catch{r=/[\u{1F300}-\u{1FAFF}]/gu}var v=/\r\n|\r|\n/g,x=/[ \t]*(\n+)[ \t]*/g,z=/\n{2,}/g,y=/\n{3,}/g,A=/ {2,}/g,t=/[\u0000-\u0008\u000B\u000C\u000E-\u001F\u007F\u0080-\u009F\u200E\u200F\u202A-\u202E]+/g;
|
|
5
|
+
function J(a){if("string"!==typeof a)throw new TypeError("inspectText expects a string input.");const b=[],c={o:!1,u:!1,j:!1,g:null,s:!1,v:!1,summary:b},d=(f,l,m)=>{f&&(c[l]=!0,b.push(m))};d(t.test(a),"hasControlChars","Control characters detected.");d(n.test(a),"hasInvisibleChars","Invisible Unicode characters detected.");d(r.test(a),"hasEmojis","Emojis detected.");const {m:e,types:k}=K(a);c.j=e;c.g=e?"Mixed":k[0]||null;c.g&&b.push(e?"Mixed newline styles detected.":`Consistent newline style: ${c.g}`);
|
|
6
|
+
a=C(a).replace(B,f=>f.match(r)?"":"\u2612");d(/[\u2612]/.test(a),"hasNonKeyboardChars","Non-keyboard characters detected.");return c}function K(a){if("string"!==typeof a)throw new TypeError("getNewlineStats expects a string input.");var b=a.replace(/\r\n/g,"");a={i:(a.match(/\r\n/g)||[]).length,h:(b.match(/\r/g)||[]).length,l:(b.match(/\n/g)||[]).length};b=[];0<a.i&&b.push("CRLF");0<a.h&&b.push("CR");0<a.l&&b.push("LF");return{...a,types:b,m:1<b.length}}
|
|
7
|
+
export { g as summonSanctifier, J as inspectText };
|
package/package.json
CHANGED
package/src/index.d.ts
CHANGED
|
@@ -1,20 +1,35 @@
|
|
|
1
1
|
// src/index.d.ts
|
|
2
2
|
|
|
3
3
|
export interface SanctifyOptions {
|
|
4
|
-
/**
|
|
5
|
-
|
|
4
|
+
/** Remove ZWSP, NBSP, bidi, and other invisible Unicode trash */
|
|
5
|
+
purgeInvisibleChars?: boolean;
|
|
6
6
|
|
|
7
|
-
/**
|
|
8
|
-
|
|
7
|
+
/** Remove emoji characters */
|
|
8
|
+
purgeEmojis?: boolean;
|
|
9
9
|
|
|
10
10
|
/** Nuke hidden control characters (excluding whitespace like \n and \t) */
|
|
11
11
|
nukeControls?: boolean;
|
|
12
12
|
|
|
13
|
-
/** Remove emoji characters */
|
|
14
|
-
purgeEmojis?: boolean;
|
|
15
|
-
|
|
16
13
|
/** Restrict to printable ASCII (+ emoji if `purgeEmojis` is false) */
|
|
17
14
|
keyboardOnlyFilter?: boolean;
|
|
15
|
+
|
|
16
|
+
/** Normalize all newline sequences to LF (`\n`) */
|
|
17
|
+
normalizeNewlines?: boolean;
|
|
18
|
+
|
|
19
|
+
/** Remove tabs and spaces before/after newline characters */
|
|
20
|
+
trimSpacesAroundNewlines?: boolean;
|
|
21
|
+
|
|
22
|
+
/** Collapse multiple consecutive newlines */
|
|
23
|
+
collapseNewLines?: boolean;
|
|
24
|
+
|
|
25
|
+
/** When collapsing newlines, preserve paragraph breaks as double `\n\n` */
|
|
26
|
+
preserveParagraphs?: boolean;
|
|
27
|
+
|
|
28
|
+
/** Collapse multiple spaces into a single space */
|
|
29
|
+
collapseSpaces?: boolean;
|
|
30
|
+
|
|
31
|
+
/** Trim leading and trailing whitespace from final result */
|
|
32
|
+
finalTrim?: boolean;
|
|
18
33
|
}
|
|
19
34
|
|
|
20
35
|
/** Preconfigured sanitizer function */
|
|
@@ -56,11 +71,16 @@ export namespace summonSanctifier {
|
|
|
56
71
|
*/
|
|
57
72
|
export function sanctifyText(
|
|
58
73
|
text: string,
|
|
74
|
+
purgeInvisibleChars?: boolean,
|
|
75
|
+
purgeEmojis?: boolean,
|
|
76
|
+
nukeControls?: boolean,
|
|
77
|
+
keyboardOnlyFilter?: boolean,
|
|
78
|
+
normalizeNewlines?: boolean,
|
|
79
|
+
trimSpacesAroundNewlines?: boolean,
|
|
80
|
+
collapseNewLines?: boolean,
|
|
59
81
|
preserveParagraphs?: boolean,
|
|
60
82
|
collapseSpaces?: boolean,
|
|
61
|
-
|
|
62
|
-
purgeEmojis?: boolean,
|
|
63
|
-
keyboardOnlyFilter?: boolean
|
|
83
|
+
finalTrim?: boolean,
|
|
64
84
|
): string;
|
|
65
85
|
|
|
66
86
|
/** Style of newline characters detected in a string */
|
package/src/sanctifyText.js
CHANGED
|
@@ -3,77 +3,149 @@
|
|
|
3
3
|
|
|
4
4
|
/**
|
|
5
5
|
* @typedef {Object} SanctifyOptions
|
|
6
|
-
* @property {boolean} [
|
|
7
|
-
* @property {boolean} [
|
|
8
|
-
* @property {boolean} [nukeControls
|
|
9
|
-
*
|
|
10
|
-
* @property {boolean} [
|
|
6
|
+
* @property {boolean} [purgeInvisibleChars]
|
|
7
|
+
* @property {boolean} [purgeEmojis]
|
|
8
|
+
* @property {boolean} [nukeControls]
|
|
9
|
+
* @property {boolean} [keyboardOnlyFilter]
|
|
10
|
+
* @property {boolean} [normalizeNewlines]
|
|
11
|
+
* @property {boolean} [trimSpacesAroundNewlines]
|
|
12
|
+
* @property {boolean} [collapseNewLines]
|
|
13
|
+
* @property {boolean} [preserveParagraphs]
|
|
14
|
+
* @property {boolean} [collapseSpaces]
|
|
15
|
+
* @property {boolean} [finalTrim]
|
|
11
16
|
*/
|
|
12
17
|
|
|
13
18
|
|
|
14
19
|
/**
|
|
15
20
|
* Summons a customized sanctifier function with pre-bound booleans.
|
|
16
|
-
*
|
|
17
|
-
*
|
|
18
|
-
*
|
|
19
|
-
* @param {
|
|
20
|
-
* @param {boolean} [
|
|
21
|
-
* @param {boolean} [
|
|
22
|
-
* @param {boolean} [
|
|
21
|
+
*
|
|
22
|
+
* Accepts full flag names and returns a text-cleaning function.
|
|
23
|
+
*
|
|
24
|
+
* @param {Object} [defaultOptions={}]
|
|
25
|
+
* @param {boolean} [defaultOptions.purgeInvisibleChars]
|
|
26
|
+
* @param {boolean} [defaultOptions.purgeEmojis]
|
|
27
|
+
* @param {boolean} [defaultOptions.nukeControls]
|
|
28
|
+
* @param {boolean} [defaultOptions.keyboardOnlyFilter]
|
|
29
|
+
* @param {boolean} [defaultOptions.normalizeNewlines]
|
|
30
|
+
* @param {boolean} [defaultOptions.trimSpacesAroundNewlines]
|
|
31
|
+
* @param {boolean} [defaultOptions.collapseNewLines]
|
|
32
|
+
* @param {boolean} [defaultOptions.preserveParagraphs]
|
|
33
|
+
* @param {boolean} [defaultOptions.collapseSpaces]
|
|
34
|
+
* @param {boolean} [defaultOptions.finalTrim]
|
|
23
35
|
* @returns {(text: string) => string}
|
|
24
36
|
*/
|
|
25
37
|
export function summonSanctifier(defaultOptions = {}) {
|
|
26
|
-
const
|
|
27
|
-
const
|
|
28
|
-
const
|
|
29
|
-
const
|
|
30
|
-
const
|
|
31
|
-
|
|
32
|
-
|
|
38
|
+
const purgeInvisibleChars = !!defaultOptions.purgeInvisibleChars;
|
|
39
|
+
const purgeEmojis = !!defaultOptions.purgeEmojis;
|
|
40
|
+
const nukeControls = !!defaultOptions.nukeControls;
|
|
41
|
+
const keyboardOnlyFilter = !!defaultOptions.keyboardOnlyFilter;
|
|
42
|
+
const normalizeNewlines = !!defaultOptions.normalizeNewlines;
|
|
43
|
+
const trimSpacesAroundNewlines = !!defaultOptions.trimSpacesAroundNewlines;
|
|
44
|
+
const collapseNewLines = !!defaultOptions.collapseNewLines;
|
|
45
|
+
const preserveParagraphs = !!defaultOptions.preserveParagraphs;
|
|
46
|
+
const collapseSpaces = !!defaultOptions.collapseSpaces;
|
|
47
|
+
const finalTrim = !!defaultOptions.finalTrim;
|
|
48
|
+
|
|
49
|
+
return text => sanctifyText(
|
|
50
|
+
text,
|
|
51
|
+
purgeInvisibleChars,
|
|
52
|
+
purgeEmojis,
|
|
53
|
+
nukeControls,
|
|
54
|
+
keyboardOnlyFilter,
|
|
55
|
+
normalizeNewlines,
|
|
56
|
+
trimSpacesAroundNewlines,
|
|
57
|
+
collapseNewLines,
|
|
58
|
+
preserveParagraphs,
|
|
59
|
+
collapseSpaces,
|
|
60
|
+
finalTrim
|
|
61
|
+
);
|
|
33
62
|
}
|
|
34
63
|
|
|
35
|
-
|
|
36
64
|
// --- Added Presets ---
|
|
37
65
|
|
|
38
66
|
/**
|
|
39
67
|
* Strict sanitizer:
|
|
40
|
-
* -
|
|
68
|
+
* - Purge emojis
|
|
41
69
|
* - Collapse all newlines
|
|
70
|
+
* - Collapse spaces
|
|
42
71
|
* - Nuke control characters
|
|
43
72
|
*/
|
|
44
|
-
summonSanctifier.strict = text => sanctifyText(
|
|
73
|
+
summonSanctifier.strict = text => sanctifyText(
|
|
74
|
+
text,
|
|
75
|
+
true, // purgeInvisibleChars
|
|
76
|
+
true, // purgeEmojis
|
|
77
|
+
true, // nukeControls
|
|
78
|
+
false, // keyboardOnlyFilter
|
|
79
|
+
true, // normalizeNewlines
|
|
80
|
+
true, // trimSpacesAroundNewlines
|
|
81
|
+
true, // collapseNewLines
|
|
82
|
+
false, // preserveParagraphs
|
|
83
|
+
true, // collapseSpaces
|
|
84
|
+
true // finalTrim
|
|
85
|
+
);
|
|
45
86
|
|
|
46
87
|
|
|
47
88
|
/**
|
|
48
89
|
* Loose sanitizer:
|
|
49
90
|
* - Collapse spaces
|
|
50
91
|
* - Preserve paragraphs
|
|
51
|
-
* -
|
|
92
|
+
* - Normalize newlines
|
|
52
93
|
*/
|
|
53
|
-
summonSanctifier.loose = text => sanctifyText(
|
|
94
|
+
summonSanctifier.loose = text => sanctifyText(
|
|
95
|
+
text,
|
|
96
|
+
false, // purgeInvisibleChars
|
|
97
|
+
false, // purgeEmojis
|
|
98
|
+
false, // nukeControls
|
|
99
|
+
false, // keyboardOnlyFilter
|
|
100
|
+
true, // normalizeNewlines
|
|
101
|
+
true, // trimSpacesAroundNewlines
|
|
102
|
+
true, // collapseNewLines
|
|
103
|
+
true, // preserveParagraphs
|
|
104
|
+
true, // collapseSpaces
|
|
105
|
+
true // finalTrim
|
|
106
|
+
);
|
|
54
107
|
|
|
55
108
|
|
|
56
109
|
/**
|
|
57
110
|
* Keyboard-only (with emojis):
|
|
58
|
-
* - Keeps emojis and printable ASCII
|
|
59
|
-
* -
|
|
60
|
-
* -
|
|
61
|
-
* - Keeps spacing soft (spaces not collapsed).
|
|
111
|
+
* - Keeps emojis and printable ASCII
|
|
112
|
+
* - Strips non-standard characters
|
|
113
|
+
* - Normalizes typographic trash
|
|
62
114
|
*/
|
|
63
|
-
summonSanctifier.keyboardOnlyEmoji = text =>
|
|
64
|
-
|
|
115
|
+
summonSanctifier.keyboardOnlyEmoji = text => sanctifyText(
|
|
116
|
+
text,
|
|
117
|
+
false, // purgeInvisibleChars
|
|
118
|
+
false, // purgeEmojis
|
|
119
|
+
false, // nukeControls
|
|
120
|
+
true, // keyboardOnlyFilter
|
|
121
|
+
true, // normalizeNewlines
|
|
122
|
+
true, // trimSpacesAroundNewlines
|
|
123
|
+
false, // collapseNewLines
|
|
124
|
+
false, // preserveParagraphs
|
|
125
|
+
false, // collapseSpaces
|
|
126
|
+
true // finalTrim
|
|
127
|
+
);
|
|
65
128
|
|
|
66
129
|
|
|
67
130
|
/**
|
|
68
|
-
* Keyboard-only (strict):
|
|
69
|
-
* -
|
|
70
|
-
* - Collapses whitespace
|
|
71
|
-
* -
|
|
72
|
-
*/
|
|
73
|
-
summonSanctifier.keyboardOnly = text =>
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
131
|
+
* Keyboard-only (strict):
|
|
132
|
+
* - Removes emojis
|
|
133
|
+
* - Collapses all whitespace
|
|
134
|
+
* - Restricts to printable ASCII only
|
|
135
|
+
*/
|
|
136
|
+
summonSanctifier.keyboardOnly = text => sanctifyText(
|
|
137
|
+
text,
|
|
138
|
+
true, // purgeInvisibleChars
|
|
139
|
+
true, // purgeEmojis
|
|
140
|
+
true, // nukeControls
|
|
141
|
+
true, // keyboardOnlyFilter
|
|
142
|
+
true, // normalizeNewlines
|
|
143
|
+
true, // trimSpacesAroundNewlines
|
|
144
|
+
true, // collapseNewLines
|
|
145
|
+
false, // preserveParagraphs
|
|
146
|
+
true, // collapseSpaces
|
|
147
|
+
true // finalTrim
|
|
148
|
+
);
|
|
77
149
|
|
|
78
150
|
|
|
79
151
|
/**
|
|
@@ -82,70 +154,64 @@ summonSanctifier.keyboardOnly = text =>
|
|
|
82
154
|
* Brutal text normalizer and invisible trash scrubber,
|
|
83
155
|
* configurable to kill whatever ghosts you want dead.
|
|
84
156
|
*
|
|
85
|
-
* Usage:
|
|
86
|
-
*
|
|
87
|
-
* import { sanctifyText } from './utils/sanctifyText';
|
|
88
|
-
*
|
|
89
|
-
* const cleaned = sanctifyText(rawText, FLAG_COLLAPSE_SPACES | FLAG_NUKE_CONTROLS);
|
|
90
|
-
*
|
|
91
157
|
* @param {string | null | undefined} text
|
|
92
|
-
* @param {boolean} [
|
|
93
|
-
* @param {boolean} [
|
|
94
|
-
* @param {boolean} [nukeControls=false] - Remove
|
|
95
|
-
* @param {boolean} [
|
|
96
|
-
* @param {boolean} [
|
|
158
|
+
* @param {boolean} [purgeInvisibleChars=false] - Remove ZWSP, NBSP, bidi, etc.
|
|
159
|
+
* @param {boolean} [purgeEmojis=false] - Remove emoji characters entirely.
|
|
160
|
+
* @param {boolean} [nukeControls=false] - Remove non-whitespace control characters.
|
|
161
|
+
* @param {boolean} [keyboardOnlyFilter=false] - Keep printable ASCII and emojis only.
|
|
162
|
+
* @param {boolean} [normalizeNewlines=false] - Convert all newlines to `\n`.
|
|
163
|
+
* @param {boolean} [trimSpacesAroundNewlines=false] - Remove spaces/tabs around newlines.
|
|
164
|
+
* @param {boolean} [collapseNewLines=false] - Collapse `\n` runs (optionally preserve paragraphs).
|
|
165
|
+
* @param {boolean} [preserveParagraphs=false] - Preserve paragraph breaks when collapsing newlines.
|
|
166
|
+
* @param {boolean} [collapseSpaces=false] - Collapse multiple spaces into one.
|
|
167
|
+
* @param {boolean} [finalTrim=false] - `.trim()` the final output (head/tail).
|
|
97
168
|
* @returns {string}
|
|
98
169
|
*/
|
|
99
170
|
export function sanctifyText(
|
|
100
171
|
text,
|
|
172
|
+
purgeInvisibleChars = false,
|
|
173
|
+
purgeEmojis = false,
|
|
174
|
+
nukeControls = false,
|
|
175
|
+
keyboardOnlyFilter = false,
|
|
176
|
+
normalizeNewlines = false,
|
|
177
|
+
trimSpacesAroundNewlines = false,
|
|
178
|
+
collapseNewLines = false,
|
|
101
179
|
preserveParagraphs = false,
|
|
102
180
|
collapseSpaces = false,
|
|
103
|
-
|
|
104
|
-
purgeEmojis = false,
|
|
105
|
-
keyboardOnlyFilter = false
|
|
181
|
+
finalTrim = false,
|
|
106
182
|
) {
|
|
107
|
-
|
|
108
183
|
if (typeof text !== 'string') {
|
|
109
184
|
throw new TypeError('sanctifyText expects a string input.');
|
|
110
185
|
}
|
|
111
186
|
|
|
112
187
|
let cleaned = text;
|
|
113
188
|
|
|
114
|
-
//
|
|
115
|
-
cleaned = purgeInvisibleTrash(cleaned);
|
|
189
|
+
// Purge invisible Unicode trash (zero-width, non-breaking, bidi junk, etc.)
|
|
190
|
+
if (purgeInvisibleChars) cleaned = purgeInvisibleTrash(cleaned);
|
|
116
191
|
|
|
117
|
-
//
|
|
118
|
-
if (purgeEmojis)
|
|
119
|
-
cleaned = purgeEmojisCharacters(cleaned);
|
|
120
|
-
}
|
|
192
|
+
// Remove emojis
|
|
193
|
+
if (purgeEmojis) cleaned = purgeEmojisCharacters(cleaned);
|
|
121
194
|
|
|
122
|
-
//
|
|
123
|
-
if (nukeControls)
|
|
124
|
-
cleaned = purgeControlCharacters(cleaned);
|
|
125
|
-
}
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
if (keyboardOnlyFilter) {
|
|
129
|
-
cleaned = purgeNonKeyboardChars(cleaned, purgeEmojis);
|
|
130
|
-
}
|
|
195
|
+
// Nuke control characters (excluding whitespace)
|
|
196
|
+
if (nukeControls) cleaned = purgeControlCharacters(cleaned);
|
|
131
197
|
|
|
198
|
+
// Keep only ASCII/emojis
|
|
199
|
+
if (keyboardOnlyFilter) cleaned = purgeNonKeyboardChars(cleaned, purgeEmojis);
|
|
132
200
|
|
|
133
201
|
// Normalize line endings to Unix style (\n)
|
|
134
|
-
cleaned =
|
|
202
|
+
if (normalizeNewlines) cleaned = normalizeNewlineChars(cleaned);
|
|
135
203
|
|
|
136
204
|
// Remove spaces/tabs around newlines
|
|
137
|
-
cleaned =
|
|
205
|
+
if (trimSpacesAroundNewlines) cleaned = trimSpacesAroundNewlineChars(cleaned);
|
|
138
206
|
|
|
139
207
|
// Collapse excessive newlines, Optionally preserve Paragraphs
|
|
140
|
-
cleaned =
|
|
208
|
+
if (collapseNewLines) cleaned = collapseMultipleNewLines(cleaned, preserveParagraphs);
|
|
141
209
|
|
|
142
|
-
//
|
|
143
|
-
if (collapseSpaces)
|
|
144
|
-
cleaned = collapseExtraSpaces(cleaned);
|
|
145
|
-
}
|
|
210
|
+
// Collapse multiple spaces into a single space
|
|
211
|
+
if (collapseSpaces) cleaned = collapseExtraSpaces(cleaned);
|
|
146
212
|
|
|
147
|
-
// Final trim
|
|
148
|
-
return cleaned.trim();
|
|
213
|
+
// Final trim, return Sanctified Text
|
|
214
|
+
return finalTrim ? cleaned.trim() : cleaned;
|
|
149
215
|
}
|
|
150
216
|
|
|
151
217
|
|
|
@@ -268,7 +334,7 @@ function purgeEmojisCharacters(text) {
|
|
|
268
334
|
* @returns {string}
|
|
269
335
|
*/
|
|
270
336
|
const NORMALIZE_NEWLINES_REGEX = /\r\n|\r|\n/g;
|
|
271
|
-
function
|
|
337
|
+
function normalizeNewlineChars(text, normalized = '\n') {
|
|
272
338
|
return text.replace(NORMALIZE_NEWLINES_REGEX, normalized);
|
|
273
339
|
}
|
|
274
340
|
|
|
@@ -284,7 +350,7 @@ function normalizeNewlines(text, normalized = '\n') {
|
|
|
284
350
|
* @returns {string}
|
|
285
351
|
*/
|
|
286
352
|
const TRIM_SPACES_AROUND_NEWLINES_REGEX = /[ \t]*(\n+)[ \t]*/g;
|
|
287
|
-
function
|
|
353
|
+
function trimSpacesAroundNewlineChars(text) {
|
|
288
354
|
return text.replace(TRIM_SPACES_AROUND_NEWLINES_REGEX, '$1');
|
|
289
355
|
}
|
|
290
356
|
|
|
@@ -307,7 +373,7 @@ function trimSpacesAroundNewlines(text) {
|
|
|
307
373
|
const MULTIPLE_NEWLINES_REGEX = /\n{2,}/g;
|
|
308
374
|
const TRIPLE_NEWLINES_REGEX = /\n{3,}/g;
|
|
309
375
|
|
|
310
|
-
function
|
|
376
|
+
function collapseMultipleNewLines(text, preserveParagraphs) {
|
|
311
377
|
return preserveParagraphs
|
|
312
378
|
? text.replace(TRIPLE_NEWLINES_REGEX, '\n\n')
|
|
313
379
|
: text.replace(MULTIPLE_NEWLINES_REGEX, '\n');
|