aslopcleaner 1.0.5 → 1.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +326 -79
- package/dist/cli.mjs +1 -1
- package/dist/index.mjs +1 -1
- package/dist/{scanner-ligLyI2c.mjs → scanner-DAmuBcau.mjs} +35 -40
- package/package.json +3 -3
package/README.md
CHANGED
|
@@ -14,92 +14,339 @@ High-performance CLI to normalize common LLM/AI Unicode punctuation and symbols
|
|
|
14
14
|
|
|
15
15
|
## Default replacements
|
|
16
16
|
|
|
17
|
-
### Dashes /
|
|
18
|
-
|
|
19
|
-
- `—` => `-`
|
|
20
|
-
- `–` => `-`
|
|
21
|
-
- `‒` => `-`
|
|
22
|
-
- `―` => `--`
|
|
23
|
-
- `‐` => `-`
|
|
24
|
-
-
|
|
25
|
-
-
|
|
26
|
-
-
|
|
27
|
-
-
|
|
28
|
-
-
|
|
29
|
-
-
|
|
30
|
-
-
|
|
31
|
-
-
|
|
32
|
-
-
|
|
33
|
-
-
|
|
34
|
-
-
|
|
35
|
-
-
|
|
36
|
-
-
|
|
37
|
-
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
-
|
|
42
|
-
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
-
|
|
47
|
-
-
|
|
48
|
-
-
|
|
49
|
-
-
|
|
50
|
-
-
|
|
51
|
-
-
|
|
52
|
-
-
|
|
53
|
-
-
|
|
54
|
-
-
|
|
55
|
-
-
|
|
56
|
-
-
|
|
57
|
-
-
|
|
58
|
-
-
|
|
59
|
-
-
|
|
60
|
-
-
|
|
61
|
-
-
|
|
62
|
-
-
|
|
63
|
-
-
|
|
17
|
+
### Dashes / hyphens
|
|
18
|
+
|
|
19
|
+
- `—` => `-` (em dash)
|
|
20
|
+
- `–` => `-` (en dash)
|
|
21
|
+
- `‒` => `-` (figure dash)
|
|
22
|
+
- `―` => `--` (horizontal bar)
|
|
23
|
+
- `‐` => `-` (hyphen)
|
|
24
|
+
- `⁃` => `-` (hyphen bullet)
|
|
25
|
+
- `﹘` => `-` (small em dash)
|
|
26
|
+
- `﹣` => ` - ` (small hyphen-minus)
|
|
27
|
+
- `-` => `-` (fullwidth hyphen-minus)
|
|
28
|
+
- `−` => `-` (minus sign)
|
|
29
|
+
- `⎯` => `--` (horizontal line extension)
|
|
30
|
+
- `⏤` => `--` (straight horizontal bar)
|
|
31
|
+
- `─` => `-` (box drawings light horizontal)
|
|
32
|
+
- `━` => `-` (box drawings heavy horizontal)
|
|
33
|
+
- `╴` => `-` (box drawings light left)
|
|
34
|
+
- `╶` => `-` (box drawings light right)
|
|
35
|
+
- `᠆` => `-` (mongolian todo soft hyphen)
|
|
36
|
+
- `֊` => `-` (hebrew maqaf)
|
|
37
|
+
- `゠` => `-` (katakana-hiragana double hyphen)
|
|
38
|
+
|
|
39
|
+
### Arrows
|
|
40
|
+
|
|
41
|
+
- `→` => `=>` (right arrow)
|
|
42
|
+
- `⟶` => `=>` (long right arrow)
|
|
43
|
+
- `➜` => `=>` (heavy right arrow)
|
|
44
|
+
- `➔` => `=>` (black right arrow)
|
|
45
|
+
- `➝` => `=>` (drafting right arrow)
|
|
46
|
+
- `➡` => `=>` (black rightwards arrow)
|
|
47
|
+
- `⇢` => `=>` (rightwards dashed arrow)
|
|
48
|
+
- `⇨` => `=>` (rightwards white arrow)
|
|
49
|
+
- `⇒` => `=>` (double right arrow)
|
|
50
|
+
- `⟹` => `=>` (long double right arrow)
|
|
51
|
+
- `⇛` => `=>` (rightwards triple dash arrow)
|
|
52
|
+
- `←` => `<=` (left arrow)
|
|
53
|
+
- `⟵` => `<=` (long left arrow)
|
|
54
|
+
- `⇐` => `<=` (double left arrow)
|
|
55
|
+
- `⟸` => `<=` (long double left arrow)
|
|
56
|
+
- `⬅` => `<=` (black leftwards arrow)
|
|
57
|
+
- `↔` => `<->` (left right arrow)
|
|
58
|
+
- `⇄` => `<->` (right arrow over left arrow)
|
|
59
|
+
- `⇆` => `<->` (left arrow over right arrow)
|
|
60
|
+
- `⇔` => `<=>` (left right double arrow)
|
|
61
|
+
- `⟷` => `<->` (long left right arrow)
|
|
62
|
+
- `⟺` => `<=>` (long left right double arrow)
|
|
63
|
+
- `↑` => `^` (up arrow)
|
|
64
|
+
- `⇑` => `^` (double up arrow)
|
|
65
|
+
- `↓` => `v` (down arrow)
|
|
66
|
+
- `⇓` => `v` (double down arrow)
|
|
67
|
+
- `↕` => `^v` (up down arrow)
|
|
68
|
+
- `⇕` => `^v` (up down double arrow)
|
|
69
|
+
|
|
70
|
+
### Bullets / markers
|
|
71
|
+
|
|
72
|
+
- `✔` => `[OK]` (heavy check mark)
|
|
73
|
+
- `✅` => `[OK]` (check mark button)
|
|
74
|
+
- `☑` => `[OK]` (ballot box with check)
|
|
75
|
+
- `✓` => `[OK]` (check mark)
|
|
76
|
+
- `🗸` => `[ ]` (light check mark)
|
|
77
|
+
- `✗` => `[X]` (crossed out)
|
|
78
|
+
- `✘` => `[X]` (crossed out bold)
|
|
79
|
+
- `✕` => `x` (multiplication x)
|
|
80
|
+
- `✖` => `x` (heavy multiplication x)
|
|
81
|
+
- `•` => `-` (bullet)
|
|
82
|
+
- `‣` => `->` (triangular bullet)
|
|
83
|
+
- `◦` => `°` (white bullet)
|
|
84
|
+
- `▪` => `[ ]` (small square bullet)
|
|
85
|
+
- `▫` => `[ ]` (small white square)
|
|
86
|
+
- `■` => `[ ]` (black square)
|
|
87
|
+
- `□` => `[ ]` (white square)
|
|
88
|
+
- `▸` => `->` (black right-pointing small triangle)
|
|
89
|
+
- `▹` => `->` (white right-pointing small triangle)
|
|
90
|
+
- `►` => `->` (black right-pointing pointer)
|
|
91
|
+
- `▻` => `->` (white right-pointing pointer)
|
|
92
|
+
- `●` => `-` (black circle bullet)
|
|
93
|
+
- `○` => `-` (white circle bullet)
|
|
94
|
+
- `◉` => `-` (fisheye)
|
|
95
|
+
- `◎` => `-` (bullseye)
|
|
96
|
+
- `◯` => `-` (large circle)
|
|
97
|
+
- `·` => `-` (middle dot)
|
|
98
|
+
- `・` => `-` (katakana middle dot)
|
|
99
|
+
- `∙` => `*` (bullet operator)
|
|
100
|
+
- `⋅` => `*` (dot operator)
|
|
101
|
+
- `‧` => `-` (hyphenation point)
|
|
102
|
+
- `※` => `-` (reference mark)
|
|
103
|
+
- `⁂` => `***` (asterism)
|
|
104
|
+
- `❖` => `<>` (black diamond minus white x)
|
|
105
|
+
- `◆` => `<>` (black diamond)
|
|
106
|
+
- `◇` => `<>` (white diamond)
|
|
107
|
+
- `◈` => `<>` (white diamond containing black small diamond)
|
|
108
|
+
- `❥` => `{>` (rotated heavy black heart bullet)
|
|
109
|
+
|
|
110
|
+
### Quotes / apostrophes / primes
|
|
111
|
+
|
|
112
|
+
- `"` => `"` (left double quote)
|
|
113
|
+
- `"` => `"` (right double quote)
|
|
114
|
+
- `„` => `"` (low double quote)
|
|
115
|
+
- `‟` => `"` (double high-reversed-9 quote)
|
|
116
|
+
- `«` => `"` (left guillemet)
|
|
117
|
+
- `»` => `"` (right guillemet)
|
|
118
|
+
- `‹` => `<` (left single guillemet)
|
|
119
|
+
- `›` => `>` (right single guillemet)
|
|
120
|
+
- `'` => `'` (left single quote)
|
|
121
|
+
- `'` => `'` (right single quote / apostrophe)
|
|
122
|
+
- `‚` => `'` (low single quote)
|
|
123
|
+
- `‛` => `'` (single high-reversed-9 quote)
|
|
124
|
+
- `❛` => `'` (heavy left single quote ornament)
|
|
125
|
+
- `❜` => `'` (heavy right single quote ornament)
|
|
126
|
+
- `❝` => `"` (heavy left double quote ornament)
|
|
127
|
+
- `❞` => `"` (heavy right double quote ornament)
|
|
128
|
+
- `〝` => `"` (reversed double prime quote)
|
|
129
|
+
- `〞` => `"` (double prime quote)
|
|
130
|
+
- `"` => `"` (fullwidth quotation mark)
|
|
131
|
+
- `'` => `'` (fullwidth apostrophe)
|
|
132
|
+
- `′` => `'` (prime)
|
|
133
|
+
- `‵` => `` ` `` (reversed prime)
|
|
134
|
+
- `ʹ` => `'` (modifier letter prime)
|
|
135
|
+
- `ʻ` => `'` (modifier letter turned comma)
|
|
136
|
+
- `ʼ` => `'` (modifier letter apostrophe)
|
|
137
|
+
- `ʽ` => `'` (modifier letter reversed comma)
|
|
138
|
+
- `ʾ` => `'` (modifier letter right half ring)
|
|
139
|
+
- `ʿ` => `'` (modifier letter left half ring)
|
|
140
|
+
- `ˈ` => `'` (modifier letter vertical line)
|
|
141
|
+
- `ˊ` => `'` (modifier letter acute accent)
|
|
142
|
+
- `ˋ` => `` ` `` (modifier letter grave accent)
|
|
143
|
+
- `˴` => `` ` `` (modifier letter middle grave accent)
|
|
144
|
+
- `´` => `'` (acute accent)
|
|
145
|
+
- `″` => `""` (double prime)
|
|
146
|
+
- `‶` => `""` (reversed double prime)
|
|
147
|
+
- `ʺ` => `""` (modifier letter double prime)
|
|
148
|
+
- `‴` => `"""` (triple prime)
|
|
149
|
+
- `⁗` => `""""` (quadruple prime)
|
|
150
|
+
|
|
151
|
+
### Ellipsis / dots
|
|
152
|
+
|
|
153
|
+
- `…` => `...` (ellipsis)
|
|
154
|
+
- `‥` => `..` (two dot leader)
|
|
155
|
+
- `⋯` => `...` (midline horizontal ellipsis)
|
|
156
|
+
- `︙` => `:` (presentation form for vertical horizontal ellipsis)
|
|
157
|
+
- `⋮` => `:` (vertical ellipsis)
|
|
158
|
+
- `⋰` => `...` (up right diagonal ellipsis)
|
|
159
|
+
- `⋱` => `:` (down right diagonal ellipsis)
|
|
160
|
+
|
|
161
|
+
### Math / relations
|
|
162
|
+
|
|
163
|
+
- `≤` => `<=` (less-than-or-equal)
|
|
164
|
+
- `≦` => `<=` (less-than over equal)
|
|
165
|
+
- `⩽` => `<=` (slanted equal to or less-than)
|
|
166
|
+
- `≥` => `>=` (greater-than-or-equal)
|
|
167
|
+
- `≧` => `>=` (greater-than over equal)
|
|
168
|
+
- `⩾` => `>=` (slanted equal to greater-than)
|
|
169
|
+
- `≠` => `!=` (not-equal)
|
|
170
|
+
- `≉` => `!=` (not almost equal)
|
|
171
|
+
- `≈` => `~=` (almost equal)
|
|
172
|
+
- `≃` => `~=` (asymptotically equal)
|
|
173
|
+
- `≅` => `~=` (approximately equal)
|
|
174
|
+
- `∼` => `~` (tilde operator)
|
|
175
|
+
- `∽` => `~` (reversed tilde)
|
|
176
|
+
- `∿` => `~` (sine wave)
|
|
177
|
+
- `˜` => `~` (small tilde)
|
|
178
|
+
- `〜` => `~` (wave dash)
|
|
179
|
+
- `~` => `~` (fullwidth tilde)
|
|
180
|
+
- `≪` => `<<` (much less-than)
|
|
181
|
+
- `≫` => `>>` (much greater-than)
|
|
182
|
+
- `⋘` => `<<` (very much less-than)
|
|
183
|
+
- `⋙` => `>>` (very much greater-than)
|
|
184
|
+
- `¬` => `!` (not sign)
|
|
185
|
+
- `±` => `+/-` (plus-minus)
|
|
186
|
+
- `∓` => `-/+` (minus-or-plus)
|
|
187
|
+
- `×` => `x` (times sign)
|
|
188
|
+
- `÷` => `/` (division sign)
|
|
189
|
+
- `⁄` => `/` (fraction slash)
|
|
190
|
+
- `∕` => `/` (division slash)
|
|
191
|
+
- `∣` => `|` (divides)
|
|
192
|
+
- `∥` => `||` (parallel to)
|
|
193
|
+
- `¦` => `|` (broken bar)
|
|
194
|
+
- `‖` => `||` (double vertical line)
|
|
195
|
+
- `∧` => `^` (logical and)
|
|
196
|
+
- `∨` => `v` (logical or)
|
|
197
|
+
- `⊕` => `+` (circled plus)
|
|
198
|
+
- `⊗` => `*` (circled times)
|
|
199
|
+
- `⊙` => `.` (circled dot)
|
|
200
|
+
- `√` => `sqrt` (square root)
|
|
201
|
+
- `∛` => `cuberoot` (cube root)
|
|
202
|
+
- `∞` => `inf` (infinity)
|
|
203
|
+
- `∈` => `in` (element of)
|
|
204
|
+
- `∉` => `not in` (not element of)
|
|
205
|
+
- `∅` => `{}` (empty set)
|
|
206
|
+
- `∩` => `cap` (intersection)
|
|
207
|
+
- `∪` => `cup` (union)
|
|
208
|
+
- `⊂` => `<` (subset of)
|
|
209
|
+
- `⊃` => `>` (superset of)
|
|
210
|
+
- `⊆` => `<=` (subset of or equal)
|
|
211
|
+
- `⊇` => `>=` (superset of or equal)
|
|
212
|
+
- `∴` => `therefore` (therefore)
|
|
213
|
+
- `∵` => `because` (because)
|
|
214
|
+
|
|
215
|
+
### Fractions
|
|
216
|
+
|
|
217
|
+
- `¼` => `1/4` (one quarter)
|
|
218
|
+
- `½` => `1/2` (one half)
|
|
219
|
+
- `¾` => `3/4` (three quarters)
|
|
220
|
+
- `⅐` => `1/7` (one seventh)
|
|
221
|
+
- `⅑` => `1/9` (one ninth)
|
|
222
|
+
- `⅒` => `1/10` (one tenth)
|
|
223
|
+
- `⅓` => `1/3` (one third)
|
|
224
|
+
- `⅔` => `2/3` (two thirds)
|
|
225
|
+
- `⅕` => `1/5` (one fifth)
|
|
226
|
+
- `⅖` => `2/5` (two fifths)
|
|
227
|
+
- `⅗` => `3/5` (three fifths)
|
|
228
|
+
- `⅘` => `4/5` (four fifths)
|
|
229
|
+
- `⅙` => `1/6` (one sixth)
|
|
230
|
+
- `⅚` => `5/6` (five sixths)
|
|
231
|
+
- `⅛` => `1/8` (one eighth)
|
|
232
|
+
- `⅜` => `3/8` (three eighths)
|
|
233
|
+
- `⅝` => `5/8` (five eighths)
|
|
234
|
+
- `⅞` => `7/8` (seven eighths)
|
|
235
|
+
|
|
236
|
+
### Symbols
|
|
237
|
+
|
|
238
|
+
- `©` => `(C)` (copyright sign)
|
|
239
|
+
- `®` => `(R)` (registered sign)
|
|
240
|
+
- `℗` => `(P)` (sound recording copyright)
|
|
241
|
+
- `™` => `TM` (trade mark sign)
|
|
242
|
+
- `℠` => `SM` (service mark)
|
|
243
|
+
- `°` => ` deg` (degree sign)
|
|
244
|
+
- `℃` => ` degC` (degree celsius)
|
|
245
|
+
- `℉` => ` degF` (degree fahrenheit)
|
|
246
|
+
- `№` => `No.` (numero sign)
|
|
247
|
+
- `ª` => `a` (feminine ordinal indicator)
|
|
248
|
+
- `º` => `o` (masculine ordinal indicator)
|
|
249
|
+
- `‰` => `permille` (per mille sign)
|
|
250
|
+
- `‱` => `permyriad` (per ten thousand sign)
|
|
251
|
+
|
|
252
|
+
### Spaces / invisibles / separators
|
|
253
|
+
|
|
254
|
+
- NBSP, ogham space mark, en/em quad/space, thin space, hair space, etc. => regular space
|
|
255
|
+
- Soft hyphen, zero-width space/joiner/non-joiner, word joiner, BOM, etc. => removed
|
|
256
|
+
- Line separator, paragraph separator, vertical tab, form feed => newline(s)
|
|
257
|
+
|
|
258
|
+
### Slashes / bars
|
|
259
|
+
|
|
260
|
+
- `/` => `/` (fullwidth solidus)
|
|
261
|
+
- `⧸` => `/` (big solidus)
|
|
262
|
+
- `╱` => `/` (box drawings diagonal)
|
|
263
|
+
- `⟋` => `/` (mathematical rising diagonal)
|
|
264
|
+
- `\` => `\` (fullwidth reverse solidus)
|
|
265
|
+
- `∖` => `\` (set minus)
|
|
266
|
+
- `⧵` => `\` (reverse solidus operator)
|
|
267
|
+
- `⟍` => `\` (mathematical falling diagonal)
|
|
268
|
+
- `╲` => `\` (box drawings diagonal)
|
|
269
|
+
- `|` => `|` (fullwidth vertical line)
|
|
270
|
+
- `ǀ` => `|` (latin letter dental click)
|
|
271
|
+
- `︱` => `|` (presentation form for vertical em dash)
|
|
272
|
+
- `│` => `|` (box drawings light vertical)
|
|
273
|
+
- `┃` => `|` (box drawings heavy vertical)
|
|
274
|
+
- `┆` => `|` (box drawings light triple dash vertical)
|
|
275
|
+
- `┊` => `|` (box drawings light quadruple dash vertical)
|
|
276
|
+
- `╎` => `|` (box drawings light double dash vertical)
|
|
277
|
+
- `╏` => `|` (box drawings heavy double dash vertical)
|
|
278
|
+
|
|
279
|
+
### Punctuation
|
|
280
|
+
|
|
281
|
+
- `:` => `:` (fullwidth colon)
|
|
282
|
+
- `∶` => `:` (ratio)
|
|
283
|
+
- `ː` => `:` (modifier letter triangular colon)
|
|
284
|
+
- `꞉` => `:` (modifier letter colon)
|
|
285
|
+
- `;` => `;` (fullwidth semicolon)
|
|
286
|
+
- `,` => `,` (fullwidth comma)
|
|
287
|
+
- `、` => `,` (ideographic comma)
|
|
288
|
+
- `﹐` => `,` (small comma)
|
|
289
|
+
- `﹑` => `,` (small ideographic comma)
|
|
290
|
+
- `.` => `.` (fullwidth full stop)
|
|
291
|
+
- `。` => `.` (ideographic full stop)
|
|
292
|
+
- `。` => `.` (halfwidth ideographic full stop)
|
|
293
|
+
- `!` => `!` (fullwidth exclamation mark)
|
|
294
|
+
- `‼` => `!!` (double exclamation mark)
|
|
295
|
+
- `⁉` => `?!` (exclamation question mark)
|
|
296
|
+
- `?` => `?` (fullwidth question mark)
|
|
297
|
+
- `⁇` => `??` (double question mark)
|
|
298
|
+
- `⁈` => `!?` (question exclamation mark)
|
|
299
|
+
|
|
300
|
+
### Brackets
|
|
301
|
+
|
|
302
|
+
- `(` / `)` => `(` / `)` (fullwidth parentheses)
|
|
303
|
+
- `[` / `]` => `[` / `]` (fullwidth square brackets)
|
|
304
|
+
- `{` / `}` => `{` / `}` (fullwidth curly brackets)
|
|
305
|
+
- `〈` / `〉` => `<` / `>` (angle brackets)
|
|
306
|
+
- `《` / `》` => `<<` / `>>` (double angle brackets)
|
|
307
|
+
- `⟨` / `⟩` => `<` / `>` (mathematical angle brackets)
|
|
308
|
+
- `「` / `」` => `[` / `]` (corner brackets)
|
|
309
|
+
- `『` / `』` => `[` / `]` (white corner brackets)
|
|
310
|
+
- `【` / `】` => `[` / `]` (black lenticular brackets)
|
|
311
|
+
- `〔` / `〕` => `[` / `]` (tortoise shell brackets)
|
|
312
|
+
- `〖` / `〗` => `[` / `]` (white lenticular brackets)
|
|
313
|
+
- `〘` / `〙` => `[` / `]` (white tortoise shell brackets)
|
|
314
|
+
- `〚` / `〛` => `[` / `]` (white square brackets)
|
|
315
|
+
|
|
316
|
+
### Misc ASCII lookalikes
|
|
317
|
+
|
|
318
|
+
- `&` => `&` (fullwidth ampersand)
|
|
319
|
+
- `*` => `*` (fullwidth asterisk)
|
|
320
|
+
- `_` => `_` (fullwidth low line)
|
|
321
|
+
- `‗` => `_` (double low line)
|
|
322
|
+
- `^` => `^` (fullwidth circumflex accent)
|
|
323
|
+
- `‸` => `^` (caret insertion point)
|
|
324
|
+
- `%` => `%` (fullwidth percent)
|
|
325
|
+
- `+` => `+` (fullwidth plus)
|
|
326
|
+
- `=` => `=` (fullwidth equals)
|
|
327
|
+
- `═` => `=` (box drawings double horizontal)
|
|
328
|
+
- `╬` => `+` (box drawings double horizontal and vertical)
|
|
329
|
+
- `╪` => `+` (box drawings double vertical and horizontal)
|
|
330
|
+
- `╫` => `+` (box drawings vertical double and horizontal)
|
|
331
|
+
- `╩` => `+` (box drawings double up and horizontal)
|
|
332
|
+
- `╨` => `+` (box drawings double down and horizontal)
|
|
333
|
+
- `╂` => `+` (box drawings light vertical and horizontal)
|
|
334
|
+
- `╋` => `+` (box drawings heavy vertical and horizontal)
|
|
64
335
|
|
|
65
336
|
## Run it
|
|
66
337
|
|
|
67
|
-
### NPM
|
|
68
|
-
|
|
69
|
-
```bash
|
|
70
|
-
npx aslopcleaner
|
|
71
|
-
```
|
|
72
|
-
|
|
73
|
-
### Bun
|
|
74
|
-
|
|
75
|
-
```bash
|
|
76
|
-
bunx aslopcleaner
|
|
77
|
-
```
|
|
78
|
-
|
|
79
|
-
### PNPM
|
|
80
|
-
|
|
81
338
|
```bash
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
```bash
|
|
88
|
-
yarn dlx aslopcleaner
|
|
339
|
+
bunx aslopcleaner # Bun
|
|
340
|
+
npx aslopcleaner # npm
|
|
341
|
+
pnpx aslopcleaner # pnpm
|
|
342
|
+
yarn dlx aslopcleaner # Yarn
|
|
89
343
|
```
|
|
90
344
|
|
|
91
345
|
### Local testing
|
|
92
346
|
|
|
93
|
-
#### Node
|
|
94
|
-
|
|
95
|
-
```bash
|
|
96
|
-
node dist/cli.mjs
|
|
97
|
-
```
|
|
98
|
-
|
|
99
|
-
### Bun
|
|
100
|
-
|
|
101
347
|
```bash
|
|
102
|
-
|
|
348
|
+
node dist/cli.mjs # Node
|
|
349
|
+
bun run dist/cli.mjs # Bun
|
|
103
350
|
```
|
|
104
351
|
|
|
105
352
|
## Library usage
|
|
@@ -108,9 +355,9 @@ You can also import `aslopcleaner` as a library to integrate Unicode normalizati
|
|
|
108
355
|
|
|
109
356
|
```bash
|
|
110
357
|
bun add aslopcleaner
|
|
111
|
-
pnpm
|
|
358
|
+
pnpm add aslopcleaner
|
|
112
359
|
yarn add aslopcleaner
|
|
113
|
-
npm
|
|
360
|
+
npm add aslopcleaner
|
|
114
361
|
```
|
|
115
362
|
|
|
116
363
|
```ts
|
package/dist/cli.mjs
CHANGED
|
@@ -3,7 +3,7 @@ import path from 'node:path';
|
|
|
3
3
|
import process from 'node:process';
|
|
4
4
|
import { readFile, writeFile } from 'node:fs/promises';
|
|
5
5
|
import * as readline from 'node:readline/promises';
|
|
6
|
-
import { R as REPLACEMENT_RULES, s as scanDirectory, d as shouldSkipSensitivePath, b as applyOccurrences, c as countByMatch, a as REPLACEMENT_RULE_MAP } from './scanner-
|
|
6
|
+
import { R as REPLACEMENT_RULES, s as scanDirectory, d as shouldSkipSensitivePath, b as applyOccurrences, c as countByMatch, a as REPLACEMENT_RULE_MAP } from './scanner-DAmuBcau.mjs';
|
|
7
7
|
import 'fast-glob';
|
|
8
8
|
|
|
9
9
|
function parseArgs(argv) {
|
package/dist/index.mjs
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
-
export { F as FAST_GLOB_IGNORE_PATTERNS, M as MAX_FILE_SIZE_BYTES, R as REPLACEMENT_RULES, a as REPLACEMENT_RULE_MAP, b as applyOccurrences, c as countByMatch, f as findOccurrences, i as isProbablyBinary, n as normalizeGlobPath, s as scanDirectory, d as shouldSkipSensitivePath } from './scanner-
|
|
2
|
+
export { F as FAST_GLOB_IGNORE_PATTERNS, M as MAX_FILE_SIZE_BYTES, R as REPLACEMENT_RULES, a as REPLACEMENT_RULE_MAP, b as applyOccurrences, c as countByMatch, f as findOccurrences, i as isProbablyBinary, n as normalizeGlobPath, s as scanDirectory, d as shouldSkipSensitivePath } from './scanner-DAmuBcau.mjs';
|
|
3
3
|
import 'fast-glob';
|
|
4
4
|
import 'node:path';
|
|
5
5
|
import 'node:fs/promises';
|
|
@@ -173,10 +173,10 @@ const REPLACEMENT_RULES = [
|
|
|
173
173
|
{ match: "\u2010", replacement: "-", description: "hyphen" },
|
|
174
174
|
{ match: "\u2043", replacement: "-", description: "hyphen bullet" },
|
|
175
175
|
{ match: "\uFE58", replacement: "-", description: "small em dash" },
|
|
176
|
-
{ match: "\uFE63", replacement: "-", description: "small hyphen-minus" },
|
|
176
|
+
{ match: "\uFE63", replacement: " - ", description: "small hyphen-minus" },
|
|
177
177
|
{ match: "\uFF0D", replacement: "-", description: "fullwidth hyphen-minus" },
|
|
178
178
|
{ match: "\u2212", replacement: "-", description: "minus sign" },
|
|
179
|
-
{ match: "\u23AF", replacement: "
|
|
179
|
+
{ match: "\u23AF", replacement: "--", description: "horizontal line extension" },
|
|
180
180
|
{ match: "\u23E4", replacement: "--", description: "straight horizontal bar" },
|
|
181
181
|
{
|
|
182
182
|
match: "\u2500",
|
|
@@ -237,40 +237,40 @@ const REPLACEMENT_RULES = [
|
|
|
237
237
|
description: "long left right double arrow"
|
|
238
238
|
},
|
|
239
239
|
{ match: "\u2191", replacement: "^", description: "up arrow" },
|
|
240
|
-
{ match: "\u21D1", replacement: "
|
|
240
|
+
{ match: "\u21D1", replacement: "^", description: "double up arrow" },
|
|
241
241
|
{ match: "\u2193", replacement: "v", description: "down arrow" },
|
|
242
|
-
{ match: "\u21D3", replacement: "
|
|
242
|
+
{ match: "\u21D3", replacement: "v", description: "double down arrow" },
|
|
243
243
|
{ match: "\u2195", replacement: "^v", description: "up down arrow" },
|
|
244
|
-
{ match: "\u21D5", replacement: "
|
|
244
|
+
{ match: "\u21D5", replacement: "^v", description: "up down double arrow" },
|
|
245
245
|
// Bullets / markers
|
|
246
|
-
{ match: "\u2714", replacement: "
|
|
247
|
-
{ match: "\u2705", replacement: "
|
|
248
|
-
{ match: "\u2611", replacement: "
|
|
249
|
-
{ match: "\u2713", replacement: "
|
|
250
|
-
{ match: "\u{1F5F8}", replacement: "
|
|
251
|
-
{ match: "\u2717", replacement: "
|
|
252
|
-
{ match: "\u2718", replacement: "
|
|
246
|
+
{ match: "\u2714", replacement: "[OK]", description: "heavy check mark" },
|
|
247
|
+
{ match: "\u2705", replacement: "[OK]", description: "check mark button" },
|
|
248
|
+
{ match: "\u2611", replacement: "[OK]", description: "ballot box with check" },
|
|
249
|
+
{ match: "\u2713", replacement: "[OK]", description: "check mark" },
|
|
250
|
+
{ match: "\u{1F5F8}", replacement: "[ ]", description: "light check mark" },
|
|
251
|
+
{ match: "\u2717", replacement: "[X]", description: "crossed out" },
|
|
252
|
+
{ match: "\u2718", replacement: "[X]", description: "crossed out bold" },
|
|
253
253
|
{ match: "\u2715", replacement: "x", description: "multiplication x" },
|
|
254
254
|
{ match: "\u2716", replacement: "x", description: "heavy multiplication x" },
|
|
255
255
|
{ match: "\u2022", replacement: "-", description: "bullet" },
|
|
256
|
-
{ match: "\u2023", replacement: "
|
|
257
|
-
{ match: "\u25E6", replacement: "
|
|
258
|
-
{ match: "\u25AA", replacement: "
|
|
259
|
-
{ match: "\u25AB", replacement: "
|
|
260
|
-
{ match: "\u25A0", replacement: "
|
|
261
|
-
{ match: "\u25A1", replacement: "
|
|
256
|
+
{ match: "\u2023", replacement: "->", description: "triangular bullet" },
|
|
257
|
+
{ match: "\u25E6", replacement: "\xB0", description: "white bullet" },
|
|
258
|
+
{ match: "\u25AA", replacement: "[ ]", description: "small square bullet" },
|
|
259
|
+
{ match: "\u25AB", replacement: "[ ]", description: "small white square" },
|
|
260
|
+
{ match: "\u25A0", replacement: "[ ]", description: "black square" },
|
|
261
|
+
{ match: "\u25A1", replacement: "[ ]", description: "white square" },
|
|
262
262
|
{
|
|
263
263
|
match: "\u25B8",
|
|
264
|
-
replacement: "
|
|
264
|
+
replacement: "->",
|
|
265
265
|
description: "black right-pointing small triangle"
|
|
266
266
|
},
|
|
267
267
|
{
|
|
268
268
|
match: "\u25B9",
|
|
269
|
-
replacement: "
|
|
269
|
+
replacement: "->",
|
|
270
270
|
description: "white right-pointing small triangle"
|
|
271
271
|
},
|
|
272
|
-
{ match: "\u25BA", replacement: "
|
|
273
|
-
{ match: "\u25BB", replacement: "
|
|
272
|
+
{ match: "\u25BA", replacement: "->", description: "black right-pointing pointer" },
|
|
273
|
+
{ match: "\u25BB", replacement: "->", description: "white right-pointing pointer" },
|
|
274
274
|
{ match: "\u25CF", replacement: "-", description: "black circle bullet" },
|
|
275
275
|
{ match: "\u25CB", replacement: "-", description: "white circle bullet" },
|
|
276
276
|
{ match: "\u25C9", replacement: "-", description: "fisheye" },
|
|
@@ -283,17 +283,17 @@ const REPLACEMENT_RULES = [
|
|
|
283
283
|
{ match: "\u2027", replacement: "-", description: "hyphenation point" },
|
|
284
284
|
{ match: "\u203B", replacement: "-", description: "reference mark" },
|
|
285
285
|
{ match: "\u2042", replacement: "***", description: "asterism" },
|
|
286
|
-
{ match: "\u2756", replacement: "
|
|
287
|
-
{ match: "\u25C6", replacement: "
|
|
288
|
-
{ match: "\u25C7", replacement: "
|
|
286
|
+
{ match: "\u2756", replacement: "<>", description: "black diamond minus white x" },
|
|
287
|
+
{ match: "\u25C6", replacement: "<>", description: "black diamond" },
|
|
288
|
+
{ match: "\u25C7", replacement: "<>", description: "white diamond" },
|
|
289
289
|
{
|
|
290
290
|
match: "\u25C8",
|
|
291
|
-
replacement: "
|
|
291
|
+
replacement: "<>",
|
|
292
292
|
description: "white diamond containing black small diamond"
|
|
293
293
|
},
|
|
294
294
|
{
|
|
295
295
|
match: "\u2765",
|
|
296
|
-
replacement: "
|
|
296
|
+
replacement: "{>",
|
|
297
297
|
description: "rotated heavy black heart bullet"
|
|
298
298
|
},
|
|
299
299
|
// Quotes / apostrophes / primes
|
|
@@ -303,8 +303,8 @@ const REPLACEMENT_RULES = [
|
|
|
303
303
|
{ match: "\u201F", replacement: '"', description: "double high-reversed-9 quote" },
|
|
304
304
|
{ match: "\xAB", replacement: '"', description: "left guillemet" },
|
|
305
305
|
{ match: "\xBB", replacement: '"', description: "right guillemet" },
|
|
306
|
-
{ match: "\u2039", replacement: "
|
|
307
|
-
{ match: "\u203A", replacement: "
|
|
306
|
+
{ match: "\u2039", replacement: "<", description: "left single guillemet" },
|
|
307
|
+
{ match: "\u203A", replacement: ">", description: "right single guillemet" },
|
|
308
308
|
{ match: "\u2018", replacement: "'", description: "left single quote" },
|
|
309
309
|
{
|
|
310
310
|
match: "\u2019",
|
|
@@ -389,14 +389,14 @@ const REPLACEMENT_RULES = [
|
|
|
389
389
|
},
|
|
390
390
|
{
|
|
391
391
|
match: "\uFE19",
|
|
392
|
-
replacement: "
|
|
392
|
+
replacement: ":",
|
|
393
393
|
description: "presentation form for vertical horizontal ellipsis"
|
|
394
394
|
},
|
|
395
395
|
{ match: "\u22EE", replacement: ":", description: "vertical ellipsis" },
|
|
396
396
|
{ match: "\u22F0", replacement: "...", description: "up right diagonal ellipsis" },
|
|
397
397
|
{
|
|
398
398
|
match: "\u22F1",
|
|
399
|
-
replacement: "
|
|
399
|
+
replacement: ":",
|
|
400
400
|
description: "down right diagonal ellipsis"
|
|
401
401
|
},
|
|
402
402
|
// Math / relations
|
|
@@ -432,7 +432,7 @@ const REPLACEMENT_RULES = [
|
|
|
432
432
|
{ match: "\xAC", replacement: "!", description: "not sign" },
|
|
433
433
|
{ match: "\xB1", replacement: "+/-", description: "plus-minus" },
|
|
434
434
|
{ match: "\u2213", replacement: "-/+", description: "minus-or-plus" },
|
|
435
|
-
{ match: "\xD7", replacement: "
|
|
435
|
+
{ match: "\xD7", replacement: "x", description: "times sign" },
|
|
436
436
|
{ match: "\xF7", replacement: "/", description: "division sign" },
|
|
437
437
|
{ match: "\u2044", replacement: "/", description: "fraction slash" },
|
|
438
438
|
{ match: "\u2215", replacement: "/", description: "division slash" },
|
|
@@ -515,9 +515,9 @@ const REPLACEMENT_RULES = [
|
|
|
515
515
|
description: "vulgar fraction seven eighths"
|
|
516
516
|
},
|
|
517
517
|
// Symbols
|
|
518
|
-
{ match: "\xA9", replacement: "(
|
|
519
|
-
{ match: "\xAE", replacement: "(
|
|
520
|
-
{ match: "\u2117", replacement: "(
|
|
518
|
+
{ match: "\xA9", replacement: "(C)", description: "copyright sign" },
|
|
519
|
+
{ match: "\xAE", replacement: "(R)", description: "registered sign" },
|
|
520
|
+
{ match: "\u2117", replacement: "(P)", description: "sound recording copyright" },
|
|
521
521
|
{ match: "\u2122", replacement: "TM", description: "trade mark sign" },
|
|
522
522
|
{ match: "\u2120", replacement: "SM", description: "service mark" },
|
|
523
523
|
{ match: "\xB0", replacement: " deg", description: "degree sign" },
|
|
@@ -595,11 +595,6 @@ const REPLACEMENT_RULES = [
|
|
|
595
595
|
replacement: "",
|
|
596
596
|
description: "mongolian vowel separator"
|
|
597
597
|
},
|
|
598
|
-
{
|
|
599
|
-
match: "\r\n",
|
|
600
|
-
replacement: "\n",
|
|
601
|
-
description: "carriage return + line feed"
|
|
602
|
-
},
|
|
603
598
|
{ match: "\u2028", replacement: "\n", description: "line separator" },
|
|
604
599
|
{ match: "\u2029", replacement: "\n\n", description: "paragraph separator" },
|
|
605
600
|
{ match: "\v", replacement: "\n", description: "vertical tab" },
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "aslopcleaner",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.7",
|
|
4
4
|
"description": "High-performance CLI to replace common LLM/AI Unicode punctuation and symbols with ASCII equivalents.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -51,9 +51,9 @@
|
|
|
51
51
|
},
|
|
52
52
|
"devDependencies": {
|
|
53
53
|
"@types/node": "^25.5.0",
|
|
54
|
-
"@vitest/coverage-v8": "^
|
|
54
|
+
"@vitest/coverage-v8": "^4.1.2",
|
|
55
55
|
"pkgroll": "^2.11.3",
|
|
56
56
|
"tsx": "^4.19.4",
|
|
57
|
-
"vitest": "^
|
|
57
|
+
"vitest": "^4.1.2"
|
|
58
58
|
}
|
|
59
59
|
}
|