aslopcleaner 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2019 - 2026 Aron Homberg
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,154 @@
1
+ # aslopcleaner
2
+
3
+ High-performance CLI to normalize common LLM/AI Unicode punctuation and symbols into plain ASCII.
4
+
5
+ ## What it does
6
+
7
+ - Recursively scans the current directory.
8
+ - Uses `fast-glob` to skip expensive third-party/build/cache directories early.
9
+ - Never opens `.env*`, SSH keys, certificate/key material, or password database files.
10
+ - Skips files larger than **125 KiB**.
11
+ - Uses a jump-sampled binary heuristic before reading as UTF-8.
12
+ - Prompts once per file in interactive mode.
13
+ - Replaces everything automatically in non-interactive mode with `-y`.
14
+
15
+ ## Default replacements
16
+
17
+ ### Dashes / bullets / arrows
18
+
19
+ - `—` => `-`
20
+ - `–` => `-`
21
+ - `‒` => `-`
22
+ - `―` => `--`
23
+ - `‐` => `-`
24
+ - `‑` => `-`
25
+ - `−` => `-`
26
+ - `→` => `=>`
27
+ - `⇒` => `=>`
28
+ - `⟶` => `=>`
29
+ - `➜` => `=>`
30
+ - `➔` => `=>`
31
+ - `➝` => `=>`
32
+ - `✔` => `-`
33
+ - `✅` => `-`
34
+ - `☑` => `-`
35
+ - `✓` => `-`
36
+ - `•` => `-`
37
+ - `‣` => `-`
38
+ - `◦` => `-`
39
+ - `▪` => `-`
40
+ - `·` => `-`
41
+ - `●` => `-`
42
+ - `○` => `-`
43
+
44
+ ### Quotes / punctuation / spacing
45
+
46
+ - `“` => `"`
47
+ - `”` => `"`
48
+ - `„` => `"`
49
+ - `‟` => `"`
50
+ - `«` => `"`
51
+ - `»` => `"`
52
+ - `‹` => `'`
53
+ - `›` => `'`
54
+ - `‘` => `'`
55
+ - `’` => `'`
56
+ - `‚` => `'`
57
+ - `‛` => `'`
58
+ - `…` => `...`
59
+ - `≤` => `<=`
60
+ - `≥` => `>=`
61
+ - `≠` => `!=`
62
+ - NBSP / narrow NBSP / figure space => regular space
63
+ - zero-width space / joiner / BOM => removed
64
+
65
+ ## Run it
66
+
67
+ ### NPM
68
+
69
+ ```bash
70
+ npx aslopcleaner
71
+ ```
72
+
73
+ ### Bun
74
+
75
+ ```bash
76
+ bunx aslopcleaner
77
+ ```
78
+
79
+ ### PNPM
80
+
81
+ ```bash
82
+ pnpx aslopcleaner
83
+ ```
84
+
85
+ ### Yarn
86
+
87
+ ```bash
88
+ yarn dlx aslopcleaner
89
+ ```
90
+
91
+ ### Local testing
92
+
93
+ #### Node
94
+
95
+ ```bash
96
+ node dist/cli.mjs
97
+ ```
98
+
99
+ ### Bun
100
+
101
+ ```bash
102
+ bun run dist/cli.mjs
103
+ ```
104
+
105
+ ## Library usage
106
+
107
+ You can also import `aslopcleaner` as a library to integrate Unicode normalization into your own tools:
108
+
109
+ ```bash
110
+ bun add aslopcleaner
111
+ pnpm install aslopcleaner
112
+ yarn add aslopcleaner
113
+ npm install aslopcleaner
114
+ ```
115
+
116
+ ```ts
117
+ import {
118
+ findOccurrences,
119
+ applyOccurrences,
120
+ countByMatch,
121
+ scanDirectory,
122
+ isProbablyBinary,
123
+ shouldSkipSensitivePath,
124
+ REPLACEMENT_RULES,
125
+ REPLACEMENT_RULE_MAP,
126
+ } from "aslopcleaner";
127
+
128
+ // Scan a directory for files containing Unicode slop
129
+ const { files, totalOccurrences } = await scanDirectory(process.cwd());
130
+
131
+ // Find occurrences in a string
132
+ const content = '"Hello" → world…';
133
+ const matches = findOccurrences(content, REPLACEMENT_RULES);
134
+
135
+ // Apply replacements
136
+ const cleaned = applyOccurrences(content, matches, REPLACEMENT_RULE_MAP);
137
+ // => '"Hello" => world...'
138
+
139
+ // Count occurrences per symbol
140
+ const counts = countByMatch(matches);
141
+ // => Map { '"' => 1, '"' => 1, '→' => 1, '…' => 1 }
142
+
143
+ // Check if a file is binary (skip before processing)
144
+ const binary = await isProbablyBinary("image.png"); // true
145
+
146
+ // Check if a path is sensitive (e.g. .env, SSH keys)
147
+ shouldSkipSensitivePath(".env.production"); // true
148
+ ```
149
+
150
+ ### Exported types
151
+
152
+ ```ts
153
+ import type { ReplacementRule, MatchOccurrence, ScanResult } from "aslopcleaner";
154
+ ```
package/dist/cli.mjs ADDED
@@ -0,0 +1,118 @@
1
+ #!/usr/bin/env node
2
+ import path from 'node:path';
3
+ import process from 'node:process';
4
+ import { readFile, writeFile } from 'node:fs/promises';
5
+ import * as readline from 'node:readline/promises';
6
+ import { R as REPLACEMENT_RULES, s as scanDirectory, d as shouldSkipSensitivePath, b as applyOccurrences, c as countByMatch, a as REPLACEMENT_RULE_MAP } from './scanner-ChvwTQMG.mjs';
7
+ import 'fast-glob';
8
+
9
+ function parseArgs(argv) {
10
+ const flags = new Set(argv);
11
+ return {
12
+ yes: flags.has("-y") || flags.has("--yes"),
13
+ help: flags.has("-h") || flags.has("--help")
14
+ };
15
+ }
16
+ function printHelp() {
17
+ console.log(`aslopcleaner
18
+
19
+ Usage:
20
+ aslopcleaner [-y]
21
+
22
+ Options:
23
+ -y, --yes Replace every detected occurrence without prompting
24
+ -h, --help Show help
25
+ `);
26
+ }
27
+ function formatCounts(matches) {
28
+ const counts = countByMatch(matches);
29
+ const parts = [...counts.entries()].sort((a, b) => b[1] - a[1] || a[0].localeCompare(b[0])).map(([match, count]) => {
30
+ const replacement = REPLACEMENT_RULE_MAP.get(match) ?? "?";
31
+ return `${JSON.stringify(match)}=>${JSON.stringify(replacement)} x${count}`;
32
+ });
33
+ return parts.join(", ");
34
+ }
35
+ async function promptReplace(rl, filePath, matches) {
36
+ console.log(`
37
+ ${filePath}`);
38
+ console.log(` ${matches.length} occurrence(s): ${formatCounts(matches)}`);
39
+ while (true) {
40
+ const answer = (await rl.question(" Replace and overwrite? [y/n] ")).trim().toLowerCase();
41
+ if (answer === "y") {
42
+ return true;
43
+ }
44
+ if (answer === "n") {
45
+ return false;
46
+ }
47
+ }
48
+ }
49
+ async function replaceFile(root, relativePath, matches) {
50
+ if (shouldSkipSensitivePath(relativePath)) {
51
+ return false;
52
+ }
53
+ const absolutePath = path.join(root, relativePath);
54
+ const original = await readFile(absolutePath, "utf8");
55
+ const updated = applyOccurrences(original, matches, REPLACEMENT_RULE_MAP);
56
+ if (updated === original) {
57
+ return false;
58
+ }
59
+ await writeFile(absolutePath, updated, "utf8");
60
+ return true;
61
+ }
62
+ async function main() {
63
+ const args = parseArgs(process.argv.slice(2));
64
+ if (args.help) {
65
+ printHelp();
66
+ return;
67
+ }
68
+ if (!args.yes && (!process.stdin.isTTY || !process.stdout.isTTY)) {
69
+ console.error("Interactive mode requires a TTY. Use -y for non-interactive replacement.");
70
+ process.exitCode = 1;
71
+ return;
72
+ }
73
+ const cwd = process.cwd();
74
+ console.log(`Scanning ${cwd}`);
75
+ console.log(`Loaded ${REPLACEMENT_RULES.length} ASCII normalization rule(s).`);
76
+ const scan = await scanDirectory(cwd);
77
+ const entries = [...scan.matchesByFile.entries()].sort((a, b) => a[0].localeCompare(b[0]));
78
+ if (entries.length === 0) {
79
+ console.log("No matching Unicode characters found.");
80
+ return;
81
+ }
82
+ const rl = args.yes ? null : readline.createInterface({
83
+ input: process.stdin,
84
+ output: process.stdout
85
+ });
86
+ let updatedFiles = 0;
87
+ let replacedOccurrences = 0;
88
+ try {
89
+ for (const [filePath, matches] of entries) {
90
+ const shouldReplace = args.yes || rl !== null && await promptReplace(rl, filePath, matches);
91
+ if (!shouldReplace) {
92
+ continue;
93
+ }
94
+ const changed = await replaceFile(cwd, filePath, matches);
95
+ if (!changed) {
96
+ continue;
97
+ }
98
+ updatedFiles += 1;
99
+ replacedOccurrences += matches.length;
100
+ console.log(` updated ${filePath}`);
101
+ }
102
+ } finally {
103
+ await rl?.close();
104
+ }
105
+ console.log("\nDone.");
106
+ console.log(` files with matches: ${entries.length}`);
107
+ console.log(` files updated: ${updatedFiles}`);
108
+ console.log(` occurrences replaced: ${replacedOccurrences}`);
109
+ console.log(` files scanned after glob filtering: ${scan.scannedFiles}`);
110
+ console.log(` skipped by sensitive path rules: ${scan.skippedBySensitivePattern}`);
111
+ console.log(` skipped by size (>125 KiB): ${scan.skippedBySize}`);
112
+ console.log(` skipped as binary: ${scan.skippedByBinary}`);
113
+ }
114
+ main().catch((error) => {
115
+ const message = error instanceof Error ? error.stack ?? error.message : String(error);
116
+ console.error(message);
117
+ process.exitCode = 1;
118
+ });
package/dist/index.mjs ADDED
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env node
2
+ export { F as FAST_GLOB_IGNORE_PATTERNS, M as MAX_FILE_SIZE_BYTES, R as REPLACEMENT_RULES, a as REPLACEMENT_RULE_MAP, b as applyOccurrences, c as countByMatch, f as findOccurrences, i as isProbablyBinary, n as normalizeGlobPath, s as scanDirectory, d as shouldSkipSensitivePath } from './scanner-ChvwTQMG.mjs';
3
+ import 'fast-glob';
4
+ import 'node:path';
5
+ import 'node:fs/promises';
@@ -0,0 +1,335 @@
1
+ import fg from 'fast-glob';
2
+ import path from 'node:path';
3
+ import { open, stat, readFile } from 'node:fs/promises';
4
+
5
+ const SKIPPED_DIRECTORIES = [
6
+ ".git",
7
+ ".hg",
8
+ ".svn",
9
+ ".idea",
10
+ ".vscode",
11
+ "node_modules",
12
+ "bower_components",
13
+ "vendor",
14
+ ".pnpm-store",
15
+ ".yarn",
16
+ ".turbo",
17
+ ".cache",
18
+ ".parcel-cache",
19
+ ".vite",
20
+ ".rollup.cache",
21
+ ".rspack-cache",
22
+ ".eslintcache",
23
+ ".stylelintcache",
24
+ ".next",
25
+ ".nuxt",
26
+ ".svelte-kit",
27
+ ".angular",
28
+ ".output",
29
+ "dist",
30
+ "build",
31
+ "out",
32
+ "target",
33
+ "bin",
34
+ "obj",
35
+ "coverage",
36
+ ".nyc_output",
37
+ "playwright-report",
38
+ "test-results",
39
+ "__pycache__",
40
+ ".venv",
41
+ "venv",
42
+ "env",
43
+ ".conda",
44
+ ".mypy_cache",
45
+ ".pytest_cache",
46
+ ".ruff_cache",
47
+ ".tox",
48
+ ".eggs",
49
+ ".gradle",
50
+ ".mvn",
51
+ ".settings",
52
+ ".bundle",
53
+ "_build",
54
+ "deps",
55
+ "dist-newstyle",
56
+ ".stack-work",
57
+ "storybook-static",
58
+ "tmp",
59
+ "temp",
60
+ "logs",
61
+ "log"
62
+ ];
63
+ const SENSITIVE_EXACT_BASENAMES = /* @__PURE__ */ new Set([
64
+ "id_rsa",
65
+ "id_ed25519",
66
+ "known_hosts",
67
+ "authorized_keys",
68
+ ".npmrc",
69
+ ".pypirc",
70
+ ".netrc"
71
+ ]);
72
+ const SENSITIVE_SUFFIXES = [
73
+ ".pem",
74
+ ".key",
75
+ ".p12",
76
+ ".pfx",
77
+ ".crt",
78
+ ".cer",
79
+ ".der",
80
+ ".csr",
81
+ ".p7b",
82
+ ".p7c",
83
+ ".jks",
84
+ ".keystore",
85
+ ".asc",
86
+ ".gpg",
87
+ ".kdbx"
88
+ ];
89
+ const MAX_FILE_SIZE_BYTES = 125 * 1024;
90
+ const FAST_GLOB_IGNORE_PATTERNS = SKIPPED_DIRECTORIES.flatMap((directory) => [
91
+ `${directory}/**`,
92
+ `**/${directory}/**`
93
+ ]);
94
+ function shouldSkipSensitivePath(filePath) {
95
+ const baseName = path.basename(filePath).toLowerCase();
96
+ if (baseName === ".env" || baseName.startsWith(".env.")) {
97
+ return true;
98
+ }
99
+ if (SENSITIVE_EXACT_BASENAMES.has(baseName)) {
100
+ return true;
101
+ }
102
+ return SENSITIVE_SUFFIXES.some((suffix) => baseName.endsWith(suffix));
103
+ }
104
+ function normalizeGlobPath(filePath) {
105
+ return filePath.split(path.sep).join("/");
106
+ }
107
+
108
+ function findOccurrences(content, rules) {
109
+ const matches = [];
110
+ for (const rule of rules) {
111
+ let index = content.indexOf(rule.match);
112
+ if (index === -1) {
113
+ continue;
114
+ }
115
+ while (index !== -1) {
116
+ matches.push({ index, match: rule.match });
117
+ index = content.indexOf(rule.match, index + rule.match.length);
118
+ }
119
+ }
120
+ matches.sort((a, b) => a.index - b.index || a.match.length - b.match.length);
121
+ return matches;
122
+ }
123
+ function countByMatch(matches) {
124
+ const counts = /* @__PURE__ */ new Map();
125
+ for (const match of matches) {
126
+ counts.set(match.match, (counts.get(match.match) ?? 0) + 1);
127
+ }
128
+ return counts;
129
+ }
130
+ function applyOccurrences(content, matches, replacements) {
131
+ if (matches.length === 0) {
132
+ return content;
133
+ }
134
+ let cursor = 0;
135
+ let output = "";
136
+ for (const occurrence of matches) {
137
+ if (occurrence.index < cursor) {
138
+ continue;
139
+ }
140
+ const replacement = replacements.get(occurrence.match);
141
+ if (replacement === void 0) {
142
+ continue;
143
+ }
144
+ output += content.slice(cursor, occurrence.index);
145
+ output += replacement;
146
+ cursor = occurrence.index + occurrence.match.length;
147
+ }
148
+ output += content.slice(cursor);
149
+ return output;
150
+ }
151
+
152
+ const REPLACEMENT_RULES = [
153
+ { match: "\u2014", replacement: "-", description: "em dash" },
154
+ { match: "\u2013", replacement: "-", description: "en dash" },
155
+ { match: "\u2012", replacement: "-", description: "figure dash" },
156
+ { match: "\u2015", replacement: "--", description: "horizontal bar" },
157
+ { match: "\u2010", replacement: "-", description: "hyphen" },
158
+ { match: "\u2011", replacement: "-", description: "non-breaking hyphen" },
159
+ { match: "\u2212", replacement: "-", description: "minus sign" },
160
+ { match: "\u2192", replacement: "=>", description: "right arrow" },
161
+ { match: "\u21D2", replacement: "=>", description: "double right arrow" },
162
+ { match: "\u27F6", replacement: "=>", description: "long right arrow" },
163
+ { match: "\u279C", replacement: "=>", description: "heavy right arrow" },
164
+ { match: "\u2794", replacement: "=>", description: "black right arrow" },
165
+ { match: "\u279D", replacement: "=>", description: "drafting right arrow" },
166
+ { match: "\u2714", replacement: "-", description: "heavy check mark" },
167
+ { match: "\u2705", replacement: "-", description: "check mark button" },
168
+ { match: "\u2611", replacement: "-", description: "ballot box with check" },
169
+ { match: "\u2713", replacement: "-", description: "check mark" },
170
+ { match: "\u2022", replacement: "-", description: "bullet" },
171
+ { match: "\u2023", replacement: "-", description: "triangular bullet" },
172
+ { match: "\u25E6", replacement: "-", description: "white bullet" },
173
+ { match: "\u25AA", replacement: "-", description: "small square bullet" },
174
+ { match: "\xB7", replacement: "-", description: "middle dot bullet" },
175
+ { match: "\u25CF", replacement: "-", description: "black circle bullet" },
176
+ { match: "\u25CB", replacement: "-", description: "white circle bullet" },
177
+ { match: "\u201C", replacement: '"', description: "left double quote" },
178
+ { match: "\u201D", replacement: '"', description: "right double quote" },
179
+ { match: "\u201E", replacement: '"', description: "low double quote" },
180
+ { match: "\u201F", replacement: '"', description: "double high-reversed-9 quote" },
181
+ { match: "\xAB", replacement: '"', description: "left guillemet" },
182
+ { match: "\xBB", replacement: '"', description: "right guillemet" },
183
+ { match: "\u2039", replacement: "'", description: "left single guillemet" },
184
+ { match: "\u203A", replacement: "'", description: "right single guillemet" },
185
+ { match: "\u2018", replacement: "'", description: "left single quote" },
186
+ { match: "\u2019", replacement: "'", description: "right single quote / apostrophe" },
187
+ { match: "\u201A", replacement: "'", description: "low single quote" },
188
+ { match: "\u201B", replacement: "'", description: "single high-reversed-9 quote" },
189
+ { match: "\u2026", replacement: "...", description: "ellipsis" },
190
+ { match: "\u2264", replacement: "<=", description: "less-than-or-equal" },
191
+ { match: "\u2265", replacement: ">=", description: "greater-than-or-equal" },
192
+ { match: "\u2260", replacement: "!=", description: "not-equal" },
193
+ { match: "\xA0", replacement: " ", description: "no-break space" },
194
+ { match: "\u202F", replacement: " ", description: "narrow no-break space" },
195
+ { match: "\u2007", replacement: " ", description: "figure space" },
196
+ { match: "\u200B", replacement: "", description: "zero-width space" },
197
+ { match: "\u200C", replacement: "", description: "zero-width non-joiner" },
198
+ { match: "\u200D", replacement: "", description: "zero-width joiner" },
199
+ { match: "\u2060", replacement: "", description: "word joiner" },
200
+ { match: "\uFEFF", replacement: "", description: "byte-order mark" }
201
+ ];
202
+ const REPLACEMENT_RULE_MAP = new Map(
203
+ REPLACEMENT_RULES.map((rule) => [rule.match, rule.replacement])
204
+ );
205
+
206
+ const SAMPLE_SIZE = 1024;
207
+ const MAX_SAMPLES = 5;
208
+ const SUSPICIOUS_RATIO = 0.15;
209
+ function getSampleOffsets(size) {
210
+ if (size <= SAMPLE_SIZE) {
211
+ return [0];
212
+ }
213
+ const maxOffset = Math.max(0, size - SAMPLE_SIZE);
214
+ const offsets = /* @__PURE__ */ new Set([0, maxOffset]);
215
+ for (let i = 1; i < MAX_SAMPLES - 1; i += 1) {
216
+ offsets.add(Math.floor(maxOffset * i / (MAX_SAMPLES - 1)));
217
+ }
218
+ return [...offsets].sort((a, b) => a - b);
219
+ }
220
+ function scoreSample(buffer) {
221
+ let suspicious = 0;
222
+ let total = 0;
223
+ for (const byte of buffer) {
224
+ if (byte === 0) {
225
+ return 1;
226
+ }
227
+ total += 1;
228
+ const isAllowedControl = byte === 9 || byte === 10 || byte === 13 || byte === 12;
229
+ if (byte < 32 && !isAllowedControl || byte === 127) {
230
+ suspicious += 1;
231
+ }
232
+ }
233
+ return total === 0 ? 0 : suspicious / total;
234
+ }
235
+ async function isProbablyBinary(filePath) {
236
+ const handle = await open(filePath, "r");
237
+ try {
238
+ const stats = await handle.stat();
239
+ const offsets = getSampleOffsets(stats.size);
240
+ const sampleBuffer = Buffer.allocUnsafe(SAMPLE_SIZE);
241
+ for (const offset of offsets) {
242
+ const { bytesRead } = await handle.read(sampleBuffer, 0, SAMPLE_SIZE, offset);
243
+ if (bytesRead === 0) {
244
+ continue;
245
+ }
246
+ const score = scoreSample(sampleBuffer.subarray(0, bytesRead));
247
+ if (score >= SUSPICIOUS_RATIO) {
248
+ return true;
249
+ }
250
+ }
251
+ return false;
252
+ } finally {
253
+ await handle.close();
254
+ }
255
+ }
256
+
257
+ const DEFAULT_CONCURRENCY = 64;
258
+ async function processFile(cwd, relativePath) {
259
+ const normalizedPath = normalizeGlobPath(relativePath);
260
+ if (shouldSkipSensitivePath(normalizedPath)) {
261
+ return { kind: "sensitive" };
262
+ }
263
+ const absolutePath = path.join(cwd, relativePath);
264
+ const fileStat = await stat(absolutePath);
265
+ if (fileStat.size > MAX_FILE_SIZE_BYTES) {
266
+ return { kind: "size" };
267
+ }
268
+ if (await isProbablyBinary(absolutePath)) {
269
+ return { kind: "binary" };
270
+ }
271
+ const content = await readFile(absolutePath, "utf8");
272
+ const matches = findOccurrences(content, REPLACEMENT_RULES);
273
+ if (matches.length === 0) {
274
+ return { kind: "none" };
275
+ }
276
+ return {
277
+ path: normalizedPath,
278
+ matches
279
+ };
280
+ }
281
+ async function scanDirectory(cwd) {
282
+ const matchesByFile = /* @__PURE__ */ new Map();
283
+ let scannedFiles = 0;
284
+ let skippedBySensitivePattern = 0;
285
+ let skippedBySize = 0;
286
+ let skippedByBinary = 0;
287
+ const stream = fg.stream("**/*", {
288
+ cwd,
289
+ onlyFiles: true,
290
+ dot: true,
291
+ followSymbolicLinks: false,
292
+ unique: true,
293
+ ignore: [...FAST_GLOB_IGNORE_PATTERNS]
294
+ });
295
+ const inFlight = /* @__PURE__ */ new Set();
296
+ const schedule = async (relativePath) => {
297
+ scannedFiles += 1;
298
+ try {
299
+ const result = await processFile(cwd, relativePath);
300
+ if ("kind" in result) {
301
+ if (result.kind === "sensitive") {
302
+ skippedBySensitivePattern += 1;
303
+ } else if (result.kind === "size") {
304
+ skippedBySize += 1;
305
+ } else if (result.kind === "binary") {
306
+ skippedByBinary += 1;
307
+ }
308
+ return;
309
+ }
310
+ matchesByFile.set(result.path, result.matches);
311
+ } catch {
312
+ }
313
+ };
314
+ for await (const entry of stream) {
315
+ const relativePath = String(entry);
316
+ const task = schedule(relativePath).finally(() => {
317
+ inFlight.delete(task);
318
+ });
319
+ inFlight.add(task);
320
+ if (inFlight.size >= DEFAULT_CONCURRENCY) {
321
+ await Promise.race(inFlight);
322
+ }
323
+ }
324
+ await Promise.all(inFlight);
325
+ return {
326
+ matchesByFile,
327
+ scannedFiles,
328
+ skippedByGlob: 0,
329
+ skippedBySensitivePattern,
330
+ skippedBySize,
331
+ skippedByBinary
332
+ };
333
+ }
334
+
335
+ export { FAST_GLOB_IGNORE_PATTERNS as F, MAX_FILE_SIZE_BYTES as M, REPLACEMENT_RULES as R, REPLACEMENT_RULE_MAP as a, applyOccurrences as b, countByMatch as c, shouldSkipSensitivePath as d, findOccurrences as f, isProbablyBinary as i, normalizeGlobPath as n, scanDirectory as s };
package/package.json ADDED
@@ -0,0 +1,58 @@
1
+ {
2
+ "name": "aslopcleaner",
3
+ "version": "1.0.0",
4
+ "description": "High-performance CLI to replace common LLM/AI Unicode punctuation and symbols with ASCII equivalents.",
5
+ "type": "module",
6
+ "bin": {
7
+ "aslopcleaner": "./dist/cli.mjs"
8
+ },
9
+ "exports": {
10
+ ".": {
11
+ "import": "./dist/index.mjs"
12
+ }
13
+ },
14
+ "repository": {
15
+ "type": "git",
16
+ "url": "git+https://github.com/kyr0/defuss.git"
17
+ },
18
+ "author": "Aron Homberg <info@aron-homberg.de>",
19
+ "homepage": "https://github.com/kyr0/defuss",
20
+ "publishConfig": {
21
+ "access": "public"
22
+ },
23
+ "packageManager": "bun@1.3.9",
24
+ "sideEffects": false,
25
+ "files": [
26
+ "dist",
27
+ "README.md",
28
+ "LICENSE"
29
+ ],
30
+ "scripts": {
31
+ "build": "rm -rf dist && pkgroll",
32
+ "start": "node dist/cli.mjs",
33
+ "dev": "node --enable-source-maps src/cli.ts",
34
+ "test": "vitest run",
35
+ "test:coverage": "vitest run --coverage",
36
+ "run:example": "tsx example/example.ts"
37
+ },
38
+ "keywords": [
39
+ "ascii",
40
+ "bun",
41
+ "node",
42
+ "cli",
43
+ "unicode",
44
+ "sanitizer",
45
+ "llm"
46
+ ],
47
+ "license": "MIT",
48
+ "dependencies": {
49
+ "fast-glob": "^3.3.3"
50
+ },
51
+ "devDependencies": {
52
+ "@types/node": "^25.5.0",
53
+ "@vitest/coverage-v8": "^3.1.1",
54
+ "pkgroll": "^2.11.3",
55
+ "tsx": "^4.19.4",
56
+ "vitest": "^3.1.1"
57
+ }
58
+ }