cleanmonkey 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ """cleanmonkey — one-call text cleanup for invisible characters, smart quotes, and whitespace."""
2
+
3
+ from cleanmonkey.core import MAX_DEPTH, clean, clean_column, clean_dict, inspect
4
+ from cleanmonkey.profiles import PROFILES, Profile
5
+
6
+ __version__ = "0.1.0"
7
+ __all__ = ["MAX_DEPTH", "clean", "clean_column", "clean_dict", "inspect", "Profile", "PROFILES"]
@@ -0,0 +1,5 @@
1
+ """Allow running cleanmonkey as ``python -m cleanmonkey``."""
2
+
3
+ from cleanmonkey.cli import _main_with_broken_pipe_handling
4
+
5
+ _main_with_broken_pipe_handling()
cleanmonkey/cli.py ADDED
@@ -0,0 +1,400 @@
1
+ """CLI entry point for cleanmonkey."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+ import contextlib
7
+ import json
8
+ import os
9
+ import shutil
10
+ import stat
11
+ import sys
12
+ import tempfile
13
+ from typing import Any, TextIO
14
+
15
+ from cleanmonkey.core import clean, inspect
16
+ from cleanmonkey.profiles import PROFILES
17
+
18
+
19
+ def _fsync_directory(dir_path: str) -> None:
20
+ """Best-effort fsync of a directory for rename durability on POSIX.
21
+
22
+ Silently ignores errors when directory fsync is unsupported (e.g. Windows,
23
+ certain filesystems). Emits a warning to stderr for unexpected I/O errors
24
+ that may indicate a real durability problem.
25
+ """
26
+ import errno
27
+ # Errors that indicate "not supported here" rather than a real failure.
28
+ # EACCES/EBADF are NOT included: they may indicate real permission issues
29
+ # on the directory that should surface as warnings.
30
+ _UNSUPPORTED_ERRNOS = {
31
+ errno.ENOTSUP, errno.EOPNOTSUPP, errno.ENOSYS, errno.EINVAL,
32
+ }
33
+ try:
34
+ fd = os.open(dir_path, os.O_RDONLY)
35
+ try:
36
+ os.fsync(fd)
37
+ finally:
38
+ os.close(fd)
39
+ except OSError as exc:
40
+ if exc.errno not in _UNSUPPORTED_ERRNOS:
41
+ print(
42
+ f"cleanmonkey: warning: directory fsync failed for "
43
+ f"{dir_path!r}: {exc}",
44
+ file=sys.stderr,
45
+ )
46
+ # Never fatal — durability is best-effort.
47
+
48
+
49
+ def _open_streams(
50
+ file_path: str | None,
51
+ output_path: str | None,
52
+ ) -> contextlib.AbstractContextManager[tuple[TextIO, TextIO, str | None]]:
53
+ """Open input/output streams with explicit ownership.
54
+
55
+ When a path is provided (and is not ``"-"``), the file is opened and will
56
+ be closed when the context manager exits. ``stdin``/``stdout`` are
57
+ *borrowed* – never closed by this function.
58
+
59
+ When *output_path* refers to a real file, writes go to a temporary file in
60
+ the same directory. The caller receives the temp path as the third element
61
+ of the yielded tuple so it can be atomically renamed to the final
62
+ destination **after** processing succeeds. On failure, the temp file is
63
+ removed and the original destination is untouched.
64
+ """
65
+
66
+ @contextlib.contextmanager
67
+ def _ctx():
68
+ in_stream: TextIO | None = None
69
+ out_stream: TextIO | None = None
70
+ tmp_path: str | None = None
71
+ # Resolve symlinks so os.replace() writes through to the real
72
+ # destination instead of replacing the symlink itself.
73
+ resolved_output = (
74
+ os.path.realpath(output_path)
75
+ if output_path is not None and output_path != "-"
76
+ else output_path
77
+ )
78
+ success = False
79
+ try:
80
+ if file_path is None or file_path == "-":
81
+ in_stream = sys.stdin
82
+ else:
83
+ in_stream = open(file_path, "r", encoding="utf-8", newline="")
84
+
85
+ if resolved_output is None or resolved_output == "-":
86
+ out_stream = sys.stdout
87
+ else:
88
+ # Write to a temp file in the same directory so os.replace()
89
+ # is atomic on the same filesystem.
90
+ out_dir = os.path.dirname(os.path.abspath(resolved_output))
91
+ fd, tmp_path = tempfile.mkstemp(
92
+ dir=out_dir, prefix=".cleanmonkey_", suffix=".tmp",
93
+ )
94
+ out_stream = open(fd, "w", encoding="utf-8", newline="")
95
+
96
+ yield in_stream, out_stream, tmp_path
97
+ success = True
98
+ finally:
99
+ # Only close streams we opened (not stdin/stdout).
100
+ if in_stream is not None and in_stream is not sys.stdin:
101
+ in_stream.close()
102
+ finalize_error: OSError | None = None
103
+ if out_stream is not None and out_stream is not sys.stdout:
104
+ try:
105
+ # Flush to OS and fsync for durability before atomic rename.
106
+ if tmp_path is not None and success:
107
+ out_stream.flush()
108
+ os.fsync(out_stream.fileno())
109
+ except OSError as exc:
110
+ # Flush/fsync failed — treat as unsuccessful so we clean up.
111
+ success = False
112
+ finalize_error = exc
113
+ finally:
114
+ out_stream.close()
115
+ # Clean up temp file on failure; promote on success.
116
+ if tmp_path is not None:
117
+ if success and resolved_output is not None:
118
+ try:
119
+ # Preserve original file metadata if destination exists.
120
+ dest_existed = os.path.exists(resolved_output)
121
+ if dest_existed:
122
+ try:
123
+ shutil.copystat(resolved_output, tmp_path)
124
+ except OSError:
125
+ # copystat failed (e.g. unsupported metadata).
126
+ # Fall back to preserving at least the file mode
127
+ # so os.replace() doesn't leave mkstemp's 0600.
128
+ try:
129
+ orig_mode = os.stat(resolved_output).st_mode
130
+ os.chmod(tmp_path, stat.S_IMODE(orig_mode))
131
+ except OSError as perm_exc:
132
+ print(
133
+ f"cleanmonkey: warning: could not preserve "
134
+ f"permissions for {resolved_output!r}: {perm_exc}",
135
+ file=sys.stderr,
136
+ )
137
+ else:
138
+ print(
139
+ f"cleanmonkey: warning: metadata preservation "
140
+ f"partially failed for {resolved_output!r}; "
141
+ f"file mode preserved",
142
+ file=sys.stderr,
143
+ )
144
+ else:
145
+ # mkstemp creates files with mode 0600; apply
146
+ # umask-derived default so new files behave like
147
+ # a normal open() would (typically 0644).
148
+ # Apply to temp file BEFORE replace so that a
149
+ # chmod failure cannot leave a replaced destination.
150
+ umask = os.umask(0)
151
+ os.umask(umask)
152
+ os.chmod(tmp_path, 0o666 & ~umask)
153
+ os.replace(tmp_path, resolved_output)
154
+ _fsync_directory(os.path.dirname(os.path.abspath(resolved_output)))
155
+ except OSError:
156
+ # replace or dir fsync failed — clean up temp file.
157
+ try:
158
+ os.unlink(tmp_path)
159
+ except OSError:
160
+ pass
161
+ raise
162
+ else:
163
+ try:
164
+ os.unlink(tmp_path)
165
+ except OSError:
166
+ pass
167
+ # Re-raise flush/fsync error after cleanup so callers know it failed.
168
+ if finalize_error is not None:
169
+ raise finalize_error
170
+
171
+ return _ctx()
172
+
173
+
174
+ def _non_negative_int(value: str) -> int:
175
+ """Argparse type for non-negative integers."""
176
+ try:
177
+ n = int(value)
178
+ except ValueError:
179
+ raise argparse.ArgumentTypeError(f"invalid int value: {value!r}")
180
+ if n < 0:
181
+ raise argparse.ArgumentTypeError(f"must be non-negative, got {n}")
182
+ return n
183
+
184
+
185
+ def main(argv: list[str] | None = None) -> None:
186
+ parser = argparse.ArgumentParser(
187
+ prog="cleanmonkey",
188
+ description="Clean invisible characters, smart quotes, and whitespace from text.",
189
+ )
190
+ parser.add_argument(
191
+ "file",
192
+ nargs="?",
193
+ default=None,
194
+ help="Input file (default: stdin, use '-' for stdin)",
195
+ )
196
+ parser.add_argument(
197
+ "-o", "--output",
198
+ default=None,
199
+ help="Output file (default: stdout, use '-' for stdout)",
200
+ )
201
+ parser.add_argument(
202
+ "-p", "--profile",
203
+ choices=sorted(PROFILES),
204
+ default="default",
205
+ help="Cleaning profile (default: default)",
206
+ )
207
+ parser.add_argument(
208
+ "--inspect",
209
+ action="store_true",
210
+ dest="inspect_mode",
211
+ help="Inspect mode: report problematic characters instead of cleaning. "
212
+ "Reports character-map replacements only; structural changes like "
213
+ "space collapsing and per-line stripping are not reported.",
214
+ )
215
+ parser.add_argument(
216
+ "--no-smart-quotes", action="store_true", help="Disable smart quote normalization",
217
+ )
218
+ parser.add_argument(
219
+ "--no-dashes", action="store_true", help="Disable dash normalization",
220
+ )
221
+ parser.add_argument(
222
+ "--fullwidth", action="store_true",
223
+ help="Enable fullwidth ASCII letter and digit normalization "
224
+ "(e.g. \uff21\u2192A, \uff10\u21920; fullwidth punctuation is not covered)",
225
+ )
226
+ parser.add_argument(
227
+ "--no-line-endings", action="store_true",
228
+ help="Disable line-ending normalization (also disables CR reporting in inspect mode)",
229
+ )
230
+ parser.add_argument(
231
+ "--no-strip", action="store_true",
232
+ help="Disable stripping of leading/trailing whitespace per line",
233
+ )
234
+ parser.add_argument(
235
+ "--no-collapse-spaces", action="store_true",
236
+ help="Disable collapsing of multiple spaces into one",
237
+ )
238
+ parser.add_argument(
239
+ "--json", action="store_true",
240
+ help="Output inspect results as JSON (implies --inspect)",
241
+ )
242
+ parser.add_argument(
243
+ "--max-positions", type=_non_negative_int, default=None, metavar="N",
244
+ help="Limit position lists in inspect output to at most N entries "
245
+ "(count is always accurate). Useful for large files.",
246
+ )
247
+ parser.add_argument(
248
+ "--stream", action="store_true",
249
+ help="Process input line-by-line instead of loading it all into memory. "
250
+ "Suitable for very large files. Ignored in inspect mode.",
251
+ )
252
+ parser.add_argument(
253
+ "--version", action="version", version=f"%(prog)s {_get_version()}",
254
+ )
255
+
256
+ args = parser.parse_args(argv)
257
+
258
+ # --json implies --inspect.
259
+ if args.json:
260
+ args.inspect_mode = True
261
+
262
+ # Warn if --stream is used with --inspect (inspect needs full text).
263
+ if args.stream and args.inspect_mode:
264
+ print(
265
+ "cleanmonkey: warning: --stream is ignored in inspect mode",
266
+ file=sys.stderr,
267
+ )
268
+
269
+ # Guard against same input/output path (would truncate source before read).
270
+ if (
271
+ args.file is not None
272
+ and args.file != "-"
273
+ and args.output is not None
274
+ and args.output != "-"
275
+ ):
276
+ try:
277
+ if os.path.samefile(args.file, args.output):
278
+ parser.error("input and output paths refer to the same file; this would cause data loss")
279
+ except FileNotFoundError:
280
+ # Output file doesn't exist yet – that's fine, no collision.
281
+ pass
282
+ except OSError as exc:
283
+ parser.error(
284
+ f"cannot compare input {args.file!r} and output {args.output!r}: {exc}"
285
+ )
286
+
287
+ try:
288
+ _run(parser, args)
289
+ except (OSError, ValueError) as exc:
290
+ parser.error(str(exc))
291
+
292
+
293
+ def _run(parser: argparse.ArgumentParser, args: argparse.Namespace) -> None:
294
+ """Execute the main logic, allowing OSError to propagate to the caller."""
295
+ with _open_streams(args.file, args.output) as (in_stream, out_stream, _tmp):
296
+ # Build clean overrides (shared by buffered and streaming paths).
297
+ overrides: dict[str, bool] = {}
298
+ if args.no_smart_quotes:
299
+ overrides["smart_quotes"] = False
300
+ if args.no_dashes:
301
+ overrides["dashes"] = False
302
+ if args.fullwidth:
303
+ overrides["fullwidth"] = True
304
+ if args.no_line_endings:
305
+ overrides["line_endings"] = False
306
+ if args.no_strip:
307
+ overrides["strip"] = False
308
+ if args.no_collapse_spaces:
309
+ overrides["collapse_spaces"] = False
310
+
311
+ # --stream: line-by-line processing for clean mode (not inspect).
312
+ if args.stream and not args.inspect_mode:
313
+ try:
314
+ for line in in_stream:
315
+ out_stream.write(clean(line, profile=args.profile, **overrides))
316
+ except UnicodeDecodeError:
317
+ parser.error(f"cannot decode {args.file or '<stdin>'!r}: file is not valid UTF-8")
318
+ return
319
+
320
+ try:
321
+ text = in_stream.read()
322
+ except UnicodeDecodeError:
323
+ parser.error(f"cannot decode {args.file or '<stdin>'!r}: file is not valid UTF-8")
324
+
325
+ if args.inspect_mode:
326
+ inspect_kw: dict[str, Any] = {"profile": args.profile}
327
+ if args.fullwidth:
328
+ inspect_kw["fullwidth"] = True
329
+ if args.no_line_endings:
330
+ inspect_kw["line_endings"] = False
331
+ if args.max_positions is not None:
332
+ inspect_kw["max_positions"] = args.max_positions
333
+ findings = inspect(text, **inspect_kw)
334
+ if args.json:
335
+ json.dump(
336
+ [
337
+ {
338
+ "char": info.char,
339
+ "codepoint": info.codepoint,
340
+ "name": info.name,
341
+ "category": info.category,
342
+ "count": info.count,
343
+ "positions": info.positions,
344
+ }
345
+ for info in findings
346
+ ],
347
+ out_stream,
348
+ ensure_ascii=False,
349
+ )
350
+ out_stream.write("\n")
351
+ return
352
+ if not findings:
353
+ print("No problematic characters found.", file=out_stream)
354
+ return
355
+ for info in findings:
356
+ # When max_positions is set, inspect() already truncated;
357
+ # otherwise apply a default cap of 10 for text readability.
358
+ if args.max_positions is not None:
359
+ shown = info.positions
360
+ else:
361
+ shown = info.positions[:10]
362
+ truncated = len(shown) < info.count
363
+ print(
364
+ f"{info.codepoint} {info.name} (count: {info.count}, "
365
+ f"positions: {shown}{'...' if truncated else ''})",
366
+ file=out_stream,
367
+ )
368
+ return
369
+
370
+ result = clean(text, profile=args.profile, **overrides)
371
+ out_stream.write(result)
372
+
373
+
374
+ def _get_version() -> str:
375
+ from cleanmonkey import __version__
376
+ return __version__
377
+
378
+
379
+ def _main_with_broken_pipe_handling() -> None:
380
+ """Entry point that handles BrokenPipeError for pipeline-friendly behavior."""
381
+ try:
382
+ main()
383
+ except BrokenPipeError:
384
+ # Suppress noisy traceback when downstream consumer closes early
385
+ # (e.g., `cleanmonkey file.txt | head -n1`).
386
+ # Flush stderr and restore default SIGPIPE behavior for clean exit.
387
+ try:
388
+ sys.stdout.close()
389
+ except BrokenPipeError:
390
+ pass
391
+ try:
392
+ sys.stderr.close()
393
+ except BrokenPipeError:
394
+ pass
395
+ # Exit with the conventional signal code for SIGPIPE (128 + 13 = 141)
396
+ sys.exit(141)
397
+
398
+
399
+ if __name__ == "__main__":
400
+ _main_with_broken_pipe_handling()
cleanmonkey/core.py ADDED
@@ -0,0 +1,449 @@
1
+ """Core cleaning functions."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+ from dataclasses import dataclass, replace
7
+ from typing import Any
8
+
9
+ _BOOL_OVERRIDE_NAMES_CLEAN = (
10
+ "smart_quotes", "dashes", "ellipsis", "invisible", "whitespace",
11
+ "control", "fullwidth", "line_endings", "collapse_spaces", "strip",
12
+ )
13
+ _BOOL_OVERRIDE_NAMES_INSPECT = ("fullwidth", "line_endings")
14
+
15
+
16
+ def _validate_bool_overrides(overrides: dict[str, Any], allowed: tuple[str, ...], func_name: str) -> None:
17
+ """Raise TypeError if any override value is not None or bool, or if unknown keys are present."""
18
+ unknown = set(overrides) - set(allowed)
19
+ if unknown:
20
+ raise TypeError(
21
+ f"{func_name}() got unexpected keyword argument(s): {', '.join(sorted(unknown))}"
22
+ )
23
+ for name in allowed:
24
+ val = overrides.get(name)
25
+ if val is not None and not isinstance(val, bool):
26
+ raise TypeError(
27
+ f"{func_name}() override {name!r} must be bool or None, got {type(val).__name__}"
28
+ )
29
+
30
+ from cleanmonkey.maps import (
31
+ CONTROL,
32
+ DASHES,
33
+ ELLIPSIS,
34
+ FULLWIDTH,
35
+ INVISIBLE,
36
+ SMART_QUOTES,
37
+ WHITESPACE,
38
+ )
39
+ from cleanmonkey.profiles import PROFILES, Profile
40
+
41
+
42
+ def _validate_profile_kwarg(kwargs: dict[str, Any], func_name: str) -> None:
43
+ """Validate the 'profile' kwarg type and name if present, matching clean()'s contract."""
44
+ if "profile" in kwargs:
45
+ p = kwargs["profile"]
46
+ if isinstance(p, str):
47
+ if p not in PROFILES:
48
+ raise ValueError(
49
+ f"Unknown profile {p!r}. Available: {', '.join(sorted(PROFILES))}"
50
+ )
51
+ elif not isinstance(p, Profile):
52
+ raise TypeError(
53
+ f"{func_name}() profile must be str or Profile, got {type(p).__name__}"
54
+ )
55
+
56
+ _MULTI_SPACE = re.compile(r" {2,}")
57
+
58
+ #: Maximum nesting depth for recursive cleaners (clean_dict, clean_column).
59
+ #: Kept well below half of Python's default recursion limit (1000) since each
60
+ #: nesting level consumes multiple Python frames.
61
+ MAX_DEPTH: int = 200
62
+
63
+
64
+ def _build_table(profile: Profile) -> dict[int, str | int | None]:
65
+ """Build a str.translate table from a profile."""
66
+ merged: dict[str, str] = {}
67
+ if profile.invisible:
68
+ merged.update(INVISIBLE)
69
+ if profile.whitespace:
70
+ merged.update(WHITESPACE)
71
+ if profile.control:
72
+ merged.update(CONTROL)
73
+ if profile.smart_quotes:
74
+ merged.update(SMART_QUOTES)
75
+ if profile.dashes:
76
+ merged.update(DASHES)
77
+ if profile.ellipsis:
78
+ merged.update(ELLIPSIS)
79
+ if profile.fullwidth:
80
+ merged.update(FULLWIDTH)
81
+ return str.maketrans({k: v for k, v in merged.items()})
82
+
83
+
84
+ # Cache tables for default profiles
85
+ _TABLE_CACHE: dict[str, dict[int, str | int | None]] = {}
86
+
87
+
88
+ def _get_table(profile: Profile, profile_name: str | None = None) -> dict[int, str | int | None]:
89
+ if profile_name and profile_name in _TABLE_CACHE:
90
+ return _TABLE_CACHE[profile_name]
91
+ table = _build_table(profile)
92
+ if profile_name:
93
+ _TABLE_CACHE[profile_name] = table
94
+ return table
95
+
96
+
97
+ def clean(
98
+ text: str,
99
+ *,
100
+ profile: str | Profile = "default",
101
+ smart_quotes: bool | None = None,
102
+ dashes: bool | None = None,
103
+ ellipsis: bool | None = None,
104
+ invisible: bool | None = None,
105
+ whitespace: bool | None = None,
106
+ control: bool | None = None,
107
+ fullwidth: bool | None = None,
108
+ line_endings: bool | None = None,
109
+ collapse_spaces: bool | None = None,
110
+ strip: bool | None = None,
111
+ ) -> str:
112
+ """Clean text with sensible defaults.
113
+
114
+ Parameters
115
+ ----------
116
+ text : str
117
+ The text to clean.
118
+ profile : str or Profile
119
+ Named profile or a Profile instance. Default is "default".
120
+ smart_quotes, dashes, ellipsis, invisible, whitespace, control,
121
+ fullwidth, line_endings, collapse_spaces, strip :
122
+ Override individual profile settings. None means use profile value.
123
+
124
+ Returns
125
+ -------
126
+ str
127
+ Cleaned text.
128
+
129
+ Notes
130
+ -----
131
+ When ``strip=True`` (the default profile), leading and trailing spaces
132
+ and tabs are removed from **each line individually**, not just the
133
+ overall string. This will destroy meaningful indentation (e.g. Python,
134
+ YAML, Markdown code blocks). Pass ``strip=False`` for
135
+ indentation-sensitive content.
136
+ """
137
+ if not isinstance(text, str):
138
+ raise TypeError(f"clean() expects str, got {type(text).__name__}")
139
+
140
+ # Validate override types before any other processing
141
+ _validate_bool_overrides(
142
+ {"smart_quotes": smart_quotes, "dashes": dashes, "ellipsis": ellipsis,
143
+ "invisible": invisible, "whitespace": whitespace, "control": control,
144
+ "fullwidth": fullwidth, "line_endings": line_endings,
145
+ "collapse_spaces": collapse_spaces, "strip": strip},
146
+ _BOOL_OVERRIDE_NAMES_CLEAN, "clean",
147
+ )
148
+
149
+ # Resolve profile (validate before early return so invalid profiles always raise)
150
+ if isinstance(profile, str):
151
+ profile_name: str | None = profile
152
+ if profile not in PROFILES:
153
+ raise ValueError(f"Unknown profile {profile!r}. Available: {', '.join(sorted(PROFILES))}")
154
+ base = PROFILES[profile]
155
+ elif isinstance(profile, Profile):
156
+ profile_name = None
157
+ base = profile
158
+ else:
159
+ raise TypeError(f"profile must be str or Profile, got {type(profile).__name__}")
160
+
161
+ if not text:
162
+ return text
163
+
164
+ # Apply overrides
165
+ overrides = {
166
+ k: v
167
+ for k, v in {
168
+ "smart_quotes": smart_quotes,
169
+ "dashes": dashes,
170
+ "ellipsis": ellipsis,
171
+ "invisible": invisible,
172
+ "whitespace": whitespace,
173
+ "control": control,
174
+ "fullwidth": fullwidth,
175
+ "line_endings": line_endings,
176
+ "collapse_spaces": collapse_spaces,
177
+ "strip": strip,
178
+ }.items()
179
+ if v is not None
180
+ }
181
+
182
+ if overrides:
183
+ p = replace(base, **overrides)
184
+ profile_name = None # don't cache custom combos
185
+ else:
186
+ p = base
187
+
188
+ # Translate characters
189
+ table = _get_table(p, profile_name)
190
+ result = text.translate(table)
191
+
192
+ # Normalize line endings
193
+ if p.line_endings:
194
+ result = result.replace("\r\n", "\n").replace("\r", "\n")
195
+
196
+ # Collapse multiple spaces
197
+ if p.collapse_spaces:
198
+ result = _MULTI_SPACE.sub(" ", result)
199
+
200
+ # Strip
201
+ if p.strip:
202
+ result = "\n".join(line.strip(" \t") for line in result.split("\n"))
203
+
204
+ return result
205
+
206
+
207
+ def _clean_value(
208
+ v: Any, *, keys: bool = False, _seen: set[int] | None = None, _depth: int = 0, **kwargs: Any,
209
+ ) -> Any:
210
+ """Recursively clean a value of any type."""
211
+ if isinstance(v, str):
212
+ return clean(v, **kwargs)
213
+ if isinstance(v, dict):
214
+ return clean_dict(v, keys=keys, _seen=_seen, _depth=_depth, **kwargs)
215
+ if isinstance(v, list):
216
+ return clean_column(v, keys=keys, _seen=_seen, _depth=_depth, **kwargs)
217
+ if isinstance(v, (tuple, set, frozenset)):
218
+ if _depth >= MAX_DEPTH:
219
+ raise ValueError(f"Maximum nesting depth ({MAX_DEPTH}) exceeded")
220
+ obj_id = id(v)
221
+ if _seen is None:
222
+ _seen = set()
223
+ if obj_id in _seen:
224
+ raise ValueError("Circular reference detected in input structure")
225
+ _seen.add(obj_id)
226
+ try:
227
+ cleaned_items = [
228
+ _clean_value(item, keys=keys, _seen=_seen, _depth=_depth + 1, **kwargs)
229
+ for item in v
230
+ ]
231
+ result = type(v)(cleaned_items)
232
+ if isinstance(v, (set, frozenset)) and len(result) != len(v):
233
+ raise ValueError(
234
+ "Set member collision: cleaning produced duplicate members"
235
+ )
236
+ return result
237
+ finally:
238
+ _seen.discard(obj_id)
239
+ return v
240
+
241
+
242
+ def clean_column(
243
+ values: list[Any], *, keys: bool = False, _seen: set[int] | None = None,
244
+ _depth: int = 0, **kwargs: Any,
245
+ ) -> list[Any]:
246
+ """Clean a list of values. Recursively traverses nested dicts and lists."""
247
+ if _depth == 0:
248
+ if not isinstance(values, list):
249
+ raise TypeError(f"clean_column() expects list, got {type(values).__name__}")
250
+ if not isinstance(keys, bool):
251
+ raise TypeError(f"clean_column() keys must be bool, got {type(keys).__name__}")
252
+ # Validate profile type if provided (same contract as clean()).
253
+ _validate_profile_kwarg(kwargs, "clean_column")
254
+ # Validate overrides upfront so invalid types always raise, regardless of data shape.
255
+ if _depth == 0:
256
+ # Exclude 'profile' from bool validation — it's a valid passthrough to clean().
257
+ bool_kwargs = {k: v for k, v in kwargs.items() if k != "profile"}
258
+ _validate_bool_overrides(bool_kwargs, _BOOL_OVERRIDE_NAMES_CLEAN, "clean_column")
259
+ if _depth >= MAX_DEPTH:
260
+ raise ValueError(f"Maximum nesting depth ({MAX_DEPTH}) exceeded")
261
+ if _seen is None:
262
+ _seen = set()
263
+ obj_id = id(values)
264
+ if obj_id in _seen:
265
+ raise ValueError("Circular reference detected in input structure")
266
+ _seen.add(obj_id)
267
+ try:
268
+ return [_clean_value(v, keys=keys, _seen=_seen, _depth=_depth + 1, **kwargs) for v in values]
269
+ finally:
270
+ _seen.discard(obj_id)
271
+
272
+
273
+ def clean_dict(
274
+ d: dict[Any, Any], *, keys: bool = False, _seen: set[int] | None = None,
275
+ _depth: int = 0, **kwargs: Any,
276
+ ) -> dict[Any, Any]:
277
+ """Recursively clean string values in a dict.
278
+
279
+ Parameters
280
+ ----------
281
+ d : dict
282
+ Dictionary to clean.
283
+ keys : bool
284
+ If True, also clean dictionary keys (only str keys are cleaned).
285
+ **kwargs :
286
+ Passed to clean().
287
+ """
288
+ if _depth == 0:
289
+ if not isinstance(d, dict):
290
+ raise TypeError(f"clean_dict() expects dict, got {type(d).__name__}")
291
+ if not isinstance(keys, bool):
292
+ raise TypeError(f"clean_dict() keys must be bool, got {type(keys).__name__}")
293
+ # Validate profile type if provided (same contract as clean()).
294
+ _validate_profile_kwarg(kwargs, "clean_dict")
295
+ # Validate overrides upfront so invalid types always raise, regardless of data shape.
296
+ if _depth == 0:
297
+ bool_kwargs = {k: v for k, v in kwargs.items() if k != "profile"}
298
+ _validate_bool_overrides(bool_kwargs, _BOOL_OVERRIDE_NAMES_CLEAN, "clean_dict")
299
+ if _depth >= MAX_DEPTH:
300
+ raise ValueError(f"Maximum nesting depth ({MAX_DEPTH}) exceeded")
301
+ if _seen is None:
302
+ _seen = set()
303
+ obj_id = id(d)
304
+ if obj_id in _seen:
305
+ raise ValueError("Circular reference detected in input structure")
306
+ _seen.add(obj_id)
307
+ try:
308
+ out: dict[Any, Any] = {}
309
+ for k, v in d.items():
310
+ new_key = clean(k, **kwargs) if keys and isinstance(k, str) else k
311
+ if new_key in out:
312
+ raise ValueError(
313
+ f"Key collision: {k!r} normalizes to {new_key!r} which already exists"
314
+ )
315
+ out[new_key] = _clean_value(v, keys=keys, _seen=_seen, _depth=_depth + 1, **kwargs)
316
+ return out
317
+ finally:
318
+ _seen.discard(obj_id)
319
+
320
+
321
+ @dataclass(slots=True)
322
+ class CharInfo:
323
+ """Information about a character found during inspection."""
324
+ char: str
325
+ codepoint: str
326
+ name: str
327
+ category: str
328
+ positions: list[int]
329
+ count: int
330
+
331
+
332
+ def inspect(
333
+ text: str,
334
+ *,
335
+ profile: str | Profile = "default",
336
+ fullwidth: bool | None = None,
337
+ line_endings: bool | None = None,
338
+ max_positions: int | None = None,
339
+ ) -> list[CharInfo]:
340
+ """Inspect text for non-standard characters.
341
+
342
+ Parameters
343
+ ----------
344
+ text : str
345
+ The text to inspect.
346
+ profile : str or Profile
347
+ Named profile or a Profile instance. Only character categories
348
+ enabled in the profile are flagged. Default is ``"default"``.
349
+ fullwidth : bool or None
350
+ Override the profile's fullwidth setting.
351
+ line_endings : bool or None
352
+ Override the profile's line_endings setting.
353
+ max_positions : int or None
354
+ If set, limit the ``positions`` list in each :class:`CharInfo` to at
355
+ most this many entries. ``count`` always reflects the true total.
356
+ Useful for bounding memory on very large inputs.
357
+
358
+ Returns a list of CharInfo objects describing each problematic character
359
+ found, sorted by first position.
360
+
361
+ Notes
362
+ -----
363
+ Positions are **character indices** (``enumerate`` over the Python
364
+ string), not byte offsets. For multibyte UTF-8 characters such as
365
+ emoji, the character index will be smaller than the byte offset. If
366
+ byte offsets are needed, convert via
367
+ ``len(text[:pos].encode('utf-8'))``.
368
+ """
369
+ if not isinstance(text, str):
370
+ raise TypeError(f"inspect() expects str, got {type(text).__name__}")
371
+
372
+ # Validate max_positions
373
+ if max_positions is not None:
374
+ if not isinstance(max_positions, int) or isinstance(max_positions, bool):
375
+ raise TypeError(
376
+ f"inspect() max_positions must be int or None, got {type(max_positions).__name__}"
377
+ )
378
+ if max_positions < 0:
379
+ raise ValueError(
380
+ f"inspect() max_positions must be non-negative, got {max_positions}"
381
+ )
382
+
383
+ # Validate override types
384
+ _validate_bool_overrides(
385
+ {"fullwidth": fullwidth, "line_endings": line_endings},
386
+ _BOOL_OVERRIDE_NAMES_INSPECT, "inspect",
387
+ )
388
+
389
+ # Resolve profile
390
+ if isinstance(profile, str):
391
+ if profile not in PROFILES:
392
+ raise ValueError(f"Unknown profile {profile!r}. Available: {', '.join(sorted(PROFILES))}")
393
+ p = PROFILES[profile]
394
+ elif isinstance(profile, Profile):
395
+ p = profile
396
+ else:
397
+ raise TypeError(f"profile must be str or Profile, got {type(profile).__name__}")
398
+
399
+ # Apply overrides
400
+ use_fullwidth = fullwidth if fullwidth is not None else p.fullwidth
401
+ use_line_endings = line_endings if line_endings is not None else p.line_endings
402
+
403
+ import unicodedata
404
+
405
+ # Collect only chars that the resolved profile would change
406
+ target_chars: set[str] = set()
407
+ if p.invisible:
408
+ target_chars.update(INVISIBLE.keys())
409
+ if p.whitespace:
410
+ target_chars.update(WHITESPACE.keys())
411
+ if p.control:
412
+ target_chars.update(CONTROL.keys())
413
+ if p.smart_quotes:
414
+ target_chars.update(SMART_QUOTES.keys())
415
+ if p.dashes:
416
+ target_chars.update(DASHES.keys())
417
+ if p.ellipsis:
418
+ target_chars.update(ELLIPSIS.keys())
419
+ if use_fullwidth:
420
+ target_chars.update(FULLWIDTH.keys())
421
+ if use_line_endings:
422
+ target_chars.add("\r")
423
+
424
+ found: dict[str, list[int]] = {}
425
+ counts: dict[str, int] = {}
426
+ first_pos: dict[str, int] = {}
427
+ for i, ch in enumerate(text):
428
+ if ch in target_chars:
429
+ counts[ch] = counts.get(ch, 0) + 1
430
+ if ch not in first_pos:
431
+ first_pos[ch] = i
432
+ found[ch] = []
433
+ # Only store positions up to max_positions to bound memory.
434
+ if max_positions is None or len(found[ch]) < max_positions:
435
+ found[ch].append(i)
436
+
437
+ results: list[CharInfo] = []
438
+ for ch in sorted(found, key=lambda c: first_pos[c]):
439
+ positions = found[ch]
440
+ total = counts[ch]
441
+ results.append(CharInfo(
442
+ char=ch,
443
+ codepoint=f"U+{ord(ch):04X}",
444
+ name=unicodedata.name(ch, f"UNKNOWN-{ord(ch):04X}"),
445
+ category=unicodedata.category(ch),
446
+ positions=positions,
447
+ count=total,
448
+ ))
449
+ return results
cleanmonkey/maps.py ADDED
@@ -0,0 +1,106 @@
1
+ """Character replacement maps used by cleanmonkey."""
2
+
3
+ # Smart / curly quotes → ASCII equivalents
4
+ SMART_QUOTES: dict[str, str] = {
5
+ "\u2018": "'", # left single
6
+ "\u2019": "'", # right single
7
+ "\u201a": "'", # single low-9
8
+ "\u201b": "'", # single high-reversed-9
9
+ "\u201c": '"', # left double
10
+ "\u201d": '"', # right double
11
+ "\u201e": '"', # double low-9
12
+ "\u201f": '"', # double high-reversed-9
13
+ "\u2039": "'", # single left-pointing angle
14
+ "\u203a": "'", # single right-pointing angle
15
+ "\u00ab": '"', # left-pointing double angle
16
+ "\u00bb": '"', # right-pointing double angle
17
+ }
18
+
19
+ # Dash-like characters → ASCII hyphen-minus
20
+ DASHES: dict[str, str] = {
21
+ "\u2010": "-", # hyphen
22
+ "\u2011": "-", # non-breaking hyphen
23
+ "\u2012": "-", # figure dash
24
+ "\u2013": "-", # en dash
25
+ "\u2014": "-", # em dash
26
+ "\u2015": "-", # horizontal bar
27
+ "\u2212": "-", # minus sign
28
+ "\ufe58": "-", # small em dash
29
+ "\ufe63": "-", # small hyphen-minus
30
+ "\uff0d": "-", # fullwidth hyphen-minus
31
+ }
32
+
33
+ # Ellipsis
34
+ ELLIPSIS: dict[str, str] = {
35
+ "\u2026": "...", # horizontal ellipsis
36
+ }
37
+
38
+ # Zero-width and invisible characters → removed
39
+ INVISIBLE: dict[str, str] = {
40
+ "\u200b": "", # zero-width space
41
+ "\u200c": "", # zero-width non-joiner
42
+ "\u200d": "", # zero-width joiner
43
+ "\u200e": "", # left-to-right mark
44
+ "\u200f": "", # right-to-left mark
45
+ "\u2060": "", # word joiner
46
+ "\u2061": "", # function application
47
+ "\u2062": "", # invisible times
48
+ "\u2063": "", # invisible separator
49
+ "\u2064": "", # invisible plus
50
+ "\ufeff": "", # BOM / zero-width no-break space
51
+ "\u00ad": "", # soft hyphen
52
+ "\u034f": "", # combining grapheme joiner
53
+ "\u061c": "", # Arabic letter mark
54
+ "\u180e": "", # Mongolian vowel separator
55
+ }
56
+
57
+ # Whitespace-like characters → ASCII space
58
+ WHITESPACE: dict[str, str] = {
59
+ "\u00a0": " ", # non-breaking space
60
+ "\u1680": " ", # ogham space mark
61
+ "\u2000": " ", # en quad
62
+ "\u2001": " ", # em quad
63
+ "\u2002": " ", # en space
64
+ "\u2003": " ", # em space
65
+ "\u2004": " ", # three-per-em space
66
+ "\u2005": " ", # four-per-em space
67
+ "\u2006": " ", # six-per-em space
68
+ "\u2007": " ", # figure space
69
+ "\u2008": " ", # punctuation space
70
+ "\u2009": " ", # thin space
71
+ "\u200a": " ", # hair space
72
+ "\u202f": " ", # narrow no-break space
73
+ "\u205f": " ", # medium mathematical space
74
+ "\u3000": " ", # ideographic space
75
+ }
76
+
77
+ # Control characters (C0/C1) to remove, excluding \t \n \r which are handled separately
78
+ CONTROL: dict[str, str] = {
79
+ "\x00": "", # null
80
+ "\x01": "", "\x02": "", "\x03": "", "\x04": "",
81
+ "\x05": "", "\x06": "", "\x07": "", "\x08": "",
82
+ "\x0b": "", # vertical tab
83
+ "\x0c": "", # form feed
84
+ "\x0e": "", "\x0f": "",
85
+ "\x10": "", "\x11": "", "\x12": "", "\x13": "",
86
+ "\x14": "", "\x15": "", "\x16": "", "\x17": "",
87
+ "\x18": "", "\x19": "", "\x1a": "", "\x1b": "",
88
+ "\x1c": "", "\x1d": "", "\x1e": "", "\x1f": "",
89
+ "\x7f": "", # DEL
90
+ # C1 control characters (U+0080–U+009F)
91
+ **{chr(i): "" for i in range(0x80, 0xA0)},
92
+ }
93
+
94
+ # Fullwidth ASCII digits → normal digits
95
+ FULLWIDTH_DIGITS: dict[str, str] = {
96
+ chr(0xFF10 + i): str(i) for i in range(10)
97
+ }
98
+
99
+ # Fullwidth ASCII letters → normal letters
100
+ FULLWIDTH_LETTERS: dict[str, str] = {
101
+ **{chr(0xFF21 + i): chr(0x41 + i) for i in range(26)}, # A-Z
102
+ **{chr(0xFF41 + i): chr(0x61 + i) for i in range(26)}, # a-z
103
+ }
104
+
105
+ # Combined fullwidth map
106
+ FULLWIDTH: dict[str, str] = {**FULLWIDTH_DIGITS, **FULLWIDTH_LETTERS}
@@ -0,0 +1,57 @@
1
+ """Preset cleaning profiles for common use cases."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass
6
+
7
+
8
+ @dataclass(frozen=True, slots=True)
9
+ class Profile:
10
+ """Configuration for which normalizations to apply."""
11
+
12
+ smart_quotes: bool = True
13
+ dashes: bool = True
14
+ ellipsis: bool = True
15
+ invisible: bool = True
16
+ whitespace: bool = True
17
+ control: bool = True
18
+ fullwidth: bool = False
19
+ line_endings: bool = True # normalize \r\n and \r to \n
20
+ collapse_spaces: bool = True # multiple spaces → single space
21
+ strip: bool = True # strip leading/trailing whitespace per line
22
+
23
+
24
+ # Named profiles
25
+ PROFILES: dict[str, Profile] = {
26
+ "default": Profile(),
27
+ "csv": Profile(
28
+ fullwidth=True,
29
+ ),
30
+ "sql": Profile(
31
+ fullwidth=True,
32
+ ),
33
+ "display": Profile(
34
+ smart_quotes=False,
35
+ dashes=False,
36
+ ellipsis=False,
37
+ fullwidth=False,
38
+ ),
39
+ "minimal": Profile(
40
+ smart_quotes=False,
41
+ dashes=False,
42
+ ellipsis=False,
43
+ invisible=True,
44
+ whitespace=False,
45
+ control=False,
46
+ fullwidth=False,
47
+ line_endings=False,
48
+ collapse_spaces=False,
49
+ strip=False,
50
+ ),
51
+ # "aggressive" enables every available normalization, including fullwidth.
52
+ # It is intentionally equivalent to the default profile with fullwidth=True,
53
+ # providing a semantic alias for pipelines that want maximum cleaning.
54
+ "aggressive": Profile(
55
+ fullwidth=True,
56
+ ),
57
+ }
cleanmonkey/py.typed ADDED
File without changes
@@ -0,0 +1,152 @@
1
+ Metadata-Version: 2.4
2
+ Name: cleanmonkey
3
+ Version: 0.1.0
4
+ Summary: One-call text cleanup: invisible characters, smart quotes, whitespace normalization.
5
+ Author-email: RexBytes <pythonic@rexbytes.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/RexBytes/cleanmonkey
8
+ Project-URL: Repository, https://github.com/RexBytes/cleanmonkey
9
+ Project-URL: Issues, https://github.com/RexBytes/cleanmonkey/issues
10
+ Keywords: text,cleanup,whitespace,unicode,normalize
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Programming Language :: Python :: 3.13
19
+ Classifier: Topic :: Text Processing
20
+ Classifier: Topic :: Text Processing :: Filters
21
+ Classifier: Typing :: Typed
22
+ Requires-Python: >=3.10
23
+ Description-Content-Type: text/markdown
24
+ License-File: LICENSE
25
+ Dynamic: license-file
26
+
27
+ # cleanmonkey
28
+
29
+ One-call text cleanup for invisible characters, smart quotes, and whitespace normalization.
30
+
31
+ ## Install
32
+
33
+ ```bash
34
+ pip install cleanmonkey
35
+ ```
36
+
37
+ ## Quick Start
38
+
39
+ ```python
40
+ from cleanmonkey import clean
41
+
42
+ # Sensible defaults handle the common garbage
43
+ clean("hello\u00a0world\u2019s \u2014 test")
44
+ # → "hello world's - test"
45
+
46
+ # Idempotent — safe to call twice
47
+ clean(clean(text)) == clean(text)
48
+ ```
49
+
50
+ ## What It Cleans (by default)
51
+
52
+ | Category | Examples | Result |
53
+ |---|---|---|
54
+ | Non-breaking spaces | `\u00a0`, `\u2007`, `\u202f` | Regular space |
55
+ | Zero-width chars | `\u200b`, `\u200c`, `\u200d`, `\ufeff` | Removed |
56
+ | Smart quotes | `\u2018` `\u2019` `\u201c` `\u201d` | `'` and `"` |
57
+ | Dashes | `\u2013` (en), `\u2014` (em) | `-` |
58
+ | Ellipsis | `\u2026` | `...` |
59
+ | Control chars | null, form feed, vertical tab | Removed |
60
+ | Line endings | `\r\n`, `\r` | `\n` |
61
+ | Multiple spaces | `"hello world"` | `"hello world"` |
62
+ | Leading/trailing | `" hello "` | `"hello"` |
63
+
64
+ ## Granular Control
65
+
66
+ Override any default:
67
+
68
+ ```python
69
+ clean(text, smart_quotes=False) # keep curly quotes
70
+ clean(text, dashes=False) # keep em/en dashes
71
+ clean(text, fullwidth=True) # also normalize fullwidth digits/letters
72
+ clean(text, collapse_spaces=False) # keep multiple spaces
73
+ clean(text, strip=False) # keep leading/trailing whitespace
74
+ ```
75
+
76
+ ## Profiles
77
+
78
+ ```python
79
+ clean(text, profile="default") # all normalizations (the default)
80
+ clean(text, profile="csv") # default + fullwidth normalization
81
+ clean(text, profile="sql") # default + fullwidth normalization
82
+ clean(text, profile="display") # keep smart quotes & dashes; still clean invisible, control, whitespace, line endings
83
+ clean(text, profile="minimal") # invisible chars only, no collapsing or stripping
84
+ clean(text, profile="aggressive") # everything including fullwidth
85
+ ```
86
+
87
+ ## Batch Helpers
88
+
89
+ ```python
90
+ from cleanmonkey import clean_column, clean_dict
91
+
92
+ # Clean a list (non-strings pass through)
93
+ clean_column(["hello\u00a0world", 42, None])
94
+ # → ["hello world", 42, None]
95
+
96
+ # Recursively clean dict values
97
+ clean_dict({"name": "John\u00a0Doe", "nested": {"val": "test\u200b"}})
98
+ # → {"name": "John Doe", "nested": {"val": "test"}}
99
+
100
+ # Also clean keys
101
+ clean_dict({"key\u00a0name": "val"}, keys=True)
102
+ # → {"key name": "val"}
103
+ ```
104
+
105
+ ## Inspect
106
+
107
+ Find out what's lurking in your text:
108
+
109
+ ```python
110
+ from cleanmonkey import inspect
111
+
112
+ for info in inspect("hello\u00a0world\u200b"):
113
+ print(f"{info.codepoint} {info.name} count={info.count} at {info.positions}")
114
+ # U+00A0 NO-BREAK SPACE count=1 at [5]
115
+ # U+200B ZERO WIDTH SPACE count=1 at [11]
116
+ ```
117
+
118
+ ## CLI
119
+
120
+ ```bash
121
+ # Clean a file
122
+ cleanmonkey input.txt -o output.txt
123
+
124
+ # Pipe through stdin
125
+ cat dirty.csv | cleanmonkey > clean.csv
126
+
127
+ # Use a profile
128
+ cleanmonkey --profile csv input.txt
129
+
130
+ # Inspect mode — report what's in a file
131
+ cleanmonkey --inspect input.txt
132
+
133
+ # Machine-readable JSON inspect output
134
+ cleanmonkey --json input.txt
135
+
136
+ # Selective overrides
137
+ cleanmonkey --no-smart-quotes --fullwidth input.txt
138
+
139
+ # Preserve whitespace structure
140
+ cleanmonkey --no-strip --no-collapse-spaces input.txt
141
+
142
+ # Preserve line endings (CR/CRLF)
143
+ cleanmonkey --no-line-endings input.txt
144
+ ```
145
+
146
+ ## Built for LLMs
147
+
148
+ cleanmonkey is designed to work well as a tool for large language models. Invisible character cleanup is a constant source of silent bugs in LLM-driven data pipelines — non-breaking spaces break splits, zero-width characters corrupt comparisons, and smart quotes fail exact matches. Without cleanmonkey, LLMs end up generating repetitive `.replace()` chains that miss edge cases and waste tokens. A single `clean()` call handles all of it with a structured, idempotent result — no multi-step prompting or character-by-character debugging required. Fewer tokens in, clean data out.
149
+
150
+ ## License
151
+
152
+ MIT
@@ -0,0 +1,13 @@
1
+ cleanmonkey/__init__.py,sha256=gHz9hgjVMCryQXmiycnVzdWzWdDvOCKoSnMSSRlbNN4,353
2
+ cleanmonkey/__main__.py,sha256=jFCbwZHhmw8k8JnQsSE14dtyiZpBDkb8T7mjWBRaAg4,158
3
+ cleanmonkey/cli.py,sha256=wPXbCwCrT1yZx8kMCvauBFT2ioZTczTItLuwr35-w7Y,15788
4
+ cleanmonkey/core.py,sha256=vO03OpJqz7TzjJoEgOYJLQcwlT3gMLM_kHgASw36KYY,15624
5
+ cleanmonkey/maps.py,sha256=AS5kxrtZtcxYLOFES38jG34gFqB96V5fXRNwHtlkU98,3708
6
+ cleanmonkey/profiles.py,sha256=hG6zWAwAFhP0Pxm_vpc4HRKKfk_ipRsDv2AFuJeZ1YU,1534
7
+ cleanmonkey/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
+ cleanmonkey-0.1.0.dist-info/licenses/LICENSE,sha256=srNahN_Cxejm5SlFsCghF2Mml1gXgqlnuqWlDt7F1ck,1065
9
+ cleanmonkey-0.1.0.dist-info/METADATA,sha256=tHxlu1XJxo_kQLYJal9F2iTwyRCBSHgeYk6VRQD3kYQ,4914
10
+ cleanmonkey-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
11
+ cleanmonkey-0.1.0.dist-info/entry_points.txt,sha256=ePX3uiSQ0P3GDiO4yAci3iILpk-FqD6QCvNkXFjJmyg,80
12
+ cleanmonkey-0.1.0.dist-info/top_level.txt,sha256=q7GGSdV6NFD8-QSz1Vowde7NLAO5MPoaMvpnpmhrQWI,12
13
+ cleanmonkey-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ cleanmonkey = cleanmonkey.cli:_main_with_broken_pipe_handling
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 RexBytes
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1 @@
1
+ cleanmonkey