shareclean 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
shareclean/__init__.py ADDED
@@ -0,0 +1,4 @@
1
+ """ShareClean: sanitize sensitive values before sharing text publicly."""
2
+
3
+ __version__ = "0.2.0"
4
+ version = "0.2.0"
shareclean/__main__.py ADDED
@@ -0,0 +1,5 @@
1
+ """Entry point for `python -m shareclean`."""
2
+
3
+ from shareclean.cli import main
4
+
5
+ raise SystemExit(main())
shareclean/cli.py ADDED
@@ -0,0 +1,308 @@
1
+ """CLI entry point for ShareClean."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+ import json
7
+ import sys
8
+
9
+ from shareclean import __version__
10
+ from shareclean.config import ConfigError, ShareCleanConfig, load_config
11
+ from shareclean.detectors import DEFAULT_REDACTION_LABEL, get_rules
12
+ from shareclean.io_utils import ShareCleanIOError, read_input, write_output
13
+ from shareclean.redactor import sanitize
14
+ from shareclean.report import format_brief_count, format_json_report, format_text_report
15
+ from shareclean.selectors import (
16
+ SelectorError,
17
+ findings_for_check,
18
+ parse_selector_values,
19
+ )
20
+
21
+ EXIT_OK = 0
22
+ EXIT_FINDING = 1
23
+ EXIT_USER = 2
24
+ EXIT_INTERNAL = 3
25
+
26
+
27
+ def _redaction_label(value: str) -> str:
28
+ if value == "":
29
+ raise argparse.ArgumentTypeError("must not be empty")
30
+ if "\n" in value or "\r" in value:
31
+ raise argparse.ArgumentTypeError("must stay on one line")
32
+ return value
33
+
34
+
35
+ def _add_config_options(parser: argparse.ArgumentParser) -> None:
36
+ parser.add_argument(
37
+ "--config",
38
+ metavar="FILE",
39
+ default=None,
40
+ help="Load ShareClean config from FILE instead of auto-discovery.",
41
+ )
42
+ parser.add_argument(
43
+ "--profile",
44
+ metavar="NAME",
45
+ default=None,
46
+ help="Use a named ShareClean config profile.",
47
+ )
48
+ parser.add_argument(
49
+ "--redact-email",
50
+ dest="redact_email",
51
+ action="store_true",
52
+ default=None,
53
+ help="Enable email address detection for this run.",
54
+ )
55
+ parser.add_argument(
56
+ "--no-redact-email",
57
+ "--no-email",
58
+ dest="redact_email",
59
+ action="store_false",
60
+ default=None,
61
+ help=(
62
+ "Disable email address detection for this run. "
63
+ "--no-email is deprecated; use --no-redact-email."
64
+ ),
65
+ )
66
+ parser.add_argument(
67
+ "--redact-private-ip",
68
+ dest="redact_private_ip",
69
+ action="store_true",
70
+ default=None,
71
+ help="Enable detection and redaction of RFC 1918 private IP addresses.",
72
+ )
73
+ parser.add_argument(
74
+ "--no-redact-private-ip",
75
+ dest="redact_private_ip",
76
+ action="store_false",
77
+ default=None,
78
+ help="Disable detection and redaction of RFC 1918 private IP addresses.",
79
+ )
80
+ parser.add_argument(
81
+ "--redaction-label",
82
+ default=None,
83
+ type=_redaction_label,
84
+ metavar="TEXT",
85
+ help=(
86
+ "Replacement text for generic secrets such as passwords, API keys, "
87
+ f"Bearer tokens, and connection string passwords. Default: "
88
+ f"{DEFAULT_REDACTION_LABEL!r}."
89
+ ),
90
+ )
91
+ parser.add_argument(
92
+ "--fail-on",
93
+ action="append",
94
+ default=None,
95
+ metavar="SELECTORS",
96
+ help=(
97
+ "In --check mode, fail on selectors such as severity:high, "
98
+ "category:token, or rule:SC003."
99
+ ),
100
+ )
101
+ parser.add_argument(
102
+ "--ignore-for-check",
103
+ action="append",
104
+ default=None,
105
+ metavar="SELECTORS",
106
+ help=(
107
+ "In --check mode, exclude matching findings from the exit decision "
108
+ "without disabling detection or redaction."
109
+ ),
110
+ )
111
+
112
+
113
+ def _build_parser() -> argparse.ArgumentParser:
114
+ parser = argparse.ArgumentParser(
115
+ prog="shareclean",
116
+ description=(
117
+ "Sanitize sensitive values in logs and text before sharing publicly. "
118
+ "Reads from a file or stdin; writes sanitized text to stdout (or --output)."
119
+ ),
120
+ )
121
+ parser.add_argument(
122
+ "file",
123
+ nargs="?",
124
+ default=None,
125
+ metavar="FILE",
126
+ help="Input file to sanitize. Reads from stdin if omitted.",
127
+ )
128
+ parser.add_argument(
129
+ "--version",
130
+ action="version",
131
+ version=f"%(prog)s {__version__}",
132
+ help="Print the ShareClean version and exit.",
133
+ )
134
+ parser.add_argument(
135
+ "--check",
136
+ action="store_true",
137
+ default=False,
138
+ help=(
139
+ "Exit 1 if matching findings are detected; do not write sanitized "
140
+ "output. Useful in CI pipelines and Git hooks."
141
+ ),
142
+ )
143
+ parser.add_argument(
144
+ "--output",
145
+ metavar="FILE",
146
+ default=None,
147
+ help="Write sanitized text to FILE instead of stdout.",
148
+ )
149
+ parser.add_argument(
150
+ "--report",
151
+ action="store_true",
152
+ default=False,
153
+ help="Print a full redaction report to stderr after processing.",
154
+ )
155
+ parser.add_argument(
156
+ "--report-format",
157
+ choices=["text", "json"],
158
+ default="text",
159
+ metavar="{text,json}",
160
+ help="Format for --report output: 'text' (default) or 'json'.",
161
+ )
162
+ _add_config_options(parser)
163
+ return parser
164
+
165
+
166
+ def _build_config_show_parser() -> argparse.ArgumentParser:
167
+ parser = argparse.ArgumentParser(
168
+ prog="shareclean config show",
169
+ description="Print the effective ShareClean configuration.",
170
+ )
171
+ parser.add_argument(
172
+ "--version",
173
+ action="version",
174
+ version=f"%(prog)s {__version__}",
175
+ help="Print the ShareClean version and exit.",
176
+ )
177
+ _add_config_options(parser)
178
+ return parser
179
+
180
+
181
+ def _cli_config_values(args: argparse.Namespace) -> dict[str, object]:
182
+ return {
183
+ "redact_email": args.redact_email,
184
+ "redact_private_ip": args.redact_private_ip,
185
+ "redaction_label": args.redaction_label,
186
+ "fail_on": args.fail_on,
187
+ "ignore_for_check": args.ignore_for_check,
188
+ }
189
+
190
+
191
+ def _load_effective_config(args: argparse.Namespace) -> ShareCleanConfig:
192
+ return load_config(
193
+ config_path=args.config,
194
+ cli_profile=args.profile,
195
+ cli_values=_cli_config_values(args),
196
+ )
197
+
198
+
199
+ def _extract_config_show_args(argv: list[str]) -> list[str] | None:
200
+ for index in range(len(argv) - 1):
201
+ if argv[index] == "config" and argv[index + 1] == "show":
202
+ return argv[:index] + argv[index + 2:]
203
+ return None
204
+
205
+
206
+ def _print_check_summary(total: int, failing: int) -> None:
207
+ if failing:
208
+ print(
209
+ f"Found {failing} check-failing sensitive item(s) "
210
+ f"out of {total} total finding(s). No output written.",
211
+ file=sys.stderr,
212
+ )
213
+ elif total:
214
+ print(
215
+ f"No check-failing sensitive items found. "
216
+ f"{total} finding(s) still detected and no output written.",
217
+ file=sys.stderr,
218
+ )
219
+ else:
220
+ print("No sensitive items found. No output written.", file=sys.stderr)
221
+
222
+
223
+ def _run_config_show(argv: list[str]) -> int:
224
+ parser = _build_config_show_parser()
225
+ args = parser.parse_args(argv)
226
+ try:
227
+ config = _load_effective_config(args)
228
+ except ConfigError as exc:
229
+ print(str(exc), file=sys.stderr)
230
+ return EXIT_USER
231
+ print(json.dumps(config.to_public_dict(), indent=2))
232
+ return EXIT_OK
233
+
234
+
235
+ def main(argv: list[str] | None = None) -> int:
236
+ """Run ShareClean and return a process exit code."""
237
+ raw_args = list(sys.argv[1:] if argv is None else argv)
238
+ config_show_args = _extract_config_show_args(raw_args)
239
+ if config_show_args is not None:
240
+ return _run_config_show(config_show_args)
241
+
242
+ parser = _build_parser()
243
+ args = parser.parse_args(raw_args)
244
+
245
+ try:
246
+ if not args.check and (args.fail_on is not None or args.ignore_for_check is not None):
247
+ print(
248
+ "Error: --fail-on and --ignore-for-check require --check.",
249
+ file=sys.stderr,
250
+ )
251
+ return EXIT_USER
252
+
253
+ config = _load_effective_config(args)
254
+
255
+ try:
256
+ fail_on = parse_selector_values(config.fail_on)
257
+ ignore_for_check = parse_selector_values(config.ignore_for_check)
258
+ except SelectorError as exc:
259
+ print(f"Error: {exc}", file=sys.stderr)
260
+ return EXIT_USER
261
+
262
+ try:
263
+ text, input_name = read_input(args.file)
264
+ except ShareCleanIOError as exc:
265
+ print(str(exc), file=sys.stderr)
266
+ return EXIT_USER
267
+
268
+ rules = get_rules(
269
+ redact_email=config.redact_email,
270
+ redact_private_ip=config.redact_private_ip,
271
+ redaction_label=config.redaction_label,
272
+ )
273
+ result = sanitize(text, rules)
274
+
275
+ if args.check:
276
+ failing = findings_for_check(
277
+ result.findings,
278
+ fail_on=fail_on,
279
+ ignore_for_check=ignore_for_check,
280
+ )
281
+ _print_check_summary(result.replacement_count, len(failing))
282
+ return EXIT_FINDING if failing else EXIT_OK
283
+
284
+ try:
285
+ write_output(result.cleaned_text, args.output, args.file)
286
+ except ShareCleanIOError as exc:
287
+ print(str(exc), file=sys.stderr)
288
+ return EXIT_USER
289
+
290
+ if args.output:
291
+ print(f"Output written to: {args.output}", file=sys.stderr)
292
+
293
+ if args.report:
294
+ if args.report_format == "json":
295
+ print(format_json_report(result, input_name), file=sys.stderr)
296
+ else:
297
+ print(format_text_report(result, input_name), file=sys.stderr)
298
+ else:
299
+ print(format_brief_count(result), file=sys.stderr)
300
+
301
+ return EXIT_OK
302
+
303
+ except ConfigError as exc:
304
+ print(str(exc), file=sys.stderr)
305
+ return EXIT_USER
306
+ except Exception as exc: # noqa: BLE001 - catch-all for internal errors
307
+ print(f"Internal error: {exc}", file=sys.stderr)
308
+ return EXIT_INTERNAL
shareclean/config.py ADDED
@@ -0,0 +1,312 @@
1
+ """Configuration loading for ShareClean."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import os
6
+ from dataclasses import asdict, dataclass, replace
7
+ from pathlib import Path
8
+ from typing import Any
9
+
10
+ from shareclean.detectors import DEFAULT_REDACTION_LABEL
11
+
12
+ try: # pragma: no cover - exercised on Python 3.11+
13
+ import tomllib
14
+ except ModuleNotFoundError: # pragma: no cover - exercised on Python 3.10
15
+ import tomli as tomllib # type: ignore[no-redef]
16
+
17
+
18
+ BOOL_TRUE = frozenset({"true", "1", "yes", "on"})
19
+ BOOL_FALSE = frozenset({"false", "0", "no", "off"})
20
+
21
+ ENV_REDACT_EMAIL = "SHARECLEAN_REDACT_EMAIL"
22
+ ENV_REDACT_PRIVATE_IP = "SHARECLEAN_REDACT_PRIVATE_IP"
23
+ ENV_REDACTION_LABEL = "SHARECLEAN_REDACTION_LABEL"
24
+ ENV_PROFILE = "SHARECLEAN_PROFILE"
25
+ ENV_FAIL_ON = "SHARECLEAN_FAIL_ON"
26
+ ENV_IGNORE_FOR_CHECK = "SHARECLEAN_IGNORE_FOR_CHECK"
27
+
28
+ ROOT_KEYS = frozenset({
29
+ "redact_email",
30
+ "redact_private_ip",
31
+ "redaction_label",
32
+ "profile",
33
+ "fail_on",
34
+ "ignore_for_check",
35
+ "profiles",
36
+ })
37
+ PROFILE_KEYS = frozenset({
38
+ "redact_email",
39
+ "redact_private_ip",
40
+ "redaction_label",
41
+ "fail_on",
42
+ "ignore_for_check",
43
+ })
44
+
45
+
46
+ class ConfigError(ValueError):
47
+ """Raised for user-facing configuration errors."""
48
+
49
+
50
+ @dataclass(frozen=True)
51
+ class ShareCleanConfig:
52
+ redact_email: bool = True
53
+ redact_private_ip: bool = False
54
+ redaction_label: str = DEFAULT_REDACTION_LABEL
55
+ profile: str = "default"
56
+ fail_on: list[str] | None = None
57
+ ignore_for_check: list[str] | None = None
58
+
59
+ def with_lists(self) -> "ShareCleanConfig":
60
+ return replace(
61
+ self,
62
+ fail_on=list(self.fail_on or []),
63
+ ignore_for_check=list(self.ignore_for_check or []),
64
+ )
65
+
66
+ def to_public_dict(self) -> dict[str, Any]:
67
+ data = asdict(self.with_lists())
68
+ return data
69
+
70
+
71
+ def _config_error(path: Path, message: str) -> ConfigError:
72
+ return ConfigError(f"Config error in {path}: {message}")
73
+
74
+
75
+ def _format_toml_error(path: Path, exc: tomllib.TOMLDecodeError) -> ConfigError:
76
+ line = getattr(exc, "lineno", None)
77
+ column = getattr(exc, "colno", None)
78
+ if line is not None and column is not None:
79
+ return ConfigError(f"Config error in {path}:{line}:{column}: {exc}")
80
+ return ConfigError(f"Config error in {path}: {exc}")
81
+
82
+
83
+ def _load_toml(path: Path) -> dict[str, Any]:
84
+ try:
85
+ with path.open("rb") as handle:
86
+ data = tomllib.load(handle)
87
+ except tomllib.TOMLDecodeError as exc:
88
+ raise _format_toml_error(path, exc) from exc
89
+ except OSError as exc:
90
+ raise ConfigError(f"Config error in {path}: cannot read file") from exc
91
+ if not isinstance(data, dict):
92
+ raise _config_error(path, "top-level TOML value must be a table")
93
+ return data
94
+
95
+
96
+ def _shareclean_table_from_pyproject(path: Path) -> dict[str, Any] | None:
97
+ data = _load_toml(path)
98
+ tool = data.get("tool")
99
+ if not isinstance(tool, dict):
100
+ return None
101
+ table = tool.get("shareclean")
102
+ if table is None:
103
+ return None
104
+ if not isinstance(table, dict):
105
+ raise _config_error(path, "[tool.shareclean] must be a table")
106
+ return table
107
+
108
+
109
+ def _find_config(start: Path | None = None) -> tuple[Path, dict[str, Any]] | None:
110
+ current = (start or Path.cwd()).resolve()
111
+ if current.is_file():
112
+ current = current.parent
113
+
114
+ while True:
115
+ dotfile = current / ".shareclean.toml"
116
+ pyproject = current / "pyproject.toml"
117
+ dotfile_table = _load_toml(dotfile) if dotfile.exists() else None
118
+ pyproject_table = (
119
+ _shareclean_table_from_pyproject(pyproject)
120
+ if pyproject.exists()
121
+ else None
122
+ )
123
+
124
+ if dotfile_table is not None and pyproject_table is not None:
125
+ raise ConfigError(
126
+ "Config error: both ShareClean config files exist in the same "
127
+ f"directory: {dotfile} and {pyproject}"
128
+ )
129
+ if dotfile_table is not None:
130
+ return dotfile, dotfile_table
131
+ if pyproject_table is not None:
132
+ return pyproject, pyproject_table
133
+
134
+ if (current / ".git").exists():
135
+ return None
136
+ parent = current.parent
137
+ if parent == current:
138
+ return None
139
+ current = parent
140
+
141
+
142
+ def _load_explicit_config(path_value: str) -> tuple[Path, dict[str, Any]]:
143
+ path = Path(path_value).expanduser().resolve()
144
+ if not path.exists():
145
+ raise ConfigError(f"Config error: config file not found: {path}")
146
+ if path.name == "pyproject.toml":
147
+ table = _shareclean_table_from_pyproject(path)
148
+ if table is None:
149
+ raise _config_error(path, "missing [tool.shareclean] table")
150
+ return path, table
151
+ return path, _load_toml(path)
152
+
153
+
154
+ def _validate_label(path: Path | None, value: object) -> str:
155
+ if not isinstance(value, str):
156
+ raise _value_error(path, "redaction_label must be a string")
157
+ if value == "":
158
+ raise _value_error(path, "redaction_label must not be empty")
159
+ if "\n" in value or "\r" in value:
160
+ raise _value_error(path, "redaction_label must stay on one line")
161
+ return value
162
+
163
+
164
+ def _value_error(path: Path | None, message: str) -> ConfigError:
165
+ if path is None:
166
+ return ConfigError(f"Config error: {message}")
167
+ return _config_error(path, message)
168
+
169
+
170
+ def _validate_bool(path: Path | None, key: str, value: object) -> bool:
171
+ if not isinstance(value, bool):
172
+ raise _value_error(path, f"{key} must be true or false")
173
+ return value
174
+
175
+
176
+ def _validate_string(path: Path | None, key: str, value: object) -> str:
177
+ if not isinstance(value, str) or not value:
178
+ raise _value_error(path, f"{key} must be a non-empty string")
179
+ return value
180
+
181
+
182
+ def _validate_selector_list(path: Path | None, key: str, value: object) -> list[str]:
183
+ if not isinstance(value, list) or not all(isinstance(item, str) for item in value):
184
+ raise _value_error(path, f"{key} must be a list of selector strings")
185
+ return list(value)
186
+
187
+
188
+ def _validate_table(
189
+ path: Path | None,
190
+ table: dict[str, Any],
191
+ *,
192
+ profile_table: bool,
193
+ ) -> tuple[dict[str, Any], dict[str, dict[str, Any]]]:
194
+ allowed = PROFILE_KEYS if profile_table else ROOT_KEYS
195
+ unknown = sorted(set(table) - allowed)
196
+ if unknown:
197
+ raise _value_error(path, f"unknown config key(s): {', '.join(unknown)}")
198
+
199
+ values: dict[str, Any] = {}
200
+ profiles: dict[str, dict[str, Any]] = {}
201
+ for key, value in table.items():
202
+ if key == "profiles":
203
+ if not isinstance(value, dict):
204
+ raise _value_error(path, "profiles must be a table")
205
+ for profile_name, profile_values in value.items():
206
+ if not isinstance(profile_name, str) or not profile_name:
207
+ raise _value_error(path, "profile names must be non-empty strings")
208
+ if not isinstance(profile_values, dict):
209
+ raise _value_error(path, f"profile {profile_name!r} must be a table")
210
+ profile_config, _ = _validate_table(
211
+ path,
212
+ profile_values,
213
+ profile_table=True,
214
+ )
215
+ profiles[profile_name] = profile_config
216
+ continue
217
+ if key in {"redact_email", "redact_private_ip"}:
218
+ values[key] = _validate_bool(path, key, value)
219
+ elif key == "redaction_label":
220
+ values[key] = _validate_label(path, value)
221
+ elif key == "profile":
222
+ values[key] = _validate_string(path, key, value)
223
+ elif key in {"fail_on", "ignore_for_check"}:
224
+ values[key] = _validate_selector_list(path, key, value)
225
+ return values, profiles
226
+
227
+
228
+ def _apply_values(config: ShareCleanConfig, values: dict[str, Any]) -> ShareCleanConfig:
229
+ data = asdict(config)
230
+ for key, value in values.items():
231
+ data[key] = value
232
+ return ShareCleanConfig(**data)
233
+
234
+
235
+ def _parse_env_bool(name: str, raw: str) -> bool:
236
+ normalized = raw.strip().lower()
237
+ if normalized in BOOL_TRUE:
238
+ return True
239
+ if normalized in BOOL_FALSE:
240
+ return False
241
+ raise ConfigError(
242
+ f"Config error: {name} must be one of true, 1, yes, on, false, 0, no, off"
243
+ )
244
+
245
+
246
+ def _env_values(environ: dict[str, str]) -> tuple[dict[str, Any], str | None]:
247
+ values: dict[str, Any] = {}
248
+ profile = None
249
+ if ENV_REDACT_EMAIL in environ:
250
+ values["redact_email"] = _parse_env_bool(
251
+ ENV_REDACT_EMAIL,
252
+ environ[ENV_REDACT_EMAIL],
253
+ )
254
+ if ENV_REDACT_PRIVATE_IP in environ:
255
+ values["redact_private_ip"] = _parse_env_bool(
256
+ ENV_REDACT_PRIVATE_IP,
257
+ environ[ENV_REDACT_PRIVATE_IP],
258
+ )
259
+ if ENV_REDACTION_LABEL in environ:
260
+ values["redaction_label"] = _validate_label(None, environ[ENV_REDACTION_LABEL])
261
+ if ENV_PROFILE in environ:
262
+ profile = _validate_string(None, ENV_PROFILE, environ[ENV_PROFILE])
263
+ if ENV_FAIL_ON in environ:
264
+ values["fail_on"] = [environ[ENV_FAIL_ON]]
265
+ if ENV_IGNORE_FOR_CHECK in environ:
266
+ values["ignore_for_check"] = [environ[ENV_IGNORE_FOR_CHECK]]
267
+ return values, profile
268
+
269
+
270
+ def load_config(
271
+ *,
272
+ config_path: str | None = None,
273
+ cli_profile: str | None = None,
274
+ cli_values: dict[str, Any] | None = None,
275
+ environ: dict[str, str] | None = None,
276
+ start: Path | None = None,
277
+ ) -> ShareCleanConfig:
278
+ """Load ShareClean configuration using the documented precedence order."""
279
+ path: Path | None = None
280
+ table: dict[str, Any] = {}
281
+ profiles: dict[str, dict[str, Any]] = {}
282
+
283
+ if config_path is not None:
284
+ path, table = _load_explicit_config(config_path)
285
+ else:
286
+ discovered = _find_config(start)
287
+ if discovered is not None:
288
+ path, table = discovered
289
+
290
+ base_values, profiles = _validate_table(path, table, profile_table=False)
291
+ config = _apply_values(ShareCleanConfig(), base_values)
292
+
293
+ env_values, env_profile = _env_values(environ or dict(os.environ))
294
+ selected_profile = config.profile
295
+ if env_profile is not None:
296
+ selected_profile = env_profile
297
+ if cli_profile is not None:
298
+ selected_profile = cli_profile
299
+
300
+ if selected_profile != "default":
301
+ if selected_profile not in profiles:
302
+ raise _value_error(path, f"unknown profile: {selected_profile}")
303
+ config = _apply_values(config, profiles[selected_profile])
304
+
305
+ config = replace(config, profile=selected_profile)
306
+ config = _apply_values(config, env_values)
307
+ if cli_values:
308
+ config = _apply_values(
309
+ config,
310
+ {key: value for key, value in cli_values.items() if value is not None},
311
+ )
312
+ return config.with_lists()