shareclean 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- shareclean/__init__.py +4 -0
- shareclean/__main__.py +5 -0
- shareclean/cli.py +308 -0
- shareclean/config.py +312 -0
- shareclean/detectors.py +234 -0
- shareclean/io_utils.py +124 -0
- shareclean/models.py +65 -0
- shareclean/py.typed +1 -0
- shareclean/redactor.py +116 -0
- shareclean/report.py +102 -0
- shareclean/selectors.py +94 -0
- shareclean-0.2.0.dist-info/METADATA +263 -0
- shareclean-0.2.0.dist-info/RECORD +17 -0
- shareclean-0.2.0.dist-info/WHEEL +5 -0
- shareclean-0.2.0.dist-info/entry_points.txt +2 -0
- shareclean-0.2.0.dist-info/licenses/LICENSE +21 -0
- shareclean-0.2.0.dist-info/top_level.txt +1 -0
shareclean/__init__.py
ADDED
shareclean/__main__.py
ADDED
shareclean/cli.py
ADDED
|
@@ -0,0 +1,308 @@
|
|
|
1
|
+
"""CLI entry point for ShareClean."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import argparse
|
|
6
|
+
import json
|
|
7
|
+
import sys
|
|
8
|
+
|
|
9
|
+
from shareclean import __version__
|
|
10
|
+
from shareclean.config import ConfigError, ShareCleanConfig, load_config
|
|
11
|
+
from shareclean.detectors import DEFAULT_REDACTION_LABEL, get_rules
|
|
12
|
+
from shareclean.io_utils import ShareCleanIOError, read_input, write_output
|
|
13
|
+
from shareclean.redactor import sanitize
|
|
14
|
+
from shareclean.report import format_brief_count, format_json_report, format_text_report
|
|
15
|
+
from shareclean.selectors import (
|
|
16
|
+
SelectorError,
|
|
17
|
+
findings_for_check,
|
|
18
|
+
parse_selector_values,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
EXIT_OK = 0
|
|
22
|
+
EXIT_FINDING = 1
|
|
23
|
+
EXIT_USER = 2
|
|
24
|
+
EXIT_INTERNAL = 3
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _redaction_label(value: str) -> str:
|
|
28
|
+
if value == "":
|
|
29
|
+
raise argparse.ArgumentTypeError("must not be empty")
|
|
30
|
+
if "\n" in value or "\r" in value:
|
|
31
|
+
raise argparse.ArgumentTypeError("must stay on one line")
|
|
32
|
+
return value
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _add_config_options(parser: argparse.ArgumentParser) -> None:
|
|
36
|
+
parser.add_argument(
|
|
37
|
+
"--config",
|
|
38
|
+
metavar="FILE",
|
|
39
|
+
default=None,
|
|
40
|
+
help="Load ShareClean config from FILE instead of auto-discovery.",
|
|
41
|
+
)
|
|
42
|
+
parser.add_argument(
|
|
43
|
+
"--profile",
|
|
44
|
+
metavar="NAME",
|
|
45
|
+
default=None,
|
|
46
|
+
help="Use a named ShareClean config profile.",
|
|
47
|
+
)
|
|
48
|
+
parser.add_argument(
|
|
49
|
+
"--redact-email",
|
|
50
|
+
dest="redact_email",
|
|
51
|
+
action="store_true",
|
|
52
|
+
default=None,
|
|
53
|
+
help="Enable email address detection for this run.",
|
|
54
|
+
)
|
|
55
|
+
parser.add_argument(
|
|
56
|
+
"--no-redact-email",
|
|
57
|
+
"--no-email",
|
|
58
|
+
dest="redact_email",
|
|
59
|
+
action="store_false",
|
|
60
|
+
default=None,
|
|
61
|
+
help=(
|
|
62
|
+
"Disable email address detection for this run. "
|
|
63
|
+
"--no-email is deprecated; use --no-redact-email."
|
|
64
|
+
),
|
|
65
|
+
)
|
|
66
|
+
parser.add_argument(
|
|
67
|
+
"--redact-private-ip",
|
|
68
|
+
dest="redact_private_ip",
|
|
69
|
+
action="store_true",
|
|
70
|
+
default=None,
|
|
71
|
+
help="Enable detection and redaction of RFC 1918 private IP addresses.",
|
|
72
|
+
)
|
|
73
|
+
parser.add_argument(
|
|
74
|
+
"--no-redact-private-ip",
|
|
75
|
+
dest="redact_private_ip",
|
|
76
|
+
action="store_false",
|
|
77
|
+
default=None,
|
|
78
|
+
help="Disable detection and redaction of RFC 1918 private IP addresses.",
|
|
79
|
+
)
|
|
80
|
+
parser.add_argument(
|
|
81
|
+
"--redaction-label",
|
|
82
|
+
default=None,
|
|
83
|
+
type=_redaction_label,
|
|
84
|
+
metavar="TEXT",
|
|
85
|
+
help=(
|
|
86
|
+
"Replacement text for generic secrets such as passwords, API keys, "
|
|
87
|
+
f"Bearer tokens, and connection string passwords. Default: "
|
|
88
|
+
f"{DEFAULT_REDACTION_LABEL!r}."
|
|
89
|
+
),
|
|
90
|
+
)
|
|
91
|
+
parser.add_argument(
|
|
92
|
+
"--fail-on",
|
|
93
|
+
action="append",
|
|
94
|
+
default=None,
|
|
95
|
+
metavar="SELECTORS",
|
|
96
|
+
help=(
|
|
97
|
+
"In --check mode, fail on selectors such as severity:high, "
|
|
98
|
+
"category:token, or rule:SC003."
|
|
99
|
+
),
|
|
100
|
+
)
|
|
101
|
+
parser.add_argument(
|
|
102
|
+
"--ignore-for-check",
|
|
103
|
+
action="append",
|
|
104
|
+
default=None,
|
|
105
|
+
metavar="SELECTORS",
|
|
106
|
+
help=(
|
|
107
|
+
"In --check mode, exclude matching findings from the exit decision "
|
|
108
|
+
"without disabling detection or redaction."
|
|
109
|
+
),
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def _build_parser() -> argparse.ArgumentParser:
|
|
114
|
+
parser = argparse.ArgumentParser(
|
|
115
|
+
prog="shareclean",
|
|
116
|
+
description=(
|
|
117
|
+
"Sanitize sensitive values in logs and text before sharing publicly. "
|
|
118
|
+
"Reads from a file or stdin; writes sanitized text to stdout (or --output)."
|
|
119
|
+
),
|
|
120
|
+
)
|
|
121
|
+
parser.add_argument(
|
|
122
|
+
"file",
|
|
123
|
+
nargs="?",
|
|
124
|
+
default=None,
|
|
125
|
+
metavar="FILE",
|
|
126
|
+
help="Input file to sanitize. Reads from stdin if omitted.",
|
|
127
|
+
)
|
|
128
|
+
parser.add_argument(
|
|
129
|
+
"--version",
|
|
130
|
+
action="version",
|
|
131
|
+
version=f"%(prog)s {__version__}",
|
|
132
|
+
help="Print the ShareClean version and exit.",
|
|
133
|
+
)
|
|
134
|
+
parser.add_argument(
|
|
135
|
+
"--check",
|
|
136
|
+
action="store_true",
|
|
137
|
+
default=False,
|
|
138
|
+
help=(
|
|
139
|
+
"Exit 1 if matching findings are detected; do not write sanitized "
|
|
140
|
+
"output. Useful in CI pipelines and Git hooks."
|
|
141
|
+
),
|
|
142
|
+
)
|
|
143
|
+
parser.add_argument(
|
|
144
|
+
"--output",
|
|
145
|
+
metavar="FILE",
|
|
146
|
+
default=None,
|
|
147
|
+
help="Write sanitized text to FILE instead of stdout.",
|
|
148
|
+
)
|
|
149
|
+
parser.add_argument(
|
|
150
|
+
"--report",
|
|
151
|
+
action="store_true",
|
|
152
|
+
default=False,
|
|
153
|
+
help="Print a full redaction report to stderr after processing.",
|
|
154
|
+
)
|
|
155
|
+
parser.add_argument(
|
|
156
|
+
"--report-format",
|
|
157
|
+
choices=["text", "json"],
|
|
158
|
+
default="text",
|
|
159
|
+
metavar="{text,json}",
|
|
160
|
+
help="Format for --report output: 'text' (default) or 'json'.",
|
|
161
|
+
)
|
|
162
|
+
_add_config_options(parser)
|
|
163
|
+
return parser
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def _build_config_show_parser() -> argparse.ArgumentParser:
|
|
167
|
+
parser = argparse.ArgumentParser(
|
|
168
|
+
prog="shareclean config show",
|
|
169
|
+
description="Print the effective ShareClean configuration.",
|
|
170
|
+
)
|
|
171
|
+
parser.add_argument(
|
|
172
|
+
"--version",
|
|
173
|
+
action="version",
|
|
174
|
+
version=f"%(prog)s {__version__}",
|
|
175
|
+
help="Print the ShareClean version and exit.",
|
|
176
|
+
)
|
|
177
|
+
_add_config_options(parser)
|
|
178
|
+
return parser
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def _cli_config_values(args: argparse.Namespace) -> dict[str, object]:
|
|
182
|
+
return {
|
|
183
|
+
"redact_email": args.redact_email,
|
|
184
|
+
"redact_private_ip": args.redact_private_ip,
|
|
185
|
+
"redaction_label": args.redaction_label,
|
|
186
|
+
"fail_on": args.fail_on,
|
|
187
|
+
"ignore_for_check": args.ignore_for_check,
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def _load_effective_config(args: argparse.Namespace) -> ShareCleanConfig:
|
|
192
|
+
return load_config(
|
|
193
|
+
config_path=args.config,
|
|
194
|
+
cli_profile=args.profile,
|
|
195
|
+
cli_values=_cli_config_values(args),
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
def _extract_config_show_args(argv: list[str]) -> list[str] | None:
|
|
200
|
+
for index in range(len(argv) - 1):
|
|
201
|
+
if argv[index] == "config" and argv[index + 1] == "show":
|
|
202
|
+
return argv[:index] + argv[index + 2:]
|
|
203
|
+
return None
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
def _print_check_summary(total: int, failing: int) -> None:
|
|
207
|
+
if failing:
|
|
208
|
+
print(
|
|
209
|
+
f"Found {failing} check-failing sensitive item(s) "
|
|
210
|
+
f"out of {total} total finding(s). No output written.",
|
|
211
|
+
file=sys.stderr,
|
|
212
|
+
)
|
|
213
|
+
elif total:
|
|
214
|
+
print(
|
|
215
|
+
f"No check-failing sensitive items found. "
|
|
216
|
+
f"{total} finding(s) still detected and no output written.",
|
|
217
|
+
file=sys.stderr,
|
|
218
|
+
)
|
|
219
|
+
else:
|
|
220
|
+
print("No sensitive items found. No output written.", file=sys.stderr)
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
def _run_config_show(argv: list[str]) -> int:
|
|
224
|
+
parser = _build_config_show_parser()
|
|
225
|
+
args = parser.parse_args(argv)
|
|
226
|
+
try:
|
|
227
|
+
config = _load_effective_config(args)
|
|
228
|
+
except ConfigError as exc:
|
|
229
|
+
print(str(exc), file=sys.stderr)
|
|
230
|
+
return EXIT_USER
|
|
231
|
+
print(json.dumps(config.to_public_dict(), indent=2))
|
|
232
|
+
return EXIT_OK
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
def main(argv: list[str] | None = None) -> int:
|
|
236
|
+
"""Run ShareClean and return a process exit code."""
|
|
237
|
+
raw_args = list(sys.argv[1:] if argv is None else argv)
|
|
238
|
+
config_show_args = _extract_config_show_args(raw_args)
|
|
239
|
+
if config_show_args is not None:
|
|
240
|
+
return _run_config_show(config_show_args)
|
|
241
|
+
|
|
242
|
+
parser = _build_parser()
|
|
243
|
+
args = parser.parse_args(raw_args)
|
|
244
|
+
|
|
245
|
+
try:
|
|
246
|
+
if not args.check and (args.fail_on is not None or args.ignore_for_check is not None):
|
|
247
|
+
print(
|
|
248
|
+
"Error: --fail-on and --ignore-for-check require --check.",
|
|
249
|
+
file=sys.stderr,
|
|
250
|
+
)
|
|
251
|
+
return EXIT_USER
|
|
252
|
+
|
|
253
|
+
config = _load_effective_config(args)
|
|
254
|
+
|
|
255
|
+
try:
|
|
256
|
+
fail_on = parse_selector_values(config.fail_on)
|
|
257
|
+
ignore_for_check = parse_selector_values(config.ignore_for_check)
|
|
258
|
+
except SelectorError as exc:
|
|
259
|
+
print(f"Error: {exc}", file=sys.stderr)
|
|
260
|
+
return EXIT_USER
|
|
261
|
+
|
|
262
|
+
try:
|
|
263
|
+
text, input_name = read_input(args.file)
|
|
264
|
+
except ShareCleanIOError as exc:
|
|
265
|
+
print(str(exc), file=sys.stderr)
|
|
266
|
+
return EXIT_USER
|
|
267
|
+
|
|
268
|
+
rules = get_rules(
|
|
269
|
+
redact_email=config.redact_email,
|
|
270
|
+
redact_private_ip=config.redact_private_ip,
|
|
271
|
+
redaction_label=config.redaction_label,
|
|
272
|
+
)
|
|
273
|
+
result = sanitize(text, rules)
|
|
274
|
+
|
|
275
|
+
if args.check:
|
|
276
|
+
failing = findings_for_check(
|
|
277
|
+
result.findings,
|
|
278
|
+
fail_on=fail_on,
|
|
279
|
+
ignore_for_check=ignore_for_check,
|
|
280
|
+
)
|
|
281
|
+
_print_check_summary(result.replacement_count, len(failing))
|
|
282
|
+
return EXIT_FINDING if failing else EXIT_OK
|
|
283
|
+
|
|
284
|
+
try:
|
|
285
|
+
write_output(result.cleaned_text, args.output, args.file)
|
|
286
|
+
except ShareCleanIOError as exc:
|
|
287
|
+
print(str(exc), file=sys.stderr)
|
|
288
|
+
return EXIT_USER
|
|
289
|
+
|
|
290
|
+
if args.output:
|
|
291
|
+
print(f"Output written to: {args.output}", file=sys.stderr)
|
|
292
|
+
|
|
293
|
+
if args.report:
|
|
294
|
+
if args.report_format == "json":
|
|
295
|
+
print(format_json_report(result, input_name), file=sys.stderr)
|
|
296
|
+
else:
|
|
297
|
+
print(format_text_report(result, input_name), file=sys.stderr)
|
|
298
|
+
else:
|
|
299
|
+
print(format_brief_count(result), file=sys.stderr)
|
|
300
|
+
|
|
301
|
+
return EXIT_OK
|
|
302
|
+
|
|
303
|
+
except ConfigError as exc:
|
|
304
|
+
print(str(exc), file=sys.stderr)
|
|
305
|
+
return EXIT_USER
|
|
306
|
+
except Exception as exc: # noqa: BLE001 - catch-all for internal errors
|
|
307
|
+
print(f"Internal error: {exc}", file=sys.stderr)
|
|
308
|
+
return EXIT_INTERNAL
|
shareclean/config.py
ADDED
|
@@ -0,0 +1,312 @@
|
|
|
1
|
+
"""Configuration loading for ShareClean."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
from dataclasses import asdict, dataclass, replace
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
from shareclean.detectors import DEFAULT_REDACTION_LABEL
|
|
11
|
+
|
|
12
|
+
try: # pragma: no cover - exercised on Python 3.11+
|
|
13
|
+
import tomllib
|
|
14
|
+
except ModuleNotFoundError: # pragma: no cover - exercised on Python 3.10
|
|
15
|
+
import tomli as tomllib # type: ignore[no-redef]
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
BOOL_TRUE = frozenset({"true", "1", "yes", "on"})
|
|
19
|
+
BOOL_FALSE = frozenset({"false", "0", "no", "off"})
|
|
20
|
+
|
|
21
|
+
ENV_REDACT_EMAIL = "SHARECLEAN_REDACT_EMAIL"
|
|
22
|
+
ENV_REDACT_PRIVATE_IP = "SHARECLEAN_REDACT_PRIVATE_IP"
|
|
23
|
+
ENV_REDACTION_LABEL = "SHARECLEAN_REDACTION_LABEL"
|
|
24
|
+
ENV_PROFILE = "SHARECLEAN_PROFILE"
|
|
25
|
+
ENV_FAIL_ON = "SHARECLEAN_FAIL_ON"
|
|
26
|
+
ENV_IGNORE_FOR_CHECK = "SHARECLEAN_IGNORE_FOR_CHECK"
|
|
27
|
+
|
|
28
|
+
ROOT_KEYS = frozenset({
|
|
29
|
+
"redact_email",
|
|
30
|
+
"redact_private_ip",
|
|
31
|
+
"redaction_label",
|
|
32
|
+
"profile",
|
|
33
|
+
"fail_on",
|
|
34
|
+
"ignore_for_check",
|
|
35
|
+
"profiles",
|
|
36
|
+
})
|
|
37
|
+
PROFILE_KEYS = frozenset({
|
|
38
|
+
"redact_email",
|
|
39
|
+
"redact_private_ip",
|
|
40
|
+
"redaction_label",
|
|
41
|
+
"fail_on",
|
|
42
|
+
"ignore_for_check",
|
|
43
|
+
})
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class ConfigError(ValueError):
|
|
47
|
+
"""Raised for user-facing configuration errors."""
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
@dataclass(frozen=True)
|
|
51
|
+
class ShareCleanConfig:
|
|
52
|
+
redact_email: bool = True
|
|
53
|
+
redact_private_ip: bool = False
|
|
54
|
+
redaction_label: str = DEFAULT_REDACTION_LABEL
|
|
55
|
+
profile: str = "default"
|
|
56
|
+
fail_on: list[str] | None = None
|
|
57
|
+
ignore_for_check: list[str] | None = None
|
|
58
|
+
|
|
59
|
+
def with_lists(self) -> "ShareCleanConfig":
|
|
60
|
+
return replace(
|
|
61
|
+
self,
|
|
62
|
+
fail_on=list(self.fail_on or []),
|
|
63
|
+
ignore_for_check=list(self.ignore_for_check or []),
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
def to_public_dict(self) -> dict[str, Any]:
|
|
67
|
+
data = asdict(self.with_lists())
|
|
68
|
+
return data
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def _config_error(path: Path, message: str) -> ConfigError:
|
|
72
|
+
return ConfigError(f"Config error in {path}: {message}")
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def _format_toml_error(path: Path, exc: tomllib.TOMLDecodeError) -> ConfigError:
|
|
76
|
+
line = getattr(exc, "lineno", None)
|
|
77
|
+
column = getattr(exc, "colno", None)
|
|
78
|
+
if line is not None and column is not None:
|
|
79
|
+
return ConfigError(f"Config error in {path}:{line}:{column}: {exc}")
|
|
80
|
+
return ConfigError(f"Config error in {path}: {exc}")
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def _load_toml(path: Path) -> dict[str, Any]:
|
|
84
|
+
try:
|
|
85
|
+
with path.open("rb") as handle:
|
|
86
|
+
data = tomllib.load(handle)
|
|
87
|
+
except tomllib.TOMLDecodeError as exc:
|
|
88
|
+
raise _format_toml_error(path, exc) from exc
|
|
89
|
+
except OSError as exc:
|
|
90
|
+
raise ConfigError(f"Config error in {path}: cannot read file") from exc
|
|
91
|
+
if not isinstance(data, dict):
|
|
92
|
+
raise _config_error(path, "top-level TOML value must be a table")
|
|
93
|
+
return data
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def _shareclean_table_from_pyproject(path: Path) -> dict[str, Any] | None:
|
|
97
|
+
data = _load_toml(path)
|
|
98
|
+
tool = data.get("tool")
|
|
99
|
+
if not isinstance(tool, dict):
|
|
100
|
+
return None
|
|
101
|
+
table = tool.get("shareclean")
|
|
102
|
+
if table is None:
|
|
103
|
+
return None
|
|
104
|
+
if not isinstance(table, dict):
|
|
105
|
+
raise _config_error(path, "[tool.shareclean] must be a table")
|
|
106
|
+
return table
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def _find_config(start: Path | None = None) -> tuple[Path, dict[str, Any]] | None:
|
|
110
|
+
current = (start or Path.cwd()).resolve()
|
|
111
|
+
if current.is_file():
|
|
112
|
+
current = current.parent
|
|
113
|
+
|
|
114
|
+
while True:
|
|
115
|
+
dotfile = current / ".shareclean.toml"
|
|
116
|
+
pyproject = current / "pyproject.toml"
|
|
117
|
+
dotfile_table = _load_toml(dotfile) if dotfile.exists() else None
|
|
118
|
+
pyproject_table = (
|
|
119
|
+
_shareclean_table_from_pyproject(pyproject)
|
|
120
|
+
if pyproject.exists()
|
|
121
|
+
else None
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
if dotfile_table is not None and pyproject_table is not None:
|
|
125
|
+
raise ConfigError(
|
|
126
|
+
"Config error: both ShareClean config files exist in the same "
|
|
127
|
+
f"directory: {dotfile} and {pyproject}"
|
|
128
|
+
)
|
|
129
|
+
if dotfile_table is not None:
|
|
130
|
+
return dotfile, dotfile_table
|
|
131
|
+
if pyproject_table is not None:
|
|
132
|
+
return pyproject, pyproject_table
|
|
133
|
+
|
|
134
|
+
if (current / ".git").exists():
|
|
135
|
+
return None
|
|
136
|
+
parent = current.parent
|
|
137
|
+
if parent == current:
|
|
138
|
+
return None
|
|
139
|
+
current = parent
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def _load_explicit_config(path_value: str) -> tuple[Path, dict[str, Any]]:
|
|
143
|
+
path = Path(path_value).expanduser().resolve()
|
|
144
|
+
if not path.exists():
|
|
145
|
+
raise ConfigError(f"Config error: config file not found: {path}")
|
|
146
|
+
if path.name == "pyproject.toml":
|
|
147
|
+
table = _shareclean_table_from_pyproject(path)
|
|
148
|
+
if table is None:
|
|
149
|
+
raise _config_error(path, "missing [tool.shareclean] table")
|
|
150
|
+
return path, table
|
|
151
|
+
return path, _load_toml(path)
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def _validate_label(path: Path | None, value: object) -> str:
|
|
155
|
+
if not isinstance(value, str):
|
|
156
|
+
raise _value_error(path, "redaction_label must be a string")
|
|
157
|
+
if value == "":
|
|
158
|
+
raise _value_error(path, "redaction_label must not be empty")
|
|
159
|
+
if "\n" in value or "\r" in value:
|
|
160
|
+
raise _value_error(path, "redaction_label must stay on one line")
|
|
161
|
+
return value
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def _value_error(path: Path | None, message: str) -> ConfigError:
|
|
165
|
+
if path is None:
|
|
166
|
+
return ConfigError(f"Config error: {message}")
|
|
167
|
+
return _config_error(path, message)
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def _validate_bool(path: Path | None, key: str, value: object) -> bool:
|
|
171
|
+
if not isinstance(value, bool):
|
|
172
|
+
raise _value_error(path, f"{key} must be true or false")
|
|
173
|
+
return value
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def _validate_string(path: Path | None, key: str, value: object) -> str:
|
|
177
|
+
if not isinstance(value, str) or not value:
|
|
178
|
+
raise _value_error(path, f"{key} must be a non-empty string")
|
|
179
|
+
return value
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
def _validate_selector_list(path: Path | None, key: str, value: object) -> list[str]:
|
|
183
|
+
if not isinstance(value, list) or not all(isinstance(item, str) for item in value):
|
|
184
|
+
raise _value_error(path, f"{key} must be a list of selector strings")
|
|
185
|
+
return list(value)
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def _validate_table(
|
|
189
|
+
path: Path | None,
|
|
190
|
+
table: dict[str, Any],
|
|
191
|
+
*,
|
|
192
|
+
profile_table: bool,
|
|
193
|
+
) -> tuple[dict[str, Any], dict[str, dict[str, Any]]]:
|
|
194
|
+
allowed = PROFILE_KEYS if profile_table else ROOT_KEYS
|
|
195
|
+
unknown = sorted(set(table) - allowed)
|
|
196
|
+
if unknown:
|
|
197
|
+
raise _value_error(path, f"unknown config key(s): {', '.join(unknown)}")
|
|
198
|
+
|
|
199
|
+
values: dict[str, Any] = {}
|
|
200
|
+
profiles: dict[str, dict[str, Any]] = {}
|
|
201
|
+
for key, value in table.items():
|
|
202
|
+
if key == "profiles":
|
|
203
|
+
if not isinstance(value, dict):
|
|
204
|
+
raise _value_error(path, "profiles must be a table")
|
|
205
|
+
for profile_name, profile_values in value.items():
|
|
206
|
+
if not isinstance(profile_name, str) or not profile_name:
|
|
207
|
+
raise _value_error(path, "profile names must be non-empty strings")
|
|
208
|
+
if not isinstance(profile_values, dict):
|
|
209
|
+
raise _value_error(path, f"profile {profile_name!r} must be a table")
|
|
210
|
+
profile_config, _ = _validate_table(
|
|
211
|
+
path,
|
|
212
|
+
profile_values,
|
|
213
|
+
profile_table=True,
|
|
214
|
+
)
|
|
215
|
+
profiles[profile_name] = profile_config
|
|
216
|
+
continue
|
|
217
|
+
if key in {"redact_email", "redact_private_ip"}:
|
|
218
|
+
values[key] = _validate_bool(path, key, value)
|
|
219
|
+
elif key == "redaction_label":
|
|
220
|
+
values[key] = _validate_label(path, value)
|
|
221
|
+
elif key == "profile":
|
|
222
|
+
values[key] = _validate_string(path, key, value)
|
|
223
|
+
elif key in {"fail_on", "ignore_for_check"}:
|
|
224
|
+
values[key] = _validate_selector_list(path, key, value)
|
|
225
|
+
return values, profiles
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
def _apply_values(config: ShareCleanConfig, values: dict[str, Any]) -> ShareCleanConfig:
|
|
229
|
+
data = asdict(config)
|
|
230
|
+
for key, value in values.items():
|
|
231
|
+
data[key] = value
|
|
232
|
+
return ShareCleanConfig(**data)
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
def _parse_env_bool(name: str, raw: str) -> bool:
|
|
236
|
+
normalized = raw.strip().lower()
|
|
237
|
+
if normalized in BOOL_TRUE:
|
|
238
|
+
return True
|
|
239
|
+
if normalized in BOOL_FALSE:
|
|
240
|
+
return False
|
|
241
|
+
raise ConfigError(
|
|
242
|
+
f"Config error: {name} must be one of true, 1, yes, on, false, 0, no, off"
|
|
243
|
+
)
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
def _env_values(environ: dict[str, str]) -> tuple[dict[str, Any], str | None]:
|
|
247
|
+
values: dict[str, Any] = {}
|
|
248
|
+
profile = None
|
|
249
|
+
if ENV_REDACT_EMAIL in environ:
|
|
250
|
+
values["redact_email"] = _parse_env_bool(
|
|
251
|
+
ENV_REDACT_EMAIL,
|
|
252
|
+
environ[ENV_REDACT_EMAIL],
|
|
253
|
+
)
|
|
254
|
+
if ENV_REDACT_PRIVATE_IP in environ:
|
|
255
|
+
values["redact_private_ip"] = _parse_env_bool(
|
|
256
|
+
ENV_REDACT_PRIVATE_IP,
|
|
257
|
+
environ[ENV_REDACT_PRIVATE_IP],
|
|
258
|
+
)
|
|
259
|
+
if ENV_REDACTION_LABEL in environ:
|
|
260
|
+
values["redaction_label"] = _validate_label(None, environ[ENV_REDACTION_LABEL])
|
|
261
|
+
if ENV_PROFILE in environ:
|
|
262
|
+
profile = _validate_string(None, ENV_PROFILE, environ[ENV_PROFILE])
|
|
263
|
+
if ENV_FAIL_ON in environ:
|
|
264
|
+
values["fail_on"] = [environ[ENV_FAIL_ON]]
|
|
265
|
+
if ENV_IGNORE_FOR_CHECK in environ:
|
|
266
|
+
values["ignore_for_check"] = [environ[ENV_IGNORE_FOR_CHECK]]
|
|
267
|
+
return values, profile
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
def load_config(
|
|
271
|
+
*,
|
|
272
|
+
config_path: str | None = None,
|
|
273
|
+
cli_profile: str | None = None,
|
|
274
|
+
cli_values: dict[str, Any] | None = None,
|
|
275
|
+
environ: dict[str, str] | None = None,
|
|
276
|
+
start: Path | None = None,
|
|
277
|
+
) -> ShareCleanConfig:
|
|
278
|
+
"""Load ShareClean configuration using the documented precedence order."""
|
|
279
|
+
path: Path | None = None
|
|
280
|
+
table: dict[str, Any] = {}
|
|
281
|
+
profiles: dict[str, dict[str, Any]] = {}
|
|
282
|
+
|
|
283
|
+
if config_path is not None:
|
|
284
|
+
path, table = _load_explicit_config(config_path)
|
|
285
|
+
else:
|
|
286
|
+
discovered = _find_config(start)
|
|
287
|
+
if discovered is not None:
|
|
288
|
+
path, table = discovered
|
|
289
|
+
|
|
290
|
+
base_values, profiles = _validate_table(path, table, profile_table=False)
|
|
291
|
+
config = _apply_values(ShareCleanConfig(), base_values)
|
|
292
|
+
|
|
293
|
+
env_values, env_profile = _env_values(environ or dict(os.environ))
|
|
294
|
+
selected_profile = config.profile
|
|
295
|
+
if env_profile is not None:
|
|
296
|
+
selected_profile = env_profile
|
|
297
|
+
if cli_profile is not None:
|
|
298
|
+
selected_profile = cli_profile
|
|
299
|
+
|
|
300
|
+
if selected_profile != "default":
|
|
301
|
+
if selected_profile not in profiles:
|
|
302
|
+
raise _value_error(path, f"unknown profile: {selected_profile}")
|
|
303
|
+
config = _apply_values(config, profiles[selected_profile])
|
|
304
|
+
|
|
305
|
+
config = replace(config, profile=selected_profile)
|
|
306
|
+
config = _apply_values(config, env_values)
|
|
307
|
+
if cli_values:
|
|
308
|
+
config = _apply_values(
|
|
309
|
+
config,
|
|
310
|
+
{key: value for key, value in cli_values.items() if value is not None},
|
|
311
|
+
)
|
|
312
|
+
return config.with_lists()
|