python-log-redactor 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- python_log_redactor-0.1.0/.gitignore +13 -0
- python_log_redactor-0.1.0/LICENSE +21 -0
- python_log_redactor-0.1.0/PKG-INFO +117 -0
- python_log_redactor-0.1.0/README.md +94 -0
- python_log_redactor-0.1.0/log_redactor/__init__.py +6 -0
- python_log_redactor-0.1.0/log_redactor/core.py +238 -0
- python_log_redactor-0.1.0/log_redactor/logging_filter.py +93 -0
- python_log_redactor-0.1.0/log_redactor/patterns.py +30 -0
- python_log_redactor-0.1.0/pyproject.toml +52 -0
- python_log_redactor-0.1.0/tests/test_core.py +47 -0
- python_log_redactor-0.1.0/tests/test_logging_filter.py +56 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 log-redactor contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: python-log-redactor
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Lightweight sensitive data redaction for accidental printing of sensitive Python strings, dicts, and logs.
|
|
5
|
+
Project-URL: Homepage, https://github.com/morgan-young/python-log-redactor
|
|
6
|
+
Author: Morgan Young
|
|
7
|
+
License: MIT
|
|
8
|
+
License-File: LICENSE
|
|
9
|
+
Keywords: logging,privacy,redaction,security
|
|
10
|
+
Classifier: Development Status :: 3 - Alpha
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
20
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
21
|
+
Requires-Python: >=3.10
|
|
22
|
+
Description-Content-Type: text/markdown
|
|
23
|
+
|
|
24
|
+
# log-redactor
|
|
25
|
+
|
|
26
|
+
[](https://github.com/morgan-young/log-redactor/actions/workflows/ci.yml)
|
|
27
|
+
[](https://pypi.org/project/python-log-redactor/)
|
|
28
|
+
[](https://pypi.org/project/python-log-redactor/)
|
|
29
|
+
[](https://github.com/morgan-young/log-redactor/blob/main/LICENSE)
|
|
30
|
+
|
|
31
|
+
Small, dependency-free redaction helpers for Python logs and payloads.
|
|
32
|
+
`log-redactor` helps prevent accidental exposure of secrets in log messages, strings, and nested dictionaries.
|
|
33
|
+
|
|
34
|
+
## Why use it?
|
|
35
|
+
|
|
36
|
+
- Redacts by **key name** and **regex pattern value matching**
|
|
37
|
+
- Supports nested `dict` / `list` / `tuple` structures
|
|
38
|
+
- Works with standard library `logging` and `%s`-style args
|
|
39
|
+
- Keeps runtime dependencies at zero (stdlib only)
|
|
40
|
+
|
|
41
|
+
## Installation
|
|
42
|
+
|
|
43
|
+
```bash
|
|
44
|
+
pip install python-log-redactor
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
## Quick start
|
|
48
|
+
|
|
49
|
+
```python
|
|
50
|
+
import logging
|
|
51
|
+
from log_redactor import RedactingFilter, redact, redact_dict
|
|
52
|
+
|
|
53
|
+
logger = logging.getLogger("app")
|
|
54
|
+
logger.setLevel(logging.INFO)
|
|
55
|
+
logger.addFilter(RedactingFilter(patterns=["email", "jwt", "api_key"]))
|
|
56
|
+
|
|
57
|
+
logger.info("User %s used key %s", "alice@example.com", "sk-live-abc123")
|
|
58
|
+
|
|
59
|
+
print(redact("Contact: dev@example.com"))
|
|
60
|
+
|
|
61
|
+
payload = {
|
|
62
|
+
"username": "alice",
|
|
63
|
+
"password": "super-secret",
|
|
64
|
+
"profile": {"email": "alice@example.com"},
|
|
65
|
+
}
|
|
66
|
+
print(redact_dict(payload))
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
## API
|
|
70
|
+
|
|
71
|
+
```python
|
|
72
|
+
from log_redactor import RedactingFilter, redact, redact_dict
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
- `redact(text: str, patterns=None, custom_patterns=None, replacement="[REDACTED]") -> str`
|
|
76
|
+
- `redact_dict(data: dict, keys=None, patterns=None, custom_patterns=None, replacement="[REDACTED]") -> dict`
|
|
77
|
+
- `RedactingFilter(logging.Filter)`
|
|
78
|
+
|
|
79
|
+
## Built-in patterns
|
|
80
|
+
|
|
81
|
+
- `email`
|
|
82
|
+
- `ipv4`
|
|
83
|
+
- `jwt`
|
|
84
|
+
- `bearer_token`
|
|
85
|
+
- `api_key`
|
|
86
|
+
- `url_token`
|
|
87
|
+
- `credit_card_basic`
|
|
88
|
+
|
|
89
|
+
## Built-in sensitive keys
|
|
90
|
+
|
|
91
|
+
- `password`
|
|
92
|
+
- `passwd`
|
|
93
|
+
- `secret`
|
|
94
|
+
- `token`
|
|
95
|
+
- `access_token`
|
|
96
|
+
- `refresh_token`
|
|
97
|
+
- `api_key`
|
|
98
|
+
- `authorization`
|
|
99
|
+
|
|
100
|
+
## Development
|
|
101
|
+
|
|
102
|
+
```bash
|
|
103
|
+
python3 -m venv .venv
|
|
104
|
+
. .venv/bin/activate
|
|
105
|
+
pip install -e . pytest ruff
|
|
106
|
+
pytest
|
|
107
|
+
ruff check .
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
## Security note
|
|
111
|
+
|
|
112
|
+
This package is intended to reduce accidental leakage, not guarantee perfect anonymization.
|
|
113
|
+
Always validate your own threat model and pattern coverage for production systems.
|
|
114
|
+
|
|
115
|
+
## License
|
|
116
|
+
|
|
117
|
+
MIT
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
# log-redactor
|
|
2
|
+
|
|
3
|
+
[](https://github.com/morgan-young/log-redactor/actions/workflows/ci.yml)
|
|
4
|
+
[](https://pypi.org/project/python-log-redactor/)
|
|
5
|
+
[](https://pypi.org/project/python-log-redactor/)
|
|
6
|
+
[](https://github.com/morgan-young/log-redactor/blob/main/LICENSE)
|
|
7
|
+
|
|
8
|
+
Small, dependency-free redaction helpers for Python logs and payloads.
|
|
9
|
+
`log-redactor` helps prevent accidental exposure of secrets in log messages, strings, and nested dictionaries.
|
|
10
|
+
|
|
11
|
+
## Why use it?
|
|
12
|
+
|
|
13
|
+
- Redacts by **key name** and **regex pattern value matching**
|
|
14
|
+
- Supports nested `dict` / `list` / `tuple` structures
|
|
15
|
+
- Works with standard library `logging` and `%s`-style args
|
|
16
|
+
- Keeps runtime dependencies at zero (stdlib only)
|
|
17
|
+
|
|
18
|
+
## Installation
|
|
19
|
+
|
|
20
|
+
```bash
|
|
21
|
+
pip install python-log-redactor
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
## Quick start
|
|
25
|
+
|
|
26
|
+
```python
|
|
27
|
+
import logging
|
|
28
|
+
from log_redactor import RedactingFilter, redact, redact_dict
|
|
29
|
+
|
|
30
|
+
logger = logging.getLogger("app")
|
|
31
|
+
logger.setLevel(logging.INFO)
|
|
32
|
+
logger.addFilter(RedactingFilter(patterns=["email", "jwt", "api_key"]))
|
|
33
|
+
|
|
34
|
+
logger.info("User %s used key %s", "alice@example.com", "sk-live-abc123")
|
|
35
|
+
|
|
36
|
+
print(redact("Contact: dev@example.com"))
|
|
37
|
+
|
|
38
|
+
payload = {
|
|
39
|
+
"username": "alice",
|
|
40
|
+
"password": "super-secret",
|
|
41
|
+
"profile": {"email": "alice@example.com"},
|
|
42
|
+
}
|
|
43
|
+
print(redact_dict(payload))
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
## API
|
|
47
|
+
|
|
48
|
+
```python
|
|
49
|
+
from log_redactor import RedactingFilter, redact, redact_dict
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
- `redact(text: str, patterns=None, custom_patterns=None, replacement="[REDACTED]") -> str`
|
|
53
|
+
- `redact_dict(data: dict, keys=None, patterns=None, custom_patterns=None, replacement="[REDACTED]") -> dict`
|
|
54
|
+
- `RedactingFilter(logging.Filter)`
|
|
55
|
+
|
|
56
|
+
## Built-in patterns
|
|
57
|
+
|
|
58
|
+
- `email`
|
|
59
|
+
- `ipv4`
|
|
60
|
+
- `jwt`
|
|
61
|
+
- `bearer_token`
|
|
62
|
+
- `api_key`
|
|
63
|
+
- `url_token`
|
|
64
|
+
- `credit_card_basic`
|
|
65
|
+
|
|
66
|
+
## Built-in sensitive keys
|
|
67
|
+
|
|
68
|
+
- `password`
|
|
69
|
+
- `passwd`
|
|
70
|
+
- `secret`
|
|
71
|
+
- `token`
|
|
72
|
+
- `access_token`
|
|
73
|
+
- `refresh_token`
|
|
74
|
+
- `api_key`
|
|
75
|
+
- `authorization`
|
|
76
|
+
|
|
77
|
+
## Development
|
|
78
|
+
|
|
79
|
+
```bash
|
|
80
|
+
python3 -m venv .venv
|
|
81
|
+
. .venv/bin/activate
|
|
82
|
+
pip install -e . pytest ruff
|
|
83
|
+
pytest
|
|
84
|
+
ruff check .
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
## Security note
|
|
88
|
+
|
|
89
|
+
This package is intended to reduce accidental leakage, not guarantee perfect anonymization.
|
|
90
|
+
Always validate your own threat model and pattern coverage for production systems.
|
|
91
|
+
|
|
92
|
+
## License
|
|
93
|
+
|
|
94
|
+
MIT
|
|
@@ -0,0 +1,238 @@
|
|
|
1
|
+
"""Core string and dictionary redaction helpers."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
from collections.abc import Iterable, Mapping
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from .patterns import BUILTIN_PATTERNS, BUILTIN_SENSITIVE_KEYS
|
|
10
|
+
|
|
11
|
+
RegexLike = str | re.Pattern[str]
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def _build_patterns(
|
|
15
|
+
patterns: Iterable[str] | None = None,
|
|
16
|
+
custom_patterns: Iterable[RegexLike] | None = None,
|
|
17
|
+
) -> tuple[re.Pattern[str], ...]:
|
|
18
|
+
"""Resolve built-in and custom patterns into compiled regex objects.
|
|
19
|
+
|
|
20
|
+
:param patterns:
|
|
21
|
+
Iterable of built-in pattern names to enable. If ``None``, all built-ins
|
|
22
|
+
are used.
|
|
23
|
+
:param custom_patterns:
|
|
24
|
+
Additional regex patterns provided as pattern strings or pre-compiled
|
|
25
|
+
:class:`re.Pattern` instances.
|
|
26
|
+
:returns:
|
|
27
|
+
Tuple of compiled regex patterns in application order.
|
|
28
|
+
:raises ValueError:
|
|
29
|
+
If ``patterns`` contains an unknown built-in pattern name.
|
|
30
|
+
"""
|
|
31
|
+
if patterns is None:
|
|
32
|
+
compiled = list(BUILTIN_PATTERNS.values())
|
|
33
|
+
else:
|
|
34
|
+
compiled = []
|
|
35
|
+
for name in patterns:
|
|
36
|
+
try:
|
|
37
|
+
compiled.append(BUILTIN_PATTERNS[name])
|
|
38
|
+
except KeyError as error:
|
|
39
|
+
message = f"Unknown built-in pattern: {name!r}"
|
|
40
|
+
raise ValueError(message) from error
|
|
41
|
+
|
|
42
|
+
if custom_patterns:
|
|
43
|
+
for pattern in custom_patterns:
|
|
44
|
+
if isinstance(pattern, re.Pattern):
|
|
45
|
+
compiled.append(pattern)
|
|
46
|
+
else:
|
|
47
|
+
compiled.append(re.compile(pattern))
|
|
48
|
+
|
|
49
|
+
return tuple(compiled)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _normalize_sensitive_keys(keys: Iterable[str] | None = None) -> set[str]:
|
|
53
|
+
"""Normalize sensitive key names for case-insensitive comparison.
|
|
54
|
+
|
|
55
|
+
:param keys:
|
|
56
|
+
Optional iterable of key names to treat as sensitive. If ``None``, the
|
|
57
|
+
default built-in key set is used.
|
|
58
|
+
:returns:
|
|
59
|
+
Lower-cased key names used during dictionary redaction.
|
|
60
|
+
"""
|
|
61
|
+
if keys is None:
|
|
62
|
+
return set(BUILTIN_SENSITIVE_KEYS)
|
|
63
|
+
return {key.lower() for key in keys}
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _redact_text_with_patterns(
|
|
67
|
+
text: str,
|
|
68
|
+
compiled_patterns: tuple[re.Pattern[str], ...],
|
|
69
|
+
replacement: str,
|
|
70
|
+
) -> str:
|
|
71
|
+
"""Apply compiled redaction patterns to a string.
|
|
72
|
+
|
|
73
|
+
:param text:
|
|
74
|
+
Input text to scan for sensitive values.
|
|
75
|
+
:param compiled_patterns:
|
|
76
|
+
Compiled regex objects used for substitutions.
|
|
77
|
+
:param replacement:
|
|
78
|
+
Replacement text inserted for matched sensitive values.
|
|
79
|
+
:returns:
|
|
80
|
+
Redacted string with all configured patterns applied.
|
|
81
|
+
"""
|
|
82
|
+
redacted = text
|
|
83
|
+
for pattern in compiled_patterns:
|
|
84
|
+
if pattern.pattern == BUILTIN_PATTERNS["url_token"].pattern:
|
|
85
|
+
redacted = pattern.sub(rf"\1{replacement}", redacted)
|
|
86
|
+
else:
|
|
87
|
+
redacted = pattern.sub(replacement, redacted)
|
|
88
|
+
return redacted
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def _redact_value(
|
|
92
|
+
value: Any,
|
|
93
|
+
*,
|
|
94
|
+
compiled_patterns: tuple[re.Pattern[str], ...],
|
|
95
|
+
sensitive_keys: set[str],
|
|
96
|
+
replacement: str,
|
|
97
|
+
key_context: str | None = None,
|
|
98
|
+
) -> Any:
|
|
99
|
+
"""Recursively redact a value based on key sensitivity and regex matches.
|
|
100
|
+
|
|
101
|
+
:param value:
|
|
102
|
+
Value to process. Supports nested mappings, lists, tuples, and strings.
|
|
103
|
+
:param compiled_patterns:
|
|
104
|
+
Compiled regex objects used to redact string values.
|
|
105
|
+
:param sensitive_keys:
|
|
106
|
+
Lower-cased key names that force full-value redaction.
|
|
107
|
+
:param replacement:
|
|
108
|
+
Replacement text inserted when data is redacted.
|
|
109
|
+
:param key_context:
|
|
110
|
+
Current key name for ``value`` when traversing mappings.
|
|
111
|
+
:returns:
|
|
112
|
+
A redacted copy of ``value`` while preserving container types.
|
|
113
|
+
"""
|
|
114
|
+
if key_context and key_context.lower() in sensitive_keys:
|
|
115
|
+
return replacement
|
|
116
|
+
|
|
117
|
+
if isinstance(value, str):
|
|
118
|
+
return _redact_text_with_patterns(value, compiled_patterns, replacement)
|
|
119
|
+
|
|
120
|
+
if isinstance(value, Mapping):
|
|
121
|
+
return {
|
|
122
|
+
key: _redact_value(
|
|
123
|
+
nested_value,
|
|
124
|
+
compiled_patterns=compiled_patterns,
|
|
125
|
+
sensitive_keys=sensitive_keys,
|
|
126
|
+
replacement=replacement,
|
|
127
|
+
key_context=key if isinstance(key, str) else None,
|
|
128
|
+
)
|
|
129
|
+
for key, nested_value in value.items()
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
if isinstance(value, list):
|
|
133
|
+
return [
|
|
134
|
+
_redact_value(
|
|
135
|
+
item,
|
|
136
|
+
compiled_patterns=compiled_patterns,
|
|
137
|
+
sensitive_keys=sensitive_keys,
|
|
138
|
+
replacement=replacement,
|
|
139
|
+
)
|
|
140
|
+
for item in value
|
|
141
|
+
]
|
|
142
|
+
|
|
143
|
+
if isinstance(value, tuple):
|
|
144
|
+
return tuple(
|
|
145
|
+
_redact_value(
|
|
146
|
+
item,
|
|
147
|
+
compiled_patterns=compiled_patterns,
|
|
148
|
+
sensitive_keys=sensitive_keys,
|
|
149
|
+
replacement=replacement,
|
|
150
|
+
)
|
|
151
|
+
for item in value
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
return value
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def redact(
|
|
158
|
+
text: str,
|
|
159
|
+
patterns: Iterable[str] | None = None,
|
|
160
|
+
custom_patterns: Iterable[RegexLike] | None = None,
|
|
161
|
+
replacement: str = "[REDACTED]",
|
|
162
|
+
) -> str:
|
|
163
|
+
"""Redact sensitive values from a string.
|
|
164
|
+
|
|
165
|
+
:param text:
|
|
166
|
+
Text to redact.
|
|
167
|
+
:param patterns:
|
|
168
|
+
Optional built-in pattern names to apply. If ``None``, all built-in
|
|
169
|
+
patterns are used.
|
|
170
|
+
:param custom_patterns:
|
|
171
|
+
Optional custom regex patterns as strings or compiled
|
|
172
|
+
:class:`re.Pattern` objects.
|
|
173
|
+
:param replacement:
|
|
174
|
+
Text used to replace matched sensitive values.
|
|
175
|
+
:returns:
|
|
176
|
+
Redacted text.
|
|
177
|
+
:raises TypeError:
|
|
178
|
+
If ``text`` is not a string.
|
|
179
|
+
:raises ValueError:
|
|
180
|
+
If ``patterns`` contains an unknown built-in pattern name.
|
|
181
|
+
"""
|
|
182
|
+
if not isinstance(text, str):
|
|
183
|
+
message = "text must be a string"
|
|
184
|
+
raise TypeError(message)
|
|
185
|
+
|
|
186
|
+
compiled_patterns = _build_patterns(patterns, custom_patterns)
|
|
187
|
+
return _redact_text_with_patterns(text, compiled_patterns, replacement)
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
def redact_dict(
|
|
191
|
+
data: dict[Any, Any],
|
|
192
|
+
keys: Iterable[str] | None = None,
|
|
193
|
+
patterns: Iterable[str] | None = None,
|
|
194
|
+
custom_patterns: Iterable[RegexLike] | None = None,
|
|
195
|
+
replacement: str = "[REDACTED]",
|
|
196
|
+
) -> dict[Any, Any]:
|
|
197
|
+
"""Return a recursively redacted copy of a dictionary.
|
|
198
|
+
|
|
199
|
+
Redaction is applied by sensitive key name and by configured regex patterns
|
|
200
|
+
for string values. Input data is never mutated.
|
|
201
|
+
|
|
202
|
+
:param data:
|
|
203
|
+
Dictionary to redact.
|
|
204
|
+
:param keys:
|
|
205
|
+
Optional sensitive key names. If ``None``, built-in sensitive keys are
|
|
206
|
+
used.
|
|
207
|
+
:param patterns:
|
|
208
|
+
Optional built-in pattern names to apply. If ``None``, all built-in
|
|
209
|
+
patterns are used.
|
|
210
|
+
:param custom_patterns:
|
|
211
|
+
Optional custom regex patterns as strings or compiled
|
|
212
|
+
:class:`re.Pattern` objects.
|
|
213
|
+
:param replacement:
|
|
214
|
+
Text used to replace redacted values.
|
|
215
|
+
:returns:
|
|
216
|
+
New dictionary with redacted values.
|
|
217
|
+
:raises TypeError:
|
|
218
|
+
If ``data`` is not a dictionary.
|
|
219
|
+
:raises ValueError:
|
|
220
|
+
If ``patterns`` contains an unknown built-in pattern name.
|
|
221
|
+
"""
|
|
222
|
+
if not isinstance(data, dict):
|
|
223
|
+
message = "data must be a dictionary"
|
|
224
|
+
raise TypeError(message)
|
|
225
|
+
|
|
226
|
+
compiled_patterns = _build_patterns(patterns, custom_patterns)
|
|
227
|
+
sensitive_keys = _normalize_sensitive_keys(keys)
|
|
228
|
+
|
|
229
|
+
return {
|
|
230
|
+
key: _redact_value(
|
|
231
|
+
value,
|
|
232
|
+
compiled_patterns=compiled_patterns,
|
|
233
|
+
sensitive_keys=sensitive_keys,
|
|
234
|
+
replacement=replacement,
|
|
235
|
+
key_context=key if isinstance(key, str) else None,
|
|
236
|
+
)
|
|
237
|
+
for key, value in data.items()
|
|
238
|
+
}
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
"""Logging filter that redacts sensitive values in log records."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
from collections.abc import Iterable
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from .core import RegexLike, _build_patterns, _normalize_sensitive_keys, _redact_value
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class RedactingFilter(logging.Filter):
|
|
13
|
+
"""A logging filter that redacts sensitive values before emission.
|
|
14
|
+
|
|
15
|
+
:param name:
|
|
16
|
+
Filter name passed to :class:`logging.Filter`.
|
|
17
|
+
:param keys:
|
|
18
|
+
Optional sensitive key names. If ``None``, built-in sensitive keys are
|
|
19
|
+
used.
|
|
20
|
+
:param patterns:
|
|
21
|
+
Optional built-in pattern names to apply. If ``None``, all built-in
|
|
22
|
+
patterns are used.
|
|
23
|
+
:param custom_patterns:
|
|
24
|
+
Optional custom regex patterns as strings or compiled regex objects.
|
|
25
|
+
:param replacement:
|
|
26
|
+
Text used to replace redacted values.
|
|
27
|
+
:raises ValueError:
|
|
28
|
+
If ``patterns`` contains an unknown built-in pattern name.
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
def __init__(
|
|
32
|
+
self,
|
|
33
|
+
name: str = "",
|
|
34
|
+
*,
|
|
35
|
+
keys: Iterable[str] | None = None,
|
|
36
|
+
patterns: Iterable[str] | None = None,
|
|
37
|
+
custom_patterns: Iterable[RegexLike] | None = None,
|
|
38
|
+
replacement: str = "[REDACTED]",
|
|
39
|
+
) -> None:
|
|
40
|
+
"""Initialize filter configuration for key and pattern redaction.
|
|
41
|
+
|
|
42
|
+
:param name:
|
|
43
|
+
Filter name passed to :class:`logging.Filter`.
|
|
44
|
+
:param keys:
|
|
45
|
+
Optional sensitive key names.
|
|
46
|
+
:param patterns:
|
|
47
|
+
Optional built-in pattern names to apply.
|
|
48
|
+
:param custom_patterns:
|
|
49
|
+
Optional custom regex patterns as strings or compiled regex objects.
|
|
50
|
+
:param replacement:
|
|
51
|
+
Text used to replace redacted values.
|
|
52
|
+
:raises ValueError:
|
|
53
|
+
If ``patterns`` contains an unknown built-in pattern name.
|
|
54
|
+
"""
|
|
55
|
+
super().__init__(name)
|
|
56
|
+
self._replacement = replacement
|
|
57
|
+
self._compiled_patterns = _build_patterns(patterns, custom_patterns)
|
|
58
|
+
self._sensitive_keys = _normalize_sensitive_keys(keys)
|
|
59
|
+
|
|
60
|
+
def _redact_any(self, value: Any) -> Any:
|
|
61
|
+
"""Redact an arbitrary object using configured keys and patterns.
|
|
62
|
+
|
|
63
|
+
:param value:
|
|
64
|
+
Value to redact.
|
|
65
|
+
:returns:
|
|
66
|
+
Redacted value with container structure preserved.
|
|
67
|
+
"""
|
|
68
|
+
return _redact_value(
|
|
69
|
+
value,
|
|
70
|
+
compiled_patterns=self._compiled_patterns,
|
|
71
|
+
sensitive_keys=self._sensitive_keys,
|
|
72
|
+
replacement=self._replacement,
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
def filter(self, record: logging.LogRecord) -> bool:
|
|
76
|
+
"""Redact message content on a log record.
|
|
77
|
+
|
|
78
|
+
This method updates ``record.msg`` and ``record.args`` in place so that
|
|
79
|
+
standard ``%s`` logging formatting emits redacted values.
|
|
80
|
+
|
|
81
|
+
:param record:
|
|
82
|
+
Log record to redact.
|
|
83
|
+
:returns:
|
|
84
|
+
Always ``True`` so logging continues after redaction.
|
|
85
|
+
"""
|
|
86
|
+
record.msg = self._redact_any(record.msg)
|
|
87
|
+
if isinstance(record.args, tuple):
|
|
88
|
+
record.args = tuple(self._redact_any(item) for item in record.args)
|
|
89
|
+
elif isinstance(record.args, dict):
|
|
90
|
+
record.args = self._redact_any(record.args)
|
|
91
|
+
elif record.args:
|
|
92
|
+
record.args = self._redact_any(record.args)
|
|
93
|
+
return True
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
"""Built-in patterns and key configuration for redaction."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
|
|
7
|
+
BUILTIN_PATTERN_SOURCES: dict[str, str] = {
|
|
8
|
+
"email": r"\b[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}\b",
|
|
9
|
+
"ipv4": r"\b(?:(?:25[0-5]|2[0-4]\d|1?\d?\d)\.){3}(?:25[0-5]|2[0-4]\d|1?\d?\d)\b",
|
|
10
|
+
"jwt": r"\b[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+\b",
|
|
11
|
+
"bearer_token": r"\bBearer\s+[A-Za-z0-9\-._~+/]+=*\b",
|
|
12
|
+
"api_key": r"\b(?:sk-(?:live|test)-[A-Za-z0-9]+|AKIA[0-9A-Z]{16}|AIza[A-Za-z0-9_-]{35})\b",
|
|
13
|
+
"url_token": r"([?&](?:token|access_token|refresh_token|api_key|key)=)[^&\s]+",
|
|
14
|
+
"credit_card_basic": r"\b(?:\d[ -]*?){13,19}\b",
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
BUILTIN_PATTERNS: dict[str, re.Pattern[str]] = {
|
|
18
|
+
name: re.compile(source) for name, source in BUILTIN_PATTERN_SOURCES.items()
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
BUILTIN_SENSITIVE_KEYS: set[str] = {
|
|
22
|
+
"password",
|
|
23
|
+
"passwd",
|
|
24
|
+
"secret",
|
|
25
|
+
"token",
|
|
26
|
+
"access_token",
|
|
27
|
+
"refresh_token",
|
|
28
|
+
"api_key",
|
|
29
|
+
"authorization",
|
|
30
|
+
}
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "python-log-redactor"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Lightweight sensitive data redaction for accidental printing of sensitive Python strings, dicts, and logs."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.10"
|
|
11
|
+
license = { text = "MIT" }
|
|
12
|
+
authors = [{ name = "Morgan Young" }]
|
|
13
|
+
keywords = ["logging", "security", "redaction", "privacy"]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Development Status :: 3 - Alpha",
|
|
16
|
+
"Intended Audience :: Developers",
|
|
17
|
+
"License :: OSI Approved :: MIT License",
|
|
18
|
+
"Programming Language :: Python :: 3",
|
|
19
|
+
"Programming Language :: Python :: 3 :: Only",
|
|
20
|
+
"Programming Language :: Python :: 3.10",
|
|
21
|
+
"Programming Language :: Python :: 3.11",
|
|
22
|
+
"Programming Language :: Python :: 3.12",
|
|
23
|
+
"Programming Language :: Python :: 3.13",
|
|
24
|
+
"Programming Language :: Python :: 3.14",
|
|
25
|
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
26
|
+
]
|
|
27
|
+
|
|
28
|
+
[project.urls]
|
|
29
|
+
Homepage = "https://github.com/morgan-young/python-log-redactor"
|
|
30
|
+
|
|
31
|
+
[tool.pytest.ini_options]
|
|
32
|
+
addopts = "-q"
|
|
33
|
+
testpaths = ["tests"]
|
|
34
|
+
|
|
35
|
+
[tool.ruff]
|
|
36
|
+
target-version = "py310"
|
|
37
|
+
line-length = 100
|
|
38
|
+
|
|
39
|
+
[tool.ruff.lint]
|
|
40
|
+
select = ["E", "F", "I", "UP", "B"]
|
|
41
|
+
|
|
42
|
+
[tool.hatch.build.targets.wheel]
|
|
43
|
+
packages = ["log_redactor"]
|
|
44
|
+
|
|
45
|
+
[tool.hatch.build.targets.sdist]
|
|
46
|
+
include = [
|
|
47
|
+
"/log_redactor",
|
|
48
|
+
"/tests",
|
|
49
|
+
"/README.md",
|
|
50
|
+
"/LICENSE",
|
|
51
|
+
"/pyproject.toml",
|
|
52
|
+
]
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from log_redactor import redact, redact_dict
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def test_redact_uses_selected_builtin_patterns() -> None:
|
|
7
|
+
text = "Email alice@example.com from 127.0.0.1"
|
|
8
|
+
redacted = redact(text, patterns=["email"])
|
|
9
|
+
assert "alice@example.com" not in redacted
|
|
10
|
+
assert "[REDACTED]" in redacted
|
|
11
|
+
assert "127.0.0.1" in redacted
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def test_redact_supports_custom_patterns() -> None:
|
|
15
|
+
text = "order id: internal-12345"
|
|
16
|
+
redacted = redact(text, patterns=[], custom_patterns=[r"internal-\d+"])
|
|
17
|
+
assert redacted == "order id: [REDACTED]"
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def test_redact_dict_is_recursive_and_non_mutating() -> None:
|
|
21
|
+
original = {
|
|
22
|
+
"user": "alice@example.com",
|
|
23
|
+
"password": "p@ssw0rd",
|
|
24
|
+
"nested": {
|
|
25
|
+
"token": "Bearer super-secret",
|
|
26
|
+
"list": ["hello", "bob@example.com"],
|
|
27
|
+
"tuple": ("api_key=abc123", 7),
|
|
28
|
+
},
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
redacted = redact_dict(original)
|
|
32
|
+
|
|
33
|
+
assert original["password"] == "p@ssw0rd"
|
|
34
|
+
assert redacted["password"] == "[REDACTED]"
|
|
35
|
+
assert redacted["user"] == "[REDACTED]"
|
|
36
|
+
assert redacted["nested"]["token"] == "[REDACTED]"
|
|
37
|
+
assert redacted["nested"]["list"][1] == "[REDACTED]"
|
|
38
|
+
assert redacted["nested"]["tuple"][1] == 7
|
|
39
|
+
assert redacted["nested"]["tuple"][0].startswith("api_key=")
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def test_redact_dict_preserves_non_string_values_unless_key_sensitive() -> None:
|
|
43
|
+
data = {"count": 3, "active": True, "api_key": 9999}
|
|
44
|
+
redacted = redact_dict(data)
|
|
45
|
+
assert redacted["count"] == 3
|
|
46
|
+
assert redacted["active"] is True
|
|
47
|
+
assert redacted["api_key"] == "[REDACTED]"
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import io
|
|
4
|
+
import logging
|
|
5
|
+
|
|
6
|
+
from log_redactor import RedactingFilter
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def _build_test_logger() -> tuple[logging.Logger, io.StringIO, logging.Handler]:
|
|
10
|
+
logger = logging.getLogger("log_redactor_test_logger")
|
|
11
|
+
logger.handlers.clear()
|
|
12
|
+
logger.filters.clear()
|
|
13
|
+
logger.setLevel(logging.INFO)
|
|
14
|
+
logger.propagate = False
|
|
15
|
+
|
|
16
|
+
stream = io.StringIO()
|
|
17
|
+
handler = logging.StreamHandler(stream)
|
|
18
|
+
handler.setFormatter(logging.Formatter("%(message)s"))
|
|
19
|
+
logger.addHandler(handler)
|
|
20
|
+
return logger, stream, handler
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def test_filter_redacts_percent_style_args() -> None:
|
|
24
|
+
logger, stream, handler = _build_test_logger()
|
|
25
|
+
logger.addFilter(RedactingFilter(patterns=["email", "api_key"]))
|
|
26
|
+
|
|
27
|
+
logger.info("User %s used key %s", "alice@example.com", "sk-live-abc123")
|
|
28
|
+
|
|
29
|
+
output = stream.getvalue()
|
|
30
|
+
logger.removeHandler(handler)
|
|
31
|
+
logger.handlers.clear()
|
|
32
|
+
logger.filters.clear()
|
|
33
|
+
|
|
34
|
+
assert "alice@example.com" not in output
|
|
35
|
+
assert "sk-live-abc123" not in output
|
|
36
|
+
assert output.count("[REDACTED]") >= 2
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def test_filter_redacts_mapping_and_nested_values() -> None:
|
|
40
|
+
logger, stream, handler = _build_test_logger()
|
|
41
|
+
logger.addFilter(RedactingFilter(patterns=["email"]))
|
|
42
|
+
|
|
43
|
+
logger.info(
|
|
44
|
+
"payload=%s",
|
|
45
|
+
{"password": "letmein", "meta": {"email": "bob@example.com"}, "count": 2},
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
output = stream.getvalue()
|
|
49
|
+
logger.removeHandler(handler)
|
|
50
|
+
logger.handlers.clear()
|
|
51
|
+
logger.filters.clear()
|
|
52
|
+
|
|
53
|
+
assert "letmein" not in output
|
|
54
|
+
assert "bob@example.com" not in output
|
|
55
|
+
assert output.count("[REDACTED]") >= 2
|
|
56
|
+
assert "2" in output
|