secretscreen 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- secretscreen-0.1.0/.github/workflows/ci.yml +33 -0
- secretscreen-0.1.0/.github/workflows/publish.yml +28 -0
- secretscreen-0.1.0/.gitignore +16 -0
- secretscreen-0.1.0/LICENSE +26 -0
- secretscreen-0.1.0/PKG-INFO +78 -0
- secretscreen-0.1.0/README.md +51 -0
- secretscreen-0.1.0/pyproject.toml +51 -0
- secretscreen-0.1.0/src/secretscreen/__init__.py +37 -0
- secretscreen-0.1.0/src/secretscreen/_core.py +339 -0
- secretscreen-0.1.0/src/secretscreen/_entropy.py +66 -0
- secretscreen-0.1.0/src/secretscreen/_formats.py +116 -0
- secretscreen-0.1.0/src/secretscreen/_keys.py +110 -0
- secretscreen-0.1.0/src/secretscreen/_parsers.py +175 -0
- secretscreen-0.1.0/src/secretscreen/_urls.py +51 -0
- secretscreen-0.1.0/src/secretscreen/gitleaks.toml +3209 -0
- secretscreen-0.1.0/src/secretscreen/py.typed +0 -0
- secretscreen-0.1.0/tests/__init__.py +0 -0
- secretscreen-0.1.0/tests/test_core.py +284 -0
- secretscreen-0.1.0/tests/test_entropy.py +71 -0
- secretscreen-0.1.0/tests/test_formats.py +80 -0
- secretscreen-0.1.0/tests/test_keys.py +154 -0
- secretscreen-0.1.0/tests/test_parsers.py +105 -0
- secretscreen-0.1.0/tests/test_urls.py +63 -0
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [main]
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
test:
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
strategy:
|
|
13
|
+
matrix:
|
|
14
|
+
python-version: ["3.11", "3.12", "3.13"]
|
|
15
|
+
|
|
16
|
+
steps:
|
|
17
|
+
- uses: actions/checkout@v4
|
|
18
|
+
|
|
19
|
+
- uses: actions/setup-python@v5
|
|
20
|
+
with:
|
|
21
|
+
python-version: ${{ matrix.python-version }}
|
|
22
|
+
|
|
23
|
+
- name: Install package
|
|
24
|
+
run: pip install -e ".[dev]"
|
|
25
|
+
|
|
26
|
+
- name: Lint
|
|
27
|
+
run: ruff check src/ tests/
|
|
28
|
+
|
|
29
|
+
- name: Type check
|
|
30
|
+
run: mypy src/secretscreen/
|
|
31
|
+
|
|
32
|
+
- name: Test
|
|
33
|
+
run: pytest --tb=short -q
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
name: Publish to PyPI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
release:
|
|
5
|
+
types: [published]
|
|
6
|
+
|
|
7
|
+
permissions:
|
|
8
|
+
id-token: write
|
|
9
|
+
|
|
10
|
+
jobs:
|
|
11
|
+
publish:
|
|
12
|
+
runs-on: ubuntu-latest
|
|
13
|
+
environment: pypi
|
|
14
|
+
|
|
15
|
+
steps:
|
|
16
|
+
- uses: actions/checkout@v4
|
|
17
|
+
|
|
18
|
+
- uses: actions/setup-python@v5
|
|
19
|
+
with:
|
|
20
|
+
python-version: "3.13"
|
|
21
|
+
|
|
22
|
+
- name: Build
|
|
23
|
+
run: |
|
|
24
|
+
pip install build
|
|
25
|
+
python -m build
|
|
26
|
+
|
|
27
|
+
- name: Publish to PyPI
|
|
28
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Cron <cron@featurecreep.dev>
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
22
|
+
|
|
23
|
+
---
|
|
24
|
+
|
|
25
|
+
This package vendors pattern data from the gitleaks project
|
|
26
|
+
(https://github.com/gitleaks/gitleaks), which is also MIT-licensed.
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: secretscreen
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Detect and redact secrets in key-value pairs, dicts, and environment variables.
|
|
5
|
+
Project-URL: Homepage, https://github.com/featurecreep-cron/secretscreen
|
|
6
|
+
Project-URL: Issues, https://github.com/featurecreep-cron/secretscreen/issues
|
|
7
|
+
Author-email: Cron <cron@featurecreep.dev>
|
|
8
|
+
License-Expression: MIT
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Keywords: docker,environment,redaction,secrets,security
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
18
|
+
Classifier: Topic :: Security
|
|
19
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
20
|
+
Classifier: Typing :: Typed
|
|
21
|
+
Requires-Python: >=3.11
|
|
22
|
+
Provides-Extra: dev
|
|
23
|
+
Requires-Dist: mypy>=1.10; extra == 'dev'
|
|
24
|
+
Requires-Dist: pytest>=7.0; extra == 'dev'
|
|
25
|
+
Requires-Dist: ruff>=0.4; extra == 'dev'
|
|
26
|
+
Description-Content-Type: text/markdown
|
|
27
|
+
|
|
28
|
+
# secretscreen
|
|
29
|
+
|
|
30
|
+
[](https://github.com/featurecreep-cron/secretscreen/actions/workflows/ci.yml)
|
|
31
|
+
[](https://pypi.org/project/secretscreen/)
|
|
32
|
+
[](LICENSE)
|
|
33
|
+
|
|
34
|
+
Detect and redact secrets in key-value pairs, dicts, and environment variables.
|
|
35
|
+
|
|
36
|
+
Best-effort defense-in-depth. Not a security boundary.
|
|
37
|
+
|
|
38
|
+
## Install
|
|
39
|
+
|
|
40
|
+
```
|
|
41
|
+
pip install secretscreen
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
## Quick start
|
|
45
|
+
|
|
46
|
+
```python
|
|
47
|
+
from secretscreen import redact_pair, redact_dict, audit_dict, Mode
|
|
48
|
+
|
|
49
|
+
# Single pair
|
|
50
|
+
redact_pair("DB_PASSWORD", "hunter2") # → "[REDACTED]"
|
|
51
|
+
redact_pair("APP_NAME", "myapp") # → "myapp"
|
|
52
|
+
|
|
53
|
+
# Dict with recursion
|
|
54
|
+
redact_dict({"db": {"password": "x", "host": "localhost"}})
|
|
55
|
+
# → {"db": {"password": "[REDACTED]", "host": "localhost"}}
|
|
56
|
+
|
|
57
|
+
# Aggressive mode (adds entropy detection)
|
|
58
|
+
redact_dict(env, mode=Mode.AGGRESSIVE)
|
|
59
|
+
|
|
60
|
+
# Audit mode (structured findings, no mutation)
|
|
61
|
+
findings = audit_dict(env)
|
|
62
|
+
# → [Finding(key="DB_PASSWORD", reason="key_pattern:password", ...)]
|
|
63
|
+
|
|
64
|
+
# Custom safe suffixes (keys ending with these are never redacted)
|
|
65
|
+
redact_dict(env, safe_suffixes=("_config", "_enabled"))
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
## Detection layers
|
|
69
|
+
|
|
70
|
+
1. **Key-name denylist** — substring match against ~30 known secret key patterns
|
|
71
|
+
2. **Structured value parsing** — JSON, Python literals, DSN, INI, URL query params
|
|
72
|
+
3. **Value-format detection** — 222 known formats via vendored [gitleaks](https://github.com/gitleaks/gitleaks) patterns (MIT)
|
|
73
|
+
4. **URL credential detection** — partial redaction of `user:pass@host` URLs
|
|
74
|
+
5. **Entropy detection** — Shannon entropy for machine-generated strings (aggressive mode only)
|
|
75
|
+
|
|
76
|
+
## License
|
|
77
|
+
|
|
78
|
+
MIT. Gitleaks patterns are also MIT-licensed.
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
# secretscreen
|
|
2
|
+
|
|
3
|
+
[](https://github.com/featurecreep-cron/secretscreen/actions/workflows/ci.yml)
|
|
4
|
+
[](https://pypi.org/project/secretscreen/)
|
|
5
|
+
[](LICENSE)
|
|
6
|
+
|
|
7
|
+
Detect and redact secrets in key-value pairs, dicts, and environment variables.
|
|
8
|
+
|
|
9
|
+
Best-effort defense-in-depth. Not a security boundary.
|
|
10
|
+
|
|
11
|
+
## Install
|
|
12
|
+
|
|
13
|
+
```
|
|
14
|
+
pip install secretscreen
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
## Quick start
|
|
18
|
+
|
|
19
|
+
```python
|
|
20
|
+
from secretscreen import redact_pair, redact_dict, audit_dict, Mode
|
|
21
|
+
|
|
22
|
+
# Single pair
|
|
23
|
+
redact_pair("DB_PASSWORD", "hunter2") # → "[REDACTED]"
|
|
24
|
+
redact_pair("APP_NAME", "myapp") # → "myapp"
|
|
25
|
+
|
|
26
|
+
# Dict with recursion
|
|
27
|
+
redact_dict({"db": {"password": "x", "host": "localhost"}})
|
|
28
|
+
# → {"db": {"password": "[REDACTED]", "host": "localhost"}}
|
|
29
|
+
|
|
30
|
+
# Aggressive mode (adds entropy detection)
|
|
31
|
+
redact_dict(env, mode=Mode.AGGRESSIVE)
|
|
32
|
+
|
|
33
|
+
# Audit mode (structured findings, no mutation)
|
|
34
|
+
findings = audit_dict(env)
|
|
35
|
+
# → [Finding(key="DB_PASSWORD", reason="key_pattern:password", ...)]
|
|
36
|
+
|
|
37
|
+
# Custom safe suffixes (keys ending with these are never redacted)
|
|
38
|
+
redact_dict(env, safe_suffixes=("_config", "_enabled"))
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
## Detection layers
|
|
42
|
+
|
|
43
|
+
1. **Key-name denylist** — substring match against ~30 known secret key patterns
|
|
44
|
+
2. **Structured value parsing** — JSON, Python literals, DSN, INI, URL query params
|
|
45
|
+
3. **Value-format detection** — 222 known formats via vendored [gitleaks](https://github.com/gitleaks/gitleaks) patterns (MIT)
|
|
46
|
+
4. **URL credential detection** — partial redaction of `user:pass@host` URLs
|
|
47
|
+
5. **Entropy detection** — Shannon entropy for machine-generated strings (aggressive mode only)
|
|
48
|
+
|
|
49
|
+
## License
|
|
50
|
+
|
|
51
|
+
MIT. Gitleaks patterns are also MIT-licensed.
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "secretscreen"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Detect and redact secrets in key-value pairs, dicts, and environment variables."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = "MIT"
|
|
11
|
+
requires-python = ">=3.11"
|
|
12
|
+
authors = [{ name = "Cron", email = "cron@featurecreep.dev" }]
|
|
13
|
+
keywords = ["secrets", "redaction", "security", "environment", "docker"]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Development Status :: 3 - Alpha",
|
|
16
|
+
"Intended Audience :: Developers",
|
|
17
|
+
"License :: OSI Approved :: MIT License",
|
|
18
|
+
"Programming Language :: Python :: 3",
|
|
19
|
+
"Programming Language :: Python :: 3.11",
|
|
20
|
+
"Programming Language :: Python :: 3.12",
|
|
21
|
+
"Programming Language :: Python :: 3.13",
|
|
22
|
+
"Topic :: Security",
|
|
23
|
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
24
|
+
"Typing :: Typed",
|
|
25
|
+
]
|
|
26
|
+
|
|
27
|
+
[project.optional-dependencies]
|
|
28
|
+
dev = ["pytest>=7.0", "ruff>=0.4", "mypy>=1.10"]
|
|
29
|
+
|
|
30
|
+
[project.urls]
|
|
31
|
+
Homepage = "https://github.com/featurecreep-cron/secretscreen"
|
|
32
|
+
Issues = "https://github.com/featurecreep-cron/secretscreen/issues"
|
|
33
|
+
|
|
34
|
+
[tool.hatch.build.targets.wheel]
|
|
35
|
+
packages = ["src/secretscreen"]
|
|
36
|
+
|
|
37
|
+
[tool.pytest.ini_options]
|
|
38
|
+
testpaths = ["tests"]
|
|
39
|
+
|
|
40
|
+
[tool.mypy]
|
|
41
|
+
strict = true
|
|
42
|
+
warn_return_any = true
|
|
43
|
+
warn_unused_configs = true
|
|
44
|
+
|
|
45
|
+
[tool.ruff]
|
|
46
|
+
target-version = "py311"
|
|
47
|
+
line-length = 120
|
|
48
|
+
|
|
49
|
+
[tool.ruff.lint]
|
|
50
|
+
select = ["E", "F", "W", "I", "N", "UP", "B", "A", "SIM", "TCH"]
|
|
51
|
+
ignore = ["SIM102", "SIM108"]
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
"""Detect and redact secrets in key-value pairs, dicts, and environment variables.
|
|
2
|
+
|
|
3
|
+
Best-effort defense-in-depth. Not a security boundary.
|
|
4
|
+
|
|
5
|
+
Five detection layers:
|
|
6
|
+
1. Key-name denylist — substring match against known secret key patterns.
|
|
7
|
+
2. Structured value parsing — JSON, Python literals, INI, DSN, URL query params.
|
|
8
|
+
3. Value-format detection — 222 known secret formats via vendored gitleaks patterns.
|
|
9
|
+
4. URL credential detection — partial redaction of embedded passwords.
|
|
10
|
+
5. Entropy detection — Shannon entropy for machine-generated strings (aggressive mode).
|
|
11
|
+
|
|
12
|
+
Two modes:
|
|
13
|
+
- NORMAL: layers 1-4, zero false positives target.
|
|
14
|
+
- AGGRESSIVE: layers 1-5, adds entropy detection.
|
|
15
|
+
|
|
16
|
+
audit_pair() and audit_dict() return structured findings without mutating values.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from secretscreen._core import (
|
|
20
|
+
Finding,
|
|
21
|
+
Mode,
|
|
22
|
+
audit_dict,
|
|
23
|
+
audit_pair,
|
|
24
|
+
redact_dict,
|
|
25
|
+
redact_pair,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
__all__ = [
|
|
29
|
+
"Finding",
|
|
30
|
+
"Mode",
|
|
31
|
+
"audit_dict",
|
|
32
|
+
"audit_pair",
|
|
33
|
+
"redact_dict",
|
|
34
|
+
"redact_pair",
|
|
35
|
+
]
|
|
36
|
+
|
|
37
|
+
__version__ = "0.1.0"
|
|
@@ -0,0 +1,339 @@
|
|
|
1
|
+
"""Core orchestration — ties all detection layers together.
|
|
2
|
+
|
|
3
|
+
Public API: redact_pair, redact_dict, audit_pair, audit_dict.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
import enum
|
|
9
|
+
from dataclasses import dataclass
|
|
10
|
+
|
|
11
|
+
from secretscreen._entropy import looks_like_secret
|
|
12
|
+
from secretscreen._formats import matches_known_format
|
|
13
|
+
from secretscreen._keys import (
|
|
14
|
+
DEFAULT_KEY_PATTERNS,
|
|
15
|
+
DEFAULT_SAFE_SUFFIXES,
|
|
16
|
+
matches_key_pattern,
|
|
17
|
+
)
|
|
18
|
+
from secretscreen._parsers import extract_pairs
|
|
19
|
+
from secretscreen._urls import has_url_credentials, redact_url_password
|
|
20
|
+
|
|
21
|
+
REDACTED = "[REDACTED]"
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class Mode(enum.Enum):
|
|
25
|
+
"""Detection mode controlling which layers are active."""
|
|
26
|
+
|
|
27
|
+
NORMAL = "normal"
|
|
28
|
+
"""Layers 1-4: key patterns, structured parsing, format detection, URL credentials."""
|
|
29
|
+
|
|
30
|
+
AGGRESSIVE = "aggressive"
|
|
31
|
+
"""Layers 1-5: adds Shannon entropy detection for machine-generated strings."""
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@dataclass(frozen=True, slots=True)
|
|
35
|
+
class Finding:
|
|
36
|
+
"""A detected secret with metadata about how it was found."""
|
|
37
|
+
|
|
38
|
+
key: str
|
|
39
|
+
reason: str
|
|
40
|
+
layer: str
|
|
41
|
+
detail: str = ""
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@dataclass
|
|
45
|
+
class ScreenConfig:
|
|
46
|
+
"""Configuration for secret screening."""
|
|
47
|
+
|
|
48
|
+
mode: Mode = Mode.NORMAL
|
|
49
|
+
replacement: str = REDACTED
|
|
50
|
+
extra_keys: tuple[str, ...] = ()
|
|
51
|
+
safe_suffixes: tuple[str, ...] = DEFAULT_SAFE_SUFFIXES
|
|
52
|
+
entropy_threshold: float = 4.5
|
|
53
|
+
|
|
54
|
+
def __post_init__(self) -> None:
|
|
55
|
+
"""Pre-compute merged patterns to avoid recomputation per key."""
|
|
56
|
+
if not self.extra_keys:
|
|
57
|
+
self._patterns = DEFAULT_KEY_PATTERNS
|
|
58
|
+
else:
|
|
59
|
+
seen = {p.lower() for p in DEFAULT_KEY_PATTERNS}
|
|
60
|
+
extra = tuple(p for p in self.extra_keys if p.lower() not in seen)
|
|
61
|
+
self._patterns = DEFAULT_KEY_PATTERNS + extra
|
|
62
|
+
|
|
63
|
+
@property
|
|
64
|
+
def patterns(self) -> tuple[str, ...]:
|
|
65
|
+
"""Merged key patterns (defaults + extras)."""
|
|
66
|
+
return self._patterns
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def redact_pair(
|
|
70
|
+
key: str,
|
|
71
|
+
value: str,
|
|
72
|
+
*,
|
|
73
|
+
mode: Mode = Mode.NORMAL,
|
|
74
|
+
replacement: str = REDACTED,
|
|
75
|
+
extra_keys: tuple[str, ...] = (),
|
|
76
|
+
safe_suffixes: tuple[str, ...] = DEFAULT_SAFE_SUFFIXES,
|
|
77
|
+
entropy_threshold: float = 4.5,
|
|
78
|
+
) -> str:
|
|
79
|
+
"""Redact a single key-value pair if the value is detected as a secret.
|
|
80
|
+
|
|
81
|
+
Returns the replacement string if secret, or the original value.
|
|
82
|
+
"""
|
|
83
|
+
if not isinstance(value, str) or not value:
|
|
84
|
+
return value
|
|
85
|
+
|
|
86
|
+
config = ScreenConfig(
|
|
87
|
+
mode=mode,
|
|
88
|
+
replacement=replacement,
|
|
89
|
+
extra_keys=extra_keys,
|
|
90
|
+
safe_suffixes=safe_suffixes,
|
|
91
|
+
entropy_threshold=entropy_threshold,
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
finding = _detect(key, value, config)
|
|
95
|
+
if finding is None:
|
|
96
|
+
return value
|
|
97
|
+
|
|
98
|
+
return _apply_redaction(finding, key, value, config)
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def redact_dict(
|
|
102
|
+
data: dict[str, object] | list[object] | object,
|
|
103
|
+
*,
|
|
104
|
+
mode: Mode = Mode.NORMAL,
|
|
105
|
+
replacement: str = REDACTED,
|
|
106
|
+
extra_keys: tuple[str, ...] = (),
|
|
107
|
+
safe_suffixes: tuple[str, ...] = DEFAULT_SAFE_SUFFIXES,
|
|
108
|
+
entropy_threshold: float = 4.5,
|
|
109
|
+
) -> object:
|
|
110
|
+
"""Recursively redact secrets in a dict, list, or nested structure.
|
|
111
|
+
|
|
112
|
+
Returns a new structure with secrets replaced. Does not mutate the input.
|
|
113
|
+
"""
|
|
114
|
+
config = ScreenConfig(
|
|
115
|
+
mode=mode,
|
|
116
|
+
replacement=replacement,
|
|
117
|
+
extra_keys=extra_keys,
|
|
118
|
+
safe_suffixes=safe_suffixes,
|
|
119
|
+
entropy_threshold=entropy_threshold,
|
|
120
|
+
)
|
|
121
|
+
return _redact_recursive(data, config)
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def audit_pair(
|
|
125
|
+
key: str,
|
|
126
|
+
value: str,
|
|
127
|
+
*,
|
|
128
|
+
mode: Mode = Mode.NORMAL,
|
|
129
|
+
extra_keys: tuple[str, ...] = (),
|
|
130
|
+
safe_suffixes: tuple[str, ...] = DEFAULT_SAFE_SUFFIXES,
|
|
131
|
+
entropy_threshold: float = 4.5,
|
|
132
|
+
) -> Finding | None:
|
|
133
|
+
"""Check a single key-value pair for secrets without redacting.
|
|
134
|
+
|
|
135
|
+
Returns a Finding if detected, or None.
|
|
136
|
+
"""
|
|
137
|
+
if not isinstance(value, str) or not value:
|
|
138
|
+
return None
|
|
139
|
+
|
|
140
|
+
config = ScreenConfig(
|
|
141
|
+
mode=mode,
|
|
142
|
+
extra_keys=extra_keys,
|
|
143
|
+
safe_suffixes=safe_suffixes,
|
|
144
|
+
entropy_threshold=entropy_threshold,
|
|
145
|
+
)
|
|
146
|
+
return _detect(key, value, config)
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def audit_dict(
|
|
150
|
+
data: dict[str, object] | list[object] | object,
|
|
151
|
+
*,
|
|
152
|
+
mode: Mode = Mode.NORMAL,
|
|
153
|
+
extra_keys: tuple[str, ...] = (),
|
|
154
|
+
safe_suffixes: tuple[str, ...] = DEFAULT_SAFE_SUFFIXES,
|
|
155
|
+
entropy_threshold: float = 4.5,
|
|
156
|
+
) -> list[Finding]:
|
|
157
|
+
"""Recursively audit a dict/list for secrets without redacting.
|
|
158
|
+
|
|
159
|
+
Returns a list of all findings.
|
|
160
|
+
"""
|
|
161
|
+
config = ScreenConfig(
|
|
162
|
+
mode=mode,
|
|
163
|
+
extra_keys=extra_keys,
|
|
164
|
+
safe_suffixes=safe_suffixes,
|
|
165
|
+
entropy_threshold=entropy_threshold,
|
|
166
|
+
)
|
|
167
|
+
findings: list[Finding] = []
|
|
168
|
+
_audit_recursive(data, config, findings)
|
|
169
|
+
return findings
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
# Maximum recursion depth for structured parsing detection.
|
|
174
|
+
# Prevents stack overflow from crafted nested JSON/Python literals.
|
|
175
|
+
_MAX_DETECT_DEPTH = 3
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def _detect(
|
|
179
|
+
key: str, value: str, config: ScreenConfig, _depth: int = 0
|
|
180
|
+
) -> Finding | None:
|
|
181
|
+
"""Run all detection layers on a single key-value pair."""
|
|
182
|
+
|
|
183
|
+
# Layer 1: Key-name pattern match
|
|
184
|
+
matched_pattern = matches_key_pattern(key, config.patterns, config.safe_suffixes)
|
|
185
|
+
if matched_pattern is not None:
|
|
186
|
+
# URL keys get partial redaction, not full
|
|
187
|
+
if key.lower().endswith("_url") and has_url_credentials(value):
|
|
188
|
+
return Finding(
|
|
189
|
+
key=key,
|
|
190
|
+
reason=f"key_pattern:{matched_pattern}",
|
|
191
|
+
layer="url_credentials",
|
|
192
|
+
detail="URL with embedded credentials",
|
|
193
|
+
)
|
|
194
|
+
return Finding(
|
|
195
|
+
key=key,
|
|
196
|
+
reason=f"key_pattern:{matched_pattern}",
|
|
197
|
+
layer="key_pattern",
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
# Layer 4: URL credential detection (even without key pattern match)
|
|
201
|
+
if has_url_credentials(value):
|
|
202
|
+
return Finding(
|
|
203
|
+
key=key,
|
|
204
|
+
reason="url_credentials",
|
|
205
|
+
layer="url_credentials",
|
|
206
|
+
detail="URL with embedded credentials",
|
|
207
|
+
)
|
|
208
|
+
|
|
209
|
+
# Layer 2: Structured value parsing (depth-limited to prevent recursion bombs)
|
|
210
|
+
if _depth < _MAX_DETECT_DEPTH:
|
|
211
|
+
pairs = extract_pairs(value)
|
|
212
|
+
else:
|
|
213
|
+
pairs = []
|
|
214
|
+
if pairs:
|
|
215
|
+
for sub_key, sub_value in pairs:
|
|
216
|
+
sub_finding = _detect(sub_key, sub_value, config, _depth + 1)
|
|
217
|
+
if sub_finding is not None:
|
|
218
|
+
return Finding(
|
|
219
|
+
key=key,
|
|
220
|
+
reason=f"structured:{sub_key}={sub_finding.reason}",
|
|
221
|
+
layer="structured_parsing",
|
|
222
|
+
detail=f"Found secret in parsed structure: {sub_key}",
|
|
223
|
+
)
|
|
224
|
+
|
|
225
|
+
# Layer 3: Value-format detection (gitleaks patterns)
|
|
226
|
+
format_match = matches_known_format(value)
|
|
227
|
+
if format_match is not None:
|
|
228
|
+
return Finding(
|
|
229
|
+
key=key,
|
|
230
|
+
reason=f"format:{format_match.id}",
|
|
231
|
+
layer="format_detection",
|
|
232
|
+
detail=format_match.description,
|
|
233
|
+
)
|
|
234
|
+
|
|
235
|
+
# Layer 5: Entropy detection (aggressive mode only)
|
|
236
|
+
if config.mode == Mode.AGGRESSIVE:
|
|
237
|
+
entropy = looks_like_secret(value, config.entropy_threshold)
|
|
238
|
+
if entropy is not None:
|
|
239
|
+
return Finding(
|
|
240
|
+
key=key,
|
|
241
|
+
reason=f"entropy:{entropy:.2f}",
|
|
242
|
+
layer="entropy",
|
|
243
|
+
detail=f"Shannon entropy {entropy:.2f} bits/char exceeds threshold",
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
return None
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
def _apply_redaction(
|
|
250
|
+
finding: Finding, key: str, value: str, config: ScreenConfig
|
|
251
|
+
) -> str:
|
|
252
|
+
"""Apply the appropriate redaction strategy based on finding layer.
|
|
253
|
+
|
|
254
|
+
Single source of truth for layer-specific redaction behavior.
|
|
255
|
+
Used by both redact_pair and _redact_recursive.
|
|
256
|
+
"""
|
|
257
|
+
if finding.layer == "url_credentials":
|
|
258
|
+
return redact_url_password(value, config.replacement)
|
|
259
|
+
|
|
260
|
+
if finding.layer == "structured_parsing":
|
|
261
|
+
return _redact_structured(value, config)
|
|
262
|
+
|
|
263
|
+
return config.replacement
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
def _redact_structured(value: str, config: ScreenConfig) -> str:
|
|
267
|
+
"""Redact secret portions within a structured value string.
|
|
268
|
+
|
|
269
|
+
Re-parses the value and replaces only exact secret values, tracking
|
|
270
|
+
which values have been replaced to avoid collateral damage when a
|
|
271
|
+
secret string appears as a substring of a non-secret value.
|
|
272
|
+
"""
|
|
273
|
+
pairs = extract_pairs(value)
|
|
274
|
+
# Collect secret values and their replacements
|
|
275
|
+
secrets_to_redact: dict[str, str] = {}
|
|
276
|
+
for sub_key, sub_value in pairs:
|
|
277
|
+
if not sub_value:
|
|
278
|
+
continue
|
|
279
|
+
sub_finding = _detect(sub_key, sub_value, config)
|
|
280
|
+
if sub_finding is not None:
|
|
281
|
+
secrets_to_redact[sub_value] = config.replacement
|
|
282
|
+
|
|
283
|
+
if not secrets_to_redact:
|
|
284
|
+
return value
|
|
285
|
+
|
|
286
|
+
# Replace longest secrets first to avoid partial matches
|
|
287
|
+
# when one secret is a substring of another
|
|
288
|
+
redacted = value
|
|
289
|
+
for secret in sorted(secrets_to_redact, key=len, reverse=True):
|
|
290
|
+
redacted = redacted.replace(secret, secrets_to_redact[secret])
|
|
291
|
+
return redacted
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
def _redact_recursive(
|
|
295
|
+
data: object,
|
|
296
|
+
config: ScreenConfig,
|
|
297
|
+
) -> object:
|
|
298
|
+
"""Recursively walk and redact a nested structure."""
|
|
299
|
+
if isinstance(data, dict):
|
|
300
|
+
out: dict[object, object] = {}
|
|
301
|
+
for k, v in data.items():
|
|
302
|
+
key_str = str(k)
|
|
303
|
+
if isinstance(v, str):
|
|
304
|
+
finding = _detect(key_str, v, config)
|
|
305
|
+
if finding is not None:
|
|
306
|
+
out[k] = _apply_redaction(finding, key_str, v, config)
|
|
307
|
+
else:
|
|
308
|
+
out[k] = v
|
|
309
|
+
elif isinstance(v, (dict, list)):
|
|
310
|
+
out[k] = _redact_recursive(v, config)
|
|
311
|
+
else:
|
|
312
|
+
out[k] = v
|
|
313
|
+
return out
|
|
314
|
+
|
|
315
|
+
if isinstance(data, list):
|
|
316
|
+
return [_redact_recursive(item, config) for item in data]
|
|
317
|
+
|
|
318
|
+
return data
|
|
319
|
+
|
|
320
|
+
|
|
321
|
+
def _audit_recursive(
|
|
322
|
+
data: object,
|
|
323
|
+
config: ScreenConfig,
|
|
324
|
+
findings: list[Finding],
|
|
325
|
+
) -> None:
|
|
326
|
+
"""Recursively walk and audit a nested structure."""
|
|
327
|
+
if isinstance(data, dict):
|
|
328
|
+
for k, v in data.items():
|
|
329
|
+
key_str = str(k)
|
|
330
|
+
if isinstance(v, str):
|
|
331
|
+
finding = _detect(key_str, v, config)
|
|
332
|
+
if finding is not None:
|
|
333
|
+
findings.append(finding)
|
|
334
|
+
elif isinstance(v, (dict, list)):
|
|
335
|
+
_audit_recursive(v, config, findings)
|
|
336
|
+
|
|
337
|
+
elif isinstance(data, list):
|
|
338
|
+
for item in data:
|
|
339
|
+
_audit_recursive(item, config, findings)
|