scrub-ai 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
scrub_ai-1.0.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Rajwinder Marwaha
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,218 @@
1
+ Metadata-Version: 2.4
2
+ Name: scrub-ai
3
+ Version: 1.0.0
4
+ Summary: Sanitize sensitive content from any text before sharing with AI assistants
5
+ Author: Rajwinder Marwaha
6
+ License: MIT License
7
+
8
+ Copyright (c) 2026 Rajwinder Marwaha
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
27
+
28
+ Project-URL: Homepage, https://github.com/rajwindermarwaha/scrub-ai
29
+ Project-URL: Repository, https://github.com/rajwindermarwaha/scrub-ai
30
+ Project-URL: Issues, https://github.com/rajwindermarwaha/scrub-ai/issues
31
+ Keywords: security,privacy,ai,sanitize,secrets,redact
32
+ Classifier: Development Status :: 4 - Beta
33
+ Classifier: Environment :: Console
34
+ Classifier: License :: OSI Approved :: MIT License
35
+ Classifier: Operating System :: Microsoft :: Windows
36
+ Classifier: Operating System :: POSIX :: Linux
37
+ Classifier: Operating System :: MacOS
38
+ Classifier: Programming Language :: Python :: 3.10
39
+ Classifier: Programming Language :: Python :: 3.11
40
+ Classifier: Programming Language :: Python :: 3.12
41
+ Requires-Python: >=3.10
42
+ Description-Content-Type: text/markdown
43
+ License-File: LICENSE
44
+ Requires-Dist: click>=8.0
45
+ Requires-Dist: pyperclip>=1.8
46
+ Requires-Dist: keyboard>=0.13; sys_platform == "win32"
47
+ Requires-Dist: pystray>=0.19; sys_platform == "win32"
48
+ Requires-Dist: Pillow>=10.0; sys_platform == "win32"
49
+ Requires-Dist: win10toast>=0.9; sys_platform == "win32"
50
+ Provides-Extra: dev
51
+ Requires-Dist: pytest>=7.0; extra == "dev"
52
+ Requires-Dist: pytest-cov; extra == "dev"
53
+ Dynamic: license-file
54
+
55
+ # ๐Ÿงน scrub-ai
56
+
57
+ > Sanitize sensitive content from any text before sharing with AI assistants.
58
+
59
+ [![PyPI version](https://badge.fury.io/py/scrub-ai.svg)](https://badge.fury.io/py/scrub-ai)
60
+ [![Python 3.10+](https://img.shields.io/badge/python-3.10+-blue.svg)](https://www.python.org/downloads/)
61
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
62
+ [![Platform](https://img.shields.io/badge/platform-Windows%20%7C%20Linux%20%7C%20macOS-blue.svg)]()
63
+ [![CI](https://github.com/rajwindermarwaha/scrub-ai/actions/workflows/ci.yml/badge.svg)](https://github.com/rajwindermarwaha/scrub-ai/actions/workflows/ci.yml)
64
+
65
+ ---
66
+
67
+ ## The Problem
68
+
69
+ Every day, developers copy sensitive content into AI assistants without thinking:
70
+
71
+ ```
72
+ โŒ Stack trace with internal hostnames โ†’ pasted into ChatGPT
73
+ โŒ Application logs with session tokens โ†’ pasted into Copilot
74
+ โŒ Config files with database passwords โ†’ pasted into Claude
75
+ โŒ kubectl output with cluster names โ†’ pasted into AI
76
+ โŒ AWS CLI output with account IDs โ†’ pasted into ChatGPT
77
+ ```
78
+
79
+ Once that data leaves your machine, you have no control over it.
80
+
81
+ **scrub-ai fixes this** โ€” it detects and masks sensitive content before you share it with any AI tool.
82
+
83
+ ---
84
+
85
+ ## Features
86
+
87
+ - ๐Ÿ”‘ **Secrets detection** โ€” API keys, tokens, passwords, private keys
88
+ - โ˜๏ธ **Cloud detection** โ€” AWS account IDs, ARNs, GCP project IDs, Azure subscriptions
89
+ - ๐ŸŒ **Network detection** โ€” IP addresses, internal hostnames, internal URLs
90
+ - โŒจ๏ธ **Windows hotkey** โ€” press `Ctrl+Alt+S` to sanitize clipboard instantly
91
+ - ๐Ÿ–ฅ๏ธ **System tray** โ€” runs quietly in the background
92
+ - ๐Ÿ“‹ **CLI** โ€” pipe any text through it from the terminal
93
+ - ๐Ÿ“ฆ **PyPI** โ€” install with a single `pip install scrub-ai`
94
+
95
+ ---
96
+
97
+ ## Quick Start
98
+
99
+ ### Install
100
+
101
+ ```bash
102
+ pip install scrub-ai
103
+ ```
104
+
105
+ ### CLI Usage
106
+
107
+ ```bash
108
+ # Pipe any text through it
109
+ cat error.log | scrub-ai
110
+
111
+ # Sanitize a file
112
+ scrub-ai --file crash.log
113
+
114
+ # See what would be detected without changing anything
115
+ scrub-ai --dry-run --file logs.txt
116
+
117
+ # Sanitize and copy result to clipboard
118
+ scrub-ai --file logs.txt --copy
119
+ ```
120
+
121
+ ### Hotkey Usage (Windows only)
122
+
123
+ ```bash
124
+ # Start scrub-ai in the background
125
+ scrub-ai --start
126
+
127
+ # Icon appears in system tray (bottom right)
128
+ # Copy any text with Ctrl+C as normal
129
+ # Press Ctrl+Alt+S to sanitize clipboard
130
+ # Paste clean text with Ctrl+V
131
+ ```
132
+
133
+ ---
134
+
135
+ ## Example
136
+
137
+ **Input:**
138
+ ```
139
+ ERROR 2024-01-15 14:32:01 - Connection failed
140
+ host: db01.prod.internal
141
+ password: myS3cretP@ss123
142
+ aws_access_key_id: AKIAIOSFODNN7EXAMPLE
143
+ aws_account_id: 123456789012
144
+ ip: 10.0.1.45
145
+ ```
146
+
147
+ **Output:**
148
+ ```
149
+ ERROR 2024-01-15 14:32:01 - Connection failed
150
+ host: [INTERNAL_HOST]
151
+ password: [REDACTED]
152
+ aws_access_key_id: [AWS_ACCESS_KEY]
153
+ aws_account_id: [AWS_ACCOUNT_ID]
154
+ ip: [IP_ADDRESS]
155
+ ```
156
+
157
+ **Detection summary (stderr):**
158
+ ```
159
+ Detected 5 sensitive value(s): aws_access_key=1, aws_account_id=1, internal_host=1, ipv4=1, password=1
160
+ ```
161
+
162
+ ---
163
+
164
+ ## What Gets Detected
165
+
166
+ | Category | Examples |
167
+ |---|---|
168
+ | AWS credentials | Access keys, secret keys, session tokens |
169
+ | AWS infrastructure | Account IDs, ARNs, S3 URLs |
170
+ | GCP credentials | Service account keys, project IDs |
171
+ | Azure credentials | Subscription IDs, connection strings |
172
+ | Generic secrets | API keys, bearer tokens, JWTs, private keys, hex tokens |
173
+ | Passwords | `password=`, `passwd=`, `pwd=` key-value patterns |
174
+ | Network | IPv4, IPv6, internal hostnames, internal URLs |
175
+
176
+ ---
177
+
178
+ ## Roadmap
179
+
180
+ - [x] Project setup
181
+ - [x] **v1.0** โ€” CLI + secrets + cloud + network detection + Windows hotkey + system tray
182
+ - [ ] **v1.1** โ€” PII detection (emails, phones) via Presidio
183
+ - [ ] **v1.2** โ€” Watch mode (automatic clipboard monitoring)
184
+ - [ ] **v2.0** โ€” VS Code extension
185
+ - [ ] **v2.1** โ€” Browser extension (warns before pasting into ChatGPT)
186
+ - [ ] **v3.0** โ€” Team policies + audit log
187
+
188
+ ---
189
+
190
+ ## Contributing
191
+
192
+ Contributions are welcome! Please read [CONTRIBUTING.md](CONTRIBUTING.md) first.
193
+
194
+ ```bash
195
+ # Clone
196
+ git clone https://github.com/rajwindermarwaha/scrub-ai
197
+ cd scrub-ai
198
+
199
+ # Install dev dependencies
200
+ pip install -e ".[dev]"
201
+
202
+ # Run tests
203
+ pytest
204
+ ```
205
+
206
+ ---
207
+
208
+ ## License
209
+
210
+ MIT โ€” see [LICENSE](LICENSE)
211
+
212
+ ---
213
+
214
+ ## Author
215
+
216
+ Built by [@rajwindermarwaha](https://github.com/rajwindermarwaha)
217
+
218
+ > *Built this because I had to put in the extra effort of copying everything into Notepad first and manually scrubbing it before sharing with AI tools. Figured others do the same.*
@@ -0,0 +1,164 @@
1
+ # ๐Ÿงน scrub-ai
2
+
3
+ > Sanitize sensitive content from any text before sharing with AI assistants.
4
+
5
+ [![PyPI version](https://badge.fury.io/py/scrub-ai.svg)](https://badge.fury.io/py/scrub-ai)
6
+ [![Python 3.10+](https://img.shields.io/badge/python-3.10+-blue.svg)](https://www.python.org/downloads/)
7
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
8
+ [![Platform](https://img.shields.io/badge/platform-Windows%20%7C%20Linux%20%7C%20macOS-blue.svg)]()
9
+ [![CI](https://github.com/rajwindermarwaha/scrub-ai/actions/workflows/ci.yml/badge.svg)](https://github.com/rajwindermarwaha/scrub-ai/actions/workflows/ci.yml)
10
+
11
+ ---
12
+
13
+ ## The Problem
14
+
15
+ Every day, developers copy sensitive content into AI assistants without thinking:
16
+
17
+ ```
18
+ โŒ Stack trace with internal hostnames โ†’ pasted into ChatGPT
19
+ โŒ Application logs with session tokens โ†’ pasted into Copilot
20
+ โŒ Config files with database passwords โ†’ pasted into Claude
21
+ โŒ kubectl output with cluster names โ†’ pasted into AI
22
+ โŒ AWS CLI output with account IDs โ†’ pasted into ChatGPT
23
+ ```
24
+
25
+ Once that data leaves your machine, you have no control over it.
26
+
27
+ **scrub-ai fixes this** โ€” it detects and masks sensitive content before you share it with any AI tool.
28
+
29
+ ---
30
+
31
+ ## Features
32
+
33
+ - ๐Ÿ”‘ **Secrets detection** โ€” API keys, tokens, passwords, private keys
34
+ - โ˜๏ธ **Cloud detection** โ€” AWS account IDs, ARNs, GCP project IDs, Azure subscriptions
35
+ - ๐ŸŒ **Network detection** โ€” IP addresses, internal hostnames, internal URLs
36
+ - โŒจ๏ธ **Windows hotkey** โ€” press `Ctrl+Alt+S` to sanitize clipboard instantly
37
+ - ๐Ÿ–ฅ๏ธ **System tray** โ€” runs quietly in the background
38
+ - ๐Ÿ“‹ **CLI** โ€” pipe any text through it from the terminal
39
+ - ๐Ÿ“ฆ **PyPI** โ€” install with a single `pip install scrub-ai`
40
+
41
+ ---
42
+
43
+ ## Quick Start
44
+
45
+ ### Install
46
+
47
+ ```bash
48
+ pip install scrub-ai
49
+ ```
50
+
51
+ ### CLI Usage
52
+
53
+ ```bash
54
+ # Pipe any text through it
55
+ cat error.log | scrub-ai
56
+
57
+ # Sanitize a file
58
+ scrub-ai --file crash.log
59
+
60
+ # See what would be detected without changing anything
61
+ scrub-ai --dry-run --file logs.txt
62
+
63
+ # Sanitize and copy result to clipboard
64
+ scrub-ai --file logs.txt --copy
65
+ ```
66
+
67
+ ### Hotkey Usage (Windows only)
68
+
69
+ ```bash
70
+ # Start scrub-ai in the background
71
+ scrub-ai --start
72
+
73
+ # Icon appears in system tray (bottom right)
74
+ # Copy any text with Ctrl+C as normal
75
+ # Press Ctrl+Alt+S to sanitize clipboard
76
+ # Paste clean text with Ctrl+V
77
+ ```
78
+
79
+ ---
80
+
81
+ ## Example
82
+
83
+ **Input:**
84
+ ```
85
+ ERROR 2024-01-15 14:32:01 - Connection failed
86
+ host: db01.prod.internal
87
+ password: myS3cretP@ss123
88
+ aws_access_key_id: AKIAIOSFODNN7EXAMPLE
89
+ aws_account_id: 123456789012
90
+ ip: 10.0.1.45
91
+ ```
92
+
93
+ **Output:**
94
+ ```
95
+ ERROR 2024-01-15 14:32:01 - Connection failed
96
+ host: [INTERNAL_HOST]
97
+ password: [REDACTED]
98
+ aws_access_key_id: [AWS_ACCESS_KEY]
99
+ aws_account_id: [AWS_ACCOUNT_ID]
100
+ ip: [IP_ADDRESS]
101
+ ```
102
+
103
+ **Detection summary (stderr):**
104
+ ```
105
+ Detected 5 sensitive value(s): aws_access_key=1, aws_account_id=1, internal_host=1, ipv4=1, password=1
106
+ ```
107
+
108
+ ---
109
+
110
+ ## What Gets Detected
111
+
112
+ | Category | Examples |
113
+ |---|---|
114
+ | AWS credentials | Access keys, secret keys, session tokens |
115
+ | AWS infrastructure | Account IDs, ARNs, S3 URLs |
116
+ | GCP credentials | Service account keys, project IDs |
117
+ | Azure credentials | Subscription IDs, connection strings |
118
+ | Generic secrets | API keys, bearer tokens, JWTs, private keys, hex tokens |
119
+ | Passwords | `password=`, `passwd=`, `pwd=` key-value patterns |
120
+ | Network | IPv4, IPv6, internal hostnames, internal URLs |
121
+
122
+ ---
123
+
124
+ ## Roadmap
125
+
126
+ - [x] Project setup
127
+ - [x] **v1.0** โ€” CLI + secrets + cloud + network detection + Windows hotkey + system tray
128
+ - [ ] **v1.1** โ€” PII detection (emails, phones) via Presidio
129
+ - [ ] **v1.2** โ€” Watch mode (automatic clipboard monitoring)
130
+ - [ ] **v2.0** โ€” VS Code extension
131
+ - [ ] **v2.1** โ€” Browser extension (warns before pasting into ChatGPT)
132
+ - [ ] **v3.0** โ€” Team policies + audit log
133
+
134
+ ---
135
+
136
+ ## Contributing
137
+
138
+ Contributions are welcome! Please read [CONTRIBUTING.md](CONTRIBUTING.md) first.
139
+
140
+ ```bash
141
+ # Clone
142
+ git clone https://github.com/rajwindermarwaha/scrub-ai
143
+ cd scrub-ai
144
+
145
+ # Install dev dependencies
146
+ pip install -e ".[dev]"
147
+
148
+ # Run tests
149
+ pytest
150
+ ```
151
+
152
+ ---
153
+
154
+ ## License
155
+
156
+ MIT โ€” see [LICENSE](LICENSE)
157
+
158
+ ---
159
+
160
+ ## Author
161
+
162
+ Built by [@rajwindermarwaha](https://github.com/rajwindermarwaha)
163
+
164
+ > *Built this because I had to put in the extra effort of copying everything into Notepad first and manually scrubbing it before sharing with AI tools. Figured others do the same.*
@@ -0,0 +1,50 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "scrub-ai"
7
+ version = "1.0.0"
8
+ description = "Sanitize sensitive content from any text before sharing with AI assistants"
9
+ readme = "README.md"
10
+ license = { file = "LICENSE" }
11
+ authors = [{ name = "Rajwinder Marwaha" }]
12
+ requires-python = ">=3.10"
13
+ keywords = ["security", "privacy", "ai", "sanitize", "secrets", "redact"]
14
+ classifiers = [
15
+ "Development Status :: 4 - Beta",
16
+ "Environment :: Console",
17
+ "License :: OSI Approved :: MIT License",
18
+ "Operating System :: Microsoft :: Windows",
19
+ "Operating System :: POSIX :: Linux",
20
+ "Operating System :: MacOS",
21
+ "Programming Language :: Python :: 3.10",
22
+ "Programming Language :: Python :: 3.11",
23
+ "Programming Language :: Python :: 3.12",
24
+ ]
25
+ dependencies = [
26
+ "click>=8.0",
27
+ "pyperclip>=1.8",
28
+ "keyboard>=0.13; sys_platform == 'win32'",
29
+ "pystray>=0.19; sys_platform == 'win32'",
30
+ "Pillow>=10.0; sys_platform == 'win32'",
31
+ "win10toast>=0.9; sys_platform == 'win32'",
32
+ ]
33
+
34
+ [project.urls]
35
+ Homepage = "https://github.com/rajwindermarwaha/scrub-ai"
36
+ Repository = "https://github.com/rajwindermarwaha/scrub-ai"
37
+ Issues = "https://github.com/rajwindermarwaha/scrub-ai/issues"
38
+
39
+ [project.optional-dependencies]
40
+ dev = ["pytest>=7.0", "pytest-cov"]
41
+
42
+ [project.scripts]
43
+ scrub-ai = "scrub_ai.cli:main"
44
+
45
+ [tool.setuptools.packages.find]
46
+ where = ["."]
47
+ include = ["scrub_ai*"]
48
+
49
+ [tool.pytest.ini_options]
50
+ testpaths = ["tests"]
@@ -0,0 +1 @@
1
+ __version__ = "1.0.0"
@@ -0,0 +1,73 @@
1
+ from __future__ import annotations
2
+
3
+ import sys
4
+ from pathlib import Path
5
+
6
+ import click
7
+ import pyperclip
8
+
9
+ from scrub_ai.sanitizer import sanitize_text
10
+ from scrub_ai import config as cfg
11
+
12
+
13
+ def _load_input(file_path: str | None) -> str:
14
+ if file_path is not None:
15
+ return Path(file_path).read_text(encoding="utf-8")
16
+
17
+ if not sys.stdin.isatty():
18
+ return sys.stdin.read()
19
+
20
+ raise click.ClickException("No input provided. Use --file or pipe text via stdin.")
21
+
22
+
23
+ def _format_report(report: dict[str, object]) -> str:
24
+ total = int(report.get("total_matches", 0))
25
+ if total == 0:
26
+ return "No sensitive content detected."
27
+
28
+ by_label = report.get("by_label", {})
29
+ if isinstance(by_label, dict) and by_label:
30
+ details = ", ".join(f"{label}={count}" for label, count in sorted(by_label.items()))
31
+ return f"Detected {total} sensitive value(s): {details}"
32
+
33
+ return f"Detected {total} sensitive value(s)."
34
+
35
+
36
+ @click.command()
37
+ @click.option("--file", "file_path", type=click.Path(exists=True, dir_okay=False, path_type=str), help="Read input from a file.")
38
+ @click.option("--dry-run", is_flag=True, help="Show detections but do not modify the output text.")
39
+ @click.option("--copy", "copy_output", is_flag=True, help="Copy output text to clipboard.")
40
+ @click.option("--start", is_flag=True, help="Start background hotkey listener and system tray (Windows only).")
41
+ def main(file_path: str | None, dry_run: bool, copy_output: bool, start: bool) -> None:
42
+ """Sanitize sensitive content from text."""
43
+
44
+ if start:
45
+ if sys.platform != "win32":
46
+ raise click.ClickException("--start is only supported on Windows.")
47
+ from scrub_ai import tray
48
+ click.echo("scrub-ai running. Press Ctrl+Alt+S to sanitize clipboard. Right-click the tray icon to quit.", err=True)
49
+ tray.start()
50
+ return
51
+
52
+ input_text = _load_input(file_path)
53
+ clean_text, report = sanitize_text(input_text)
54
+
55
+ output_text = input_text if dry_run else clean_text
56
+ sys.stdout.write(output_text)
57
+
58
+ click.echo("", err=True)
59
+ if dry_run:
60
+ click.echo(f"Dry run: {_format_report(report)}", err=True)
61
+ else:
62
+ click.echo(_format_report(report), err=True)
63
+
64
+ if copy_output:
65
+ try:
66
+ pyperclip.copy(output_text)
67
+ click.echo("Copied output to clipboard.", err=True)
68
+ except pyperclip.PyperclipException as exc:
69
+ raise click.ClickException(f"Clipboard copy failed: {exc}") from exc
70
+
71
+
72
+ if __name__ == "__main__":
73
+ main()
@@ -0,0 +1,68 @@
1
+ """
2
+ config.py โ€” Persistent user configuration for scrub-ai.
3
+
4
+ Settings are stored in a JSON file:
5
+ - Windows: %APPDATA%\\scrub-ai\\config.json
6
+ - Linux/macOS: ~/.config/scrub-ai/config.json
7
+
8
+ Only a small set of knobs exist in v1:
9
+ - enabled (bool) โ€” whether the hotkey listener is active
10
+ - hotkey (str) โ€” the keyboard shortcut string (default "ctrl+alt+s")
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import json
16
+ import os
17
+ import sys
18
+ from pathlib import Path
19
+
20
+
21
+ _DEFAULTS: dict[str, object] = {
22
+ "enabled": True,
23
+ "hotkey": "ctrl+alt+s",
24
+ }
25
+
26
+
27
+ def _config_dir() -> Path:
28
+ if sys.platform == "win32":
29
+ appdata = os.environ.get("APPDATA") or Path.home() / "AppData" / "Roaming"
30
+ return Path(appdata) / "scrub-ai"
31
+ return Path.home() / ".config" / "scrub-ai"
32
+
33
+
34
+ def _config_path() -> Path:
35
+ return _config_dir() / "config.json"
36
+
37
+
38
+ def load() -> dict[str, object]:
39
+ """Return the current config, falling back to defaults for any missing key."""
40
+ path = _config_path()
41
+ if not path.exists():
42
+ return dict(_DEFAULTS)
43
+ try:
44
+ data = json.loads(path.read_text(encoding="utf-8"))
45
+ return {**_DEFAULTS, **data}
46
+ except (json.JSONDecodeError, OSError):
47
+ return dict(_DEFAULTS)
48
+
49
+
50
+ def save(config: dict[str, object]) -> None:
51
+ """Persist config to disk, creating directories as needed."""
52
+ path = _config_path()
53
+ path.parent.mkdir(parents=True, exist_ok=True)
54
+ path.write_text(json.dumps(config, indent=2), encoding="utf-8")
55
+
56
+
57
+ def set_enabled(value: bool) -> None:
58
+ cfg = load()
59
+ cfg["enabled"] = value
60
+ save(cfg)
61
+
62
+
63
+ def is_enabled() -> bool:
64
+ return bool(load().get("enabled", True))
65
+
66
+
67
+ def get_hotkey() -> str:
68
+ return str(load().get("hotkey", _DEFAULTS["hotkey"]))
@@ -0,0 +1,5 @@
1
+ from .secrets import SecretsDetector
2
+ from .cloud import CloudDetector
3
+ from .network import NetworkDetector
4
+
5
+ __all__ = ["SecretsDetector", "CloudDetector", "NetworkDetector"]
@@ -0,0 +1,34 @@
1
+ from __future__ import annotations
2
+ import re
3
+ from dataclasses import dataclass
4
+
5
+
6
+ @dataclass
7
+ class Match:
8
+ start: int
9
+ end: int
10
+ original: str
11
+ replacement: str
12
+ category: str
13
+ label: str
14
+ confidence: float = 1.0
15
+
16
+
17
+ class BaseDetector:
18
+ name: str = ""
19
+ priority: int = 99
20
+ patterns: list[tuple[re.Pattern, str, str]] = [] # (pattern, replacement, label)
21
+
22
+ def detect(self, text: str) -> list[Match]:
23
+ matches = []
24
+ for pattern, replacement, label in self.patterns:
25
+ for m in pattern.finditer(text):
26
+ matches.append(Match(
27
+ start=m.start(),
28
+ end=m.end(),
29
+ original=m.group(),
30
+ replacement=replacement,
31
+ category=self.name,
32
+ label=label,
33
+ ))
34
+ return matches
@@ -0,0 +1,51 @@
1
+ from __future__ import annotations
2
+ import re
3
+ from .base import BaseDetector
4
+
5
+
6
+ class CloudDetector(BaseDetector):
7
+ name = "cloud"
8
+ priority = 2
9
+ patterns = [
10
+ # โ”€โ”€ AWS โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
11
+
12
+ # AWS Access Key IDs (AKIA / ASIA / AROA / AIDA / ANPA / ANVA / APKA prefixes)
13
+ (re.compile(r"\b(?:AKIA|ASIA|AROA|AIDA|ANPA|ANVA|APKA)[0-9A-Z]{16}\b"), "[AWS_ACCESS_KEY_ID]", "aws_access_key_id"),
14
+
15
+ # AWS Secret Access Keys โ€” 40-char base64 in key=value context
16
+ (re.compile(r"(?i)aws[_-]?secret[_-]?access[_-]?key\s*[=:]\s*['\"]?([A-Za-z0-9+/]{40})['\"]?"), "[AWS_SECRET_ACCESS_KEY]", "aws_secret_access_key"),
17
+
18
+ # AWS Account IDs โ€” 12-digit numbers in ARN or account= context
19
+ (re.compile(r"(?i)(?:account[_-]?id|aws[_-]?account)\s*[=:]\s*['\"]?(\d{12})['\"]?"), "[AWS_ACCOUNT_ID]", "aws_account_id"),
20
+
21
+ # ARNs โ€” arn:aws:service:region:account-id:resource
22
+ (re.compile(r"\barn:aws[a-z0-9-]*:[a-z0-9\-]*:[a-z0-9\-]*:\d{12}:[^\s\"']+"), "[AWS_ARN]", "aws_arn"),
23
+
24
+ # AWS Session Tokens (base64, 100โ€“300 chars, in token= context)
25
+ (re.compile(r"(?i)(?:aws[_-]?session[_-]?token|session[_-]?token)\s*[=:]\s*['\"]?([A-Za-z0-9+/=]{100,300})['\"]?"), "[AWS_SESSION_TOKEN]", "aws_session_token"),
26
+
27
+ # โ”€โ”€ GCP โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
28
+
29
+ # GCP API keys (AIza prefix, 39 chars total)
30
+ (re.compile(r"\bAIza[0-9A-Za-z\-_]{35}\b"), "[GCP_API_KEY]", "gcp_api_key"),
31
+
32
+ # GCP service account email
33
+ (re.compile(r"\b[a-z0-9\-]+@[a-z0-9\-]+\.iam\.gserviceaccount\.com\b"), "[GCP_SERVICE_ACCOUNT]", "gcp_service_account"),
34
+
35
+ # GCP project IDs in project= / project_id= context
36
+ (re.compile(r"(?i)(?:project[_-]?id|gcp[_-]?project)\s*[=:]\s*['\"]?([a-z][a-z0-9\-]{4,28}[a-z0-9])['\"]?"), "[GCP_PROJECT_ID]", "gcp_project_id"),
37
+
38
+ # โ”€โ”€ Azure โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
39
+
40
+ # Azure Subscription / Tenant / Client IDs (UUIDs in context)
41
+ (re.compile(r"(?i)(?:subscription[_-]?id|tenant[_-]?id|client[_-]?id)\s*[=:]\s*['\"]?([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})['\"]?"), "[AZURE_ID]", "azure_id"),
42
+
43
+ # Azure Client Secrets (34-char random string in client_secret= context)
44
+ (re.compile(r"(?i)client[_-]?secret\s*[=:]\s*['\"]?([A-Za-z0-9~._\-]{34,})['\"]?"), "[AZURE_CLIENT_SECRET]", "azure_client_secret"),
45
+
46
+ # Azure Storage connection strings
47
+ (re.compile(r"DefaultEndpointsProtocol=https;AccountName=[^;]+;AccountKey=[A-Za-z0-9+/=]{88};[^\s\"']*"), "[AZURE_STORAGE_CONNECTION_STRING]", "azure_storage_connection_string"),
48
+
49
+ # Azure SAS tokens (sv=...&sig=... in URL or standalone)
50
+ (re.compile(r"(?i)(?:sv|se|sr|sp|sig)=[A-Za-z0-9%+/=]+(?:&(?:sv|se|sr|sp|sig)=[A-Za-z0-9%+/=]+){3,}"), "[AZURE_SAS_TOKEN]", "azure_sas_token"),
51
+ ]