scrub-ai 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scrub_ai-1.0.0/LICENSE +21 -0
- scrub_ai-1.0.0/PKG-INFO +218 -0
- scrub_ai-1.0.0/README.md +164 -0
- scrub_ai-1.0.0/pyproject.toml +50 -0
- scrub_ai-1.0.0/scrub_ai/__init__.py +1 -0
- scrub_ai-1.0.0/scrub_ai/cli.py +73 -0
- scrub_ai-1.0.0/scrub_ai/config.py +68 -0
- scrub_ai-1.0.0/scrub_ai/detectors/__init__.py +5 -0
- scrub_ai-1.0.0/scrub_ai/detectors/base.py +34 -0
- scrub_ai-1.0.0/scrub_ai/detectors/cloud.py +51 -0
- scrub_ai-1.0.0/scrub_ai/detectors/network.py +49 -0
- scrub_ai-1.0.0/scrub_ai/detectors/secrets.py +27 -0
- scrub_ai-1.0.0/scrub_ai/hotkey.py +84 -0
- scrub_ai-1.0.0/scrub_ai/notifier.py +56 -0
- scrub_ai-1.0.0/scrub_ai/sanitizer.py +85 -0
- scrub_ai-1.0.0/scrub_ai/tray.py +128 -0
- scrub_ai-1.0.0/scrub_ai.egg-info/PKG-INFO +218 -0
- scrub_ai-1.0.0/scrub_ai.egg-info/SOURCES.txt +30 -0
- scrub_ai-1.0.0/scrub_ai.egg-info/dependency_links.txt +1 -0
- scrub_ai-1.0.0/scrub_ai.egg-info/entry_points.txt +2 -0
- scrub_ai-1.0.0/scrub_ai.egg-info/requires.txt +12 -0
- scrub_ai-1.0.0/scrub_ai.egg-info/top_level.txt +1 -0
- scrub_ai-1.0.0/setup.cfg +4 -0
- scrub_ai-1.0.0/tests/test_base_detector.py +49 -0
- scrub_ai-1.0.0/tests/test_cli.py +51 -0
- scrub_ai-1.0.0/tests/test_cloud_detector.py +66 -0
- scrub_ai-1.0.0/tests/test_config.py +175 -0
- scrub_ai-1.0.0/tests/test_hotkey.py +212 -0
- scrub_ai-1.0.0/tests/test_network_detector.py +34 -0
- scrub_ai-1.0.0/tests/test_notifier.py +188 -0
- scrub_ai-1.0.0/tests/test_sanitizer.py +51 -0
- scrub_ai-1.0.0/tests/test_secrets_detector.py +61 -0
scrub_ai-1.0.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Rajwinder Marwaha
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
scrub_ai-1.0.0/PKG-INFO
ADDED
|
@@ -0,0 +1,218 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: scrub-ai
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Sanitize sensitive content from any text before sharing with AI assistants
|
|
5
|
+
Author: Rajwinder Marwaha
|
|
6
|
+
License: MIT License
|
|
7
|
+
|
|
8
|
+
Copyright (c) 2026 Rajwinder Marwaha
|
|
9
|
+
|
|
10
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
11
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
12
|
+
in the Software without restriction, including without limitation the rights
|
|
13
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
14
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
15
|
+
furnished to do so, subject to the following conditions:
|
|
16
|
+
|
|
17
|
+
The above copyright notice and this permission notice shall be included in all
|
|
18
|
+
copies or substantial portions of the Software.
|
|
19
|
+
|
|
20
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
21
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
22
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
23
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
24
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
25
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
26
|
+
SOFTWARE.
|
|
27
|
+
|
|
28
|
+
Project-URL: Homepage, https://github.com/rajwindermarwaha/scrub-ai
|
|
29
|
+
Project-URL: Repository, https://github.com/rajwindermarwaha/scrub-ai
|
|
30
|
+
Project-URL: Issues, https://github.com/rajwindermarwaha/scrub-ai/issues
|
|
31
|
+
Keywords: security,privacy,ai,sanitize,secrets,redact
|
|
32
|
+
Classifier: Development Status :: 4 - Beta
|
|
33
|
+
Classifier: Environment :: Console
|
|
34
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
35
|
+
Classifier: Operating System :: Microsoft :: Windows
|
|
36
|
+
Classifier: Operating System :: POSIX :: Linux
|
|
37
|
+
Classifier: Operating System :: MacOS
|
|
38
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
39
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
40
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
41
|
+
Requires-Python: >=3.10
|
|
42
|
+
Description-Content-Type: text/markdown
|
|
43
|
+
License-File: LICENSE
|
|
44
|
+
Requires-Dist: click>=8.0
|
|
45
|
+
Requires-Dist: pyperclip>=1.8
|
|
46
|
+
Requires-Dist: keyboard>=0.13; sys_platform == "win32"
|
|
47
|
+
Requires-Dist: pystray>=0.19; sys_platform == "win32"
|
|
48
|
+
Requires-Dist: Pillow>=10.0; sys_platform == "win32"
|
|
49
|
+
Requires-Dist: win10toast>=0.9; sys_platform == "win32"
|
|
50
|
+
Provides-Extra: dev
|
|
51
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
52
|
+
Requires-Dist: pytest-cov; extra == "dev"
|
|
53
|
+
Dynamic: license-file
|
|
54
|
+
|
|
55
|
+
# ๐งน scrub-ai
|
|
56
|
+
|
|
57
|
+
> Sanitize sensitive content from any text before sharing with AI assistants.
|
|
58
|
+
|
|
59
|
+
[](https://badge.fury.io/py/scrub-ai)
|
|
60
|
+
[](https://www.python.org/downloads/)
|
|
61
|
+
[](https://opensource.org/licenses/MIT)
|
|
62
|
+
[]()
|
|
63
|
+
[](https://github.com/rajwindermarwaha/scrub-ai/actions/workflows/ci.yml)
|
|
64
|
+
|
|
65
|
+
---
|
|
66
|
+
|
|
67
|
+
## The Problem
|
|
68
|
+
|
|
69
|
+
Every day, developers copy sensitive content into AI assistants without thinking:
|
|
70
|
+
|
|
71
|
+
```
|
|
72
|
+
โ Stack trace with internal hostnames โ pasted into ChatGPT
|
|
73
|
+
โ Application logs with session tokens โ pasted into Copilot
|
|
74
|
+
โ Config files with database passwords โ pasted into Claude
|
|
75
|
+
โ kubectl output with cluster names โ pasted into AI
|
|
76
|
+
โ AWS CLI output with account IDs โ pasted into ChatGPT
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
Once that data leaves your machine, you have no control over it.
|
|
80
|
+
|
|
81
|
+
**scrub-ai fixes this** โ it detects and masks sensitive content before you share it with any AI tool.
|
|
82
|
+
|
|
83
|
+
---
|
|
84
|
+
|
|
85
|
+
## Features
|
|
86
|
+
|
|
87
|
+
- ๐ **Secrets detection** โ API keys, tokens, passwords, private keys
|
|
88
|
+
- โ๏ธ **Cloud detection** โ AWS account IDs, ARNs, GCP project IDs, Azure subscriptions
|
|
89
|
+
- ๐ **Network detection** โ IP addresses, internal hostnames, internal URLs
|
|
90
|
+
- โจ๏ธ **Windows hotkey** โ press `Ctrl+Alt+S` to sanitize clipboard instantly
|
|
91
|
+
- ๐ฅ๏ธ **System tray** โ runs quietly in the background
|
|
92
|
+
- ๐ **CLI** โ pipe any text through it from the terminal
|
|
93
|
+
- ๐ฆ **PyPI** โ install with a single `pip install scrub-ai`
|
|
94
|
+
|
|
95
|
+
---
|
|
96
|
+
|
|
97
|
+
## Quick Start
|
|
98
|
+
|
|
99
|
+
### Install
|
|
100
|
+
|
|
101
|
+
```bash
|
|
102
|
+
pip install scrub-ai
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
### CLI Usage
|
|
106
|
+
|
|
107
|
+
```bash
|
|
108
|
+
# Pipe any text through it
|
|
109
|
+
cat error.log | scrub-ai
|
|
110
|
+
|
|
111
|
+
# Sanitize a file
|
|
112
|
+
scrub-ai --file crash.log
|
|
113
|
+
|
|
114
|
+
# See what would be detected without changing anything
|
|
115
|
+
scrub-ai --dry-run --file logs.txt
|
|
116
|
+
|
|
117
|
+
# Sanitize and copy result to clipboard
|
|
118
|
+
scrub-ai --file logs.txt --copy
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
### Hotkey Usage (Windows only)
|
|
122
|
+
|
|
123
|
+
```bash
|
|
124
|
+
# Start scrub-ai in the background
|
|
125
|
+
scrub-ai --start
|
|
126
|
+
|
|
127
|
+
# Icon appears in system tray (bottom right)
|
|
128
|
+
# Copy any text with Ctrl+C as normal
|
|
129
|
+
# Press Ctrl+Alt+S to sanitize clipboard
|
|
130
|
+
# Paste clean text with Ctrl+V
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
---
|
|
134
|
+
|
|
135
|
+
## Example
|
|
136
|
+
|
|
137
|
+
**Input:**
|
|
138
|
+
```
|
|
139
|
+
ERROR 2024-01-15 14:32:01 - Connection failed
|
|
140
|
+
host: db01.prod.internal
|
|
141
|
+
password: myS3cretP@ss123
|
|
142
|
+
aws_access_key_id: AKIAIOSFODNN7EXAMPLE
|
|
143
|
+
aws_account_id: 123456789012
|
|
144
|
+
ip: 10.0.1.45
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
**Output:**
|
|
148
|
+
```
|
|
149
|
+
ERROR 2024-01-15 14:32:01 - Connection failed
|
|
150
|
+
host: [INTERNAL_HOST]
|
|
151
|
+
password: [REDACTED]
|
|
152
|
+
aws_access_key_id: [AWS_ACCESS_KEY]
|
|
153
|
+
aws_account_id: [AWS_ACCOUNT_ID]
|
|
154
|
+
ip: [IP_ADDRESS]
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
**Detection summary (stderr):**
|
|
158
|
+
```
|
|
159
|
+
Detected 5 sensitive value(s): aws_access_key=1, aws_account_id=1, internal_host=1, ipv4=1, password=1
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
---
|
|
163
|
+
|
|
164
|
+
## What Gets Detected
|
|
165
|
+
|
|
166
|
+
| Category | Examples |
|
|
167
|
+
|---|---|
|
|
168
|
+
| AWS credentials | Access keys, secret keys, session tokens |
|
|
169
|
+
| AWS infrastructure | Account IDs, ARNs, S3 URLs |
|
|
170
|
+
| GCP credentials | Service account keys, project IDs |
|
|
171
|
+
| Azure credentials | Subscription IDs, connection strings |
|
|
172
|
+
| Generic secrets | API keys, bearer tokens, JWTs, private keys, hex tokens |
|
|
173
|
+
| Passwords | `password=`, `passwd=`, `pwd=` key-value patterns |
|
|
174
|
+
| Network | IPv4, IPv6, internal hostnames, internal URLs |
|
|
175
|
+
|
|
176
|
+
---
|
|
177
|
+
|
|
178
|
+
## Roadmap
|
|
179
|
+
|
|
180
|
+
- [x] Project setup
|
|
181
|
+
- [x] **v1.0** โ CLI + secrets + cloud + network detection + Windows hotkey + system tray
|
|
182
|
+
- [ ] **v1.1** โ PII detection (emails, phones) via Presidio
|
|
183
|
+
- [ ] **v1.2** โ Watch mode (automatic clipboard monitoring)
|
|
184
|
+
- [ ] **v2.0** โ VS Code extension
|
|
185
|
+
- [ ] **v2.1** โ Browser extension (warns before pasting into ChatGPT)
|
|
186
|
+
- [ ] **v3.0** โ Team policies + audit log
|
|
187
|
+
|
|
188
|
+
---
|
|
189
|
+
|
|
190
|
+
## Contributing
|
|
191
|
+
|
|
192
|
+
Contributions are welcome! Please read [CONTRIBUTING.md](CONTRIBUTING.md) first.
|
|
193
|
+
|
|
194
|
+
```bash
|
|
195
|
+
# Clone
|
|
196
|
+
git clone https://github.com/rajwindermarwaha/scrub-ai
|
|
197
|
+
cd scrub-ai
|
|
198
|
+
|
|
199
|
+
# Install dev dependencies
|
|
200
|
+
pip install -e ".[dev]"
|
|
201
|
+
|
|
202
|
+
# Run tests
|
|
203
|
+
pytest
|
|
204
|
+
```
|
|
205
|
+
|
|
206
|
+
---
|
|
207
|
+
|
|
208
|
+
## License
|
|
209
|
+
|
|
210
|
+
MIT โ see [LICENSE](LICENSE)
|
|
211
|
+
|
|
212
|
+
---
|
|
213
|
+
|
|
214
|
+
## Author
|
|
215
|
+
|
|
216
|
+
Built by [@rajwindermarwaha](https://github.com/rajwindermarwaha)
|
|
217
|
+
|
|
218
|
+
> *Built this because I had to put in the extra effort of copying everything into Notepad first and manually scrubbing it before sharing with AI tools. Figured others do the same.*
|
scrub_ai-1.0.0/README.md
ADDED
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
# ๐งน scrub-ai
|
|
2
|
+
|
|
3
|
+
> Sanitize sensitive content from any text before sharing with AI assistants.
|
|
4
|
+
|
|
5
|
+
[](https://badge.fury.io/py/scrub-ai)
|
|
6
|
+
[](https://www.python.org/downloads/)
|
|
7
|
+
[](https://opensource.org/licenses/MIT)
|
|
8
|
+
[]()
|
|
9
|
+
[](https://github.com/rajwindermarwaha/scrub-ai/actions/workflows/ci.yml)
|
|
10
|
+
|
|
11
|
+
---
|
|
12
|
+
|
|
13
|
+
## The Problem
|
|
14
|
+
|
|
15
|
+
Every day, developers copy sensitive content into AI assistants without thinking:
|
|
16
|
+
|
|
17
|
+
```
|
|
18
|
+
โ Stack trace with internal hostnames โ pasted into ChatGPT
|
|
19
|
+
โ Application logs with session tokens โ pasted into Copilot
|
|
20
|
+
โ Config files with database passwords โ pasted into Claude
|
|
21
|
+
โ kubectl output with cluster names โ pasted into AI
|
|
22
|
+
โ AWS CLI output with account IDs โ pasted into ChatGPT
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
Once that data leaves your machine, you have no control over it.
|
|
26
|
+
|
|
27
|
+
**scrub-ai fixes this** โ it detects and masks sensitive content before you share it with any AI tool.
|
|
28
|
+
|
|
29
|
+
---
|
|
30
|
+
|
|
31
|
+
## Features
|
|
32
|
+
|
|
33
|
+
- ๐ **Secrets detection** โ API keys, tokens, passwords, private keys
|
|
34
|
+
- โ๏ธ **Cloud detection** โ AWS account IDs, ARNs, GCP project IDs, Azure subscriptions
|
|
35
|
+
- ๐ **Network detection** โ IP addresses, internal hostnames, internal URLs
|
|
36
|
+
- โจ๏ธ **Windows hotkey** โ press `Ctrl+Alt+S` to sanitize clipboard instantly
|
|
37
|
+
- ๐ฅ๏ธ **System tray** โ runs quietly in the background
|
|
38
|
+
- ๐ **CLI** โ pipe any text through it from the terminal
|
|
39
|
+
- ๐ฆ **PyPI** โ install with a single `pip install scrub-ai`
|
|
40
|
+
|
|
41
|
+
---
|
|
42
|
+
|
|
43
|
+
## Quick Start
|
|
44
|
+
|
|
45
|
+
### Install
|
|
46
|
+
|
|
47
|
+
```bash
|
|
48
|
+
pip install scrub-ai
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
### CLI Usage
|
|
52
|
+
|
|
53
|
+
```bash
|
|
54
|
+
# Pipe any text through it
|
|
55
|
+
cat error.log | scrub-ai
|
|
56
|
+
|
|
57
|
+
# Sanitize a file
|
|
58
|
+
scrub-ai --file crash.log
|
|
59
|
+
|
|
60
|
+
# See what would be detected without changing anything
|
|
61
|
+
scrub-ai --dry-run --file logs.txt
|
|
62
|
+
|
|
63
|
+
# Sanitize and copy result to clipboard
|
|
64
|
+
scrub-ai --file logs.txt --copy
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
### Hotkey Usage (Windows only)
|
|
68
|
+
|
|
69
|
+
```bash
|
|
70
|
+
# Start scrub-ai in the background
|
|
71
|
+
scrub-ai --start
|
|
72
|
+
|
|
73
|
+
# Icon appears in system tray (bottom right)
|
|
74
|
+
# Copy any text with Ctrl+C as normal
|
|
75
|
+
# Press Ctrl+Alt+S to sanitize clipboard
|
|
76
|
+
# Paste clean text with Ctrl+V
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
---
|
|
80
|
+
|
|
81
|
+
## Example
|
|
82
|
+
|
|
83
|
+
**Input:**
|
|
84
|
+
```
|
|
85
|
+
ERROR 2024-01-15 14:32:01 - Connection failed
|
|
86
|
+
host: db01.prod.internal
|
|
87
|
+
password: myS3cretP@ss123
|
|
88
|
+
aws_access_key_id: AKIAIOSFODNN7EXAMPLE
|
|
89
|
+
aws_account_id: 123456789012
|
|
90
|
+
ip: 10.0.1.45
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
**Output:**
|
|
94
|
+
```
|
|
95
|
+
ERROR 2024-01-15 14:32:01 - Connection failed
|
|
96
|
+
host: [INTERNAL_HOST]
|
|
97
|
+
password: [REDACTED]
|
|
98
|
+
aws_access_key_id: [AWS_ACCESS_KEY]
|
|
99
|
+
aws_account_id: [AWS_ACCOUNT_ID]
|
|
100
|
+
ip: [IP_ADDRESS]
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
**Detection summary (stderr):**
|
|
104
|
+
```
|
|
105
|
+
Detected 5 sensitive value(s): aws_access_key=1, aws_account_id=1, internal_host=1, ipv4=1, password=1
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
---
|
|
109
|
+
|
|
110
|
+
## What Gets Detected
|
|
111
|
+
|
|
112
|
+
| Category | Examples |
|
|
113
|
+
|---|---|
|
|
114
|
+
| AWS credentials | Access keys, secret keys, session tokens |
|
|
115
|
+
| AWS infrastructure | Account IDs, ARNs, S3 URLs |
|
|
116
|
+
| GCP credentials | Service account keys, project IDs |
|
|
117
|
+
| Azure credentials | Subscription IDs, connection strings |
|
|
118
|
+
| Generic secrets | API keys, bearer tokens, JWTs, private keys, hex tokens |
|
|
119
|
+
| Passwords | `password=`, `passwd=`, `pwd=` key-value patterns |
|
|
120
|
+
| Network | IPv4, IPv6, internal hostnames, internal URLs |
|
|
121
|
+
|
|
122
|
+
---
|
|
123
|
+
|
|
124
|
+
## Roadmap
|
|
125
|
+
|
|
126
|
+
- [x] Project setup
|
|
127
|
+
- [x] **v1.0** โ CLI + secrets + cloud + network detection + Windows hotkey + system tray
|
|
128
|
+
- [ ] **v1.1** โ PII detection (emails, phones) via Presidio
|
|
129
|
+
- [ ] **v1.2** โ Watch mode (automatic clipboard monitoring)
|
|
130
|
+
- [ ] **v2.0** โ VS Code extension
|
|
131
|
+
- [ ] **v2.1** โ Browser extension (warns before pasting into ChatGPT)
|
|
132
|
+
- [ ] **v3.0** โ Team policies + audit log
|
|
133
|
+
|
|
134
|
+
---
|
|
135
|
+
|
|
136
|
+
## Contributing
|
|
137
|
+
|
|
138
|
+
Contributions are welcome! Please read [CONTRIBUTING.md](CONTRIBUTING.md) first.
|
|
139
|
+
|
|
140
|
+
```bash
|
|
141
|
+
# Clone
|
|
142
|
+
git clone https://github.com/rajwindermarwaha/scrub-ai
|
|
143
|
+
cd scrub-ai
|
|
144
|
+
|
|
145
|
+
# Install dev dependencies
|
|
146
|
+
pip install -e ".[dev]"
|
|
147
|
+
|
|
148
|
+
# Run tests
|
|
149
|
+
pytest
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
---
|
|
153
|
+
|
|
154
|
+
## License
|
|
155
|
+
|
|
156
|
+
MIT โ see [LICENSE](LICENSE)
|
|
157
|
+
|
|
158
|
+
---
|
|
159
|
+
|
|
160
|
+
## Author
|
|
161
|
+
|
|
162
|
+
Built by [@rajwindermarwaha](https://github.com/rajwindermarwaha)
|
|
163
|
+
|
|
164
|
+
> *Built this because I had to put in the extra effort of copying everything into Notepad first and manually scrubbing it before sharing with AI tools. Figured others do the same.*
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "scrub-ai"
|
|
7
|
+
version = "1.0.0"
|
|
8
|
+
description = "Sanitize sensitive content from any text before sharing with AI assistants"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = { file = "LICENSE" }
|
|
11
|
+
authors = [{ name = "Rajwinder Marwaha" }]
|
|
12
|
+
requires-python = ">=3.10"
|
|
13
|
+
keywords = ["security", "privacy", "ai", "sanitize", "secrets", "redact"]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Development Status :: 4 - Beta",
|
|
16
|
+
"Environment :: Console",
|
|
17
|
+
"License :: OSI Approved :: MIT License",
|
|
18
|
+
"Operating System :: Microsoft :: Windows",
|
|
19
|
+
"Operating System :: POSIX :: Linux",
|
|
20
|
+
"Operating System :: MacOS",
|
|
21
|
+
"Programming Language :: Python :: 3.10",
|
|
22
|
+
"Programming Language :: Python :: 3.11",
|
|
23
|
+
"Programming Language :: Python :: 3.12",
|
|
24
|
+
]
|
|
25
|
+
dependencies = [
|
|
26
|
+
"click>=8.0",
|
|
27
|
+
"pyperclip>=1.8",
|
|
28
|
+
"keyboard>=0.13; sys_platform == 'win32'",
|
|
29
|
+
"pystray>=0.19; sys_platform == 'win32'",
|
|
30
|
+
"Pillow>=10.0; sys_platform == 'win32'",
|
|
31
|
+
"win10toast>=0.9; sys_platform == 'win32'",
|
|
32
|
+
]
|
|
33
|
+
|
|
34
|
+
[project.urls]
|
|
35
|
+
Homepage = "https://github.com/rajwindermarwaha/scrub-ai"
|
|
36
|
+
Repository = "https://github.com/rajwindermarwaha/scrub-ai"
|
|
37
|
+
Issues = "https://github.com/rajwindermarwaha/scrub-ai/issues"
|
|
38
|
+
|
|
39
|
+
[project.optional-dependencies]
|
|
40
|
+
dev = ["pytest>=7.0", "pytest-cov"]
|
|
41
|
+
|
|
42
|
+
[project.scripts]
|
|
43
|
+
scrub-ai = "scrub_ai.cli:main"
|
|
44
|
+
|
|
45
|
+
[tool.setuptools.packages.find]
|
|
46
|
+
where = ["."]
|
|
47
|
+
include = ["scrub_ai*"]
|
|
48
|
+
|
|
49
|
+
[tool.pytest.ini_options]
|
|
50
|
+
testpaths = ["tests"]
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "1.0.0"
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import sys
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
import click
|
|
7
|
+
import pyperclip
|
|
8
|
+
|
|
9
|
+
from scrub_ai.sanitizer import sanitize_text
|
|
10
|
+
from scrub_ai import config as cfg
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _load_input(file_path: str | None) -> str:
|
|
14
|
+
if file_path is not None:
|
|
15
|
+
return Path(file_path).read_text(encoding="utf-8")
|
|
16
|
+
|
|
17
|
+
if not sys.stdin.isatty():
|
|
18
|
+
return sys.stdin.read()
|
|
19
|
+
|
|
20
|
+
raise click.ClickException("No input provided. Use --file or pipe text via stdin.")
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _format_report(report: dict[str, object]) -> str:
|
|
24
|
+
total = int(report.get("total_matches", 0))
|
|
25
|
+
if total == 0:
|
|
26
|
+
return "No sensitive content detected."
|
|
27
|
+
|
|
28
|
+
by_label = report.get("by_label", {})
|
|
29
|
+
if isinstance(by_label, dict) and by_label:
|
|
30
|
+
details = ", ".join(f"{label}={count}" for label, count in sorted(by_label.items()))
|
|
31
|
+
return f"Detected {total} sensitive value(s): {details}"
|
|
32
|
+
|
|
33
|
+
return f"Detected {total} sensitive value(s)."
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@click.command()
|
|
37
|
+
@click.option("--file", "file_path", type=click.Path(exists=True, dir_okay=False, path_type=str), help="Read input from a file.")
|
|
38
|
+
@click.option("--dry-run", is_flag=True, help="Show detections but do not modify the output text.")
|
|
39
|
+
@click.option("--copy", "copy_output", is_flag=True, help="Copy output text to clipboard.")
|
|
40
|
+
@click.option("--start", is_flag=True, help="Start background hotkey listener and system tray (Windows only).")
|
|
41
|
+
def main(file_path: str | None, dry_run: bool, copy_output: bool, start: bool) -> None:
|
|
42
|
+
"""Sanitize sensitive content from text."""
|
|
43
|
+
|
|
44
|
+
if start:
|
|
45
|
+
if sys.platform != "win32":
|
|
46
|
+
raise click.ClickException("--start is only supported on Windows.")
|
|
47
|
+
from scrub_ai import tray
|
|
48
|
+
click.echo("scrub-ai running. Press Ctrl+Alt+S to sanitize clipboard. Right-click the tray icon to quit.", err=True)
|
|
49
|
+
tray.start()
|
|
50
|
+
return
|
|
51
|
+
|
|
52
|
+
input_text = _load_input(file_path)
|
|
53
|
+
clean_text, report = sanitize_text(input_text)
|
|
54
|
+
|
|
55
|
+
output_text = input_text if dry_run else clean_text
|
|
56
|
+
sys.stdout.write(output_text)
|
|
57
|
+
|
|
58
|
+
click.echo("", err=True)
|
|
59
|
+
if dry_run:
|
|
60
|
+
click.echo(f"Dry run: {_format_report(report)}", err=True)
|
|
61
|
+
else:
|
|
62
|
+
click.echo(_format_report(report), err=True)
|
|
63
|
+
|
|
64
|
+
if copy_output:
|
|
65
|
+
try:
|
|
66
|
+
pyperclip.copy(output_text)
|
|
67
|
+
click.echo("Copied output to clipboard.", err=True)
|
|
68
|
+
except pyperclip.PyperclipException as exc:
|
|
69
|
+
raise click.ClickException(f"Clipboard copy failed: {exc}") from exc
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
if __name__ == "__main__":
|
|
73
|
+
main()
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
"""
|
|
2
|
+
config.py โ Persistent user configuration for scrub-ai.
|
|
3
|
+
|
|
4
|
+
Settings are stored in a JSON file:
|
|
5
|
+
- Windows: %APPDATA%\\scrub-ai\\config.json
|
|
6
|
+
- Linux/macOS: ~/.config/scrub-ai/config.json
|
|
7
|
+
|
|
8
|
+
Only a small set of knobs exist in v1:
|
|
9
|
+
- enabled (bool) โ whether the hotkey listener is active
|
|
10
|
+
- hotkey (str) โ the keyboard shortcut string (default "ctrl+alt+s")
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import json
|
|
16
|
+
import os
|
|
17
|
+
import sys
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
_DEFAULTS: dict[str, object] = {
|
|
22
|
+
"enabled": True,
|
|
23
|
+
"hotkey": "ctrl+alt+s",
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _config_dir() -> Path:
|
|
28
|
+
if sys.platform == "win32":
|
|
29
|
+
appdata = os.environ.get("APPDATA") or Path.home() / "AppData" / "Roaming"
|
|
30
|
+
return Path(appdata) / "scrub-ai"
|
|
31
|
+
return Path.home() / ".config" / "scrub-ai"
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def _config_path() -> Path:
|
|
35
|
+
return _config_dir() / "config.json"
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def load() -> dict[str, object]:
|
|
39
|
+
"""Return the current config, falling back to defaults for any missing key."""
|
|
40
|
+
path = _config_path()
|
|
41
|
+
if not path.exists():
|
|
42
|
+
return dict(_DEFAULTS)
|
|
43
|
+
try:
|
|
44
|
+
data = json.loads(path.read_text(encoding="utf-8"))
|
|
45
|
+
return {**_DEFAULTS, **data}
|
|
46
|
+
except (json.JSONDecodeError, OSError):
|
|
47
|
+
return dict(_DEFAULTS)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def save(config: dict[str, object]) -> None:
|
|
51
|
+
"""Persist config to disk, creating directories as needed."""
|
|
52
|
+
path = _config_path()
|
|
53
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
54
|
+
path.write_text(json.dumps(config, indent=2), encoding="utf-8")
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def set_enabled(value: bool) -> None:
|
|
58
|
+
cfg = load()
|
|
59
|
+
cfg["enabled"] = value
|
|
60
|
+
save(cfg)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def is_enabled() -> bool:
|
|
64
|
+
return bool(load().get("enabled", True))
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def get_hotkey() -> str:
|
|
68
|
+
return str(load().get("hotkey", _DEFAULTS["hotkey"]))
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
import re
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
@dataclass
|
|
7
|
+
class Match:
|
|
8
|
+
start: int
|
|
9
|
+
end: int
|
|
10
|
+
original: str
|
|
11
|
+
replacement: str
|
|
12
|
+
category: str
|
|
13
|
+
label: str
|
|
14
|
+
confidence: float = 1.0
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class BaseDetector:
|
|
18
|
+
name: str = ""
|
|
19
|
+
priority: int = 99
|
|
20
|
+
patterns: list[tuple[re.Pattern, str, str]] = [] # (pattern, replacement, label)
|
|
21
|
+
|
|
22
|
+
def detect(self, text: str) -> list[Match]:
|
|
23
|
+
matches = []
|
|
24
|
+
for pattern, replacement, label in self.patterns:
|
|
25
|
+
for m in pattern.finditer(text):
|
|
26
|
+
matches.append(Match(
|
|
27
|
+
start=m.start(),
|
|
28
|
+
end=m.end(),
|
|
29
|
+
original=m.group(),
|
|
30
|
+
replacement=replacement,
|
|
31
|
+
category=self.name,
|
|
32
|
+
label=label,
|
|
33
|
+
))
|
|
34
|
+
return matches
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
import re
|
|
3
|
+
from .base import BaseDetector
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class CloudDetector(BaseDetector):
|
|
7
|
+
name = "cloud"
|
|
8
|
+
priority = 2
|
|
9
|
+
patterns = [
|
|
10
|
+
# โโ AWS โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
|
11
|
+
|
|
12
|
+
# AWS Access Key IDs (AKIA / ASIA / AROA / AIDA / ANPA / ANVA / APKA prefixes)
|
|
13
|
+
(re.compile(r"\b(?:AKIA|ASIA|AROA|AIDA|ANPA|ANVA|APKA)[0-9A-Z]{16}\b"), "[AWS_ACCESS_KEY_ID]", "aws_access_key_id"),
|
|
14
|
+
|
|
15
|
+
# AWS Secret Access Keys โ 40-char base64 in key=value context
|
|
16
|
+
(re.compile(r"(?i)aws[_-]?secret[_-]?access[_-]?key\s*[=:]\s*['\"]?([A-Za-z0-9+/]{40})['\"]?"), "[AWS_SECRET_ACCESS_KEY]", "aws_secret_access_key"),
|
|
17
|
+
|
|
18
|
+
# AWS Account IDs โ 12-digit numbers in ARN or account= context
|
|
19
|
+
(re.compile(r"(?i)(?:account[_-]?id|aws[_-]?account)\s*[=:]\s*['\"]?(\d{12})['\"]?"), "[AWS_ACCOUNT_ID]", "aws_account_id"),
|
|
20
|
+
|
|
21
|
+
# ARNs โ arn:aws:service:region:account-id:resource
|
|
22
|
+
(re.compile(r"\barn:aws[a-z0-9-]*:[a-z0-9\-]*:[a-z0-9\-]*:\d{12}:[^\s\"']+"), "[AWS_ARN]", "aws_arn"),
|
|
23
|
+
|
|
24
|
+
# AWS Session Tokens (base64, 100โ300 chars, in token= context)
|
|
25
|
+
(re.compile(r"(?i)(?:aws[_-]?session[_-]?token|session[_-]?token)\s*[=:]\s*['\"]?([A-Za-z0-9+/=]{100,300})['\"]?"), "[AWS_SESSION_TOKEN]", "aws_session_token"),
|
|
26
|
+
|
|
27
|
+
# โโ GCP โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
|
28
|
+
|
|
29
|
+
# GCP API keys (AIza prefix, 39 chars total)
|
|
30
|
+
(re.compile(r"\bAIza[0-9A-Za-z\-_]{35}\b"), "[GCP_API_KEY]", "gcp_api_key"),
|
|
31
|
+
|
|
32
|
+
# GCP service account email
|
|
33
|
+
(re.compile(r"\b[a-z0-9\-]+@[a-z0-9\-]+\.iam\.gserviceaccount\.com\b"), "[GCP_SERVICE_ACCOUNT]", "gcp_service_account"),
|
|
34
|
+
|
|
35
|
+
# GCP project IDs in project= / project_id= context
|
|
36
|
+
(re.compile(r"(?i)(?:project[_-]?id|gcp[_-]?project)\s*[=:]\s*['\"]?([a-z][a-z0-9\-]{4,28}[a-z0-9])['\"]?"), "[GCP_PROJECT_ID]", "gcp_project_id"),
|
|
37
|
+
|
|
38
|
+
# โโ Azure โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
|
39
|
+
|
|
40
|
+
# Azure Subscription / Tenant / Client IDs (UUIDs in context)
|
|
41
|
+
(re.compile(r"(?i)(?:subscription[_-]?id|tenant[_-]?id|client[_-]?id)\s*[=:]\s*['\"]?([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})['\"]?"), "[AZURE_ID]", "azure_id"),
|
|
42
|
+
|
|
43
|
+
# Azure Client Secrets (34-char random string in client_secret= context)
|
|
44
|
+
(re.compile(r"(?i)client[_-]?secret\s*[=:]\s*['\"]?([A-Za-z0-9~._\-]{34,})['\"]?"), "[AZURE_CLIENT_SECRET]", "azure_client_secret"),
|
|
45
|
+
|
|
46
|
+
# Azure Storage connection strings
|
|
47
|
+
(re.compile(r"DefaultEndpointsProtocol=https;AccountName=[^;]+;AccountKey=[A-Za-z0-9+/=]{88};[^\s\"']*"), "[AZURE_STORAGE_CONNECTION_STRING]", "azure_storage_connection_string"),
|
|
48
|
+
|
|
49
|
+
# Azure SAS tokens (sv=...&sig=... in URL or standalone)
|
|
50
|
+
(re.compile(r"(?i)(?:sv|se|sr|sp|sig)=[A-Za-z0-9%+/=]+(?:&(?:sv|se|sr|sp|sig)=[A-Za-z0-9%+/=]+){3,}"), "[AZURE_SAS_TOKEN]", "azure_sas_token"),
|
|
51
|
+
]
|