agentsweep 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. agentsweep-0.1.0/.gitignore +36 -0
  2. agentsweep-0.1.0/LICENSE +21 -0
  3. agentsweep-0.1.0/PKG-INFO +139 -0
  4. agentsweep-0.1.0/README.md +111 -0
  5. agentsweep-0.1.0/pyproject.toml +63 -0
  6. agentsweep-0.1.0/src/agentsweep/__init__.py +1 -0
  7. agentsweep-0.1.0/src/agentsweep/bip39_words.py +2055 -0
  8. agentsweep-0.1.0/src/agentsweep/cli.py +130 -0
  9. agentsweep-0.1.0/src/agentsweep/ignore.py +75 -0
  10. agentsweep-0.1.0/src/agentsweep/menu.py +139 -0
  11. agentsweep-0.1.0/src/agentsweep/mnemonic.py +118 -0
  12. agentsweep-0.1.0/src/agentsweep/pipeline.py +439 -0
  13. agentsweep-0.1.0/src/agentsweep/preflight.py +80 -0
  14. agentsweep-0.1.0/src/agentsweep/redactor.py +183 -0
  15. agentsweep-0.1.0/src/agentsweep/scanner.py +859 -0
  16. agentsweep-0.1.0/src/agentsweep/sources.py +181 -0
  17. agentsweep-0.1.0/src/agentsweep/tips.py +32 -0
  18. agentsweep-0.1.0/src/agentsweep/ui/__init__.py +35 -0
  19. agentsweep-0.1.0/src/agentsweep/ui/banner.py +278 -0
  20. agentsweep-0.1.0/src/agentsweep/ui/console.py +73 -0
  21. agentsweep-0.1.0/src/agentsweep/ui/progress.py +188 -0
  22. agentsweep-0.1.0/src/agentsweep/ui/shutdown.py +54 -0
  23. agentsweep-0.1.0/src/agentsweep/ui/widgets.py +166 -0
  24. agentsweep-0.1.0/tests/fixtures/claude-code/sample.jsonl +4 -0
  25. agentsweep-0.1.0/tests/fixtures/codex/rollout-sample.jsonl +4 -0
  26. agentsweep-0.1.0/tests/test_cli_gates.py +59 -0
  27. agentsweep-0.1.0/tests/test_codex_source.py +108 -0
  28. agentsweep-0.1.0/tests/test_ignore.py +548 -0
  29. agentsweep-0.1.0/tests/test_mnemonic.py +115 -0
  30. agentsweep-0.1.0/tests/test_output.py +376 -0
  31. agentsweep-0.1.0/tests/test_ported_rules.py +234 -0
  32. agentsweep-0.1.0/tests/test_preflight.py +45 -0
  33. agentsweep-0.1.0/tests/test_redactor.py +162 -0
  34. agentsweep-0.1.0/tests/test_scan_performance.py +116 -0
  35. agentsweep-0.1.0/tests/test_sources.py +71 -0
  36. agentsweep-0.1.0/tests/test_ui_output.py +500 -0
  37. agentsweep-0.1.0/tests/test_verbs.py +458 -0
@@ -0,0 +1,36 @@
1
+ __pycache__/
2
+ *.py[cod]
3
+ *$py.class
4
+ *.so
5
+ .Python
6
+ build/
7
+ dist/
8
+ *.egg-info/
9
+ *.egg
10
+ .pytest_cache/
11
+ .mypy_cache/
12
+ .ruff_cache/
13
+ .venv/
14
+ venv/
15
+ env/
16
+ .coverage
17
+ htmlcov/
18
+ *.bak
19
+ .idea/
20
+ .vscode/
21
+ *.swp
22
+ .DS_Store
23
+ Thumbs.db
24
+
25
+ # agentsweep's own default output files (anti-flood / -o defaults)
26
+ agentsweep-report.txt
27
+ agentsweep-findings.json
28
+ *.stackdump
29
+
30
+ # Scratch/debug scripts (often contain unsplit fake secrets that trip
31
+ # push protection) — keep them out of the repo entirely.
32
+ # Note: _[a-z]*.py (not _*.py) so dunder files like __init__.py are NOT
33
+ # matched — that glob silently dropped packages from the built wheel.
34
+ debug_*.py
35
+ verify_*.py
36
+ _[a-z]*.py
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 agentsweep contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,139 @@
1
+ Metadata-Version: 2.4
2
+ Name: agentsweep
3
+ Version: 0.1.0
4
+ Summary: Find and redact secrets in AI coding agent histories (Claude Code, and more).
5
+ Project-URL: Homepage, https://github.com/Ishannaik/agent-sweep
6
+ Project-URL: Issues, https://github.com/Ishannaik/agent-sweep/issues
7
+ Author: agentsweep contributors
8
+ License: MIT
9
+ License-File: LICENSE
10
+ Keywords: claude-code,dlp,llm,redaction,secrets,security
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Environment :: Console
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Operating System :: OS Independent
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Programming Language :: Python :: 3.13
20
+ Classifier: Topic :: Security
21
+ Classifier: Topic :: Software Development
22
+ Requires-Python: >=3.11
23
+ Requires-Dist: pyahocorasick>=2.0
24
+ Requires-Dist: rich>=13.0
25
+ Provides-Extra: dev
26
+ Requires-Dist: pytest>=7.0; extra == 'dev'
27
+ Description-Content-Type: text/markdown
28
+
29
+ # AgentSweep
30
+
31
+ ![AgentSweep](docs/wordmark.jpg)
32
+
33
+ > Find and redact secrets (API keys, tokens, private keys, DB URLs) that got pasted into your AI coding agent's local history.
34
+
35
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE)
36
+ [![Python 3.11+](https://img.shields.io/badge/python-3.11%2B-blue.svg)](https://www.python.org/downloads/)
37
+ [![Status: Alpha](https://img.shields.io/badge/status-alpha-orange.svg)](https://github.com/Ishannaik/agent-sweep)
38
+
39
+ **Status:** alpha. Works on **Claude Code** and **OpenAI Codex** today. Aider, Cursor, Continue via contributed `Source` adapters — see [CONTRIBUTING.md](CONTRIBUTING.md).
40
+
41
+ ## The problem
42
+
43
+ Claude Code (and every other AI coding CLI) stores your full conversation history as plain-text JSONL on disk — under `~/.claude/projects/` for Claude Code, `~/.codex/sessions/` for OpenAI Codex. Anything you paste — an AWS key, a `.env` file, a database URL — sits in clear text indefinitely. A typical dev's history accumulates dozens of secrets over months, often without them realizing.
44
+
45
+ `agentsweep` scans that history, tells you what leaked, and can redact the secret values in place while preserving the JSONL structure byte-for-byte. It also tells you which keys to rotate, with the right revocation URL for each provider.
46
+
47
+ ## Install
48
+
49
+ ```
50
+ pip install agentsweep
51
+ ```
52
+
53
+ Requires Python 3.11+. One dependency: [`rich`](https://github.com/Textualize/rich), for the pipeline terminal UI. Output degrades to plain text automatically when piped, and `--json` is always styling-free.
54
+
55
+ ## Usage
56
+
57
+ ### Interactive mode
58
+
59
+ Run with no arguments in a terminal and you get the full experience — banner,
60
+ numbered menu, typed confirmations before anything destructive, and one-key
61
+ undo (restores the `.bak` backups). Any interactive scan that finds secrets
62
+ ends with an offer to redact them on the spot (type `REDACT` to confirm):
63
+
64
+ ```
65
+ agentsweep
66
+ ```
67
+
68
+ Scripting is unaffected: any flag, or a piped/redirected stream, skips the
69
+ menu entirely and behaves exactly as documented below.
70
+
71
+ ### Flags
72
+
73
+ Scan (read-only, safe):
74
+
75
+ ```
76
+ agentsweep --source claude-code
77
+ agentsweep --source codex
78
+ ```
79
+
80
+ Redact in place (creates `.bak` backups):
81
+
82
+ ```
83
+ agentsweep --fix --allow-production
84
+ ```
85
+
86
+ Point at an arbitrary folder (e.g. a copy of your history):
87
+
88
+ ```
89
+ agentsweep --root /path/to/jsonl-files --fix
90
+ ```
91
+
92
+ Machine-readable output:
93
+
94
+ ```
95
+ agentsweep --json
96
+ ```
97
+
98
+ ## Corruption-prevention guarantees
99
+
100
+ A redactor that corrupts your history is strictly worse than the leak it's fixing. agentsweep enforces these invariants on every `--fix`:
101
+
102
+ 1. **Redaction happens in parsed JSON, not on raw bytes.** Secrets are replaced as string *values* inside the parsed structure, then re-serialized. Structural damage is impossible by construction.
103
+ 2. **Atomic writes.** Every rewrite goes: temp file → `fsync()` → `os.replace()` over the original. A crash at any instant leaves either the complete old file or the complete new file — never a torn write.
104
+ 3. **Post-write validation.** Before committing, every non-empty line in the new content must parse as JSON, and the line count must match the original. If either check fails, the write aborts and the original is untouched.
105
+ 4. **`.bak` backup by default.** Refuses to run if a `.bak` already exists (so prior backups can't be clobbered).
106
+ 5. **Path containment.** Refuses any target that doesn't resolve inside the source's root.
107
+ 6. **Symlink rejection.** Refuses symlinks outright.
108
+ 7. **mtime window.** Refuses files modified in the last 60 seconds (likely an active session). `--force` overrides.
109
+ 8. **Running-process check.** Refuses if a Claude Code process appears to be running. `--force` overrides.
110
+ 9. **Alpha-stage production gate.** `--fix` against the default `~/.claude/projects/` root requires `--allow-production` until v1.0.
111
+ 10. **Audit log.** Every write appends SHA256 before/after and path to `~/.claude/agentsweep-audit.jsonl`.
112
+
113
+ ## Recovery
114
+
115
+ Every redacted file has a sibling `*.bak` with the original bytes. To undo:
116
+
117
+ ```
118
+ mv session.jsonl.bak session.jsonl
119
+ ```
120
+
121
+ ## What's detected
122
+
123
+ 189 high-confidence patterns **plus a checksum-validated crypto seed-phrase detector** — BIP-39 mnemonics (12/15/18/21/24 words; the wallet format behind BTC, ETH, SOL, BNB, ADA, DOGE, LTC, DOT, AVAX and virtually every major chain) and Electrum seeds are confirmed cryptographically (BIP-39 checksum / Electrum version tag), so English prose that happens to use wallet words never false-positives.
124
+
125
+ The patterns: AWS access keys, GitHub tokens (PAT/OAuth/App/fine-grained), Stripe live/test, OpenAI, Anthropic, Google API, Slack bot/user/webhook, Hugging Face, JWT, PEM private keys, DB URLs with embedded passwords, npm/PyPI/SendGrid/Twilio tokens — plus 167 rules ported from the [gitleaks](https://github.com/gitleaks/gitleaks) pack covering GitLab, Grafana, HashiCorp Vault/Terraform, DigitalOcean, Shopify, PlanetScale, Databricks, Atlassian, Azure AD, 1Password, Sentry, New Relic, Mailgun, Datadog, Twilio, Twitter/X, Twitch, Yandex, JFrog, Snyk, Mailchimp, curl credentials on the command line, and many more. Patterns are high-precision — false positives are rare, and provider-context rules are keyword-gated so large pastes stay fast.
126
+
127
+ ## What's NOT detected
128
+
129
+ - Custom/proprietary secrets without a recognizable prefix.
130
+ - Monero seed phrases (25 words from Monero's own wordlist — planned).
131
+ - Unknown tokens that look like arbitrary base64.
132
+ - Secrets split across multiple messages.
133
+ - Anything inside a binary/non-UTF-8 file.
134
+
135
+ For deeper detection, run `gitleaks` or `trufflehog` alongside agentsweep — their rule packs are more exhaustive. agentsweep's value is the **agent-history-specific surface**, not the detection engine.
136
+
137
+ ## License
138
+
139
+ MIT. See LICENSE.
@@ -0,0 +1,111 @@
1
+ # AgentSweep
2
+
3
+ ![AgentSweep](docs/wordmark.jpg)
4
+
5
+ > Find and redact secrets (API keys, tokens, private keys, DB URLs) that got pasted into your AI coding agent's local history.
6
+
7
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE)
8
+ [![Python 3.11+](https://img.shields.io/badge/python-3.11%2B-blue.svg)](https://www.python.org/downloads/)
9
+ [![Status: Alpha](https://img.shields.io/badge/status-alpha-orange.svg)](https://github.com/Ishannaik/agent-sweep)
10
+
11
+ **Status:** alpha. Works on **Claude Code** and **OpenAI Codex** today. Aider, Cursor, Continue via contributed `Source` adapters — see [CONTRIBUTING.md](CONTRIBUTING.md).
12
+
13
+ ## The problem
14
+
15
+ Claude Code (and every other AI coding CLI) stores your full conversation history as plain-text JSONL on disk — under `~/.claude/projects/` for Claude Code, `~/.codex/sessions/` for OpenAI Codex. Anything you paste — an AWS key, a `.env` file, a database URL — sits in clear text indefinitely. A typical dev's history accumulates dozens of secrets over months, often without them realizing.
16
+
17
+ `agentsweep` scans that history, tells you what leaked, and can redact the secret values in place while preserving the JSONL structure byte-for-byte. It also tells you which keys to rotate, with the right revocation URL for each provider.
18
+
19
+ ## Install
20
+
21
+ ```
22
+ pip install agentsweep
23
+ ```
24
+
25
+ Requires Python 3.11+. One dependency: [`rich`](https://github.com/Textualize/rich), for the pipeline terminal UI. Output degrades to plain text automatically when piped, and `--json` is always styling-free.
26
+
27
+ ## Usage
28
+
29
+ ### Interactive mode
30
+
31
+ Run with no arguments in a terminal and you get the full experience — banner,
32
+ numbered menu, typed confirmations before anything destructive, and one-key
33
+ undo (restores the `.bak` backups). Any interactive scan that finds secrets
34
+ ends with an offer to redact them on the spot (type `REDACT` to confirm):
35
+
36
+ ```
37
+ agentsweep
38
+ ```
39
+
40
+ Scripting is unaffected: any flag, or a piped/redirected stream, skips the
41
+ menu entirely and behaves exactly as documented below.
42
+
43
+ ### Flags
44
+
45
+ Scan (read-only, safe):
46
+
47
+ ```
48
+ agentsweep --source claude-code
49
+ agentsweep --source codex
50
+ ```
51
+
52
+ Redact in place (creates `.bak` backups):
53
+
54
+ ```
55
+ agentsweep --fix --allow-production
56
+ ```
57
+
58
+ Point at an arbitrary folder (e.g. a copy of your history):
59
+
60
+ ```
61
+ agentsweep --root /path/to/jsonl-files --fix
62
+ ```
63
+
64
+ Machine-readable output:
65
+
66
+ ```
67
+ agentsweep --json
68
+ ```
69
+
70
+ ## Corruption-prevention guarantees
71
+
72
+ A redactor that corrupts your history is strictly worse than the leak it's fixing. agentsweep enforces these invariants on every `--fix`:
73
+
74
+ 1. **Redaction happens in parsed JSON, not on raw bytes.** Secrets are replaced as string *values* inside the parsed structure, then re-serialized. Structural damage is impossible by construction.
75
+ 2. **Atomic writes.** Every rewrite goes: temp file → `fsync()` → `os.replace()` over the original. A crash at any instant leaves either the complete old file or the complete new file — never a torn write.
76
+ 3. **Post-write validation.** Before committing, every non-empty line in the new content must parse as JSON, and the line count must match the original. If either check fails, the write aborts and the original is untouched.
77
+ 4. **`.bak` backup by default.** Refuses to run if a `.bak` already exists (so prior backups can't be clobbered).
78
+ 5. **Path containment.** Refuses any target that doesn't resolve inside the source's root.
79
+ 6. **Symlink rejection.** Refuses symlinks outright.
80
+ 7. **mtime window.** Refuses files modified in the last 60 seconds (likely an active session). `--force` overrides.
81
+ 8. **Running-process check.** Refuses if a Claude Code process appears to be running. `--force` overrides.
82
+ 9. **Alpha-stage production gate.** `--fix` against the default `~/.claude/projects/` root requires `--allow-production` until v1.0.
83
+ 10. **Audit log.** Every write appends SHA256 before/after and path to `~/.claude/agentsweep-audit.jsonl`.
84
+
85
+ ## Recovery
86
+
87
+ Every redacted file has a sibling `*.bak` with the original bytes. To undo:
88
+
89
+ ```
90
+ mv session.jsonl.bak session.jsonl
91
+ ```
92
+
93
+ ## What's detected
94
+
95
+ 189 high-confidence patterns **plus a checksum-validated crypto seed-phrase detector** — BIP-39 mnemonics (12/15/18/21/24 words; the wallet format behind BTC, ETH, SOL, BNB, ADA, DOGE, LTC, DOT, AVAX and virtually every major chain) and Electrum seeds are confirmed cryptographically (BIP-39 checksum / Electrum version tag), so English prose that happens to use wallet words never false-positives.
96
+
97
+ The patterns: AWS access keys, GitHub tokens (PAT/OAuth/App/fine-grained), Stripe live/test, OpenAI, Anthropic, Google API, Slack bot/user/webhook, Hugging Face, JWT, PEM private keys, DB URLs with embedded passwords, npm/PyPI/SendGrid/Twilio tokens — plus 167 rules ported from the [gitleaks](https://github.com/gitleaks/gitleaks) pack covering GitLab, Grafana, HashiCorp Vault/Terraform, DigitalOcean, Shopify, PlanetScale, Databricks, Atlassian, Azure AD, 1Password, Sentry, New Relic, Mailgun, Datadog, Twilio, Twitter/X, Twitch, Yandex, JFrog, Snyk, Mailchimp, curl credentials on the command line, and many more. Patterns are high-precision — false positives are rare, and provider-context rules are keyword-gated so large pastes stay fast.
98
+
99
+ ## What's NOT detected
100
+
101
+ - Custom/proprietary secrets without a recognizable prefix.
102
+ - Monero seed phrases (25 words from Monero's own wordlist — planned).
103
+ - Unknown tokens that look like arbitrary base64.
104
+ - Secrets split across multiple messages.
105
+ - Anything inside a binary/non-UTF-8 file.
106
+
107
+ For deeper detection, run `gitleaks` or `trufflehog` alongside agentsweep — their rule packs are more exhaustive. agentsweep's value is the **agent-history-specific surface**, not the detection engine.
108
+
109
+ ## License
110
+
111
+ MIT. See LICENSE.
@@ -0,0 +1,63 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "agentsweep"
7
+ version = "0.1.0"
8
+ description = "Find and redact secrets in AI coding agent histories (Claude Code, and more)."
9
+ readme = { file = "README.md", content-type = "text/markdown" }
10
+ license = { text = "MIT" }
11
+ # 3.11 floor: ported gitleaks rules use scoped inline flags like (?i:...),
12
+ # which Python's re only supports from 3.11.
13
+ requires-python = ">=3.11"
14
+ authors = [{ name = "agentsweep contributors" }]
15
+ keywords = ["security", "secrets", "dlp", "claude-code", "llm", "redaction"]
16
+ classifiers = [
17
+ "Development Status :: 3 - Alpha",
18
+ "Environment :: Console",
19
+ "Intended Audience :: Developers",
20
+ "License :: OSI Approved :: MIT License",
21
+ "Operating System :: OS Independent",
22
+ "Programming Language :: Python :: 3",
23
+ "Programming Language :: Python :: 3.11",
24
+ "Programming Language :: Python :: 3.12",
25
+ "Programming Language :: Python :: 3.13",
26
+ "Topic :: Security",
27
+ "Topic :: Software Development",
28
+ ]
29
+ # pyahocorasick: single-pass keyword pre-filter (~4.7x scan speedup). Ships
30
+ # prebuilt wheels for Linux (manylinux x86_64/aarch64), macOS (x86_64/arm64)
31
+ # and Windows (amd64) on CPython 3.8-3.13 — no compiler needed on install.
32
+ # scanner.py degrades to a pure-Python substring fallback if it's absent.
33
+ dependencies = ["rich>=13.0", "pyahocorasick>=2.0"]
34
+
35
+ [project.optional-dependencies]
36
+ dev = ["pytest>=7.0"]
37
+
38
+ [project.scripts]
39
+ agentsweep = "agentsweep.cli:main"
40
+
41
+ [project.urls]
42
+ Homepage = "https://github.com/Ishannaik/agent-sweep"
43
+ Issues = "https://github.com/Ishannaik/agent-sweep/issues"
44
+
45
+ [tool.hatch.build.targets.wheel]
46
+ packages = ["src/agentsweep"]
47
+
48
+ [tool.hatch.build.targets.wheel.force-include]
49
+ "src/agentsweep/__init__.py" = "agentsweep/__init__.py"
50
+ "src/agentsweep/ui/__init__.py" = "agentsweep/ui/__init__.py"
51
+
52
+ [tool.hatch.build.targets.sdist]
53
+ include = [
54
+ "/src",
55
+ "/tests",
56
+ "/LICENSE",
57
+ "/README.md",
58
+ "/pyproject.toml",
59
+ ]
60
+
61
+ [tool.hatch.build.targets.sdist.force-include]
62
+ "src/agentsweep/__init__.py" = "src/agentsweep/__init__.py"
63
+ "src/agentsweep/ui/__init__.py" = "src/agentsweep/ui/__init__.py"
@@ -0,0 +1 @@
1
+ __version__ = "0.1.0"