fix-cli 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent.py +232 -0
- client.py +160 -0
- contract.py +293 -0
- crypto.py +64 -0
- fix.py +2596 -0
- fix_cli-0.4.0.dist-info/METADATA +108 -0
- fix_cli-0.4.0.dist-info/RECORD +20 -0
- fix_cli-0.4.0.dist-info/WHEEL +5 -0
- fix_cli-0.4.0.dist-info/entry_points.txt +2 -0
- fix_cli-0.4.0.dist-info/licenses/LICENSE +21 -0
- fix_cli-0.4.0.dist-info/top_level.txt +8 -0
- protocol.py +142 -0
- scrubber.py +355 -0
- server/__init__.py +8 -0
- server/app.py +1142 -0
- server/escrow.py +391 -0
- server/judge.py +269 -0
- server/nano.py +164 -0
- server/reputation.py +142 -0
- server/store.py +154 -0
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: fix-cli
|
|
3
|
+
Version: 0.4.0
|
|
4
|
+
Summary: AI-powered command fixer with contract-based dispute resolution
|
|
5
|
+
Author-email: Karan Sharma <karans4@protonmail.com>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Requires-Python: >=3.10
|
|
8
|
+
Description-Content-Type: text/markdown
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Requires-Dist: httpx>=0.24
|
|
11
|
+
Provides-Extra: server
|
|
12
|
+
Requires-Dist: fastapi>=0.100; extra == "server"
|
|
13
|
+
Requires-Dist: uvicorn>=0.20; extra == "server"
|
|
14
|
+
Requires-Dist: starlette>=0.27; extra == "server"
|
|
15
|
+
Dynamic: license-file
|
|
16
|
+
|
|
17
|
+
# fix
|
|
18
|
+
|
|
19
|
+
AI-powered command fixer. A command fails, an LLM diagnoses it, proposes a fix, and a contract system tracks the whole thing. Disputes go to an AI judge.
|
|
20
|
+
|
|
21
|
+
## Quick start
|
|
22
|
+
|
|
23
|
+
```sh
|
|
24
|
+
pip install git+https://github.com/karans4/fix.git
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
### Local mode (you need an API key)
|
|
28
|
+
|
|
29
|
+
```sh
|
|
30
|
+
export ANTHROPIC_API_KEY=sk-ant-... # or OPENAI_API_KEY, or run Ollama
|
|
31
|
+
|
|
32
|
+
fix "gcc foo.c" # run command, fix if it fails
|
|
33
|
+
fix it # fix the last failed command
|
|
34
|
+
fix --explain "make" # just explain the error
|
|
35
|
+
fix --dry-run "make" # show fix without running
|
|
36
|
+
fix --local "make" # force Ollama (free, local)
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
### Remote mode (free platform agent)
|
|
40
|
+
|
|
41
|
+
Post a contract to the platform. A free AI agent picks it up and proposes a fix.
|
|
42
|
+
|
|
43
|
+
```sh
|
|
44
|
+
fix --remote "gcc foo.c"
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
Platform: `https://fix.notruefireman.org` (free during testing)
|
|
48
|
+
|
|
49
|
+
Configure in `~/.fix/config.py`:
|
|
50
|
+
```python
|
|
51
|
+
platform_url = "https://fix.notruefireman.org"
|
|
52
|
+
remote = True # default to remote mode
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
## Shell integration
|
|
56
|
+
|
|
57
|
+
For `fix it` / `fix !!` to work, add to your shell config:
|
|
58
|
+
|
|
59
|
+
```sh
|
|
60
|
+
# bash/zsh
|
|
61
|
+
eval "$(fix shell)"
|
|
62
|
+
|
|
63
|
+
# fish
|
|
64
|
+
fix shell fish | source
|
|
65
|
+
|
|
66
|
+
# or auto-install
|
|
67
|
+
fix shell --install
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
## Safe mode (sandbox)
|
|
71
|
+
|
|
72
|
+
Default on Linux. Runs fixes in OverlayFS -- changes only committed if verification passes.
|
|
73
|
+
|
|
74
|
+
```sh
|
|
75
|
+
fix "make build" # sandbox on Linux by default
|
|
76
|
+
fix --no-safe "make" # skip sandbox
|
|
77
|
+
fix --safe "make" # force sandbox
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
## Verification
|
|
81
|
+
|
|
82
|
+
```sh
|
|
83
|
+
fix "gcc foo.c" # default: re-run, exit 0 = success
|
|
84
|
+
fix --verify=human "python3 render.py" # human judges
|
|
85
|
+
fix --verify="pytest tests/" "pip install x" # custom command
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
## How it works
|
|
89
|
+
|
|
90
|
+
1. Command fails, stderr captured
|
|
91
|
+
2. Contract built (task, environment, verification terms, escrow)
|
|
92
|
+
3. Agent investigates (read-only commands), then proposes fix
|
|
93
|
+
4. Fix applied, verified mechanically
|
|
94
|
+
5. Multi-attempt: up to 3 tries, feeding failures back as context
|
|
95
|
+
6. Disputes go to an AI judge who reviews the full transcript
|
|
96
|
+
|
|
97
|
+
## Architecture
|
|
98
|
+
|
|
99
|
+
- `fix` -- CLI entry point
|
|
100
|
+
- `server/` -- FastAPI platform (contracts, escrow, reputation, judge)
|
|
101
|
+
- `protocol.py` -- state machine, constants
|
|
102
|
+
- `scrubber.py` -- redacts secrets from error output before sending to LLM
|
|
103
|
+
- `contract.py` -- builds structured contracts
|
|
104
|
+
- `client.py` / `agent.py` -- remote mode client and agent
|
|
105
|
+
|
|
106
|
+
## License
|
|
107
|
+
|
|
108
|
+
MIT
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
agent.py,sha256=lJgfoGi8sG7aDOW4oZzLhKooocfjxA93GFFYYXq6L9c,9011
|
|
2
|
+
client.py,sha256=RiIfNfVEjBUQjSQWCwuF9sLyPWDPQBdEL1d3Co1OBWc,6161
|
|
3
|
+
contract.py,sha256=ByfDlNrLIK1NX1143Dam3UyTuhJUK-jvERcCgnYXOio,8923
|
|
4
|
+
crypto.py,sha256=QFeg-6M_Of9ngwbSKBmZElZIMrD9yNuZDkwtlZvgwxY,1997
|
|
5
|
+
fix.py,sha256=Ht-Y_xIZhIqbaMQ_bs0hX2U-OU212ILwmyXFxIl23aA,106096
|
|
6
|
+
protocol.py,sha256=yVkqn-Bf4vT9R-kO7XI9aOQkEwSOJI8VWTngFad6r_0,4775
|
|
7
|
+
scrubber.py,sha256=M0_D6jQu9UxkTif97IhuzrvS13aaTEHfFj34CME1YXA,10585
|
|
8
|
+
fix_cli-0.4.0.dist-info/licenses/LICENSE,sha256=mH6C1emk9xIuoqHqFMJVHAjB2tj7IGoNlrBe1BvZ_Z4,1069
|
|
9
|
+
server/__init__.py,sha256=Ps8KIJbZjQ1hLiux6Z12U6KIzYJ2px2DSIPgTUznIt4,136
|
|
10
|
+
server/app.py,sha256=5zeQSh4-xiZ41AgNWmOja4ydbWTqzx5qzPGM6jlxO_0,44070
|
|
11
|
+
server/escrow.py,sha256=4su1y2bJeXU209IRtR13Lhwe3d2BYBi_oNN77-9PdF0,16922
|
|
12
|
+
server/judge.py,sha256=JPBycMixyQRrB9sAHl5CB7rRm_k-R0YyFxTKMrWRx6E,9804
|
|
13
|
+
server/nano.py,sha256=EaLVkSD1GmJ_OW9vcPXHbuP2oISYSK9qDn6V5sUSWLc,6183
|
|
14
|
+
server/reputation.py,sha256=6SLQO1cgs_m4fD_-G6Kv0hApxCNPAxM7K3EuDVP-r90,5171
|
|
15
|
+
server/store.py,sha256=nDEB4X1AiX1R1JnaYgMK__DhdXIDTc8p7zhHeARiSns,6014
|
|
16
|
+
fix_cli-0.4.0.dist-info/METADATA,sha256=yP-4cjjwgCVVCtsmadb92bIpY3csVjuIG_XNdcttvZY,2885
|
|
17
|
+
fix_cli-0.4.0.dist-info/WHEEL,sha256=YCfwYGOYMi5Jhw2fU4yNgwErybb2IX5PEwBKV4ZbdBo,91
|
|
18
|
+
fix_cli-0.4.0.dist-info/entry_points.txt,sha256=lJgaB_4xLWdxPgGogXU4ktdItAjb132Z_wzBI8UChTg,33
|
|
19
|
+
fix_cli-0.4.0.dist-info/top_level.txt,sha256=LEKHSOT7frBfNsQzRT9SlVYgKjEXE2q7p_iDaMPv5zo,58
|
|
20
|
+
fix_cli-0.4.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Karan Sharma
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
protocol.py
ADDED
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
"""Shared constants and interfaces for fix v2 protocol.
|
|
2
|
+
|
|
3
|
+
All modules import from here to avoid circular dependencies.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from decimal import Decimal
|
|
7
|
+
from enum import Enum
|
|
8
|
+
|
|
9
|
+
# --- Protocol Constants ---
|
|
10
|
+
|
|
11
|
+
PROTOCOL_VERSION = 2
|
|
12
|
+
|
|
13
|
+
DEFAULT_BOUNTY = "0.01"
|
|
14
|
+
DEFAULT_CURRENCY = "XNO"
|
|
15
|
+
DEFAULT_CHAIN = "nano"
|
|
16
|
+
DEFAULT_CANCEL_FEE = "0.002"
|
|
17
|
+
GRACE_PERIOD_SECONDS = 30
|
|
18
|
+
ABANDONMENT_TIMEOUT = 120
|
|
19
|
+
MAX_INVESTIGATION_ROUNDS = 5
|
|
20
|
+
DEFAULT_MAX_ATTEMPTS = 5
|
|
21
|
+
XNO_RAW_PER_UNIT = 10**30
|
|
22
|
+
|
|
23
|
+
# Execution modes
|
|
24
|
+
MODE_SUPERVISED = "supervised"
|
|
25
|
+
MODE_AUTONOMOUS = "autonomous"
|
|
26
|
+
|
|
27
|
+
# Review window (autonomous mode): seconds before auto-fulfill
|
|
28
|
+
DEFAULT_REVIEW_WINDOW = 7200 # 2 hours
|
|
29
|
+
|
|
30
|
+
# Judge defaults
|
|
31
|
+
DEFAULT_JUDGE_FEE = "0.026" # XNO -- each side stakes this as dispute bond
|
|
32
|
+
DEFAULT_RULING_TIMEOUT = 60 # seconds judge has to rule
|
|
33
|
+
|
|
34
|
+
# Tiered court system: escalating models and fees
|
|
35
|
+
COURT_TIERS = [
|
|
36
|
+
{"name": "district", "model": "claude-haiku-4-5-20251001", "fee": "0.001"},
|
|
37
|
+
{"name": "appeals", "model": "claude-sonnet-4-6", "fee": "0.005"},
|
|
38
|
+
{"name": "supreme", "model": "claude-opus-4-6", "fee": "0.02"},
|
|
39
|
+
]
|
|
40
|
+
MAX_DISPUTE_LEVEL = len(COURT_TIERS) - 1 # supreme is final
|
|
41
|
+
# Bond = sum of all tier fees (covers worst-case full appeal)
|
|
42
|
+
DISPUTE_BOND = str(sum(Decimal(t["fee"]) for t in COURT_TIERS)) # "0.026"
|
|
43
|
+
|
|
44
|
+
# Platform fee: small deduction from BOTH sides on every resolution
|
|
45
|
+
# Covers platform costs (agent LLM, hosting). Non-refundable.
|
|
46
|
+
PLATFORM_FEE = "0.001" # XNO per side
|
|
47
|
+
|
|
48
|
+
# Response window: seconds the other side has to counter-argue in a dispute
|
|
49
|
+
DISPUTE_RESPONSE_WINDOW = 30 # seconds
|
|
50
|
+
|
|
51
|
+
# Investigation rate limiting
|
|
52
|
+
DEFAULT_INVESTIGATION_RATE = 5 # seconds between commands
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
# --- State Machine ---
|
|
56
|
+
|
|
57
|
+
class ContractState(Enum):
|
|
58
|
+
OPEN = "open"
|
|
59
|
+
INVESTIGATING = "investigating" # agent bonded, inspecting before accept
|
|
60
|
+
IN_PROGRESS = "in_progress"
|
|
61
|
+
REVIEW = "review" # autonomous mode: fix submitted, awaiting accept/dispute/timeout
|
|
62
|
+
FULFILLED = "fulfilled"
|
|
63
|
+
CANCELED = "canceled"
|
|
64
|
+
BACKED_OUT = "backed_out"
|
|
65
|
+
DISPUTED = "disputed"
|
|
66
|
+
HALTED = "halted"
|
|
67
|
+
RESOLVED = "resolved"
|
|
68
|
+
VOIDED = "voided" # judge timeout, all funds returned
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
# Valid state transitions: current_state -> set of valid next states
|
|
72
|
+
STATE_TRANSITIONS = {
|
|
73
|
+
ContractState.OPEN: {ContractState.INVESTIGATING, ContractState.IN_PROGRESS, ContractState.CANCELED},
|
|
74
|
+
ContractState.INVESTIGATING: {ContractState.IN_PROGRESS, ContractState.OPEN}, # accept or decline
|
|
75
|
+
ContractState.IN_PROGRESS: {
|
|
76
|
+
ContractState.FULFILLED,
|
|
77
|
+
ContractState.CANCELED,
|
|
78
|
+
ContractState.BACKED_OUT,
|
|
79
|
+
ContractState.DISPUTED,
|
|
80
|
+
ContractState.HALTED,
|
|
81
|
+
ContractState.REVIEW,
|
|
82
|
+
},
|
|
83
|
+
ContractState.REVIEW: {ContractState.FULFILLED, ContractState.DISPUTED, ContractState.CANCELED},
|
|
84
|
+
ContractState.BACKED_OUT: {ContractState.OPEN}, # reopen
|
|
85
|
+
ContractState.DISPUTED: {ContractState.RESOLVED, ContractState.VOIDED},
|
|
86
|
+
ContractState.HALTED: {ContractState.RESOLVED},
|
|
87
|
+
ContractState.FULFILLED: set(),
|
|
88
|
+
ContractState.CANCELED: set(),
|
|
89
|
+
ContractState.RESOLVED: set(),
|
|
90
|
+
ContractState.VOIDED: set(),
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
# --- Feedback Message Types ---
|
|
95
|
+
|
|
96
|
+
class FeedbackType(Enum):
|
|
97
|
+
ACCEPT = "accept"
|
|
98
|
+
DECLINE = "decline"
|
|
99
|
+
INVESTIGATE = "investigate"
|
|
100
|
+
RESULT = "result"
|
|
101
|
+
VERDICT = "verdict"
|
|
102
|
+
BACK_OUT = "back_out"
|
|
103
|
+
HALT = "halt" # emergency kill by principal
|
|
104
|
+
ASK = "ask" # agent asks principal a question
|
|
105
|
+
ANSWER = "answer" # principal answers agent
|
|
106
|
+
MESSAGE = "message" # general chat (either direction)
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
# --- Verdict Rulings ---
|
|
110
|
+
|
|
111
|
+
class Ruling(Enum):
|
|
112
|
+
FULFILLED = "fulfilled"
|
|
113
|
+
CANCELED = "canceled"
|
|
114
|
+
IMPOSSIBLE = "impossible"
|
|
115
|
+
EVIL_AGENT = "evil_agent"
|
|
116
|
+
EVIL_PRINCIPAL = "evil_principal"
|
|
117
|
+
EVIL_BOTH = "evil_both"
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
# --- Evil Flags ---
|
|
121
|
+
|
|
122
|
+
EVIL_FLAGS = {"evil_agent", "evil_principal", "evil_both"}
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
# --- Investigation Command Whitelist ---
|
|
126
|
+
# (re-exported from main fix CLI for agent use)
|
|
127
|
+
|
|
128
|
+
INVESTIGATE_WHITELIST = {
|
|
129
|
+
"cat", "head", "tail", "less", "file", "wc", "stat", "md5sum", "sha256sum",
|
|
130
|
+
"ls", "find", "tree", "du",
|
|
131
|
+
"grep", "rg", "ag", "awk", "sed",
|
|
132
|
+
"which", "whereis", "type", "command", "uname", "arch", "lsb_release", "hostnamectl",
|
|
133
|
+
"dpkg", "apt", "apt-cache", "apt-file", "apt-list", "rpm", "pacman",
|
|
134
|
+
"pip", "pip3", "npm", "gem", "cargo", "rustc",
|
|
135
|
+
"python3", "python", "node", "gcc", "g++", "make", "cmake", "java", "go", "ruby",
|
|
136
|
+
"clang", "clang++", "ld", "as", "nasm",
|
|
137
|
+
"env", "printenv", "echo", "id", "whoami", "pwd", "hostname",
|
|
138
|
+
"lsmod", "lscpu", "free", "df", "mount", "ip", "ss", "ps",
|
|
139
|
+
"journalctl", "dmesg",
|
|
140
|
+
"readlink", "realpath", "basename", "dirname", "diff", "cmp",
|
|
141
|
+
"strings", "nm", "ldd", "objdump", "pkg-config", "test", "timeout",
|
|
142
|
+
}
|
scrubber.py
ADDED
|
@@ -0,0 +1,355 @@
|
|
|
1
|
+
"""Output redaction engine for fix v2.
|
|
2
|
+
|
|
3
|
+
Scrubs sensitive data from text before it leaves the machine.
|
|
4
|
+
Runs on every outbound message: investigation results, error output, contracts.
|
|
5
|
+
|
|
6
|
+
Each category can be independently toggled. False positives are safer than leaks.
|
|
7
|
+
This is a best-effort seatbelt, not a security boundary. The real protection
|
|
8
|
+
is the overlay sandbox hiding sensitive files from the command in the first place.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import re
|
|
12
|
+
import os
|
|
13
|
+
import math
|
|
14
|
+
|
|
15
|
+
# --- Pattern categories ---
|
|
16
|
+
|
|
17
|
+
# Environment variable assignments: KEY=value
|
|
18
|
+
_RE_ENV_ASSIGN = re.compile(
|
|
19
|
+
r'''(?:^|(?<=\s))([A-Z_][A-Z0-9_]{2,})=(["']?)(.+?)\2(?:\s|$)''',
|
|
20
|
+
re.MULTILINE
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
# Known secret prefixes: API keys, tokens, passwords
|
|
24
|
+
_RE_TOKENS = re.compile(
|
|
25
|
+
r'(?:'
|
|
26
|
+
# Cloud provider keys
|
|
27
|
+
r'sk-[A-Za-z0-9_-]{20,}' # Anthropic/OpenAI
|
|
28
|
+
r'|sk_live_[A-Za-z0-9]{20,}' # Stripe secret
|
|
29
|
+
r'|pk_live_[A-Za-z0-9]{20,}' # Stripe publishable
|
|
30
|
+
r'|rk_live_[A-Za-z0-9]{20,}' # Stripe restricted
|
|
31
|
+
r'|AKIA[A-Z0-9]{16}(?:/[A-Za-z0-9/+]{20,})?' # AWS access key (+ optional secret)
|
|
32
|
+
r'|AIza[A-Za-z0-9_-]{35}' # Google API key
|
|
33
|
+
r'|ya29\.[A-Za-z0-9_-]+' # Google OAuth token
|
|
34
|
+
r'|SG\.[A-Za-z0-9_-]{20,}\.[A-Za-z0-9_-]{20,}' # SendGrid
|
|
35
|
+
r'|sk-ant-[A-Za-z0-9_-]{20,}' # Anthropic specific
|
|
36
|
+
# Git forges
|
|
37
|
+
r'|ghp_[A-Za-z0-9]{30,}' # GitHub PAT
|
|
38
|
+
r'|gho_[A-Za-z0-9]{30,}' # GitHub OAuth
|
|
39
|
+
r'|ghu_[A-Za-z0-9]{30,}' # GitHub user token
|
|
40
|
+
r'|ghs_[A-Za-z0-9]{30,}' # GitHub server token
|
|
41
|
+
r'|github_pat_[A-Za-z0-9_]{30,}' # GitHub fine-grained PAT
|
|
42
|
+
r'|glpat-[A-Za-z0-9_-]{20,}' # GitLab PAT
|
|
43
|
+
# Messaging/SaaS
|
|
44
|
+
r'|xox[bsapr]-[A-Za-z0-9-]+' # Slack tokens
|
|
45
|
+
r'|SK[a-f0-9]{32}' # Twilio API key
|
|
46
|
+
r'|AC[a-f0-9]{32}' # Twilio account SID
|
|
47
|
+
r'|sq0[a-z]{3}-[A-Za-z0-9_-]{22,}' # Square
|
|
48
|
+
# Generic patterns
|
|
49
|
+
r'|Bearer\s+[A-Za-z0-9._~+/=-]{20,}' # Bearer tokens
|
|
50
|
+
r'|token=[A-Za-z0-9._~+/=-]{10,}'
|
|
51
|
+
r'|password=[^\s&]{3,}'
|
|
52
|
+
r'|passwd=[^\s&]{3,}'
|
|
53
|
+
r'|secret=[^\s&]{3,}'
|
|
54
|
+
r'|api[_-]?key=[^\s&]{3,}'
|
|
55
|
+
r')'
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
# Private key blocks (PEM format)
|
|
59
|
+
_RE_PRIVATE_KEY = re.compile(
|
|
60
|
+
r'-----BEGIN[A-Z ]*PRIVATE KEY-----[\s\S]*?-----END[A-Z ]*PRIVATE KEY-----'
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
# JWTs: three base64url segments separated by dots
|
|
64
|
+
_RE_JWT = re.compile(
|
|
65
|
+
r'\beyJ[A-Za-z0-9_-]{10,}\.eyJ[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}\b'
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
# Connection strings with credentials
|
|
69
|
+
_RE_CONN_STRING = re.compile(
|
|
70
|
+
r'(?:mongodb(?:\+srv)?|postgres(?:ql)?|mysql|redis|amqp|mssql)'
|
|
71
|
+
r'://[^\s]*:[^\s]*@[^\s]+'
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
# Git remote URLs with embedded credentials
|
|
75
|
+
_RE_GIT_CRED_URL = re.compile(
|
|
76
|
+
r'https?://[A-Za-z0-9._%-]+:[A-Za-z0-9._%-]+@[^\s]+'
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
# HTTP auth headers in output
|
|
80
|
+
_RE_HTTP_AUTH = re.compile(
|
|
81
|
+
r'(?:Authorization|Cookie|X-API-Key|X-Auth-Token|X-Secret)'
|
|
82
|
+
r'\s*[:=]\s*.+',
|
|
83
|
+
re.IGNORECASE
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
# Credit card numbers (4 groups of 4 digits, with optional separators)
|
|
87
|
+
_RE_CREDIT_CARD = re.compile(
|
|
88
|
+
r'\b(?:\d{4}[\s-]?){3}\d{4}\b'
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
# SSN
|
|
92
|
+
_RE_SSN = re.compile(
|
|
93
|
+
r'\b\d{3}-\d{2}-\d{4}\b'
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
# Phone numbers (US and international)
|
|
97
|
+
_RE_PHONE = re.compile(
|
|
98
|
+
r'(?:'
|
|
99
|
+
r'\+\d{1,3}[\s.-]?\(?\d{1,4}\)?[\s.-]?\d{1,4}[\s.-]?\d{1,9}' # international
|
|
100
|
+
r'|\b\(?\d{3}\)?[\s.-]\d{3}[\s.-]\d{4}\b' # US format
|
|
101
|
+
r')'
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
# TOTP/OTP URIs
|
|
105
|
+
_RE_TOTP = re.compile(
|
|
106
|
+
r'otpauth://[^\s]+'
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
# Home directory paths: /home/username/ -> /home/[USER]/
|
|
110
|
+
_RE_HOME_PATH = None # compiled lazily with actual username
|
|
111
|
+
|
|
112
|
+
# IPv4 addresses
|
|
113
|
+
_RE_IPV4 = re.compile(
|
|
114
|
+
r'\b(?:'
|
|
115
|
+
r'(?:10\.(?:\d{1,3}\.){2}\d{1,3})'
|
|
116
|
+
r'|(?:172\.(?:1[6-9]|2\d|3[01])\.\d{1,3}\.\d{1,3})'
|
|
117
|
+
r'|(?:192\.168\.\d{1,3}\.\d{1,3})'
|
|
118
|
+
r'|(?:\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})'
|
|
119
|
+
r')\b'
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
# Email addresses
|
|
123
|
+
_RE_EMAIL = re.compile(
|
|
124
|
+
r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b'
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
# High-entropy hex strings (likely keys/hashes — 64+ hex chars)
|
|
128
|
+
_RE_HEX_SECRET = re.compile(
|
|
129
|
+
r'\b[0-9a-fA-F]{64,}\b'
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def _get_home_re():
|
|
134
|
+
"""Lazily compile home path regex for current user."""
|
|
135
|
+
global _RE_HOME_PATH
|
|
136
|
+
if _RE_HOME_PATH is None:
|
|
137
|
+
username = os.environ.get("USER") or os.environ.get("LOGNAME") or ""
|
|
138
|
+
if username:
|
|
139
|
+
home = os.path.expanduser("~")
|
|
140
|
+
escaped = re.escape(home)
|
|
141
|
+
_RE_HOME_PATH = re.compile(escaped + r'(?=/|$|\s)')
|
|
142
|
+
else:
|
|
143
|
+
_RE_HOME_PATH = re.compile(r'/home/[a-z_][a-z0-9_-]*(?=/|$|\s)')
|
|
144
|
+
return _RE_HOME_PATH
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def _luhn_check(num_str):
|
|
148
|
+
"""Luhn algorithm to validate credit card numbers."""
|
|
149
|
+
digits = [int(d) for d in num_str if d.isdigit()]
|
|
150
|
+
if len(digits) != 16:
|
|
151
|
+
return False
|
|
152
|
+
checksum = 0
|
|
153
|
+
for i, d in enumerate(reversed(digits)):
|
|
154
|
+
if i % 2 == 1:
|
|
155
|
+
d *= 2
|
|
156
|
+
if d > 9:
|
|
157
|
+
d -= 9
|
|
158
|
+
checksum += d
|
|
159
|
+
return checksum % 10 == 0
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def _entropy(s):
|
|
163
|
+
"""Shannon entropy of a string (bits per character)."""
|
|
164
|
+
if not s:
|
|
165
|
+
return 0
|
|
166
|
+
freq = {}
|
|
167
|
+
for c in s:
|
|
168
|
+
freq[c] = freq.get(c, 0) + 1
|
|
169
|
+
length = len(s)
|
|
170
|
+
return -sum((count / length) * math.log2(count / length) for count in freq.values())
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
# Environment variables that are NOT sensitive
|
|
174
|
+
_ENV_SAFE = {
|
|
175
|
+
"DISPLAY", "WAYLAND_DISPLAY", "XDG_RUNTIME_DIR", "XDG_SESSION_TYPE",
|
|
176
|
+
"XDG_SESSION_CLASS", "XDG_SESSION_ID", "XDG_SEAT", "XDG_VTNR",
|
|
177
|
+
"XDG_SEAT_PATH", "XDG_SESSION_PATH", "XDG_CONFIG_DIRS", "XDG_DATA_DIRS",
|
|
178
|
+
"XDG_CURRENT_DESKTOP", "XDG_SESSION_DESKTOP", "XDG_MENU_PREFIX",
|
|
179
|
+
"SHELL", "TERM", "LANG", "LANGUAGE", "LC_ALL", "LC_CTYPE",
|
|
180
|
+
"HOME", "USER", "LOGNAME", "PATH", "PWD", "OLDPWD", "HOSTNAME",
|
|
181
|
+
"EDITOR", "VISUAL", "PAGER", "COLORTERM", "TERM_PROGRAM",
|
|
182
|
+
"DBUS_SESSION_BUS_ADDRESS", "SSH_AUTH_SOCK",
|
|
183
|
+
"DESKTOP_SESSION", "SESSION_MANAGER", "GDMSESSION",
|
|
184
|
+
"QT_ACCESSIBILITY", "QT_IM_MODULE", "GTK_IM_MODULE",
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
# --- Scrub functions ---
|
|
189
|
+
|
|
190
|
+
def _scrub_env_vars(text):
|
|
191
|
+
"""Redact KEY=value assignments, preserving non-sensitive system vars."""
|
|
192
|
+
def repl(m):
|
|
193
|
+
key = m.group(1)
|
|
194
|
+
if key in _ENV_SAFE:
|
|
195
|
+
return m.group(0)
|
|
196
|
+
quote = m.group(2)
|
|
197
|
+
return f"{key}={quote}[REDACTED]{quote} "
|
|
198
|
+
return _RE_ENV_ASSIGN.sub(repl, text)
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def _scrub_tokens(text):
|
|
202
|
+
"""Redact known secret patterns (API keys, vendor tokens)."""
|
|
203
|
+
return _RE_TOKENS.sub("[REDACTED]", text)
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
def _scrub_private_keys(text):
|
|
207
|
+
"""Redact PEM private key blocks."""
|
|
208
|
+
return _RE_PRIVATE_KEY.sub("[REDACTED_PRIVATE_KEY]", text)
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
def _scrub_jwts(text):
|
|
212
|
+
"""Redact JSON Web Tokens."""
|
|
213
|
+
return _RE_JWT.sub("[REDACTED_JWT]", text)
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
def _scrub_conn_strings(text):
|
|
217
|
+
"""Redact database/service connection strings with credentials."""
|
|
218
|
+
return _RE_CONN_STRING.sub("[REDACTED_CONNECTION_STRING]", text)
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
def _scrub_git_creds(text):
|
|
222
|
+
"""Redact git remote URLs with embedded credentials."""
|
|
223
|
+
return _RE_GIT_CRED_URL.sub("[REDACTED_URL]", text)
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
def _scrub_http_auth(text):
|
|
227
|
+
"""Redact HTTP auth headers."""
|
|
228
|
+
return _RE_HTTP_AUTH.sub("[REDACTED_HEADER]", text)
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
def _scrub_credit_cards(text):
|
|
232
|
+
"""Redact credit card numbers (with Luhn validation to reduce false positives)."""
|
|
233
|
+
def repl(m):
|
|
234
|
+
if _luhn_check(m.group(0)):
|
|
235
|
+
return "[REDACTED_CC]"
|
|
236
|
+
return m.group(0)
|
|
237
|
+
return _RE_CREDIT_CARD.sub(repl, text)
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
def _scrub_ssn(text):
|
|
241
|
+
"""Redact Social Security Numbers."""
|
|
242
|
+
return _RE_SSN.sub("[REDACTED_SSN]", text)
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
def _scrub_phone(text):
|
|
246
|
+
"""Redact phone numbers."""
|
|
247
|
+
return _RE_PHONE.sub("[REDACTED_PHONE]", text)
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
def _scrub_totp(text):
|
|
251
|
+
"""Redact TOTP/OTP URIs."""
|
|
252
|
+
return _RE_TOTP.sub("[REDACTED_TOTP]", text)
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
def _scrub_paths(text):
|
|
256
|
+
"""Replace /home/username/ with /home/[USER]/."""
|
|
257
|
+
home_re = _get_home_re()
|
|
258
|
+
return home_re.sub("/home/[USER]", text)
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
def _scrub_ips(text):
|
|
262
|
+
"""Redact IP addresses, preserving localhost."""
|
|
263
|
+
def repl(m):
|
|
264
|
+
ip = m.group(0)
|
|
265
|
+
if ip in ("127.0.0.1", "0.0.0.0"):
|
|
266
|
+
return ip
|
|
267
|
+
return "[REDACTED_IP]"
|
|
268
|
+
return _RE_IPV4.sub(repl, text)
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
def _scrub_emails(text):
|
|
272
|
+
"""Redact email addresses."""
|
|
273
|
+
return _RE_EMAIL.sub("[REDACTED_EMAIL]", text)
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
def _scrub_hex_secrets(text):
|
|
277
|
+
"""Redact long high-entropy hex strings (likely keys/hashes)."""
|
|
278
|
+
def repl(m):
|
|
279
|
+
s = m.group(0)
|
|
280
|
+
if _entropy(s) > 3.5: # random hex is ~4.0, repeated patterns are lower
|
|
281
|
+
return "[REDACTED_HEX]"
|
|
282
|
+
return s
|
|
283
|
+
return _RE_HEX_SECRET.sub(repl, text)
|
|
284
|
+
|
|
285
|
+
|
|
286
|
+
# Category name -> scrub function (order matters: specific before generic)
|
|
287
|
+
SCRUBBERS = {
|
|
288
|
+
"private_keys": _scrub_private_keys,
|
|
289
|
+
"tokens": _scrub_tokens,
|
|
290
|
+
"jwts": _scrub_jwts,
|
|
291
|
+
"conn_strings": _scrub_conn_strings,
|
|
292
|
+
"git_creds": _scrub_git_creds,
|
|
293
|
+
"http_auth": _scrub_http_auth,
|
|
294
|
+
"totp": _scrub_totp,
|
|
295
|
+
"credit_cards": _scrub_credit_cards,
|
|
296
|
+
"ssn": _scrub_ssn,
|
|
297
|
+
"phone": _scrub_phone,
|
|
298
|
+
"env_vars": _scrub_env_vars,
|
|
299
|
+
"paths": _scrub_paths,
|
|
300
|
+
"ips": _scrub_ips,
|
|
301
|
+
"emails": _scrub_emails,
|
|
302
|
+
"hex_secrets": _scrub_hex_secrets,
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
# All categories enabled by default
|
|
306
|
+
DEFAULT_CATEGORIES = set(SCRUBBERS.keys())
|
|
307
|
+
|
|
308
|
+
|
|
309
|
+
def scrub(text, config=None):
|
|
310
|
+
"""Scrub sensitive data from text.
|
|
311
|
+
|
|
312
|
+
Args:
|
|
313
|
+
text: The text to scrub.
|
|
314
|
+
config: Optional dict with:
|
|
315
|
+
- categories: list of category names to enable (default: all)
|
|
316
|
+
- custom_patterns: list of (pattern, replacement) tuples
|
|
317
|
+
|
|
318
|
+
Returns:
|
|
319
|
+
(scrubbed_text, matched_categories) tuple.
|
|
320
|
+
matched_categories is a set of category names that had matches.
|
|
321
|
+
"""
|
|
322
|
+
if isinstance(text, bytes):
|
|
323
|
+
return text, set()
|
|
324
|
+
if text is None:
|
|
325
|
+
return "", set()
|
|
326
|
+
if not text:
|
|
327
|
+
return text, set()
|
|
328
|
+
|
|
329
|
+
if config and "categories" in config:
|
|
330
|
+
categories = set(config["categories"])
|
|
331
|
+
else:
|
|
332
|
+
categories = DEFAULT_CATEGORIES
|
|
333
|
+
|
|
334
|
+
matched = set()
|
|
335
|
+
result = text
|
|
336
|
+
|
|
337
|
+
for cat_name in categories:
|
|
338
|
+
fn = SCRUBBERS.get(cat_name)
|
|
339
|
+
if fn is None:
|
|
340
|
+
continue
|
|
341
|
+
scrubbed = fn(result)
|
|
342
|
+
if scrubbed != result:
|
|
343
|
+
matched.add(cat_name)
|
|
344
|
+
result = scrubbed
|
|
345
|
+
|
|
346
|
+
# Custom patterns
|
|
347
|
+
if config and config.get("custom_patterns"):
|
|
348
|
+
for pattern, replacement in config["custom_patterns"]:
|
|
349
|
+
compiled = re.compile(pattern)
|
|
350
|
+
new_result = compiled.sub(replacement, result)
|
|
351
|
+
if new_result != result:
|
|
352
|
+
matched.add("custom")
|
|
353
|
+
result = new_result
|
|
354
|
+
|
|
355
|
+
return result, matched
|