dbveil 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dbveil-0.1.0/.github/workflows/publish.yml +61 -0
- dbveil-0.1.0/.gitignore +14 -0
- dbveil-0.1.0/LICENSE +21 -0
- dbveil-0.1.0/PKG-INFO +194 -0
- dbveil-0.1.0/README.md +166 -0
- dbveil-0.1.0/examples/veil.example.yaml +44 -0
- dbveil-0.1.0/pyproject.toml +42 -0
- dbveil-0.1.0/tests/test_guard.py +73 -0
- dbveil-0.1.0/tests/test_redact.py +67 -0
- dbveil-0.1.0/veil/__init__.py +14 -0
- dbveil-0.1.0/veil/__main__.py +3 -0
- dbveil-0.1.0/veil/audit.py +31 -0
- dbveil-0.1.0/veil/cli.py +284 -0
- dbveil-0.1.0/veil/config.py +101 -0
- dbveil-0.1.0/veil/executor.py +76 -0
- dbveil-0.1.0/veil/guard.py +107 -0
- dbveil-0.1.0/veil/mcp_server.py +59 -0
- dbveil-0.1.0/veil/pipeline.py +79 -0
- dbveil-0.1.0/veil/redact/__init__.py +43 -0
- dbveil-0.1.0/veil/redact/column_rules.py +47 -0
- dbveil-0.1.0/veil/redact/llm.py +50 -0
- dbveil-0.1.0/veil/redact/ner.py +36 -0
- dbveil-0.1.0/veil/redact/patterns.py +59 -0
- dbveil-0.1.0/veil/result.py +11 -0
- dbveil-0.1.0/veil/serialize.py +27 -0
- dbveil-0.1.0/veil/tui.py +68 -0
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
name: Publish to PyPI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
release:
|
|
5
|
+
types: [published]
|
|
6
|
+
|
|
7
|
+
permissions:
|
|
8
|
+
contents: read
|
|
9
|
+
|
|
10
|
+
jobs:
|
|
11
|
+
build:
|
|
12
|
+
name: Build & test
|
|
13
|
+
runs-on: ubuntu-latest
|
|
14
|
+
steps:
|
|
15
|
+
- uses: actions/checkout@v4
|
|
16
|
+
|
|
17
|
+
- uses: astral-sh/setup-uv@v5
|
|
18
|
+
|
|
19
|
+
- name: Create venv & install (with dev extras)
|
|
20
|
+
run: |
|
|
21
|
+
uv venv --python 3.12
|
|
22
|
+
uv pip install -e '.[dev]'
|
|
23
|
+
|
|
24
|
+
- name: Test
|
|
25
|
+
run: .venv/bin/pytest -q
|
|
26
|
+
|
|
27
|
+
- name: Verify version matches release tag
|
|
28
|
+
run: |
|
|
29
|
+
TAG="${GITHUB_REF_NAME#v}"
|
|
30
|
+
PKG=$(grep -m1 '^version = ' pyproject.toml | sed -E 's/^version = "(.*)"/\1/')
|
|
31
|
+
echo "release tag: $TAG | pyproject version: $PKG"
|
|
32
|
+
if [ "$TAG" != "$PKG" ]; then
|
|
33
|
+
echo "::error::Release tag ($TAG) != pyproject version ($PKG). Bump version in pyproject.toml before releasing."
|
|
34
|
+
exit 1
|
|
35
|
+
fi
|
|
36
|
+
|
|
37
|
+
- name: Build sdist + wheel
|
|
38
|
+
run: uv build
|
|
39
|
+
|
|
40
|
+
- uses: actions/upload-artifact@v4
|
|
41
|
+
with:
|
|
42
|
+
name: dist
|
|
43
|
+
path: dist/
|
|
44
|
+
|
|
45
|
+
publish:
|
|
46
|
+
name: Publish to PyPI
|
|
47
|
+
needs: build
|
|
48
|
+
runs-on: ubuntu-latest
|
|
49
|
+
environment:
|
|
50
|
+
name: pypi
|
|
51
|
+
url: https://pypi.org/p/dbveil
|
|
52
|
+
permissions:
|
|
53
|
+
id-token: write # required for trusted publishing (OIDC)
|
|
54
|
+
steps:
|
|
55
|
+
- uses: actions/download-artifact@v4
|
|
56
|
+
with:
|
|
57
|
+
name: dist
|
|
58
|
+
path: dist/
|
|
59
|
+
|
|
60
|
+
- name: Publish to PyPI
|
|
61
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
dbveil-0.1.0/.gitignore
ADDED
dbveil-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Mathusan Selvarajah
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
dbveil-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: dbveil
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A local read-only, PII-redacting proxy that lets AI agents query your database safely.
|
|
5
|
+
Project-URL: Homepage, https://github.com/mathu97/dbveil
|
|
6
|
+
Project-URL: Repository, https://github.com/mathu97/dbveil
|
|
7
|
+
Author: Mathusan Selvarajah
|
|
8
|
+
License-Expression: MIT
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Keywords: ai,claude,database,mcp,pii,postgres,proxy,read-only,redaction
|
|
11
|
+
Requires-Python: >=3.10
|
|
12
|
+
Requires-Dist: asyncpg>=0.29
|
|
13
|
+
Requires-Dist: mcp>=1.2
|
|
14
|
+
Requires-Dist: pglast>=6.0
|
|
15
|
+
Requires-Dist: pydantic>=2.6
|
|
16
|
+
Requires-Dist: pyyaml>=6.0
|
|
17
|
+
Requires-Dist: rich>=13.7
|
|
18
|
+
Requires-Dist: typer>=0.12
|
|
19
|
+
Provides-Extra: dev
|
|
20
|
+
Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
|
|
21
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
22
|
+
Provides-Extra: llm
|
|
23
|
+
Requires-Dist: httpx>=0.27; extra == 'llm'
|
|
24
|
+
Provides-Extra: ner
|
|
25
|
+
Requires-Dist: presidio-analyzer>=2.2; extra == 'ner'
|
|
26
|
+
Requires-Dist: presidio-anonymizer>=2.2; extra == 'ner'
|
|
27
|
+
Description-Content-Type: text/markdown
|
|
28
|
+
|
|
29
|
+
# veil
|
|
30
|
+
|
|
31
|
+
**A local read-only, PII-redacting proxy that lets AI agents query your database safely.**
|
|
32
|
+
|
|
33
|
+
Point Claude Code (or any MCP client) at `veil` instead of your database. Every query is
|
|
34
|
+
forced through three deterministic guarantees before a single row reaches the model:
|
|
35
|
+
|
|
36
|
+
1. **Read-only guard** — the query is parsed with Postgres's real grammar (`libpg_query`).
|
|
37
|
+
Only `SELECT` / `SHOW` / `EXPLAIN` survive. Writes, DDL, multi-statements, data-modifying
|
|
38
|
+
CTEs, `SELECT INTO`, and row locks are rejected *before execution* — not by asking the model
|
|
39
|
+
nicely, by refusing to run them.
|
|
40
|
+
2. **PII redaction** — results are scrubbed before they leave your machine: deterministic
|
|
41
|
+
column rules + always-on regex for structured PII (emails, phones, cards, SSNs), with an
|
|
42
|
+
optional NER/LLM backstop for free-text.
|
|
43
|
+
3. **Audit** — every query and verdict is appended to a log you can tail live in a TUI.
|
|
44
|
+
|
|
45
|
+
A guarded chokepoint in front of the DB, shrunk to a single open-source command with zero
|
|
46
|
+
infrastructure to stand up.
|
|
47
|
+
|
|
48
|
+
```
|
|
49
|
+
Claude Code ──MCP──▶ veil ──READ ONLY txn──▶ your database
|
|
50
|
+
│
|
|
51
|
+
├─ guard: parse → allow SELECT only
|
|
52
|
+
├─ redact: column rules + regex + (optional) NER/LLM
|
|
53
|
+
└─ audit: veil-audit.jsonl
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
## Why
|
|
57
|
+
|
|
58
|
+
You want an agent to act as a data analyst over real tables — "compare what we drafted vs what
|
|
59
|
+
was actually sent" — without (a) risking a destructive query or (b) shipping customer PII to a
|
|
60
|
+
model provider. Handing an agent raw DB credentials and hoping it only writes `SELECT` is not a
|
|
61
|
+
control. `veil` makes the unsafe paths impossible at the layer the agent can't talk its way past.
|
|
62
|
+
|
|
63
|
+
## Install
|
|
64
|
+
|
|
65
|
+
```bash
|
|
66
|
+
pip install dbveil # or: uv pip install dbveil
|
|
67
|
+
# optional extras:
|
|
68
|
+
pip install 'dbveil[ner]' # Presidio NER backstop for names/addresses
|
|
69
|
+
pip install 'dbveil[llm]' # local-LLM (Ollama) redaction
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
## Quickstart
|
|
73
|
+
|
|
74
|
+
```bash
|
|
75
|
+
veil init # interactive: DB URL + auto-detect PII columns → writes veil.yaml
|
|
76
|
+
veil doctor # verify guard, connectivity, and that READ ONLY actually blocks writes
|
|
77
|
+
veil test-query "SELECT email, created_at FROM users LIMIT 5" # try it without an agent
|
|
78
|
+
veil up # run the MCP proxy on stdio (what Claude Code connects to)
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
Try a write to see the guard refuse it:
|
|
82
|
+
|
|
83
|
+
```bash
|
|
84
|
+
veil test-query "DELETE FROM users"
|
|
85
|
+
# BLOCKED — write or DDL operation detected: DELETE
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
### Connect Claude Code
|
|
89
|
+
|
|
90
|
+
```bash
|
|
91
|
+
claude mcp add veil -- veil up
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
or commit a `.mcp.json` so your whole team gets it:
|
|
95
|
+
|
|
96
|
+
```json
|
|
97
|
+
{
|
|
98
|
+
"mcpServers": {
|
|
99
|
+
"veil": { "command": "veil", "args": ["up"], "env": { "VEIL_CONFIG": "veil.yaml" } }
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
Now the agent has three tools — `query`, `list_tables`, `describe_table` — and physically
|
|
105
|
+
cannot write or see raw PII.
|
|
106
|
+
|
|
107
|
+
### Watch it live
|
|
108
|
+
|
|
109
|
+
```bash
|
|
110
|
+
veil monitor # TUI tailing veil-audit.jsonl: allowed / blocked / redaction counts
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
## Configuration
|
|
114
|
+
|
|
115
|
+
`veil init` writes a commented `veil.yaml`. Full reference in
|
|
116
|
+
[`examples/veil.example.yaml`](examples/veil.example.yaml). The essentials:
|
|
117
|
+
|
|
118
|
+
```yaml
|
|
119
|
+
database:
|
|
120
|
+
url: ${DATABASE_URL} # env refs kept out of the file
|
|
121
|
+
|
|
122
|
+
guard:
|
|
123
|
+
allow_select_star: false # block SELECT * on PII tables; force explicit columns
|
|
124
|
+
max_rows: 1000
|
|
125
|
+
statement_timeout_ms: 15000
|
|
126
|
+
pii_tables: [contacts, users]
|
|
127
|
+
|
|
128
|
+
redact:
|
|
129
|
+
builtin_patterns: { email: true, phone: true, credit_card: true, ssn: true, ip: false }
|
|
130
|
+
columns:
|
|
131
|
+
- { column: email, strategy: hash } # sha256, still join-able
|
|
132
|
+
- { column: full_name, strategy: mask } # -> [redacted]
|
|
133
|
+
- { column: ssn, strategy: partial, keep: 4 }
|
|
134
|
+
ner: { enabled: false, engine: presidio } # optional backstop
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
## How redaction is layered (and its honest limits)
|
|
138
|
+
|
|
139
|
+
`veil` defends from the **deterministic** side first, because that's the only kind you can trust
|
|
140
|
+
not to leak:
|
|
141
|
+
|
|
142
|
+
| Layer | What it catches | Deterministic? |
|
|
143
|
+
|---|---|---|
|
|
144
|
+
| **Column rules** | Known PII columns (`email`, `ssn`, …) by name | ✅ yes |
|
|
145
|
+
| **Built-in regex** | Emails, phones, Luhn-valid cards, SSNs, IPs — even aliased or in free-text | ✅ yes |
|
|
146
|
+
| **NER (Presidio)** | Names / addresses in free-text the above miss | ⚠️ probabilistic |
|
|
147
|
+
| **LLM (Ollama)** | Same, via a local model | ⚠️ probabilistic, experimental |
|
|
148
|
+
|
|
149
|
+
**Use the probabilistic layers only as a backstop.** ML/NER *will* eventually miss a name or an
|
|
150
|
+
oddly-formatted address — that's a leak. For columns you already know are sensitive, the column
|
|
151
|
+
rules are the real control. The LLM redactor fails *closed*: if the model errors, the cell is
|
|
152
|
+
masked, never passed through.
|
|
153
|
+
|
|
154
|
+
## Security model
|
|
155
|
+
|
|
156
|
+
- **Two independent read-only layers.** The parser rejects non-reads, *and* every query runs
|
|
157
|
+
inside a `SET TRANSACTION READ ONLY` transaction — so even a parser gap can't write.
|
|
158
|
+
- **Give veil a least-privilege credential.** Best practice is a `GRANT SELECT`-only database
|
|
159
|
+
role (ideally on a read replica). Then "read-only" is enforced by the database itself, and the
|
|
160
|
+
credential `veil` holds is low-blast-radius: a leak exposes already-masked reads and can write
|
|
161
|
+
nothing. `veil doctor` confirms the READ ONLY transaction rejects writes against your DB.
|
|
162
|
+
- **PII never leaves your machine unmasked.** Redaction happens in-process, before results are
|
|
163
|
+
serialized to the MCP client.
|
|
164
|
+
|
|
165
|
+
## Secure connectivity
|
|
166
|
+
|
|
167
|
+
`veil` connects to whatever DSN you give it, so the network path is yours to choose:
|
|
168
|
+
|
|
169
|
+
- **Tailscale** — put your DB behind a tailnet and point `database.url` at the tailnet host. No
|
|
170
|
+
public DB port.
|
|
171
|
+
- **Short-lived credentials** — `${DATABASE_URL}` is expanded at load, so you can inject an
|
|
172
|
+
ephemeral token (RDS IAM auth, Cloud SQL IAM, a Vault dynamic user) instead of a static
|
|
173
|
+
password.
|
|
174
|
+
- **Railway / managed PaaS** — use the provided TLS endpoint with a dedicated read-only role.
|
|
175
|
+
|
|
176
|
+
## Roadmap
|
|
177
|
+
|
|
178
|
+
- **Postgres wire-protocol frontend** — so `psql`, BI tools, and any client (not just MCP) get
|
|
179
|
+
the same guard + redaction. The pipeline is already frontend-agnostic.
|
|
180
|
+
- **More engines** — MySQL, SQLite (the guard's parser is the only Postgres-specific piece; it's
|
|
181
|
+
a pluggable backend).
|
|
182
|
+
- **Schema-aware lineage** — resolve aliased PII columns back to their source table.
|
|
183
|
+
|
|
184
|
+
## Development
|
|
185
|
+
|
|
186
|
+
```bash
|
|
187
|
+
uv venv && source .venv/bin/activate
|
|
188
|
+
uv pip install -e '.[dev]'
|
|
189
|
+
pytest
|
|
190
|
+
```
|
|
191
|
+
|
|
192
|
+
## License
|
|
193
|
+
|
|
194
|
+
MIT
|
dbveil-0.1.0/README.md
ADDED
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
# veil
|
|
2
|
+
|
|
3
|
+
**A local read-only, PII-redacting proxy that lets AI agents query your database safely.**
|
|
4
|
+
|
|
5
|
+
Point Claude Code (or any MCP client) at `veil` instead of your database. Every query is
|
|
6
|
+
forced through three deterministic guarantees before a single row reaches the model:
|
|
7
|
+
|
|
8
|
+
1. **Read-only guard** — the query is parsed with Postgres's real grammar (`libpg_query`).
|
|
9
|
+
Only `SELECT` / `SHOW` / `EXPLAIN` survive. Writes, DDL, multi-statements, data-modifying
|
|
10
|
+
CTEs, `SELECT INTO`, and row locks are rejected *before execution* — not by asking the model
|
|
11
|
+
nicely, by refusing to run them.
|
|
12
|
+
2. **PII redaction** — results are scrubbed before they leave your machine: deterministic
|
|
13
|
+
column rules + always-on regex for structured PII (emails, phones, cards, SSNs), with an
|
|
14
|
+
optional NER/LLM backstop for free-text.
|
|
15
|
+
3. **Audit** — every query and verdict is appended to a log you can tail live in a TUI.
|
|
16
|
+
|
|
17
|
+
A guarded chokepoint in front of the DB, shrunk to a single open-source command with zero
|
|
18
|
+
infrastructure to stand up.
|
|
19
|
+
|
|
20
|
+
```
|
|
21
|
+
Claude Code ──MCP──▶ veil ──READ ONLY txn──▶ your database
|
|
22
|
+
│
|
|
23
|
+
├─ guard: parse → allow SELECT only
|
|
24
|
+
├─ redact: column rules + regex + (optional) NER/LLM
|
|
25
|
+
└─ audit: veil-audit.jsonl
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
## Why
|
|
29
|
+
|
|
30
|
+
You want an agent to act as a data analyst over real tables — "compare what we drafted vs what
|
|
31
|
+
was actually sent" — without (a) risking a destructive query or (b) shipping customer PII to a
|
|
32
|
+
model provider. Handing an agent raw DB credentials and hoping it only writes `SELECT` is not a
|
|
33
|
+
control. `veil` makes the unsafe paths impossible at the layer the agent can't talk its way past.
|
|
34
|
+
|
|
35
|
+
## Install
|
|
36
|
+
|
|
37
|
+
```bash
|
|
38
|
+
pip install dbveil # or: uv pip install dbveil
|
|
39
|
+
# optional extras:
|
|
40
|
+
pip install 'dbveil[ner]' # Presidio NER backstop for names/addresses
|
|
41
|
+
pip install 'dbveil[llm]' # local-LLM (Ollama) redaction
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
## Quickstart
|
|
45
|
+
|
|
46
|
+
```bash
|
|
47
|
+
veil init # interactive: DB URL + auto-detect PII columns → writes veil.yaml
|
|
48
|
+
veil doctor # verify guard, connectivity, and that READ ONLY actually blocks writes
|
|
49
|
+
veil test-query "SELECT email, created_at FROM users LIMIT 5" # try it without an agent
|
|
50
|
+
veil up # run the MCP proxy on stdio (what Claude Code connects to)
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
Try a write to see the guard refuse it:
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
veil test-query "DELETE FROM users"
|
|
57
|
+
# BLOCKED — write or DDL operation detected: DELETE
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
### Connect Claude Code
|
|
61
|
+
|
|
62
|
+
```bash
|
|
63
|
+
claude mcp add veil -- veil up
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
or commit a `.mcp.json` so your whole team gets it:
|
|
67
|
+
|
|
68
|
+
```json
|
|
69
|
+
{
|
|
70
|
+
"mcpServers": {
|
|
71
|
+
"veil": { "command": "veil", "args": ["up"], "env": { "VEIL_CONFIG": "veil.yaml" } }
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
Now the agent has three tools — `query`, `list_tables`, `describe_table` — and physically
|
|
77
|
+
cannot write or see raw PII.
|
|
78
|
+
|
|
79
|
+
### Watch it live
|
|
80
|
+
|
|
81
|
+
```bash
|
|
82
|
+
veil monitor # TUI tailing veil-audit.jsonl: allowed / blocked / redaction counts
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
## Configuration
|
|
86
|
+
|
|
87
|
+
`veil init` writes a commented `veil.yaml`. Full reference in
|
|
88
|
+
[`examples/veil.example.yaml`](examples/veil.example.yaml). The essentials:
|
|
89
|
+
|
|
90
|
+
```yaml
|
|
91
|
+
database:
|
|
92
|
+
url: ${DATABASE_URL} # env refs kept out of the file
|
|
93
|
+
|
|
94
|
+
guard:
|
|
95
|
+
allow_select_star: false # block SELECT * on PII tables; force explicit columns
|
|
96
|
+
max_rows: 1000
|
|
97
|
+
statement_timeout_ms: 15000
|
|
98
|
+
pii_tables: [contacts, users]
|
|
99
|
+
|
|
100
|
+
redact:
|
|
101
|
+
builtin_patterns: { email: true, phone: true, credit_card: true, ssn: true, ip: false }
|
|
102
|
+
columns:
|
|
103
|
+
- { column: email, strategy: hash } # sha256, still join-able
|
|
104
|
+
- { column: full_name, strategy: mask } # -> [redacted]
|
|
105
|
+
- { column: ssn, strategy: partial, keep: 4 }
|
|
106
|
+
ner: { enabled: false, engine: presidio } # optional backstop
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
## How redaction is layered (and its honest limits)
|
|
110
|
+
|
|
111
|
+
`veil` defends from the **deterministic** side first, because that's the only kind you can trust
|
|
112
|
+
not to leak:
|
|
113
|
+
|
|
114
|
+
| Layer | What it catches | Deterministic? |
|
|
115
|
+
|---|---|---|
|
|
116
|
+
| **Column rules** | Known PII columns (`email`, `ssn`, …) by name | ✅ yes |
|
|
117
|
+
| **Built-in regex** | Emails, phones, Luhn-valid cards, SSNs, IPs — even aliased or in free-text | ✅ yes |
|
|
118
|
+
| **NER (Presidio)** | Names / addresses in free-text the above miss | ⚠️ probabilistic |
|
|
119
|
+
| **LLM (Ollama)** | Same, via a local model | ⚠️ probabilistic, experimental |
|
|
120
|
+
|
|
121
|
+
**Use the probabilistic layers only as a backstop.** ML/NER *will* eventually miss a name or an
|
|
122
|
+
oddly-formatted address — that's a leak. For columns you already know are sensitive, the column
|
|
123
|
+
rules are the real control. The LLM redactor fails *closed*: if the model errors, the cell is
|
|
124
|
+
masked, never passed through.
|
|
125
|
+
|
|
126
|
+
## Security model
|
|
127
|
+
|
|
128
|
+
- **Two independent read-only layers.** The parser rejects non-reads, *and* every query runs
|
|
129
|
+
inside a `SET TRANSACTION READ ONLY` transaction — so even a parser gap can't write.
|
|
130
|
+
- **Give veil a least-privilege credential.** Best practice is a `GRANT SELECT`-only database
|
|
131
|
+
role (ideally on a read replica). Then "read-only" is enforced by the database itself, and the
|
|
132
|
+
credential `veil` holds is low-blast-radius: a leak exposes already-masked reads and can write
|
|
133
|
+
nothing. `veil doctor` confirms the READ ONLY transaction rejects writes against your DB.
|
|
134
|
+
- **PII never leaves your machine unmasked.** Redaction happens in-process, before results are
|
|
135
|
+
serialized to the MCP client.
|
|
136
|
+
|
|
137
|
+
## Secure connectivity
|
|
138
|
+
|
|
139
|
+
`veil` connects to whatever DSN you give it, so the network path is yours to choose:
|
|
140
|
+
|
|
141
|
+
- **Tailscale** — put your DB behind a tailnet and point `database.url` at the tailnet host. No
|
|
142
|
+
public DB port.
|
|
143
|
+
- **Short-lived credentials** — `${DATABASE_URL}` is expanded at load, so you can inject an
|
|
144
|
+
ephemeral token (RDS IAM auth, Cloud SQL IAM, a Vault dynamic user) instead of a static
|
|
145
|
+
password.
|
|
146
|
+
- **Railway / managed PaaS** — use the provided TLS endpoint with a dedicated read-only role.
|
|
147
|
+
|
|
148
|
+
## Roadmap
|
|
149
|
+
|
|
150
|
+
- **Postgres wire-protocol frontend** — so `psql`, BI tools, and any client (not just MCP) get
|
|
151
|
+
the same guard + redaction. The pipeline is already frontend-agnostic.
|
|
152
|
+
- **More engines** — MySQL, SQLite (the guard's parser is the only Postgres-specific piece; it's
|
|
153
|
+
a pluggable backend).
|
|
154
|
+
- **Schema-aware lineage** — resolve aliased PII columns back to their source table.
|
|
155
|
+
|
|
156
|
+
## Development
|
|
157
|
+
|
|
158
|
+
```bash
|
|
159
|
+
uv venv && source .venv/bin/activate
|
|
160
|
+
uv pip install -e '.[dev]'
|
|
161
|
+
pytest
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
## License
|
|
165
|
+
|
|
166
|
+
MIT
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
# veil configuration — https://github.com/mathu97/dbveil
|
|
2
|
+
# Copy to veil.yaml and edit. Secrets should stay in env vars (${VAR} is expanded at load).
|
|
3
|
+
|
|
4
|
+
database:
|
|
5
|
+
url: ${DATABASE_URL} # e.g. postgresql://ai_analyst:***@db.internal:5432/app
|
|
6
|
+
|
|
7
|
+
guard:
|
|
8
|
+
allow_select_star: false # block SELECT * on PII tables; force explicit column lists
|
|
9
|
+
max_rows: 1000 # cap rows returned to the agent
|
|
10
|
+
statement_timeout_ms: 15000 # kill slow queries
|
|
11
|
+
pii_tables: # SELECT * is always rejected on these
|
|
12
|
+
- contacts
|
|
13
|
+
- users
|
|
14
|
+
|
|
15
|
+
redact:
|
|
16
|
+
# Deterministic, always-on regex redaction for structured PII. Catches values
|
|
17
|
+
# even when aliased (SELECT email AS e) or buried in free-text columns.
|
|
18
|
+
builtin_patterns:
|
|
19
|
+
email: true
|
|
20
|
+
phone: true
|
|
21
|
+
credit_card: true # Luhn-checked to cut false positives
|
|
22
|
+
ssn: true
|
|
23
|
+
ip: false
|
|
24
|
+
hash_salt: "" # set a stable secret to keep hashed values join-able across runs
|
|
25
|
+
|
|
26
|
+
# Column-level rules applied by output column name.
|
|
27
|
+
# strategy: mask (-> [redacted]) | null | hash (sha256, join-able) | partial (keep last N)
|
|
28
|
+
columns:
|
|
29
|
+
- { column: email, strategy: hash }
|
|
30
|
+
- { column: phone, strategy: mask }
|
|
31
|
+
- { column: full_name, strategy: mask }
|
|
32
|
+
- { column: ssn, strategy: partial, keep: 4 }
|
|
33
|
+
|
|
34
|
+
# Optional probabilistic NER for free-text PII (names, addresses) the rules above miss.
|
|
35
|
+
# Backstop only, never the sole control. Needs: pip install 'dbveil[ner]' (or [llm])
|
|
36
|
+
ner:
|
|
37
|
+
enabled: false
|
|
38
|
+
engine: presidio # presidio | llm
|
|
39
|
+
entities: [PERSON, LOCATION, EMAIL_ADDRESS, PHONE_NUMBER]
|
|
40
|
+
score_threshold: 0.5
|
|
41
|
+
ollama_url: http://localhost:11434
|
|
42
|
+
ollama_model: llama3.2
|
|
43
|
+
|
|
44
|
+
audit_log: veil-audit.jsonl
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "dbveil"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "A local read-only, PII-redacting proxy that lets AI agents query your database safely."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = "MIT"
|
|
11
|
+
requires-python = ">=3.10"
|
|
12
|
+
authors = [{ name = "Mathusan Selvarajah" }]
|
|
13
|
+
keywords = ["database", "postgres", "mcp", "pii", "redaction", "ai", "read-only", "proxy", "claude"]
|
|
14
|
+
dependencies = [
|
|
15
|
+
"asyncpg>=0.29",
|
|
16
|
+
"pglast>=6.0",
|
|
17
|
+
"pydantic>=2.6",
|
|
18
|
+
"pyyaml>=6.0",
|
|
19
|
+
"typer>=0.12",
|
|
20
|
+
"rich>=13.7",
|
|
21
|
+
"mcp>=1.2",
|
|
22
|
+
]
|
|
23
|
+
|
|
24
|
+
[project.optional-dependencies]
|
|
25
|
+
ner = ["presidio-analyzer>=2.2", "presidio-anonymizer>=2.2"]
|
|
26
|
+
llm = ["httpx>=0.27"]
|
|
27
|
+
dev = ["pytest>=8.0", "pytest-asyncio>=0.23"]
|
|
28
|
+
|
|
29
|
+
[project.scripts]
|
|
30
|
+
veil = "veil.cli:app"
|
|
31
|
+
dbveil = "veil.cli:app"
|
|
32
|
+
|
|
33
|
+
[project.urls]
|
|
34
|
+
Homepage = "https://github.com/mathu97/dbveil"
|
|
35
|
+
Repository = "https://github.com/mathu97/dbveil"
|
|
36
|
+
|
|
37
|
+
[tool.hatch.build.targets.wheel]
|
|
38
|
+
packages = ["veil"]
|
|
39
|
+
|
|
40
|
+
[tool.pytest.ini_options]
|
|
41
|
+
asyncio_mode = "auto"
|
|
42
|
+
testpaths = ["tests"]
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
from veil.guard import check_query
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def test_plain_select_allowed():
|
|
5
|
+
assert check_query("SELECT id, name FROM users").allowed
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def test_join_and_aggregate_allowed():
|
|
9
|
+
sql = "SELECT u.id, count(*) FROM users u JOIN orders o ON o.user_id = u.id GROUP BY u.id"
|
|
10
|
+
assert check_query(sql).allowed
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def test_cte_select_allowed():
|
|
14
|
+
sql = "WITH recent AS (SELECT * FROM logs WHERE ts > now() - interval '1 day') SELECT count(*) FROM recent"
|
|
15
|
+
assert check_query(sql).allowed
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def test_show_allowed():
|
|
19
|
+
assert check_query("SHOW server_version").allowed
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def test_explain_allowed_but_analyze_blocked():
|
|
23
|
+
assert check_query("EXPLAIN SELECT 1").allowed
|
|
24
|
+
assert not check_query("EXPLAIN ANALYZE SELECT 1").allowed
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def test_writes_blocked():
|
|
28
|
+
for sql in [
|
|
29
|
+
"INSERT INTO t (x) VALUES (1)",
|
|
30
|
+
"UPDATE t SET x = 1",
|
|
31
|
+
"DELETE FROM t",
|
|
32
|
+
"DROP TABLE t",
|
|
33
|
+
"TRUNCATE t",
|
|
34
|
+
"ALTER TABLE t ADD COLUMN y int",
|
|
35
|
+
"CREATE TABLE t (x int)",
|
|
36
|
+
"GRANT SELECT ON t TO public",
|
|
37
|
+
]:
|
|
38
|
+
assert not check_query(sql).allowed, sql
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def test_data_modifying_cte_blocked():
|
|
42
|
+
sql = "WITH w AS (DELETE FROM t RETURNING *) SELECT * FROM w"
|
|
43
|
+
assert not check_query(sql).allowed
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def test_multi_statement_blocked():
|
|
47
|
+
assert not check_query("SELECT 1; DROP TABLE t").allowed
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def test_select_into_blocked():
|
|
51
|
+
assert not check_query("SELECT * INTO backup FROM users").allowed
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def test_locking_clause_blocked():
|
|
55
|
+
assert not check_query("SELECT * FROM users FOR UPDATE").allowed
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def test_select_star_on_pii_table_blocked():
|
|
59
|
+
r = check_query("SELECT * FROM contacts", pii_tables=["contacts"])
|
|
60
|
+
assert not r.allowed
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def test_select_star_on_non_pii_table_allowed():
|
|
64
|
+
assert check_query("SELECT * FROM metrics", pii_tables=["contacts"]).allowed
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def test_select_star_allowed_when_configured():
|
|
68
|
+
assert check_query("SELECT * FROM contacts", allow_select_star=True, pii_tables=["contacts"]).allowed
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def test_garbage_blocked():
|
|
72
|
+
assert not check_query("this is not sql").allowed
|
|
73
|
+
assert not check_query("").allowed
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
from veil.config import BuiltinPatterns, ColumnRule, RedactConfig, RedactStrategy
|
|
2
|
+
from veil.redact import Redactor
|
|
3
|
+
from veil.redact.column_rules import apply_column_rules
|
|
4
|
+
from veil.redact.patterns import redact_text
|
|
5
|
+
from veil.result import ResultSet
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def test_column_mask():
|
|
9
|
+
rows = [[1, "alice@example.com"]]
|
|
10
|
+
n = apply_column_rules(["id", "email"], rows, [ColumnRule(column="email")])
|
|
11
|
+
assert rows[0][1] == "[redacted]"
|
|
12
|
+
assert n == 1
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def test_column_null():
|
|
16
|
+
rows = [["secret"]]
|
|
17
|
+
apply_column_rules(["x"], rows, [ColumnRule(column="x", strategy=RedactStrategy.NULL)])
|
|
18
|
+
assert rows[0][0] is None
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def test_column_hash_is_deterministic():
|
|
22
|
+
rows1 = [["a@b.com"]]
|
|
23
|
+
rows2 = [["a@b.com"]]
|
|
24
|
+
rule = [ColumnRule(column="e", strategy=RedactStrategy.HASH)]
|
|
25
|
+
apply_column_rules(["e"], rows1, rule)
|
|
26
|
+
apply_column_rules(["e"], rows2, rule)
|
|
27
|
+
assert rows1[0][0] == rows2[0][0]
|
|
28
|
+
assert rows1[0][0].startswith("sha256:")
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def test_column_partial():
|
|
32
|
+
rows = [["123456789"]]
|
|
33
|
+
apply_column_rules(["ssn"], rows, [ColumnRule(column="ssn", strategy=RedactStrategy.PARTIAL, keep=4)])
|
|
34
|
+
assert rows[0][0].endswith("6789")
|
|
35
|
+
assert rows[0][0].startswith("*")
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def test_pattern_email_phone_ssn():
|
|
39
|
+
text, n = redact_text("reach alice@example.com or 415-555-2671, ssn 123-45-6789", BuiltinPatterns())
|
|
40
|
+
assert "[email]" in text and "[phone]" in text and "[ssn]" in text
|
|
41
|
+
assert n == 3
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def test_pattern_credit_card_luhn():
|
|
45
|
+
text, n = redact_text("card 4111111111111111 here", BuiltinPatterns())
|
|
46
|
+
assert "[card]" in text and n == 1
|
|
47
|
+
text2, n2 = redact_text("not a card 1234567890123456", BuiltinPatterns())
|
|
48
|
+
assert n2 == 0
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def test_pattern_ip_opt_in():
|
|
52
|
+
off, n_off = redact_text("host 10.0.0.1", BuiltinPatterns(ip=False))
|
|
53
|
+
on, n_on = redact_text("host 10.0.0.1", BuiltinPatterns(ip=True))
|
|
54
|
+
assert n_off == 0 and "[ip]" in on
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def test_redactor_end_to_end():
|
|
58
|
+
cfg = RedactConfig(columns=[ColumnRule(column="email", strategy=RedactStrategy.HASH)])
|
|
59
|
+
rs = ResultSet(
|
|
60
|
+
columns=["id", "email", "note"],
|
|
61
|
+
rows=[[1, "a@b.com", "call me at 415-555-2671"]],
|
|
62
|
+
row_count=1,
|
|
63
|
+
)
|
|
64
|
+
n = Redactor(cfg).apply(rs)
|
|
65
|
+
assert rs.rows[0][1].startswith("sha256:")
|
|
66
|
+
assert "[phone]" in rs.rows[0][2]
|
|
67
|
+
assert n >= 2
|