dbveil 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dbveil-0.1.0.dist-info/METADATA +194 -0
- dbveil-0.1.0.dist-info/RECORD +22 -0
- dbveil-0.1.0.dist-info/WHEEL +4 -0
- dbveil-0.1.0.dist-info/entry_points.txt +3 -0
- dbveil-0.1.0.dist-info/licenses/LICENSE +21 -0
- veil/__init__.py +14 -0
- veil/__main__.py +3 -0
- veil/audit.py +31 -0
- veil/cli.py +284 -0
- veil/config.py +101 -0
- veil/executor.py +76 -0
- veil/guard.py +107 -0
- veil/mcp_server.py +59 -0
- veil/pipeline.py +79 -0
- veil/redact/__init__.py +43 -0
- veil/redact/column_rules.py +47 -0
- veil/redact/llm.py +50 -0
- veil/redact/ner.py +36 -0
- veil/redact/patterns.py +59 -0
- veil/result.py +11 -0
- veil/serialize.py +27 -0
- veil/tui.py +68 -0
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: dbveil
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A local read-only, PII-redacting proxy that lets AI agents query your database safely.
|
|
5
|
+
Project-URL: Homepage, https://github.com/mathu97/dbveil
|
|
6
|
+
Project-URL: Repository, https://github.com/mathu97/dbveil
|
|
7
|
+
Author: Mathusan Selvarajah
|
|
8
|
+
License-Expression: MIT
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Keywords: ai,claude,database,mcp,pii,postgres,proxy,read-only,redaction
|
|
11
|
+
Requires-Python: >=3.10
|
|
12
|
+
Requires-Dist: asyncpg>=0.29
|
|
13
|
+
Requires-Dist: mcp>=1.2
|
|
14
|
+
Requires-Dist: pglast>=6.0
|
|
15
|
+
Requires-Dist: pydantic>=2.6
|
|
16
|
+
Requires-Dist: pyyaml>=6.0
|
|
17
|
+
Requires-Dist: rich>=13.7
|
|
18
|
+
Requires-Dist: typer>=0.12
|
|
19
|
+
Provides-Extra: dev
|
|
20
|
+
Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
|
|
21
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
22
|
+
Provides-Extra: llm
|
|
23
|
+
Requires-Dist: httpx>=0.27; extra == 'llm'
|
|
24
|
+
Provides-Extra: ner
|
|
25
|
+
Requires-Dist: presidio-analyzer>=2.2; extra == 'ner'
|
|
26
|
+
Requires-Dist: presidio-anonymizer>=2.2; extra == 'ner'
|
|
27
|
+
Description-Content-Type: text/markdown
|
|
28
|
+
|
|
29
|
+
# veil
|
|
30
|
+
|
|
31
|
+
**A local read-only, PII-redacting proxy that lets AI agents query your database safely.**
|
|
32
|
+
|
|
33
|
+
Point Claude Code (or any MCP client) at `veil` instead of your database. Every query is
|
|
34
|
+
forced through three deterministic guarantees before a single row reaches the model:
|
|
35
|
+
|
|
36
|
+
1. **Read-only guard** — the query is parsed with Postgres's real grammar (`libpg_query`).
|
|
37
|
+
Only `SELECT` / `SHOW` / `EXPLAIN` survive. Writes, DDL, multi-statements, data-modifying
|
|
38
|
+
CTEs, `SELECT INTO`, and row locks are rejected *before execution* — not by asking the model
|
|
39
|
+
nicely, by refusing to run them.
|
|
40
|
+
2. **PII redaction** — results are scrubbed before they leave your machine: deterministic
|
|
41
|
+
column rules + always-on regex for structured PII (emails, phones, cards, SSNs), with an
|
|
42
|
+
optional NER/LLM backstop for free-text.
|
|
43
|
+
3. **Audit** — every query and verdict is appended to a log you can tail live in a TUI.
|
|
44
|
+
|
|
45
|
+
A guarded chokepoint in front of the DB, shrunk to a single open-source command with zero
|
|
46
|
+
infrastructure to stand up.
|
|
47
|
+
|
|
48
|
+
```
|
|
49
|
+
Claude Code ──MCP──▶ veil ──READ ONLY txn──▶ your database
|
|
50
|
+
│
|
|
51
|
+
├─ guard: parse → allow SELECT only
|
|
52
|
+
├─ redact: column rules + regex + (optional) NER/LLM
|
|
53
|
+
└─ audit: veil-audit.jsonl
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
## Why
|
|
57
|
+
|
|
58
|
+
You want an agent to act as a data analyst over real tables — "compare what we drafted vs what
|
|
59
|
+
was actually sent" — without (a) risking a destructive query or (b) shipping customer PII to a
|
|
60
|
+
model provider. Handing an agent raw DB credentials and hoping it only writes `SELECT` is not a
|
|
61
|
+
control. `veil` makes the unsafe paths impossible at the layer the agent can't talk its way past.
|
|
62
|
+
|
|
63
|
+
## Install
|
|
64
|
+
|
|
65
|
+
```bash
|
|
66
|
+
pip install dbveil # or: uv pip install dbveil
|
|
67
|
+
# optional extras:
|
|
68
|
+
pip install 'dbveil[ner]' # Presidio NER backstop for names/addresses
|
|
69
|
+
pip install 'dbveil[llm]' # local-LLM (Ollama) redaction
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
## Quickstart
|
|
73
|
+
|
|
74
|
+
```bash
|
|
75
|
+
veil init # interactive: DB URL + auto-detect PII columns → writes veil.yaml
|
|
76
|
+
veil doctor # verify guard, connectivity, and that READ ONLY actually blocks writes
|
|
77
|
+
veil test-query "SELECT email, created_at FROM users LIMIT 5" # try it without an agent
|
|
78
|
+
veil up # run the MCP proxy on stdio (what Claude Code connects to)
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
Try a write to see the guard refuse it:
|
|
82
|
+
|
|
83
|
+
```bash
|
|
84
|
+
veil test-query "DELETE FROM users"
|
|
85
|
+
# BLOCKED — write or DDL operation detected: DELETE
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
### Connect Claude Code
|
|
89
|
+
|
|
90
|
+
```bash
|
|
91
|
+
claude mcp add veil -- veil up
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
or commit a `.mcp.json` so your whole team gets it:
|
|
95
|
+
|
|
96
|
+
```json
|
|
97
|
+
{
|
|
98
|
+
"mcpServers": {
|
|
99
|
+
"veil": { "command": "veil", "args": ["up"], "env": { "VEIL_CONFIG": "veil.yaml" } }
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
Now the agent has three tools — `query`, `list_tables`, `describe_table` — and physically
|
|
105
|
+
cannot write or see raw PII.
|
|
106
|
+
|
|
107
|
+
### Watch it live
|
|
108
|
+
|
|
109
|
+
```bash
|
|
110
|
+
veil monitor # TUI tailing veil-audit.jsonl: allowed / blocked / redaction counts
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
## Configuration
|
|
114
|
+
|
|
115
|
+
`veil init` writes a commented `veil.yaml`. Full reference in
|
|
116
|
+
[`examples/veil.example.yaml`](examples/veil.example.yaml). The essentials:
|
|
117
|
+
|
|
118
|
+
```yaml
|
|
119
|
+
database:
|
|
120
|
+
url: ${DATABASE_URL} # env refs kept out of the file
|
|
121
|
+
|
|
122
|
+
guard:
|
|
123
|
+
allow_select_star: false # block SELECT * on PII tables; force explicit columns
|
|
124
|
+
max_rows: 1000
|
|
125
|
+
statement_timeout_ms: 15000
|
|
126
|
+
pii_tables: [contacts, users]
|
|
127
|
+
|
|
128
|
+
redact:
|
|
129
|
+
builtin_patterns: { email: true, phone: true, credit_card: true, ssn: true, ip: false }
|
|
130
|
+
columns:
|
|
131
|
+
- { column: email, strategy: hash } # sha256, still join-able
|
|
132
|
+
- { column: full_name, strategy: mask } # -> [redacted]
|
|
133
|
+
- { column: ssn, strategy: partial, keep: 4 }
|
|
134
|
+
ner: { enabled: false, engine: presidio } # optional backstop
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
## How redaction is layered (and its honest limits)
|
|
138
|
+
|
|
139
|
+
`veil` defends from the **deterministic** side first, because that's the only kind you can trust
|
|
140
|
+
not to leak:
|
|
141
|
+
|
|
142
|
+
| Layer | What it catches | Deterministic? |
|
|
143
|
+
|---|---|---|
|
|
144
|
+
| **Column rules** | Known PII columns (`email`, `ssn`, …) by name | ✅ yes |
|
|
145
|
+
| **Built-in regex** | Emails, phones, Luhn-valid cards, SSNs, IPs — even aliased or in free-text | ✅ yes |
|
|
146
|
+
| **NER (Presidio)** | Names / addresses in free-text the above miss | ⚠️ probabilistic |
|
|
147
|
+
| **LLM (Ollama)** | Same, via a local model | ⚠️ probabilistic, experimental |
|
|
148
|
+
|
|
149
|
+
**Use the probabilistic layers only as a backstop.** ML/NER *will* eventually miss a name or an
|
|
150
|
+
oddly-formatted address — that's a leak. For columns you already know are sensitive, the column
|
|
151
|
+
rules are the real control. The LLM redactor fails *closed*: if the model errors, the cell is
|
|
152
|
+
masked, never passed through.
|
|
153
|
+
|
|
154
|
+
## Security model
|
|
155
|
+
|
|
156
|
+
- **Two independent read-only layers.** The parser rejects non-reads, *and* every query runs
|
|
157
|
+
inside a `SET TRANSACTION READ ONLY` transaction — so even a parser gap can't write.
|
|
158
|
+
- **Give veil a least-privilege credential.** Best practice is a `GRANT SELECT`-only database
|
|
159
|
+
role (ideally on a read replica). Then "read-only" is enforced by the database itself, and the
|
|
160
|
+
credential `veil` holds is low-blast-radius: a leak exposes already-masked reads and can write
|
|
161
|
+
nothing. `veil doctor` confirms the READ ONLY transaction rejects writes against your DB.
|
|
162
|
+
- **PII never leaves your machine unmasked.** Redaction happens in-process, before results are
|
|
163
|
+
serialized to the MCP client.
|
|
164
|
+
|
|
165
|
+
## Secure connectivity
|
|
166
|
+
|
|
167
|
+
`veil` connects to whatever DSN you give it, so the network path is yours to choose:
|
|
168
|
+
|
|
169
|
+
- **Tailscale** — put your DB behind a tailnet and point `database.url` at the tailnet host. No
|
|
170
|
+
public DB port.
|
|
171
|
+
- **Short-lived credentials** — `${DATABASE_URL}` is expanded at load, so you can inject an
|
|
172
|
+
ephemeral token (RDS IAM auth, Cloud SQL IAM, a Vault dynamic user) instead of a static
|
|
173
|
+
password.
|
|
174
|
+
- **Railway / managed PaaS** — use the provided TLS endpoint with a dedicated read-only role.
|
|
175
|
+
|
|
176
|
+
## Roadmap
|
|
177
|
+
|
|
178
|
+
- **Postgres wire-protocol frontend** — so `psql`, BI tools, and any client (not just MCP) get
|
|
179
|
+
the same guard + redaction. The pipeline is already frontend-agnostic.
|
|
180
|
+
- **More engines** — MySQL, SQLite (the guard's parser is the only Postgres-specific piece; it's
|
|
181
|
+
a pluggable backend).
|
|
182
|
+
- **Schema-aware lineage** — resolve aliased PII columns back to their source table.
|
|
183
|
+
|
|
184
|
+
## Development
|
|
185
|
+
|
|
186
|
+
```bash
|
|
187
|
+
uv venv && source .venv/bin/activate
|
|
188
|
+
uv pip install -e '.[dev]'
|
|
189
|
+
pytest
|
|
190
|
+
```
|
|
191
|
+
|
|
192
|
+
## License
|
|
193
|
+
|
|
194
|
+
MIT
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
veil/__init__.py,sha256=2TDNw-XaEYh2swJ1VNHhXslRwR7YVNzLjq6jj-RBIwI,384
|
|
2
|
+
veil/__main__.py,sha256=wcCrL4PjG51r5wVKqJhcoJPTLfHW0wNbD31DrUN0MWI,28
|
|
3
|
+
veil/audit.py,sha256=tpwBYHpzvj-68bqmxvWW4XSEuQshjBp2tLwA-zgbZYc,932
|
|
4
|
+
veil/cli.py,sha256=Qnjy8_XxWZ9zAJTL19vtxMe1LUZ69y_SyTQOMv9_3U0,9425
|
|
5
|
+
veil/config.py,sha256=CBHQlarJ6By3ChbrY0v3vZXx7b6q6cJopA_j595xS1k,2643
|
|
6
|
+
veil/executor.py,sha256=ryoWalRhEagXBxO7I66PeuCBh7OghqkOlz2QfY2E19g,2805
|
|
7
|
+
veil/guard.py,sha256=-ffgmNmcEDLGCGE-07uNMA5ednSjzU5fR5uRheNG1-I,2967
|
|
8
|
+
veil/mcp_server.py,sha256=McnkOwomGxVNfcjre2eda7abDcz7wthCK2yHH1eDlpw,1960
|
|
9
|
+
veil/pipeline.py,sha256=8LPSYtHEe2bJnow6oM6s3DH2Ig95ky7eqFzk_7W-hP0,2218
|
|
10
|
+
veil/result.py,sha256=i2VR_VRrrdzr1cakYYrrHDviVB04j48DADonzfdE0cA,202
|
|
11
|
+
veil/serialize.py,sha256=oZoBRM93EZ4jOaLRsXFdrTFAnWyVSk63-pyjLiXC_3U,771
|
|
12
|
+
veil/tui.py,sha256=elYFEoUzWRqXX8KYjhZ7hXQSIz5hDB8NGMXV85jdxn8,2207
|
|
13
|
+
veil/redact/__init__.py,sha256=VsAYimZ1hSqNLzwuj1-_Sz5osYj8WuzeOZZI30Sf0HE,1373
|
|
14
|
+
veil/redact/column_rules.py,sha256=Es1AzrOK1T3AJTt-CEc2M3DjTas7v64zaYJJ9DXG8ts,1255
|
|
15
|
+
veil/redact/llm.py,sha256=mIcqaFvae42FMZHebKr3_NqH_Mc9YofSI43MIqicKO0,1726
|
|
16
|
+
veil/redact/ner.py,sha256=MJWtHkAEUqVvQwmGb6ZWCGHxXsr3-JPDI6rxiDlPNPY,1238
|
|
17
|
+
veil/redact/patterns.py,sha256=21DlmfhmFyhZCW03cNNg_ALtIOVzm9RNRwk7LTvunt4,1494
|
|
18
|
+
dbveil-0.1.0.dist-info/METADATA,sha256=P_avXOBFoN_koHHwIH-8NFqEvClV4S9dcH1Vd77KZHM,7411
|
|
19
|
+
dbveil-0.1.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
|
|
20
|
+
dbveil-0.1.0.dist-info/entry_points.txt,sha256=YUbYmDRANeZqcDa4NUDoEspe3QBgQiAw3Z7aNijGRco,60
|
|
21
|
+
dbveil-0.1.0.dist-info/licenses/LICENSE,sha256=zQYPO93UqEzYO4Sc_JjNqKUpWCqJooEcdbQkCbIelas,1076
|
|
22
|
+
dbveil-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Mathusan Selvarajah
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
veil/__init__.py
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from importlib.metadata import PackageNotFoundError, version
|
|
4
|
+
|
|
5
|
+
from .config import Config
|
|
6
|
+
from .guard import check_query
|
|
7
|
+
from .pipeline import Pipeline, QueryOutcome
|
|
8
|
+
|
|
9
|
+
try:
|
|
10
|
+
__version__ = version("dbveil")
|
|
11
|
+
except PackageNotFoundError:
|
|
12
|
+
__version__ = "0.0.0+local"
|
|
13
|
+
|
|
14
|
+
__all__ = ["Config", "Pipeline", "QueryOutcome", "check_query", "__version__"]
|
veil/__main__.py
ADDED
veil/audit.py
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from datetime import datetime, timezone
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class AuditLog:
|
|
9
|
+
def __init__(self, path: str) -> None:
|
|
10
|
+
self.path = Path(path)
|
|
11
|
+
|
|
12
|
+
def record(self, sql: str, outcome) -> None:
|
|
13
|
+
if outcome.blocked_reason:
|
|
14
|
+
status = "blocked"
|
|
15
|
+
elif outcome.error:
|
|
16
|
+
status = "error"
|
|
17
|
+
else:
|
|
18
|
+
status = "allowed"
|
|
19
|
+
entry = {
|
|
20
|
+
"ts": datetime.now(timezone.utc).isoformat(),
|
|
21
|
+
"status": status,
|
|
22
|
+
"sql": sql.strip()[:2000],
|
|
23
|
+
"reason": outcome.blocked_reason,
|
|
24
|
+
"error": outcome.error,
|
|
25
|
+
"rows": outcome.row_count,
|
|
26
|
+
"redactions": outcome.redactions,
|
|
27
|
+
"truncated": outcome.truncated,
|
|
28
|
+
"duration_ms": round(outcome.duration_ms, 1),
|
|
29
|
+
}
|
|
30
|
+
with self.path.open("a") as f:
|
|
31
|
+
f.write(json.dumps(entry) + "\n")
|
veil/cli.py
ADDED
|
@@ -0,0 +1,284 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import os
|
|
5
|
+
import re
|
|
6
|
+
|
|
7
|
+
try:
|
|
8
|
+
import readline # noqa: F401 — enables arrow-key line editing in prompts
|
|
9
|
+
except ImportError:
|
|
10
|
+
pass
|
|
11
|
+
|
|
12
|
+
import typer
|
|
13
|
+
from rich.console import Console
|
|
14
|
+
from rich.panel import Panel
|
|
15
|
+
from rich.table import Table
|
|
16
|
+
|
|
17
|
+
from . import __version__
|
|
18
|
+
from .config import Config
|
|
19
|
+
from .executor import Executor
|
|
20
|
+
from .guard import check_query
|
|
21
|
+
|
|
22
|
+
app = typer.Typer(
|
|
23
|
+
add_completion=False,
|
|
24
|
+
help="veil — a local read-only, PII-redacting proxy for safe AI database access.",
|
|
25
|
+
)
|
|
26
|
+
console = Console()
|
|
27
|
+
err = Console(stderr=True)
|
|
28
|
+
|
|
29
|
+
_PII_HINTS = (
|
|
30
|
+
"email", "e_mail", "phone", "mobile", "fax", "name", "first", "last",
|
|
31
|
+
"address", "street", "city", "zip", "postal", "ssn", "social",
|
|
32
|
+
"dob", "birth", "passport", "license", "ip_address",
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _resolve_env(url: str) -> str:
|
|
37
|
+
return re.sub(r"\$\{([A-Z0-9_]+)\}", lambda m: os.environ.get(m.group(1), m.group(0)), url)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _load(path):
|
|
41
|
+
try:
|
|
42
|
+
return Config.load(path)
|
|
43
|
+
except FileNotFoundError as exc:
|
|
44
|
+
err.print(f"[red]config error:[/] {exc}")
|
|
45
|
+
raise typer.Exit(1)
|
|
46
|
+
except Exception as exc:
|
|
47
|
+
err.print(f"[red]could not load config:[/] {exc}")
|
|
48
|
+
raise typer.Exit(1)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@app.command()
|
|
52
|
+
def version() -> None:
|
|
53
|
+
"""Print the veil version."""
|
|
54
|
+
console.print(f"veil {__version__}")
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
@app.command()
|
|
58
|
+
def init(
|
|
59
|
+
force: bool = typer.Option(False, "--force", "-f", help="Overwrite an existing config."),
|
|
60
|
+
) -> None:
|
|
61
|
+
"""Create a veil.yaml config, optionally auto-detecting PII columns."""
|
|
62
|
+
path = Config.default_path()
|
|
63
|
+
if path.exists() and not force:
|
|
64
|
+
err.print(f"[yellow]{path} already exists. Use --force to overwrite.[/]")
|
|
65
|
+
raise typer.Exit(1)
|
|
66
|
+
|
|
67
|
+
console.print(Panel.fit("[bold]veil init[/] — let's set up safe database access", border_style="cyan"))
|
|
68
|
+
db_url = typer.prompt(
|
|
69
|
+
"Database URL (env refs like ${DATABASE_URL} are kept as-is in the file)",
|
|
70
|
+
default="${DATABASE_URL}",
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
rules: list[tuple[str, str, str]] = []
|
|
74
|
+
pii_tables: list[str] = []
|
|
75
|
+
if typer.confirm("Introspect the database now to auto-suggest PII columns?", default=False):
|
|
76
|
+
try:
|
|
77
|
+
rules, pii_tables = asyncio.run(_introspect(_resolve_env(db_url)))
|
|
78
|
+
console.print(f"[green]Found {len(rules)} likely PII column(s) across {len(pii_tables)} table(s).[/]")
|
|
79
|
+
except Exception as exc:
|
|
80
|
+
err.print(f"[yellow]Introspection failed ({exc}). Writing a template you can edit by hand.[/]")
|
|
81
|
+
|
|
82
|
+
path.write_text(_render_config(db_url, rules, pii_tables))
|
|
83
|
+
console.print(f"[bold green]Wrote {path}[/]")
|
|
84
|
+
console.print("Next: [cyan]veil doctor[/] to verify, then [cyan]veil up[/] to run the proxy.")
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
@app.command()
|
|
88
|
+
def doctor(
|
|
89
|
+
config: str = typer.Option(None, "--config", "-c", help="Path to veil.yaml."),
|
|
90
|
+
) -> None:
|
|
91
|
+
"""Verify the guard, database connectivity, and read-only enforcement."""
|
|
92
|
+
cfg = _load(config)
|
|
93
|
+
|
|
94
|
+
table = Table(title="veil doctor", show_header=True, header_style="bold")
|
|
95
|
+
table.add_column("check")
|
|
96
|
+
table.add_column("result")
|
|
97
|
+
|
|
98
|
+
guard_ok = (
|
|
99
|
+
check_query("SELECT 1").allowed
|
|
100
|
+
and not check_query("DROP TABLE users").allowed
|
|
101
|
+
and not check_query("UPDATE t SET x = 1").allowed
|
|
102
|
+
and not check_query("WITH w AS (DELETE FROM t RETURNING *) SELECT * FROM w").allowed
|
|
103
|
+
and not check_query("SELECT 1; DROP TABLE t").allowed
|
|
104
|
+
)
|
|
105
|
+
table.add_row("read-only guard (SELECT allowed, writes blocked)", _mark(guard_ok))
|
|
106
|
+
|
|
107
|
+
conn_ok = False
|
|
108
|
+
readonly_ok = False
|
|
109
|
+
detail = ""
|
|
110
|
+
try:
|
|
111
|
+
conn_ok, readonly_ok = asyncio.run(_probe(cfg))
|
|
112
|
+
except Exception as exc:
|
|
113
|
+
detail = str(exc)
|
|
114
|
+
|
|
115
|
+
table.add_row("database connection", _mark(conn_ok) + (f" [dim]{detail}[/]" if detail else ""))
|
|
116
|
+
table.add_row("server-side READ ONLY transaction rejects writes", _mark(readonly_ok))
|
|
117
|
+
|
|
118
|
+
console.print(table)
|
|
119
|
+
if not (guard_ok and conn_ok and readonly_ok):
|
|
120
|
+
raise typer.Exit(1)
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
@app.command(name="test-query")
|
|
124
|
+
def test_query(
|
|
125
|
+
sql: str = typer.Argument(..., help="A read-only SQL query to run through veil."),
|
|
126
|
+
config: str = typer.Option(None, "--config", "-c"),
|
|
127
|
+
) -> None:
|
|
128
|
+
"""Run one query through the full guard + redact pipeline and print the result."""
|
|
129
|
+
cfg = _load(config)
|
|
130
|
+
outcome = asyncio.run(_run_one(cfg, sql))
|
|
131
|
+
|
|
132
|
+
if outcome.blocked_reason:
|
|
133
|
+
console.print(Panel(f"[bold red]BLOCKED[/]\n{outcome.blocked_reason}", border_style="red"))
|
|
134
|
+
raise typer.Exit(1)
|
|
135
|
+
if outcome.error:
|
|
136
|
+
console.print(Panel(f"[bold red]ERROR[/]\n{outcome.error}", border_style="red"))
|
|
137
|
+
raise typer.Exit(1)
|
|
138
|
+
|
|
139
|
+
from .serialize import to_jsonable
|
|
140
|
+
|
|
141
|
+
table = Table(show_header=True, header_style="bold")
|
|
142
|
+
for col in outcome.columns:
|
|
143
|
+
table.add_column(str(col))
|
|
144
|
+
from rich.markup import escape
|
|
145
|
+
|
|
146
|
+
for row in to_jsonable(outcome.rows):
|
|
147
|
+
table.add_row(*[("∅" if c is None else escape(str(c))) for c in row])
|
|
148
|
+
console.print(table)
|
|
149
|
+
console.print(
|
|
150
|
+
f"[dim]{outcome.row_count} row(s) · {outcome.redactions} redaction(s)"
|
|
151
|
+
f"{' · truncated' if outcome.truncated else ''} · {outcome.duration_ms:.0f} ms[/]"
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
@app.command()
|
|
156
|
+
def up(
|
|
157
|
+
config: str = typer.Option(None, "--config", "-c", help="Path to veil.yaml."),
|
|
158
|
+
) -> None:
|
|
159
|
+
"""Run the MCP proxy on stdio (this is what Claude Code connects to)."""
|
|
160
|
+
cfg = _load(config)
|
|
161
|
+
err.print(
|
|
162
|
+
f"[bold green]veil[/] up · stdio · guard=read-only · "
|
|
163
|
+
f"redact={'on' if cfg.redact.columns or cfg.redact.ner.enabled else 'patterns-only'} · "
|
|
164
|
+
f"audit→{cfg.audit_log}"
|
|
165
|
+
)
|
|
166
|
+
from .mcp_server import build_server
|
|
167
|
+
|
|
168
|
+
build_server(cfg).run()
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
@app.command()
|
|
172
|
+
def monitor(
|
|
173
|
+
config: str = typer.Option(None, "--config", "-c"),
|
|
174
|
+
) -> None:
|
|
175
|
+
"""Live view of the audit log (allowed / blocked / redactions). Ctrl-C to quit."""
|
|
176
|
+
cfg = _load(config)
|
|
177
|
+
from .tui import run_monitor
|
|
178
|
+
|
|
179
|
+
run_monitor(cfg.audit_log)
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
async def _introspect(dsn: str) -> tuple[list[tuple[str, str, str]], list[str]]:
|
|
183
|
+
ex = Executor(dsn)
|
|
184
|
+
rs = await ex._fetch_meta(
|
|
185
|
+
"SELECT table_schema, table_name, column_name "
|
|
186
|
+
"FROM information_schema.columns "
|
|
187
|
+
"WHERE table_schema NOT IN ('pg_catalog', 'information_schema') "
|
|
188
|
+
"ORDER BY table_schema, table_name, ordinal_position"
|
|
189
|
+
)
|
|
190
|
+
await ex.close()
|
|
191
|
+
|
|
192
|
+
rules: list[tuple[str, str, str]] = []
|
|
193
|
+
pii_tables: set[str] = set()
|
|
194
|
+
seen: set[str] = set()
|
|
195
|
+
for schema, tname, col in rs.rows:
|
|
196
|
+
low = col.lower()
|
|
197
|
+
if any(h in low for h in _PII_HINTS):
|
|
198
|
+
if col not in seen:
|
|
199
|
+
strategy = "hash" if ("email" in low or "id" in low) else "mask"
|
|
200
|
+
rules.append((col, strategy, ""))
|
|
201
|
+
seen.add(col)
|
|
202
|
+
pii_tables.add(tname)
|
|
203
|
+
return rules, sorted(pii_tables)
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
async def _probe(cfg: Config) -> tuple[bool, bool]:
|
|
207
|
+
ex = Executor(cfg.database.url)
|
|
208
|
+
try:
|
|
209
|
+
await ex.run("SELECT 1")
|
|
210
|
+
conn_ok = True
|
|
211
|
+
readonly_ok = False
|
|
212
|
+
try:
|
|
213
|
+
await ex.run("CREATE TEMP TABLE _veil_probe (x int)")
|
|
214
|
+
except Exception as exc:
|
|
215
|
+
readonly_ok = "read-only" in str(exc).lower() or "cannot execute" in str(exc).lower()
|
|
216
|
+
return conn_ok, readonly_ok
|
|
217
|
+
finally:
|
|
218
|
+
await ex.close()
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
async def _run_one(cfg: Config, sql: str):
|
|
222
|
+
from .pipeline import Pipeline
|
|
223
|
+
|
|
224
|
+
pipeline = Pipeline(cfg)
|
|
225
|
+
try:
|
|
226
|
+
return await pipeline.query(sql)
|
|
227
|
+
finally:
|
|
228
|
+
await pipeline.close()
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
def _mark(ok: bool) -> str:
|
|
232
|
+
return "[green]PASS[/]" if ok else "[red]FAIL[/]"
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
def _render_config(db_url: str, rules: list[tuple[str, str, str]], pii_tables: list[str]) -> str:
|
|
236
|
+
lines = [
|
|
237
|
+
"# veil configuration — https://github.com/mathu97/dbveil",
|
|
238
|
+
"database:",
|
|
239
|
+
f" url: {db_url}",
|
|
240
|
+
"",
|
|
241
|
+
"guard:",
|
|
242
|
+
" allow_select_star: false # block SELECT * on PII tables; force explicit columns",
|
|
243
|
+
" max_rows: 1000",
|
|
244
|
+
" statement_timeout_ms: 15000",
|
|
245
|
+
" pii_tables:",
|
|
246
|
+
]
|
|
247
|
+
if pii_tables:
|
|
248
|
+
lines += [f" - {t}" for t in pii_tables]
|
|
249
|
+
else:
|
|
250
|
+
lines.append(" [] # tables where SELECT * is always rejected")
|
|
251
|
+
|
|
252
|
+
lines += [
|
|
253
|
+
"",
|
|
254
|
+
"redact:",
|
|
255
|
+
" # Deterministic, always-on regex redaction for structured PII.",
|
|
256
|
+
" builtin_patterns:",
|
|
257
|
+
" email: true",
|
|
258
|
+
" phone: true",
|
|
259
|
+
" credit_card: true",
|
|
260
|
+
" ssn: true",
|
|
261
|
+
" ip: false",
|
|
262
|
+
" hash_salt: \"\" # set a stable secret to keep hashed values join-able across runs",
|
|
263
|
+
" # Column-level rules. strategy: mask | null | hash | partial",
|
|
264
|
+
" columns:",
|
|
265
|
+
]
|
|
266
|
+
if rules:
|
|
267
|
+
for col, strategy, _ in rules:
|
|
268
|
+
lines.append(f" - {{ column: {col}, strategy: {strategy} }}")
|
|
269
|
+
else:
|
|
270
|
+
lines.append(" [] # e.g. - { column: email, strategy: hash }")
|
|
271
|
+
|
|
272
|
+
lines += [
|
|
273
|
+
" # Optional probabilistic NER for free-text PII (names/addresses).",
|
|
274
|
+
" # Backstop only — never the sole control. Needs: pip install 'dbveil[ner]'",
|
|
275
|
+
" ner:",
|
|
276
|
+
" enabled: false",
|
|
277
|
+
" engine: presidio # presidio | llm",
|
|
278
|
+
" entities: [PERSON, LOCATION, EMAIL_ADDRESS, PHONE_NUMBER]",
|
|
279
|
+
" score_threshold: 0.5",
|
|
280
|
+
"",
|
|
281
|
+
"audit_log: veil-audit.jsonl",
|
|
282
|
+
"",
|
|
283
|
+
]
|
|
284
|
+
return "\n".join(lines)
|
veil/config.py
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import re
|
|
5
|
+
from enum import Enum
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
import yaml
|
|
9
|
+
from pydantic import BaseModel, Field
|
|
10
|
+
|
|
11
|
+
_ENV_PATTERN = re.compile(r"\$\{([A-Z0-9_]+)\}")
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class RedactStrategy(str, Enum):
|
|
15
|
+
MASK = "mask"
|
|
16
|
+
NULL = "null"
|
|
17
|
+
HASH = "hash"
|
|
18
|
+
PARTIAL = "partial"
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class ColumnRule(BaseModel):
|
|
22
|
+
column: str
|
|
23
|
+
strategy: RedactStrategy = RedactStrategy.MASK
|
|
24
|
+
keep: int = 4
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class BuiltinPatterns(BaseModel):
|
|
28
|
+
email: bool = True
|
|
29
|
+
phone: bool = True
|
|
30
|
+
credit_card: bool = True
|
|
31
|
+
ssn: bool = True
|
|
32
|
+
ip: bool = False
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class NerConfig(BaseModel):
|
|
36
|
+
enabled: bool = False
|
|
37
|
+
engine: str = "presidio"
|
|
38
|
+
entities: list[str] = Field(
|
|
39
|
+
default_factory=lambda: ["PERSON", "LOCATION", "EMAIL_ADDRESS", "PHONE_NUMBER"]
|
|
40
|
+
)
|
|
41
|
+
score_threshold: float = 0.5
|
|
42
|
+
ollama_url: str = "http://localhost:11434"
|
|
43
|
+
ollama_model: str = "llama3.2"
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class RedactConfig(BaseModel):
|
|
47
|
+
columns: list[ColumnRule] = Field(default_factory=list)
|
|
48
|
+
builtin_patterns: BuiltinPatterns = Field(default_factory=BuiltinPatterns)
|
|
49
|
+
ner: NerConfig = Field(default_factory=NerConfig)
|
|
50
|
+
hash_salt: str = ""
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class GuardConfig(BaseModel):
|
|
54
|
+
allow_select_star: bool = False
|
|
55
|
+
pii_tables: list[str] = Field(default_factory=list)
|
|
56
|
+
max_rows: int = 1000
|
|
57
|
+
statement_timeout_ms: int = 15000
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class DatabaseConfig(BaseModel):
|
|
61
|
+
url: str
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
class Config(BaseModel):
|
|
65
|
+
database: DatabaseConfig
|
|
66
|
+
guard: GuardConfig = Field(default_factory=GuardConfig)
|
|
67
|
+
redact: RedactConfig = Field(default_factory=RedactConfig)
|
|
68
|
+
audit_log: str = "veil-audit.jsonl"
|
|
69
|
+
|
|
70
|
+
@classmethod
|
|
71
|
+
def default_path(cls) -> Path:
|
|
72
|
+
return Path(os.environ.get("VEIL_CONFIG", "veil.yaml"))
|
|
73
|
+
|
|
74
|
+
@classmethod
|
|
75
|
+
def load(cls, path: str | Path | None = None) -> "Config":
|
|
76
|
+
path = Path(path) if path else cls.default_path()
|
|
77
|
+
if not path.exists():
|
|
78
|
+
raise FileNotFoundError(
|
|
79
|
+
f"config not found at {path}. Run `veil init` to create one."
|
|
80
|
+
)
|
|
81
|
+
data = yaml.safe_load(path.read_text()) or {}
|
|
82
|
+
_expand_env(data)
|
|
83
|
+
return cls(**data)
|
|
84
|
+
|
|
85
|
+
def dump_yaml(self) -> str:
|
|
86
|
+
return yaml.safe_dump(self.model_dump(mode="json"), sort_keys=False)
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def _expand_env(node):
|
|
90
|
+
if isinstance(node, dict):
|
|
91
|
+
for k, v in node.items():
|
|
92
|
+
node[k] = _expand_env(v)
|
|
93
|
+
return node
|
|
94
|
+
if isinstance(node, list):
|
|
95
|
+
return [_expand_env(v) for v in node]
|
|
96
|
+
if isinstance(node, str):
|
|
97
|
+
def repl(m):
|
|
98
|
+
return os.environ.get(m.group(1), m.group(0))
|
|
99
|
+
|
|
100
|
+
return _ENV_PATTERN.sub(repl, node)
|
|
101
|
+
return node
|