dbveil 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,61 @@
1
+ name: Publish to PyPI
2
+
3
+ on:
4
+ release:
5
+ types: [published]
6
+
7
+ permissions:
8
+ contents: read
9
+
10
+ jobs:
11
+ build:
12
+ name: Build & test
13
+ runs-on: ubuntu-latest
14
+ steps:
15
+ - uses: actions/checkout@v4
16
+
17
+ - uses: astral-sh/setup-uv@v5
18
+
19
+ - name: Create venv & install (with dev extras)
20
+ run: |
21
+ uv venv --python 3.12
22
+ uv pip install -e '.[dev]'
23
+
24
+ - name: Test
25
+ run: .venv/bin/pytest -q
26
+
27
+ - name: Verify version matches release tag
28
+ run: |
29
+ TAG="${GITHUB_REF_NAME#v}"
30
+ PKG=$(grep -m1 '^version = ' pyproject.toml | sed -E 's/^version = "(.*)"/\1/')
31
+ echo "release tag: $TAG | pyproject version: $PKG"
32
+ if [ "$TAG" != "$PKG" ]; then
33
+ echo "::error::Release tag ($TAG) != pyproject version ($PKG). Bump version in pyproject.toml before releasing."
34
+ exit 1
35
+ fi
36
+
37
+ - name: Build sdist + wheel
38
+ run: uv build
39
+
40
+ - uses: actions/upload-artifact@v4
41
+ with:
42
+ name: dist
43
+ path: dist/
44
+
45
+ publish:
46
+ name: Publish to PyPI
47
+ needs: build
48
+ runs-on: ubuntu-latest
49
+ environment:
50
+ name: pypi
51
+ url: https://pypi.org/p/dbveil
52
+ permissions:
53
+ id-token: write # required for trusted publishing (OIDC)
54
+ steps:
55
+ - uses: actions/download-artifact@v4
56
+ with:
57
+ name: dist
58
+ path: dist/
59
+
60
+ - name: Publish to PyPI
61
+ uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,14 @@
1
+ __pycache__/
2
+ *.py[cod]
3
+ .venv/
4
+ venv/
5
+ *.egg-info/
6
+ dist/
7
+ build/
8
+ .pytest_cache/
9
+ .ruff_cache/
10
+
11
+ # local config + secrets + audit output
12
+ pgveil.yaml
13
+ *-audit.jsonl
14
+ .env
dbveil-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Mathusan Selvarajah
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
dbveil-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,194 @@
1
+ Metadata-Version: 2.4
2
+ Name: dbveil
3
+ Version: 0.1.0
4
+ Summary: A local read-only, PII-redacting proxy that lets AI agents query your database safely.
5
+ Project-URL: Homepage, https://github.com/mathu97/dbveil
6
+ Project-URL: Repository, https://github.com/mathu97/dbveil
7
+ Author: Mathusan Selvarajah
8
+ License-Expression: MIT
9
+ License-File: LICENSE
10
+ Keywords: ai,claude,database,mcp,pii,postgres,proxy,read-only,redaction
11
+ Requires-Python: >=3.10
12
+ Requires-Dist: asyncpg>=0.29
13
+ Requires-Dist: mcp>=1.2
14
+ Requires-Dist: pglast>=6.0
15
+ Requires-Dist: pydantic>=2.6
16
+ Requires-Dist: pyyaml>=6.0
17
+ Requires-Dist: rich>=13.7
18
+ Requires-Dist: typer>=0.12
19
+ Provides-Extra: dev
20
+ Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
21
+ Requires-Dist: pytest>=8.0; extra == 'dev'
22
+ Provides-Extra: llm
23
+ Requires-Dist: httpx>=0.27; extra == 'llm'
24
+ Provides-Extra: ner
25
+ Requires-Dist: presidio-analyzer>=2.2; extra == 'ner'
26
+ Requires-Dist: presidio-anonymizer>=2.2; extra == 'ner'
27
+ Description-Content-Type: text/markdown
28
+
29
+ # veil
30
+
31
+ **A local read-only, PII-redacting proxy that lets AI agents query your database safely.**
32
+
33
+ Point Claude Code (or any MCP client) at `veil` instead of your database. Every query is
34
+ forced through three deterministic guarantees before a single row reaches the model:
35
+
36
+ 1. **Read-only guard** — the query is parsed with Postgres's real grammar (`libpg_query`).
37
+ Only `SELECT` / `SHOW` / `EXPLAIN` survive. Writes, DDL, multi-statements, data-modifying
38
+ CTEs, `SELECT INTO`, and row locks are rejected *before execution* — not by asking the model
39
+ nicely, by refusing to run them.
40
+ 2. **PII redaction** — results are scrubbed before they leave your machine: deterministic
41
+ column rules + always-on regex for structured PII (emails, phones, cards, SSNs), with an
42
+ optional NER/LLM backstop for free-text.
43
+ 3. **Audit** — every query and verdict is appended to a log you can tail live in a TUI.
44
+
45
+ A guarded chokepoint in front of the DB, shrunk to a single open-source command with zero
46
+ infrastructure to stand up.
47
+
48
+ ```
49
+ Claude Code ──MCP──▶ veil ──READ ONLY txn──▶ your database
50
+
51
+ ├─ guard: parse → allow SELECT only
52
+ ├─ redact: column rules + regex + (optional) NER/LLM
53
+ └─ audit: veil-audit.jsonl
54
+ ```
55
+
56
+ ## Why
57
+
58
+ You want an agent to act as a data analyst over real tables — "compare what we drafted vs what
59
+ was actually sent" — without (a) risking a destructive query or (b) shipping customer PII to a
60
+ model provider. Handing an agent raw DB credentials and hoping it only writes `SELECT` is not a
61
+ control. `veil` makes the unsafe paths impossible at the layer the agent can't talk its way past.
62
+
63
+ ## Install
64
+
65
+ ```bash
66
+ pip install dbveil # or: uv pip install dbveil
67
+ # optional extras:
68
+ pip install 'dbveil[ner]' # Presidio NER backstop for names/addresses
69
+ pip install 'dbveil[llm]' # local-LLM (Ollama) redaction
70
+ ```
71
+
72
+ ## Quickstart
73
+
74
+ ```bash
75
+ veil init # interactive: DB URL + auto-detect PII columns → writes veil.yaml
76
+ veil doctor # verify guard, connectivity, and that READ ONLY actually blocks writes
77
+ veil test-query "SELECT email, created_at FROM users LIMIT 5" # try it without an agent
78
+ veil up # run the MCP proxy on stdio (what Claude Code connects to)
79
+ ```
80
+
81
+ Try a write to see the guard refuse it:
82
+
83
+ ```bash
84
+ veil test-query "DELETE FROM users"
85
+ # BLOCKED — write or DDL operation detected: DELETE
86
+ ```
87
+
88
+ ### Connect Claude Code
89
+
90
+ ```bash
91
+ claude mcp add veil -- veil up
92
+ ```
93
+
94
+ or commit a `.mcp.json` so your whole team gets it:
95
+
96
+ ```json
97
+ {
98
+ "mcpServers": {
99
+ "veil": { "command": "veil", "args": ["up"], "env": { "VEIL_CONFIG": "veil.yaml" } }
100
+ }
101
+ }
102
+ ```
103
+
104
+ Now the agent has three tools — `query`, `list_tables`, `describe_table` — and physically
105
+ cannot write or see raw PII.
106
+
107
+ ### Watch it live
108
+
109
+ ```bash
110
+ veil monitor # TUI tailing veil-audit.jsonl: allowed / blocked / redaction counts
111
+ ```
112
+
113
+ ## Configuration
114
+
115
+ `veil init` writes a commented `veil.yaml`. Full reference in
116
+ [`examples/veil.example.yaml`](examples/veil.example.yaml). The essentials:
117
+
118
+ ```yaml
119
+ database:
120
+ url: ${DATABASE_URL} # env refs kept out of the file
121
+
122
+ guard:
123
+ allow_select_star: false # block SELECT * on PII tables; force explicit columns
124
+ max_rows: 1000
125
+ statement_timeout_ms: 15000
126
+ pii_tables: [contacts, users]
127
+
128
+ redact:
129
+ builtin_patterns: { email: true, phone: true, credit_card: true, ssn: true, ip: false }
130
+ columns:
131
+ - { column: email, strategy: hash } # sha256, still join-able
132
+ - { column: full_name, strategy: mask } # -> [redacted]
133
+ - { column: ssn, strategy: partial, keep: 4 }
134
+ ner: { enabled: false, engine: presidio } # optional backstop
135
+ ```
136
+
137
+ ## How redaction is layered (and its honest limits)
138
+
139
+ `veil` defends from the **deterministic** side first, because that's the only kind you can trust
140
+ not to leak:
141
+
142
+ | Layer | What it catches | Deterministic? |
143
+ |---|---|---|
144
+ | **Column rules** | Known PII columns (`email`, `ssn`, …) by name | ✅ yes |
145
+ | **Built-in regex** | Emails, phones, Luhn-valid cards, SSNs, IPs — even aliased or in free-text | ✅ yes |
146
+ | **NER (Presidio)** | Names / addresses in free-text the above miss | ⚠️ probabilistic |
147
+ | **LLM (Ollama)** | Same, via a local model | ⚠️ probabilistic, experimental |
148
+
149
+ **Use the probabilistic layers only as a backstop.** ML/NER *will* eventually miss a name or an
150
+ oddly-formatted address — that's a leak. For columns you already know are sensitive, the column
151
+ rules are the real control. The LLM redactor fails *closed*: if the model errors, the cell is
152
+ masked, never passed through.
153
+
154
+ ## Security model
155
+
156
+ - **Two independent read-only layers.** The parser rejects non-reads, *and* every query runs
157
+ inside a `SET TRANSACTION READ ONLY` transaction — so even a parser gap can't write.
158
+ - **Give veil a least-privilege credential.** Best practice is a `GRANT SELECT`-only database
159
+ role (ideally on a read replica). Then "read-only" is enforced by the database itself, and the
160
+ credential `veil` holds is low-blast-radius: a leak exposes already-masked reads and can write
161
+ nothing. `veil doctor` confirms the READ ONLY transaction rejects writes against your DB.
162
+ - **PII never leaves your machine unmasked.** Redaction happens in-process, before results are
163
+ serialized to the MCP client.
164
+
165
+ ## Secure connectivity
166
+
167
+ `veil` connects to whatever DSN you give it, so the network path is yours to choose:
168
+
169
+ - **Tailscale** — put your DB behind a tailnet and point `database.url` at the tailnet host. No
170
+ public DB port.
171
+ - **Short-lived credentials** — `${DATABASE_URL}` is expanded at load, so you can inject an
172
+ ephemeral token (RDS IAM auth, Cloud SQL IAM, a Vault dynamic user) instead of a static
173
+ password.
174
+ - **Railway / managed PaaS** — use the provided TLS endpoint with a dedicated read-only role.
175
+
176
+ ## Roadmap
177
+
178
+ - **Postgres wire-protocol frontend** — so `psql`, BI tools, and any client (not just MCP) get
179
+ the same guard + redaction. The pipeline is already frontend-agnostic.
180
+ - **More engines** — MySQL, SQLite (the guard's parser is the only Postgres-specific piece; it's
181
+ a pluggable backend).
182
+ - **Schema-aware lineage** — resolve aliased PII columns back to their source table.
183
+
184
+ ## Development
185
+
186
+ ```bash
187
+ uv venv && source .venv/bin/activate
188
+ uv pip install -e '.[dev]'
189
+ pytest
190
+ ```
191
+
192
+ ## License
193
+
194
+ MIT
dbveil-0.1.0/README.md ADDED
@@ -0,0 +1,166 @@
1
+ # veil
2
+
3
+ **A local read-only, PII-redacting proxy that lets AI agents query your database safely.**
4
+
5
+ Point Claude Code (or any MCP client) at `veil` instead of your database. Every query is
6
+ forced through three deterministic guarantees before a single row reaches the model:
7
+
8
+ 1. **Read-only guard** — the query is parsed with Postgres's real grammar (`libpg_query`).
9
+ Only `SELECT` / `SHOW` / `EXPLAIN` survive. Writes, DDL, multi-statements, data-modifying
10
+ CTEs, `SELECT INTO`, and row locks are rejected *before execution* — not by asking the model
11
+ nicely, by refusing to run them.
12
+ 2. **PII redaction** — results are scrubbed before they leave your machine: deterministic
13
+ column rules + always-on regex for structured PII (emails, phones, cards, SSNs), with an
14
+ optional NER/LLM backstop for free-text.
15
+ 3. **Audit** — every query and verdict is appended to a log you can tail live in a TUI.
16
+
17
+ A guarded chokepoint in front of the DB, shrunk to a single open-source command with zero
18
+ infrastructure to stand up.
19
+
20
+ ```
21
+ Claude Code ──MCP──▶ veil ──READ ONLY txn──▶ your database
22
+
23
+ ├─ guard: parse → allow SELECT only
24
+ ├─ redact: column rules + regex + (optional) NER/LLM
25
+ └─ audit: veil-audit.jsonl
26
+ ```
27
+
28
+ ## Why
29
+
30
+ You want an agent to act as a data analyst over real tables — "compare what we drafted vs what
31
+ was actually sent" — without (a) risking a destructive query or (b) shipping customer PII to a
32
+ model provider. Handing an agent raw DB credentials and hoping it only writes `SELECT` is not a
33
+ control. `veil` makes the unsafe paths impossible at the layer the agent can't talk its way past.
34
+
35
+ ## Install
36
+
37
+ ```bash
38
+ pip install dbveil # or: uv pip install dbveil
39
+ # optional extras:
40
+ pip install 'dbveil[ner]' # Presidio NER backstop for names/addresses
41
+ pip install 'dbveil[llm]' # local-LLM (Ollama) redaction
42
+ ```
43
+
44
+ ## Quickstart
45
+
46
+ ```bash
47
+ veil init # interactive: DB URL + auto-detect PII columns → writes veil.yaml
48
+ veil doctor # verify guard, connectivity, and that READ ONLY actually blocks writes
49
+ veil test-query "SELECT email, created_at FROM users LIMIT 5" # try it without an agent
50
+ veil up # run the MCP proxy on stdio (what Claude Code connects to)
51
+ ```
52
+
53
+ Try a write to see the guard refuse it:
54
+
55
+ ```bash
56
+ veil test-query "DELETE FROM users"
57
+ # BLOCKED — write or DDL operation detected: DELETE
58
+ ```
59
+
60
+ ### Connect Claude Code
61
+
62
+ ```bash
63
+ claude mcp add veil -- veil up
64
+ ```
65
+
66
+ or commit a `.mcp.json` so your whole team gets it:
67
+
68
+ ```json
69
+ {
70
+ "mcpServers": {
71
+ "veil": { "command": "veil", "args": ["up"], "env": { "VEIL_CONFIG": "veil.yaml" } }
72
+ }
73
+ }
74
+ ```
75
+
76
+ Now the agent has three tools — `query`, `list_tables`, `describe_table` — and physically
77
+ cannot write or see raw PII.
78
+
79
+ ### Watch it live
80
+
81
+ ```bash
82
+ veil monitor # TUI tailing veil-audit.jsonl: allowed / blocked / redaction counts
83
+ ```
84
+
85
+ ## Configuration
86
+
87
+ `veil init` writes a commented `veil.yaml`. Full reference in
88
+ [`examples/veil.example.yaml`](examples/veil.example.yaml). The essentials:
89
+
90
+ ```yaml
91
+ database:
92
+ url: ${DATABASE_URL} # env refs kept out of the file
93
+
94
+ guard:
95
+ allow_select_star: false # block SELECT * on PII tables; force explicit columns
96
+ max_rows: 1000
97
+ statement_timeout_ms: 15000
98
+ pii_tables: [contacts, users]
99
+
100
+ redact:
101
+ builtin_patterns: { email: true, phone: true, credit_card: true, ssn: true, ip: false }
102
+ columns:
103
+ - { column: email, strategy: hash } # sha256, still join-able
104
+ - { column: full_name, strategy: mask } # -> [redacted]
105
+ - { column: ssn, strategy: partial, keep: 4 }
106
+ ner: { enabled: false, engine: presidio } # optional backstop
107
+ ```
108
+
109
+ ## How redaction is layered (and its honest limits)
110
+
111
+ `veil` defends from the **deterministic** side first, because that's the only kind you can trust
112
+ not to leak:
113
+
114
+ | Layer | What it catches | Deterministic? |
115
+ |---|---|---|
116
+ | **Column rules** | Known PII columns (`email`, `ssn`, …) by name | ✅ yes |
117
+ | **Built-in regex** | Emails, phones, Luhn-valid cards, SSNs, IPs — even aliased or in free-text | ✅ yes |
118
+ | **NER (Presidio)** | Names / addresses in free-text the above miss | ⚠️ probabilistic |
119
+ | **LLM (Ollama)** | Same, via a local model | ⚠️ probabilistic, experimental |
120
+
121
+ **Use the probabilistic layers only as a backstop.** ML/NER *will* eventually miss a name or an
122
+ oddly-formatted address — that's a leak. For columns you already know are sensitive, the column
123
+ rules are the real control. The LLM redactor fails *closed*: if the model errors, the cell is
124
+ masked, never passed through.
125
+
126
+ ## Security model
127
+
128
+ - **Two independent read-only layers.** The parser rejects non-reads, *and* every query runs
129
+ inside a `SET TRANSACTION READ ONLY` transaction — so even a parser gap can't write.
130
+ - **Give veil a least-privilege credential.** Best practice is a `GRANT SELECT`-only database
131
+ role (ideally on a read replica). Then "read-only" is enforced by the database itself, and the
132
+ credential `veil` holds is low-blast-radius: a leak exposes already-masked reads and can write
133
+ nothing. `veil doctor` confirms the READ ONLY transaction rejects writes against your DB.
134
+ - **PII never leaves your machine unmasked.** Redaction happens in-process, before results are
135
+ serialized to the MCP client.
136
+
137
+ ## Secure connectivity
138
+
139
+ `veil` connects to whatever DSN you give it, so the network path is yours to choose:
140
+
141
+ - **Tailscale** — put your DB behind a tailnet and point `database.url` at the tailnet host. No
142
+ public DB port.
143
+ - **Short-lived credentials** — `${DATABASE_URL}` is expanded at load, so you can inject an
144
+ ephemeral token (RDS IAM auth, Cloud SQL IAM, a Vault dynamic user) instead of a static
145
+ password.
146
+ - **Railway / managed PaaS** — use the provided TLS endpoint with a dedicated read-only role.
147
+
148
+ ## Roadmap
149
+
150
+ - **Postgres wire-protocol frontend** — so `psql`, BI tools, and any client (not just MCP) get
151
+ the same guard + redaction. The pipeline is already frontend-agnostic.
152
+ - **More engines** — MySQL, SQLite (the guard's parser is the only Postgres-specific piece; it's
153
+ a pluggable backend).
154
+ - **Schema-aware lineage** — resolve aliased PII columns back to their source table.
155
+
156
+ ## Development
157
+
158
+ ```bash
159
+ uv venv && source .venv/bin/activate
160
+ uv pip install -e '.[dev]'
161
+ pytest
162
+ ```
163
+
164
+ ## License
165
+
166
+ MIT
@@ -0,0 +1,44 @@
1
+ # veil configuration — https://github.com/mathu97/dbveil
2
+ # Copy to veil.yaml and edit. Secrets should stay in env vars (${VAR} is expanded at load).
3
+
4
+ database:
5
+ url: ${DATABASE_URL} # e.g. postgresql://ai_analyst:***@db.internal:5432/app
6
+
7
+ guard:
8
+ allow_select_star: false # block SELECT * on PII tables; force explicit column lists
9
+ max_rows: 1000 # cap rows returned to the agent
10
+ statement_timeout_ms: 15000 # kill slow queries
11
+ pii_tables: # SELECT * is always rejected on these
12
+ - contacts
13
+ - users
14
+
15
+ redact:
16
+ # Deterministic, always-on regex redaction for structured PII. Catches values
17
+ # even when aliased (SELECT email AS e) or buried in free-text columns.
18
+ builtin_patterns:
19
+ email: true
20
+ phone: true
21
+ credit_card: true # Luhn-checked to cut false positives
22
+ ssn: true
23
+ ip: false
24
+ hash_salt: "" # set a stable secret to keep hashed values join-able across runs
25
+
26
+ # Column-level rules applied by output column name.
27
+ # strategy: mask (-> [redacted]) | null | hash (sha256, join-able) | partial (keep last N)
28
+ columns:
29
+ - { column: email, strategy: hash }
30
+ - { column: phone, strategy: mask }
31
+ - { column: full_name, strategy: mask }
32
+ - { column: ssn, strategy: partial, keep: 4 }
33
+
34
+ # Optional probabilistic NER for free-text PII (names, addresses) the rules above miss.
35
+ # Backstop only, never the sole control. Needs: pip install 'dbveil[ner]' (or [llm])
36
+ ner:
37
+ enabled: false
38
+ engine: presidio # presidio | llm
39
+ entities: [PERSON, LOCATION, EMAIL_ADDRESS, PHONE_NUMBER]
40
+ score_threshold: 0.5
41
+ ollama_url: http://localhost:11434
42
+ ollama_model: llama3.2
43
+
44
+ audit_log: veil-audit.jsonl
@@ -0,0 +1,42 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "dbveil"
7
+ version = "0.1.0"
8
+ description = "A local read-only, PII-redacting proxy that lets AI agents query your database safely."
9
+ readme = "README.md"
10
+ license = "MIT"
11
+ requires-python = ">=3.10"
12
+ authors = [{ name = "Mathusan Selvarajah" }]
13
+ keywords = ["database", "postgres", "mcp", "pii", "redaction", "ai", "read-only", "proxy", "claude"]
14
+ dependencies = [
15
+ "asyncpg>=0.29",
16
+ "pglast>=6.0",
17
+ "pydantic>=2.6",
18
+ "pyyaml>=6.0",
19
+ "typer>=0.12",
20
+ "rich>=13.7",
21
+ "mcp>=1.2",
22
+ ]
23
+
24
+ [project.optional-dependencies]
25
+ ner = ["presidio-analyzer>=2.2", "presidio-anonymizer>=2.2"]
26
+ llm = ["httpx>=0.27"]
27
+ dev = ["pytest>=8.0", "pytest-asyncio>=0.23"]
28
+
29
+ [project.scripts]
30
+ veil = "veil.cli:app"
31
+ dbveil = "veil.cli:app"
32
+
33
+ [project.urls]
34
+ Homepage = "https://github.com/mathu97/dbveil"
35
+ Repository = "https://github.com/mathu97/dbveil"
36
+
37
+ [tool.hatch.build.targets.wheel]
38
+ packages = ["veil"]
39
+
40
+ [tool.pytest.ini_options]
41
+ asyncio_mode = "auto"
42
+ testpaths = ["tests"]
@@ -0,0 +1,73 @@
1
+ from veil.guard import check_query
2
+
3
+
4
+ def test_plain_select_allowed():
5
+ assert check_query("SELECT id, name FROM users").allowed
6
+
7
+
8
+ def test_join_and_aggregate_allowed():
9
+ sql = "SELECT u.id, count(*) FROM users u JOIN orders o ON o.user_id = u.id GROUP BY u.id"
10
+ assert check_query(sql).allowed
11
+
12
+
13
+ def test_cte_select_allowed():
14
+ sql = "WITH recent AS (SELECT * FROM logs WHERE ts > now() - interval '1 day') SELECT count(*) FROM recent"
15
+ assert check_query(sql).allowed
16
+
17
+
18
+ def test_show_allowed():
19
+ assert check_query("SHOW server_version").allowed
20
+
21
+
22
+ def test_explain_allowed_but_analyze_blocked():
23
+ assert check_query("EXPLAIN SELECT 1").allowed
24
+ assert not check_query("EXPLAIN ANALYZE SELECT 1").allowed
25
+
26
+
27
+ def test_writes_blocked():
28
+ for sql in [
29
+ "INSERT INTO t (x) VALUES (1)",
30
+ "UPDATE t SET x = 1",
31
+ "DELETE FROM t",
32
+ "DROP TABLE t",
33
+ "TRUNCATE t",
34
+ "ALTER TABLE t ADD COLUMN y int",
35
+ "CREATE TABLE t (x int)",
36
+ "GRANT SELECT ON t TO public",
37
+ ]:
38
+ assert not check_query(sql).allowed, sql
39
+
40
+
41
+ def test_data_modifying_cte_blocked():
42
+ sql = "WITH w AS (DELETE FROM t RETURNING *) SELECT * FROM w"
43
+ assert not check_query(sql).allowed
44
+
45
+
46
+ def test_multi_statement_blocked():
47
+ assert not check_query("SELECT 1; DROP TABLE t").allowed
48
+
49
+
50
+ def test_select_into_blocked():
51
+ assert not check_query("SELECT * INTO backup FROM users").allowed
52
+
53
+
54
+ def test_locking_clause_blocked():
55
+ assert not check_query("SELECT * FROM users FOR UPDATE").allowed
56
+
57
+
58
+ def test_select_star_on_pii_table_blocked():
59
+ r = check_query("SELECT * FROM contacts", pii_tables=["contacts"])
60
+ assert not r.allowed
61
+
62
+
63
+ def test_select_star_on_non_pii_table_allowed():
64
+ assert check_query("SELECT * FROM metrics", pii_tables=["contacts"]).allowed
65
+
66
+
67
+ def test_select_star_allowed_when_configured():
68
+ assert check_query("SELECT * FROM contacts", allow_select_star=True, pii_tables=["contacts"]).allowed
69
+
70
+
71
+ def test_garbage_blocked():
72
+ assert not check_query("this is not sql").allowed
73
+ assert not check_query("").allowed
@@ -0,0 +1,67 @@
1
+ from veil.config import BuiltinPatterns, ColumnRule, RedactConfig, RedactStrategy
2
+ from veil.redact import Redactor
3
+ from veil.redact.column_rules import apply_column_rules
4
+ from veil.redact.patterns import redact_text
5
+ from veil.result import ResultSet
6
+
7
+
8
+ def test_column_mask():
9
+ rows = [[1, "alice@example.com"]]
10
+ n = apply_column_rules(["id", "email"], rows, [ColumnRule(column="email")])
11
+ assert rows[0][1] == "[redacted]"
12
+ assert n == 1
13
+
14
+
15
+ def test_column_null():
16
+ rows = [["secret"]]
17
+ apply_column_rules(["x"], rows, [ColumnRule(column="x", strategy=RedactStrategy.NULL)])
18
+ assert rows[0][0] is None
19
+
20
+
21
+ def test_column_hash_is_deterministic():
22
+ rows1 = [["a@b.com"]]
23
+ rows2 = [["a@b.com"]]
24
+ rule = [ColumnRule(column="e", strategy=RedactStrategy.HASH)]
25
+ apply_column_rules(["e"], rows1, rule)
26
+ apply_column_rules(["e"], rows2, rule)
27
+ assert rows1[0][0] == rows2[0][0]
28
+ assert rows1[0][0].startswith("sha256:")
29
+
30
+
31
+ def test_column_partial():
32
+ rows = [["123456789"]]
33
+ apply_column_rules(["ssn"], rows, [ColumnRule(column="ssn", strategy=RedactStrategy.PARTIAL, keep=4)])
34
+ assert rows[0][0].endswith("6789")
35
+ assert rows[0][0].startswith("*")
36
+
37
+
38
+ def test_pattern_email_phone_ssn():
39
+ text, n = redact_text("reach alice@example.com or 415-555-2671, ssn 123-45-6789", BuiltinPatterns())
40
+ assert "[email]" in text and "[phone]" in text and "[ssn]" in text
41
+ assert n == 3
42
+
43
+
44
+ def test_pattern_credit_card_luhn():
45
+ text, n = redact_text("card 4111111111111111 here", BuiltinPatterns())
46
+ assert "[card]" in text and n == 1
47
+ text2, n2 = redact_text("not a card 1234567890123456", BuiltinPatterns())
48
+ assert n2 == 0
49
+
50
+
51
+ def test_pattern_ip_opt_in():
52
+ off, n_off = redact_text("host 10.0.0.1", BuiltinPatterns(ip=False))
53
+ on, n_on = redact_text("host 10.0.0.1", BuiltinPatterns(ip=True))
54
+ assert n_off == 0 and "[ip]" in on
55
+
56
+
57
+ def test_redactor_end_to_end():
58
+ cfg = RedactConfig(columns=[ColumnRule(column="email", strategy=RedactStrategy.HASH)])
59
+ rs = ResultSet(
60
+ columns=["id", "email", "note"],
61
+ rows=[[1, "a@b.com", "call me at 415-555-2671"]],
62
+ row_count=1,
63
+ )
64
+ n = Redactor(cfg).apply(rs)
65
+ assert rs.rows[0][1].startswith("sha256:")
66
+ assert "[phone]" in rs.rows[0][2]
67
+ assert n >= 2