askql 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- askql-0.2.0/LICENSE +21 -0
- askql-0.2.0/PKG-INFO +251 -0
- askql-0.2.0/README.md +198 -0
- askql-0.2.0/pyproject.toml +82 -0
- askql-0.2.0/setup.cfg +4 -0
- askql-0.2.0/src/askql/__init__.py +103 -0
- askql-0.2.0/src/askql/api.py +215 -0
- askql-0.2.0/src/askql/audit.py +121 -0
- askql-0.2.0/src/askql/capability.py +120 -0
- askql-0.2.0/src/askql/cli.py +243 -0
- askql-0.2.0/src/askql/compressor.py +106 -0
- askql-0.2.0/src/askql/config.py +227 -0
- askql-0.2.0/src/askql/dialects/__init__.py +26 -0
- askql-0.2.0/src/askql/dialects/mssql.py +58 -0
- askql-0.2.0/src/askql/dialects/oracle.py +50 -0
- askql-0.2.0/src/askql/dialects/postgres.py +59 -0
- askql-0.2.0/src/askql/doctor.py +262 -0
- askql-0.2.0/src/askql/drivers/__init__.py +97 -0
- askql-0.2.0/src/askql/drivers/base.py +22 -0
- askql-0.2.0/src/askql/drivers/jdbc.py +223 -0
- askql-0.2.0/src/askql/drivers/mssql.py +37 -0
- askql-0.2.0/src/askql/drivers/oracle.py +49 -0
- askql-0.2.0/src/askql/drivers/postgres.py +37 -0
- askql-0.2.0/src/askql/embeddings.py +86 -0
- askql-0.2.0/src/askql/executor.py +337 -0
- askql-0.2.0/src/askql/generate.py +292 -0
- askql-0.2.0/src/askql/orchestrator.py +171 -0
- askql-0.2.0/src/askql/policy.py +92 -0
- askql-0.2.0/src/askql/py.typed +0 -0
- askql-0.2.0/src/askql/ratelimit.py +53 -0
- askql-0.2.0/src/askql/retriever.py +168 -0
- askql-0.2.0/src/askql/schema_graph.py +163 -0
- askql-0.2.0/src/askql/scraper.py +152 -0
- askql-0.2.0/src/askql/tokenize.py +116 -0
- askql-0.2.0/src/askql/validator.py +361 -0
- askql-0.2.0/src/askql.egg-info/PKG-INFO +251 -0
- askql-0.2.0/src/askql.egg-info/SOURCES.txt +55 -0
- askql-0.2.0/src/askql.egg-info/dependency_links.txt +1 -0
- askql-0.2.0/src/askql.egg-info/entry_points.txt +3 -0
- askql-0.2.0/src/askql.egg-info/requires.txt +40 -0
- askql-0.2.0/src/askql.egg-info/top_level.txt +1 -0
- askql-0.2.0/tests/test_api.py +79 -0
- askql-0.2.0/tests/test_audit_read.py +52 -0
- askql-0.2.0/tests/test_capability.py +70 -0
- askql-0.2.0/tests/test_compressor.py +54 -0
- askql-0.2.0/tests/test_doctor.py +28 -0
- askql-0.2.0/tests/test_domain_rules.py +34 -0
- askql-0.2.0/tests/test_embeddings.py +88 -0
- askql-0.2.0/tests/test_executor.py +64 -0
- askql-0.2.0/tests/test_generate.py +108 -0
- askql-0.2.0/tests/test_jdbc_transport.py +78 -0
- askql-0.2.0/tests/test_orchestrator.py +150 -0
- askql-0.2.0/tests/test_policy.py +152 -0
- askql-0.2.0/tests/test_public_api.py +67 -0
- askql-0.2.0/tests/test_relocatable.py +30 -0
- askql-0.2.0/tests/test_schema_graph.py +75 -0
- askql-0.2.0/tests/test_validator.py +166 -0
askql-0.2.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 askql authors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
askql-0.2.0/PKG-INFO
ADDED
|
@@ -0,0 +1,251 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: askql
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: Safe natural-language-to-SQL: validated, read-only queries across many SQL engines.
|
|
5
|
+
Author: askql maintainers
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/TestAutomationArchitect/askql
|
|
8
|
+
Project-URL: Source, https://github.com/TestAutomationArchitect/askql
|
|
9
|
+
Project-URL: Changelog, https://github.com/TestAutomationArchitect/askql/blob/main/CHANGELOG.md
|
|
10
|
+
Keywords: sql,nl2sql,text-to-sql,llm,guardrails,read-only,database
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Topic :: Database
|
|
18
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
19
|
+
Classifier: Typing :: Typed
|
|
20
|
+
Requires-Python: >=3.11
|
|
21
|
+
Description-Content-Type: text/markdown
|
|
22
|
+
License-File: LICENSE
|
|
23
|
+
Requires-Dist: sqlglot>=23.0
|
|
24
|
+
Requires-Dist: pyyaml>=6.0
|
|
25
|
+
Provides-Extra: postgres
|
|
26
|
+
Requires-Dist: psycopg[binary]>=3.1; extra == "postgres"
|
|
27
|
+
Provides-Extra: oracle
|
|
28
|
+
Requires-Dist: python-oracledb>=2.0; extra == "oracle"
|
|
29
|
+
Provides-Extra: mssql
|
|
30
|
+
Requires-Dist: pymssql>=2.2; extra == "mssql"
|
|
31
|
+
Provides-Extra: jdbc
|
|
32
|
+
Requires-Dist: JayDeBeApi>=1.2; extra == "jdbc"
|
|
33
|
+
Requires-Dist: JPype1>=1.5; extra == "jdbc"
|
|
34
|
+
Provides-Extra: llm
|
|
35
|
+
Requires-Dist: anthropic>=0.40; extra == "llm"
|
|
36
|
+
Provides-Extra: llm-aws
|
|
37
|
+
Requires-Dist: anthropic[bedrock]>=0.40; extra == "llm-aws"
|
|
38
|
+
Provides-Extra: llm-vertex
|
|
39
|
+
Requires-Dist: anthropic[vertex]>=0.40; extra == "llm-vertex"
|
|
40
|
+
Provides-Extra: llm-openai
|
|
41
|
+
Requires-Dist: openai>=1.40; extra == "llm-openai"
|
|
42
|
+
Provides-Extra: embeddings
|
|
43
|
+
Requires-Dist: sentence-transformers>=2.2; extra == "embeddings"
|
|
44
|
+
Provides-Extra: api
|
|
45
|
+
Requires-Dist: fastapi>=0.110; extra == "api"
|
|
46
|
+
Requires-Dist: uvicorn[standard]>=0.27; extra == "api"
|
|
47
|
+
Provides-Extra: dev
|
|
48
|
+
Requires-Dist: pytest>=8.0; extra == "dev"
|
|
49
|
+
Requires-Dist: ruff>=0.5; extra == "dev"
|
|
50
|
+
Requires-Dist: mypy>=1.10; extra == "dev"
|
|
51
|
+
Requires-Dist: httpx>=0.27; extra == "dev"
|
|
52
|
+
Dynamic: license-file
|
|
53
|
+
|
|
54
|
+
# askql — Safe Natural-Language → SQL
|
|
55
|
+
|
|
56
|
+
Translate plain-English questions into **validated, read-only SELECT** queries and run them
|
|
57
|
+
against a relational database. Designed for **QA / dev environments only**, with
|
|
58
|
+
defense-in-depth guardrails so an AI agent can never modify data or leak PII.
|
|
59
|
+
|
|
60
|
+
> ⚠️ **Non-production by design.** This system executes only read-only `SELECT`
|
|
61
|
+
> statements through a least-privilege DB user. Do not point it at a production
|
|
62
|
+
> database without the Phase-3 approval workflow (see [ARCHITECTURE.md](ARCHITECTURE.md)).
|
|
63
|
+
|
|
64
|
+
---
|
|
65
|
+
|
|
66
|
+
## How it works
|
|
67
|
+
|
|
68
|
+
```
|
|
69
|
+
question ─▶ compress (pick ~10 relevant tables) ─▶ LLM writes SQL
|
|
70
|
+
─▶ validate (AST guardrails) ─▶ execute (read-only, rollback, timeout)
|
|
71
|
+
─▶ markdown/CSV + audit log
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
Five defense layers: **AST validation → read-only DB user → connection hygiene →
|
|
75
|
+
output caps → audit trail**. See [ARCHITECTURE.md](ARCHITECTURE.md) for the full design and
|
|
76
|
+
the security hardening applied on top of the source playbook.
|
|
77
|
+
|
|
78
|
+
> **Credentials:** use whatever your org gives you — the tool runs with any user. A read-only
|
|
79
|
+
> user is a recommended nice-to-have (defense in depth), not a requirement; if the connected
|
|
80
|
+
> user can write, you get a one-line advisory, never a block.
|
|
81
|
+
|
|
82
|
+
---
|
|
83
|
+
|
|
84
|
+
## Quick start
|
|
85
|
+
|
|
86
|
+
```bash
|
|
87
|
+
# 1. Install (Python 3.11+; default driver is PostgreSQL)
|
|
88
|
+
python -m venv .venv && .venv\Scripts\activate # Windows PowerShell
|
|
89
|
+
pip install -e ".[postgres,dev]"
|
|
90
|
+
|
|
91
|
+
# 2. Configure
|
|
92
|
+
copy .env.example .env # then edit credentials
|
|
93
|
+
# edit config/settings.yaml # set dialect + schemas
|
|
94
|
+
|
|
95
|
+
# 3. Build the metadata pipeline (one command)
|
|
96
|
+
python scripts/scrape_schema.py --schemas public --build-graph
|
|
97
|
+
|
|
98
|
+
# 4. Ask a question (the orchestrator runs compress→validate→execute)
|
|
99
|
+
askql ask "show me the 10 newest active accounts"
|
|
100
|
+
|
|
101
|
+
# …or drive the steps manually:
|
|
102
|
+
python scripts/compress_metadata.py --question "active accounts" --graph docs/schema-graph.json
|
|
103
|
+
python scripts/validate_sql.py build/query.sql --max-rows 100
|
|
104
|
+
python scripts/execute_sql.py build/query.sql --limit 50
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
No database handy? Everything except `execute` runs fully offline, and the executor
|
|
108
|
+
has an **offline render mode** (`--rows-json`) used by the test suite.
|
|
109
|
+
|
|
110
|
+
Verify your setup any time with the built-in health check:
|
|
111
|
+
|
|
112
|
+
```bash
|
|
113
|
+
python scripts/doctor.py # config, credentials, transport, live connectivity, graph
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
---
|
|
117
|
+
|
|
118
|
+
## Run in Docker (zero local install)
|
|
119
|
+
|
|
120
|
+
The image bundles Python **and a JRE**, so the universal JDBC transport works without
|
|
121
|
+
installing Java or native drivers on your machine. Mount your config, `.env`, and vendor JDBC
|
|
122
|
+
jars at runtime:
|
|
123
|
+
|
|
124
|
+
```bash
|
|
125
|
+
docker build -t askql .
|
|
126
|
+
|
|
127
|
+
# health check
|
|
128
|
+
docker run --rm --env-file .env \
|
|
129
|
+
-v "$PWD/config:/app/config" -v "$PWD/jdbc-drivers:/app/jdbc-drivers" askql doctor
|
|
130
|
+
|
|
131
|
+
# or via compose
|
|
132
|
+
docker compose run --rm askql doctor
|
|
133
|
+
docker compose run --rm askql execute build/query.sql --limit 50
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
Vendor JDBC jars aren't baked into the image (licensing) — drop them in `jdbc-drivers/` and
|
|
137
|
+
they're mounted in. This is the "usable by all" path: one image, any database engine/version.
|
|
138
|
+
|
|
139
|
+
---
|
|
140
|
+
|
|
141
|
+
## REST API (connectivity-as-a-service)
|
|
142
|
+
|
|
143
|
+
Run askql as a service so clients (a web UI, a Slack bot, scripts, other teams) hold **no**
|
|
144
|
+
drivers, JVM, or credentials — they just call HTTP. The service applies the same validator,
|
|
145
|
+
RBAC, read-only execution, and audit.
|
|
146
|
+
|
|
147
|
+
```bash
|
|
148
|
+
pip install -e ".[api,postgres,jdbc]"
|
|
149
|
+
export T2S_API_KEYS="devkey:alice@corp" # identity drives RBAC; omit + ALLOW_OPEN for dev
|
|
150
|
+
askql-api # or: uvicorn askql.api:app --port 8000
|
|
151
|
+
# or containerized: docker compose up api
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
| Method & path | Auth | Purpose |
|
|
155
|
+
|---------------|------|---------|
|
|
156
|
+
| `GET /health` | public | liveness |
|
|
157
|
+
| `GET /api/v1/databases` | public | registry names (no creds/conn strings) |
|
|
158
|
+
| `POST /api/v1/validate` | required | `{sql, database?}` → guardrail check |
|
|
159
|
+
| `POST /api/v1/compress` | required | `{question}` → focused schema slice |
|
|
160
|
+
| `POST /api/v1/query` | required | `{sql, database?, max_rows?, format?}` → execute read-only |
|
|
161
|
+
| `POST /api/v1/ask` | required | `{question, database?}` → compress→LLM→validate→retry→execute (needs `ANTHROPIC_API_KEY`) |
|
|
162
|
+
|
|
163
|
+
```bash
|
|
164
|
+
curl -s -X POST localhost:8000/api/v1/query -H "X-API-Key: devkey" \
|
|
165
|
+
-H "Content-Type: application/json" \
|
|
166
|
+
-d '{"sql":"SELECT first_name, salary FROM HR.EMPLOYEES WHERE ROWNUM <= 3","database":"oracle-prod"}'
|
|
167
|
+
# → {"ok":true,"columns":["FIRST_NAME","SALARY"],"rows":[["Steven",24000], ...],"latency_ms":104}
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
Auth: `T2S_API_KEYS="key1:alice@corp,key2:bob@corp"` maps a key → identity (→ role in
|
|
171
|
+
`config/access-control.yaml`). Writing endpoints refuse to run unless keys are set (or
|
|
172
|
+
`T2S_API_ALLOW_OPEN=true` for local dev). A non-SELECT returns `400` before any DB work.
|
|
173
|
+
|
|
174
|
+
### Choosing the model (provider-agnostic / BYOM)
|
|
175
|
+
|
|
176
|
+
`/ask` (and `askql ask`) generates SQL through a pluggable provider — **you bring your own
|
|
177
|
+
model.** The safety pipeline is identical regardless of provider; only `T2S_LLM_PROVIDER`
|
|
178
|
+
changes. With nothing configured, generation falls back to the **BYO-LLM / IDE lane** (your
|
|
179
|
+
Copilot / Claude Code agent does it — no backend model, no key).
|
|
180
|
+
|
|
181
|
+
| `T2S_LLM_PROVIDER` | What it uses | Config |
|
|
182
|
+
|--------------------|--------------|--------|
|
|
183
|
+
| `anthropic` (auto) | Anthropic API | `ANTHROPIC_API_KEY` · `pip install '.[llm]'` |
|
|
184
|
+
| `bedrock` | Claude on **AWS** | AWS creds/role · `'.[llm-aws]'` · `T2S_LLM_MODEL=anthropic.claude-…` |
|
|
185
|
+
| `vertex` | Claude on **GCP** | GCP creds · `'.[llm-vertex]'` |
|
|
186
|
+
| `openai` | OpenAI GPT | `OPENAI_API_KEY` · `'.[llm-openai]'` |
|
|
187
|
+
| `azure-openai` | Azure OpenAI | `AZURE_OPENAI_API_KEY/_ENDPOINT/OPENAI_API_VERSION` · `'.[llm-openai]'` |
|
|
188
|
+
| `openai-compatible` | **any** self-hosted / OSS (vLLM, Ollama, LM Studio, OpenRouter, GitHub Models) | `T2S_LLM_BASE_URL=…` + `T2S_LLM_MODEL=…` · `'.[llm-openai]'` |
|
|
189
|
+
| `custom` | your own gateway | `T2S_LLM_FACTORY=module:callable` |
|
|
190
|
+
| _unset / `none`_ | **BYO-LLM** — IDE agent (Copilot/Claude Code) | nothing |
|
|
191
|
+
|
|
192
|
+
> **Copilot note:** Copilot has no server-side completions API, so it can't power the `/ask`
|
|
193
|
+
> *service* — but it *is* the model in the **IDE lane** (the agent runs the scripts/skill with
|
|
194
|
+
> its own model). For a token-based API in the GitHub/MS ecosystem, use Azure OpenAI or
|
|
195
|
+
> "GitHub Models" via the `openai-compatible` provider.
|
|
196
|
+
|
|
197
|
+
---
|
|
198
|
+
|
|
199
|
+
## Use as a library
|
|
200
|
+
|
|
201
|
+
`askql` is a clean, typed, importable package (`py.typed`) — build it into your own app. The
|
|
202
|
+
public API is everything in `askql.__all__`; heavy/optional deps (DB drivers, LLM SDKs,
|
|
203
|
+
FastAPI) load lazily, so `import askql` is light.
|
|
204
|
+
|
|
205
|
+
```python
|
|
206
|
+
from askql import validate, compress, execute_sql_text, ask, Settings, load_graph
|
|
207
|
+
|
|
208
|
+
s = Settings(dialect="postgres", max_rows=100)
|
|
209
|
+
|
|
210
|
+
# 1) Guardrail check (pure, no DB)
|
|
211
|
+
r = validate("SELECT id, name FROM app.users LIMIT 10", settings=s)
|
|
212
|
+
assert r.ok, r.errors
|
|
213
|
+
|
|
214
|
+
# 2) Pick relevant tables for a question
|
|
215
|
+
slice_ = compress(load_graph("schema-graph.json"), "active users this week", max_tables=8)
|
|
216
|
+
|
|
217
|
+
# 3) Full NL->SQL (compress -> LLM -> validate -> retry -> execute), provider via env
|
|
218
|
+
result = ask("how many active users this week", settings=s) # needs an LLM provider configured
|
|
219
|
+
```
|
|
220
|
+
|
|
221
|
+
Everything is config-injectable (pass `Settings` / `DatabaseEntry` / a custom `SqlGenerator`) —
|
|
222
|
+
no checked-out repo required. For pip-installed use, set `T2S_HOME` (config/data root) and/or
|
|
223
|
+
`T2S_DATA_DIR` (writable dir for the audit log + caches) so nothing is written next to the
|
|
224
|
+
installed package. Packaging/publishing details: [SHIPPING.md](SHIPPING.md).
|
|
225
|
+
|
|
226
|
+
## Repository map
|
|
227
|
+
|
|
228
|
+
| Path | What it is |
|
|
229
|
+
|------|------------|
|
|
230
|
+
| [src/askql/](src/askql/) | The library: validator, compressor, executor, drivers, scraper |
|
|
231
|
+
| [scripts/](scripts/) | Thin CLI wrappers (the commands the agent/humans call) |
|
|
232
|
+
| [config/](config/) | `settings.yaml`, `sensitive-columns.yaml`, `databases.yaml` |
|
|
233
|
+
| [docs/domain-rules.md](docs/domain-rules.md) | Business logic hints the LLM reads |
|
|
234
|
+
| [tests/](tests/) | Offline unit tests (validator / compressor / graph) |
|
|
235
|
+
| [.claude/](.claude/) | Claude Code skill + permission settings |
|
|
236
|
+
| [CLAUDE.md](CLAUDE.md) | Operating instructions for the AI agent |
|
|
237
|
+
| [QUICKSTART.md](QUICKSTART.md) | Install + first run in your org (≈15 min) |
|
|
238
|
+
| [SHIPPING.md](SHIPPING.md) | How to package, ship, and install (wheel / index / Docker) |
|
|
239
|
+
| [PB/](PB/) | The original source playbook (reference) |
|
|
240
|
+
|
|
241
|
+
---
|
|
242
|
+
|
|
243
|
+
## Safety rules (non-negotiable)
|
|
244
|
+
|
|
245
|
+
- Only `SELECT` (incl. `UNION`/`INTERSECT`/`EXCEPT`). No DDL/DML, no procedural code.
|
|
246
|
+
- No `SELECT *` — explicit columns only (`COUNT(*)` is fine).
|
|
247
|
+
- A row limit is **required** and capped at `max_rows`.
|
|
248
|
+
- No system schemas, no sensitive columns (SSN, PASSWORD, …), no dangerous functions.
|
|
249
|
+
- Every execution rolls back, times out, and is audited.
|
|
250
|
+
|
|
251
|
+
See [CLAUDE.md](CLAUDE.md) for the agent workflow and [tests/](tests/) for the enforced behavior.
|
askql-0.2.0/README.md
ADDED
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
# askql — Safe Natural-Language → SQL
|
|
2
|
+
|
|
3
|
+
Translate plain-English questions into **validated, read-only SELECT** queries and run them
|
|
4
|
+
against a relational database. Designed for **QA / dev environments only**, with
|
|
5
|
+
defense-in-depth guardrails so an AI agent can never modify data or leak PII.
|
|
6
|
+
|
|
7
|
+
> ⚠️ **Non-production by design.** This system executes only read-only `SELECT`
|
|
8
|
+
> statements through a least-privilege DB user. Do not point it at a production
|
|
9
|
+
> database without the Phase-3 approval workflow (see [ARCHITECTURE.md](ARCHITECTURE.md)).
|
|
10
|
+
|
|
11
|
+
---
|
|
12
|
+
|
|
13
|
+
## How it works
|
|
14
|
+
|
|
15
|
+
```
|
|
16
|
+
question ─▶ compress (pick ~10 relevant tables) ─▶ LLM writes SQL
|
|
17
|
+
─▶ validate (AST guardrails) ─▶ execute (read-only, rollback, timeout)
|
|
18
|
+
─▶ markdown/CSV + audit log
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
Five defense layers: **AST validation → read-only DB user → connection hygiene →
|
|
22
|
+
output caps → audit trail**. See [ARCHITECTURE.md](ARCHITECTURE.md) for the full design and
|
|
23
|
+
the security hardening applied on top of the source playbook.
|
|
24
|
+
|
|
25
|
+
> **Credentials:** use whatever your org gives you — the tool runs with any user. A read-only
|
|
26
|
+
> user is a recommended nice-to-have (defense in depth), not a requirement; if the connected
|
|
27
|
+
> user can write, you get a one-line advisory, never a block.
|
|
28
|
+
|
|
29
|
+
---
|
|
30
|
+
|
|
31
|
+
## Quick start
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
# 1. Install (Python 3.11+; default driver is PostgreSQL)
|
|
35
|
+
python -m venv .venv && .venv\Scripts\activate # Windows PowerShell
|
|
36
|
+
pip install -e ".[postgres,dev]"
|
|
37
|
+
|
|
38
|
+
# 2. Configure
|
|
39
|
+
copy .env.example .env # then edit credentials
|
|
40
|
+
# edit config/settings.yaml # set dialect + schemas
|
|
41
|
+
|
|
42
|
+
# 3. Build the metadata pipeline (one command)
|
|
43
|
+
python scripts/scrape_schema.py --schemas public --build-graph
|
|
44
|
+
|
|
45
|
+
# 4. Ask a question (the orchestrator runs compress→validate→execute)
|
|
46
|
+
askql ask "show me the 10 newest active accounts"
|
|
47
|
+
|
|
48
|
+
# …or drive the steps manually:
|
|
49
|
+
python scripts/compress_metadata.py --question "active accounts" --graph docs/schema-graph.json
|
|
50
|
+
python scripts/validate_sql.py build/query.sql --max-rows 100
|
|
51
|
+
python scripts/execute_sql.py build/query.sql --limit 50
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
No database handy? Everything except `execute` runs fully offline, and the executor
|
|
55
|
+
has an **offline render mode** (`--rows-json`) used by the test suite.
|
|
56
|
+
|
|
57
|
+
Verify your setup any time with the built-in health check:
|
|
58
|
+
|
|
59
|
+
```bash
|
|
60
|
+
python scripts/doctor.py # config, credentials, transport, live connectivity, graph
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
---
|
|
64
|
+
|
|
65
|
+
## Run in Docker (zero local install)
|
|
66
|
+
|
|
67
|
+
The image bundles Python **and a JRE**, so the universal JDBC transport works without
|
|
68
|
+
installing Java or native drivers on your machine. Mount your config, `.env`, and vendor JDBC
|
|
69
|
+
jars at runtime:
|
|
70
|
+
|
|
71
|
+
```bash
|
|
72
|
+
docker build -t askql .
|
|
73
|
+
|
|
74
|
+
# health check
|
|
75
|
+
docker run --rm --env-file .env \
|
|
76
|
+
-v "$PWD/config:/app/config" -v "$PWD/jdbc-drivers:/app/jdbc-drivers" askql doctor
|
|
77
|
+
|
|
78
|
+
# or via compose
|
|
79
|
+
docker compose run --rm askql doctor
|
|
80
|
+
docker compose run --rm askql execute build/query.sql --limit 50
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
Vendor JDBC jars aren't baked into the image (licensing) — drop them in `jdbc-drivers/` and
|
|
84
|
+
they're mounted in. This is the "usable by all" path: one image, any database engine/version.
|
|
85
|
+
|
|
86
|
+
---
|
|
87
|
+
|
|
88
|
+
## REST API (connectivity-as-a-service)
|
|
89
|
+
|
|
90
|
+
Run askql as a service so clients (a web UI, a Slack bot, scripts, other teams) hold **no**
|
|
91
|
+
drivers, JVM, or credentials — they just call HTTP. The service applies the same validator,
|
|
92
|
+
RBAC, read-only execution, and audit.
|
|
93
|
+
|
|
94
|
+
```bash
|
|
95
|
+
pip install -e ".[api,postgres,jdbc]"
|
|
96
|
+
export T2S_API_KEYS="devkey:alice@corp" # identity drives RBAC; omit + ALLOW_OPEN for dev
|
|
97
|
+
askql-api # or: uvicorn askql.api:app --port 8000
|
|
98
|
+
# or containerized: docker compose up api
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
| Method & path | Auth | Purpose |
|
|
102
|
+
|---------------|------|---------|
|
|
103
|
+
| `GET /health` | public | liveness |
|
|
104
|
+
| `GET /api/v1/databases` | public | registry names (no creds/conn strings) |
|
|
105
|
+
| `POST /api/v1/validate` | required | `{sql, database?}` → guardrail check |
|
|
106
|
+
| `POST /api/v1/compress` | required | `{question}` → focused schema slice |
|
|
107
|
+
| `POST /api/v1/query` | required | `{sql, database?, max_rows?, format?}` → execute read-only |
|
|
108
|
+
| `POST /api/v1/ask` | required | `{question, database?}` → compress→LLM→validate→retry→execute (needs `ANTHROPIC_API_KEY`) |
|
|
109
|
+
|
|
110
|
+
```bash
|
|
111
|
+
curl -s -X POST localhost:8000/api/v1/query -H "X-API-Key: devkey" \
|
|
112
|
+
-H "Content-Type: application/json" \
|
|
113
|
+
-d '{"sql":"SELECT first_name, salary FROM HR.EMPLOYEES WHERE ROWNUM <= 3","database":"oracle-prod"}'
|
|
114
|
+
# → {"ok":true,"columns":["FIRST_NAME","SALARY"],"rows":[["Steven",24000], ...],"latency_ms":104}
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
Auth: `T2S_API_KEYS="key1:alice@corp,key2:bob@corp"` maps a key → identity (→ role in
|
|
118
|
+
`config/access-control.yaml`). Writing endpoints refuse to run unless keys are set (or
|
|
119
|
+
`T2S_API_ALLOW_OPEN=true` for local dev). A non-SELECT returns `400` before any DB work.
|
|
120
|
+
|
|
121
|
+
### Choosing the model (provider-agnostic / BYOM)
|
|
122
|
+
|
|
123
|
+
`/ask` (and `askql ask`) generates SQL through a pluggable provider — **you bring your own
|
|
124
|
+
model.** The safety pipeline is identical regardless of provider; only `T2S_LLM_PROVIDER`
|
|
125
|
+
changes. With nothing configured, generation falls back to the **BYO-LLM / IDE lane** (your
|
|
126
|
+
Copilot / Claude Code agent does it — no backend model, no key).
|
|
127
|
+
|
|
128
|
+
| `T2S_LLM_PROVIDER` | What it uses | Config |
|
|
129
|
+
|--------------------|--------------|--------|
|
|
130
|
+
| `anthropic` (auto) | Anthropic API | `ANTHROPIC_API_KEY` · `pip install '.[llm]'` |
|
|
131
|
+
| `bedrock` | Claude on **AWS** | AWS creds/role · `'.[llm-aws]'` · `T2S_LLM_MODEL=anthropic.claude-…` |
|
|
132
|
+
| `vertex` | Claude on **GCP** | GCP creds · `'.[llm-vertex]'` |
|
|
133
|
+
| `openai` | OpenAI GPT | `OPENAI_API_KEY` · `'.[llm-openai]'` |
|
|
134
|
+
| `azure-openai` | Azure OpenAI | `AZURE_OPENAI_API_KEY/_ENDPOINT/OPENAI_API_VERSION` · `'.[llm-openai]'` |
|
|
135
|
+
| `openai-compatible` | **any** self-hosted / OSS (vLLM, Ollama, LM Studio, OpenRouter, GitHub Models) | `T2S_LLM_BASE_URL=…` + `T2S_LLM_MODEL=…` · `'.[llm-openai]'` |
|
|
136
|
+
| `custom` | your own gateway | `T2S_LLM_FACTORY=module:callable` |
|
|
137
|
+
| _unset / `none`_ | **BYO-LLM** — IDE agent (Copilot/Claude Code) | nothing |
|
|
138
|
+
|
|
139
|
+
> **Copilot note:** Copilot has no server-side completions API, so it can't power the `/ask`
|
|
140
|
+
> *service* — but it *is* the model in the **IDE lane** (the agent runs the scripts/skill with
|
|
141
|
+
> its own model). For a token-based API in the GitHub/MS ecosystem, use Azure OpenAI or
|
|
142
|
+
> "GitHub Models" via the `openai-compatible` provider.
|
|
143
|
+
|
|
144
|
+
---
|
|
145
|
+
|
|
146
|
+
## Use as a library
|
|
147
|
+
|
|
148
|
+
`askql` is a clean, typed, importable package (`py.typed`) — build it into your own app. The
|
|
149
|
+
public API is everything in `askql.__all__`; heavy/optional deps (DB drivers, LLM SDKs,
|
|
150
|
+
FastAPI) load lazily, so `import askql` is light.
|
|
151
|
+
|
|
152
|
+
```python
|
|
153
|
+
from askql import validate, compress, execute_sql_text, ask, Settings, load_graph
|
|
154
|
+
|
|
155
|
+
s = Settings(dialect="postgres", max_rows=100)
|
|
156
|
+
|
|
157
|
+
# 1) Guardrail check (pure, no DB)
|
|
158
|
+
r = validate("SELECT id, name FROM app.users LIMIT 10", settings=s)
|
|
159
|
+
assert r.ok, r.errors
|
|
160
|
+
|
|
161
|
+
# 2) Pick relevant tables for a question
|
|
162
|
+
slice_ = compress(load_graph("schema-graph.json"), "active users this week", max_tables=8)
|
|
163
|
+
|
|
164
|
+
# 3) Full NL->SQL (compress -> LLM -> validate -> retry -> execute), provider via env
|
|
165
|
+
result = ask("how many active users this week", settings=s) # needs an LLM provider configured
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
Everything is config-injectable (pass `Settings` / `DatabaseEntry` / a custom `SqlGenerator`) —
|
|
169
|
+
no checked-out repo required. For pip-installed use, set `T2S_HOME` (config/data root) and/or
|
|
170
|
+
`T2S_DATA_DIR` (writable dir for the audit log + caches) so nothing is written next to the
|
|
171
|
+
installed package. Packaging/publishing details: [SHIPPING.md](SHIPPING.md).
|
|
172
|
+
|
|
173
|
+
## Repository map
|
|
174
|
+
|
|
175
|
+
| Path | What it is |
|
|
176
|
+
|------|------------|
|
|
177
|
+
| [src/askql/](src/askql/) | The library: validator, compressor, executor, drivers, scraper |
|
|
178
|
+
| [scripts/](scripts/) | Thin CLI wrappers (the commands the agent/humans call) |
|
|
179
|
+
| [config/](config/) | `settings.yaml`, `sensitive-columns.yaml`, `databases.yaml` |
|
|
180
|
+
| [docs/domain-rules.md](docs/domain-rules.md) | Business logic hints the LLM reads |
|
|
181
|
+
| [tests/](tests/) | Offline unit tests (validator / compressor / graph) |
|
|
182
|
+
| [.claude/](.claude/) | Claude Code skill + permission settings |
|
|
183
|
+
| [CLAUDE.md](CLAUDE.md) | Operating instructions for the AI agent |
|
|
184
|
+
| [QUICKSTART.md](QUICKSTART.md) | Install + first run in your org (≈15 min) |
|
|
185
|
+
| [SHIPPING.md](SHIPPING.md) | How to package, ship, and install (wheel / index / Docker) |
|
|
186
|
+
| [PB/](PB/) | The original source playbook (reference) |
|
|
187
|
+
|
|
188
|
+
---
|
|
189
|
+
|
|
190
|
+
## Safety rules (non-negotiable)
|
|
191
|
+
|
|
192
|
+
- Only `SELECT` (incl. `UNION`/`INTERSECT`/`EXCEPT`). No DDL/DML, no procedural code.
|
|
193
|
+
- No `SELECT *` — explicit columns only (`COUNT(*)` is fine).
|
|
194
|
+
- A row limit is **required** and capped at `max_rows`.
|
|
195
|
+
- No system schemas, no sensitive columns (SSN, PASSWORD, …), no dangerous functions.
|
|
196
|
+
- Every execution rolls back, times out, and is audited.
|
|
197
|
+
|
|
198
|
+
See [CLAUDE.md](CLAUDE.md) for the agent workflow and [tests/](tests/) for the enforced behavior.
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "askql"
|
|
3
|
+
version = "0.2.0"
|
|
4
|
+
description = "Safe natural-language-to-SQL: validated, read-only queries across many SQL engines."
|
|
5
|
+
requires-python = ">=3.11"
|
|
6
|
+
readme = "README.md"
|
|
7
|
+
dependencies = ["sqlglot>=23.0", "pyyaml>=6.0"]
|
|
8
|
+
keywords = ["sql", "nl2sql", "text-to-sql", "llm", "guardrails", "read-only", "database"]
|
|
9
|
+
authors = [{ name = "askql maintainers" }]
|
|
10
|
+
license = { text = "MIT" }
|
|
11
|
+
classifiers = [
|
|
12
|
+
"Development Status :: 4 - Beta",
|
|
13
|
+
"Intended Audience :: Developers",
|
|
14
|
+
"License :: OSI Approved :: MIT License",
|
|
15
|
+
"Programming Language :: Python :: 3",
|
|
16
|
+
"Programming Language :: Python :: 3.11",
|
|
17
|
+
"Programming Language :: Python :: 3.12",
|
|
18
|
+
"Topic :: Database",
|
|
19
|
+
"Topic :: Software Development :: Libraries",
|
|
20
|
+
"Typing :: Typed",
|
|
21
|
+
]
|
|
22
|
+
|
|
23
|
+
[project.urls]
|
|
24
|
+
# TODO: point these at your repo/docs when published.
|
|
25
|
+
Homepage = "https://github.com/TestAutomationArchitect/askql"
|
|
26
|
+
Source = "https://github.com/TestAutomationArchitect/askql"
|
|
27
|
+
Changelog = "https://github.com/TestAutomationArchitect/askql/blob/main/CHANGELOG.md"
|
|
28
|
+
|
|
29
|
+
[project.optional-dependencies]
|
|
30
|
+
postgres = ["psycopg[binary]>=3.1"]
|
|
31
|
+
oracle = ["python-oracledb>=2.0"]
|
|
32
|
+
mssql = ["pymssql>=2.2"]
|
|
33
|
+
jdbc = ["JayDeBeApi>=1.2", "JPype1>=1.5"] # universal transport (needs a JVM on host)
|
|
34
|
+
# NL->SQL generation providers (pick what your org uses; the pipeline is identical):
|
|
35
|
+
llm = ["anthropic>=0.40"] # Anthropic API
|
|
36
|
+
llm-aws = ["anthropic[bedrock]>=0.40"] # Claude on Amazon Bedrock
|
|
37
|
+
llm-vertex = ["anthropic[vertex]>=0.40"] # Claude on Google Vertex
|
|
38
|
+
llm-openai = ["openai>=1.40"] # OpenAI / Azure OpenAI / any OpenAI-compatible (BYOM)
|
|
39
|
+
embeddings = ["sentence-transformers>=2.2"] # local semantic table retrieval (or use openai embeddings)
|
|
40
|
+
api = ["fastapi>=0.110", "uvicorn[standard]>=0.27"]
|
|
41
|
+
dev = ["pytest>=8.0", "ruff>=0.5", "mypy>=1.10", "httpx>=0.27"]
|
|
42
|
+
|
|
43
|
+
[project.scripts]
|
|
44
|
+
# Single orchestrator entrypoint (compress -> generate -> validate -> execute).
|
|
45
|
+
askql = "askql.cli:main"
|
|
46
|
+
askql-api = "askql.api:main"
|
|
47
|
+
|
|
48
|
+
[build-system]
|
|
49
|
+
requires = ["setuptools>=68"]
|
|
50
|
+
build-backend = "setuptools.build_meta"
|
|
51
|
+
|
|
52
|
+
[tool.setuptools.packages.find]
|
|
53
|
+
where = ["src"]
|
|
54
|
+
|
|
55
|
+
[tool.setuptools.package-data]
|
|
56
|
+
askql = ["py.typed"] # ship type-hint marker so consumers get types (PEP 561)
|
|
57
|
+
|
|
58
|
+
[tool.pytest.ini_options]
|
|
59
|
+
testpaths = ["tests"]
|
|
60
|
+
markers = [
|
|
61
|
+
"integration: requires a live database connection (skipped by default)",
|
|
62
|
+
]
|
|
63
|
+
|
|
64
|
+
[tool.ruff]
|
|
65
|
+
line-length = 100
|
|
66
|
+
src = ["src", "scripts", "tests"]
|
|
67
|
+
|
|
68
|
+
[tool.ruff.lint]
|
|
69
|
+
select = ["E", "F", "I", "B", "UP", "S"] # incl. flake8-bandit (S) for security smells
|
|
70
|
+
# S101: asserts are fine. S110: try/except/pass is a deliberate, documented pattern in the
|
|
71
|
+
# audit/advisory/cleanup paths (those must never raise and break a query).
|
|
72
|
+
ignore = ["S101", "S110"]
|
|
73
|
+
|
|
74
|
+
[tool.ruff.lint.per-file-ignores]
|
|
75
|
+
"tests/*" = ["S", "E501"] # tests may embed long CSV/SQL fixtures
|
|
76
|
+
|
|
77
|
+
[tool.mypy]
|
|
78
|
+
python_version = "3.11"
|
|
79
|
+
mypy_path = "src"
|
|
80
|
+
packages = ["askql"]
|
|
81
|
+
ignore_missing_imports = true
|
|
82
|
+
explicit_package_bases = true
|
askql-0.2.0/setup.cfg
ADDED
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
"""askql — safe natural-language-to-SQL with defense-in-depth guardrails.
|
|
2
|
+
|
|
3
|
+
A library teams can build on: turn questions into validated, read-only SELECTs and run them
|
|
4
|
+
safely against many SQL engines. Read-only by design; the validator is the single source of
|
|
5
|
+
truth for what is safe to execute.
|
|
6
|
+
|
|
7
|
+
Public API (stable surface — internals may change between minor versions):
|
|
8
|
+
|
|
9
|
+
from askql import validate, compress, ask, execute_sql_text, Settings
|
|
10
|
+
|
|
11
|
+
Quick start (library use)::
|
|
12
|
+
|
|
13
|
+
from askql import validate, Settings
|
|
14
|
+
r = validate("SELECT id FROM s.t LIMIT 10", settings=Settings(dialect="postgres"))
|
|
15
|
+
assert r.ok
|
|
16
|
+
|
|
17
|
+
Optional features install via extras: `askql[postgres]`, `[jdbc]`, `[llm]`, `[llm-openai]`,
|
|
18
|
+
`[api]`. Heavy/optional deps (DB drivers, LLM SDKs, FastAPI, JPype) are imported lazily, so
|
|
19
|
+
`import askql` is light and never fails on a missing optional dependency.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
from __future__ import annotations
|
|
23
|
+
|
|
24
|
+
__version__ = "0.2.0"
|
|
25
|
+
|
|
26
|
+
# Config / models
|
|
27
|
+
# Schema graph + per-question compression
|
|
28
|
+
from .compressor import compress
|
|
29
|
+
from .config import (
|
|
30
|
+
DatabaseEntry,
|
|
31
|
+
Settings,
|
|
32
|
+
data_dir,
|
|
33
|
+
load_database,
|
|
34
|
+
load_env_file,
|
|
35
|
+
load_sensitive_patterns,
|
|
36
|
+
load_settings,
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
# Connectivity (transport-agnostic)
|
|
40
|
+
from .drivers import DatabaseDriver, get_driver
|
|
41
|
+
|
|
42
|
+
# Execution (read-only)
|
|
43
|
+
from .executor import (
|
|
44
|
+
ExecutionResult,
|
|
45
|
+
execute_sql,
|
|
46
|
+
execute_sql_text,
|
|
47
|
+
format_csv,
|
|
48
|
+
format_markdown,
|
|
49
|
+
sanitize_error,
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
# NL->SQL generation (provider-agnostic / BYOM)
|
|
53
|
+
from .generate import GeneratorUnavailable, SqlGenerator, get_generator
|
|
54
|
+
|
|
55
|
+
# Orchestration
|
|
56
|
+
from .orchestrator import ask
|
|
57
|
+
|
|
58
|
+
# RBAC (optional, opt-in by identity)
|
|
59
|
+
from .policy import AccessDenied, Policy, get_policy, resolve_current_user
|
|
60
|
+
from .schema_graph import build_graph, load_graph, write_graph
|
|
61
|
+
|
|
62
|
+
# Validation (the safety core)
|
|
63
|
+
from .validator import ValidationResult, validate
|
|
64
|
+
|
|
65
|
+
__all__ = [
|
|
66
|
+
"__version__",
|
|
67
|
+
# config
|
|
68
|
+
"Settings",
|
|
69
|
+
"DatabaseEntry",
|
|
70
|
+
"load_settings",
|
|
71
|
+
"load_database",
|
|
72
|
+
"load_env_file",
|
|
73
|
+
"load_sensitive_patterns",
|
|
74
|
+
"data_dir",
|
|
75
|
+
# validation
|
|
76
|
+
"validate",
|
|
77
|
+
"ValidationResult",
|
|
78
|
+
# schema
|
|
79
|
+
"compress",
|
|
80
|
+
"build_graph",
|
|
81
|
+
"load_graph",
|
|
82
|
+
"write_graph",
|
|
83
|
+
# execution
|
|
84
|
+
"execute_sql",
|
|
85
|
+
"execute_sql_text",
|
|
86
|
+
"ExecutionResult",
|
|
87
|
+
"format_markdown",
|
|
88
|
+
"format_csv",
|
|
89
|
+
"sanitize_error",
|
|
90
|
+
# connectivity
|
|
91
|
+
"get_driver",
|
|
92
|
+
"DatabaseDriver",
|
|
93
|
+
# rbac
|
|
94
|
+
"get_policy",
|
|
95
|
+
"Policy",
|
|
96
|
+
"AccessDenied",
|
|
97
|
+
"resolve_current_user",
|
|
98
|
+
# generation
|
|
99
|
+
"ask",
|
|
100
|
+
"get_generator",
|
|
101
|
+
"SqlGenerator",
|
|
102
|
+
"GeneratorUnavailable",
|
|
103
|
+
]
|