askql 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. askql-0.2.0/LICENSE +21 -0
  2. askql-0.2.0/PKG-INFO +251 -0
  3. askql-0.2.0/README.md +198 -0
  4. askql-0.2.0/pyproject.toml +82 -0
  5. askql-0.2.0/setup.cfg +4 -0
  6. askql-0.2.0/src/askql/__init__.py +103 -0
  7. askql-0.2.0/src/askql/api.py +215 -0
  8. askql-0.2.0/src/askql/audit.py +121 -0
  9. askql-0.2.0/src/askql/capability.py +120 -0
  10. askql-0.2.0/src/askql/cli.py +243 -0
  11. askql-0.2.0/src/askql/compressor.py +106 -0
  12. askql-0.2.0/src/askql/config.py +227 -0
  13. askql-0.2.0/src/askql/dialects/__init__.py +26 -0
  14. askql-0.2.0/src/askql/dialects/mssql.py +58 -0
  15. askql-0.2.0/src/askql/dialects/oracle.py +50 -0
  16. askql-0.2.0/src/askql/dialects/postgres.py +59 -0
  17. askql-0.2.0/src/askql/doctor.py +262 -0
  18. askql-0.2.0/src/askql/drivers/__init__.py +97 -0
  19. askql-0.2.0/src/askql/drivers/base.py +22 -0
  20. askql-0.2.0/src/askql/drivers/jdbc.py +223 -0
  21. askql-0.2.0/src/askql/drivers/mssql.py +37 -0
  22. askql-0.2.0/src/askql/drivers/oracle.py +49 -0
  23. askql-0.2.0/src/askql/drivers/postgres.py +37 -0
  24. askql-0.2.0/src/askql/embeddings.py +86 -0
  25. askql-0.2.0/src/askql/executor.py +337 -0
  26. askql-0.2.0/src/askql/generate.py +292 -0
  27. askql-0.2.0/src/askql/orchestrator.py +171 -0
  28. askql-0.2.0/src/askql/policy.py +92 -0
  29. askql-0.2.0/src/askql/py.typed +0 -0
  30. askql-0.2.0/src/askql/ratelimit.py +53 -0
  31. askql-0.2.0/src/askql/retriever.py +168 -0
  32. askql-0.2.0/src/askql/schema_graph.py +163 -0
  33. askql-0.2.0/src/askql/scraper.py +152 -0
  34. askql-0.2.0/src/askql/tokenize.py +116 -0
  35. askql-0.2.0/src/askql/validator.py +361 -0
  36. askql-0.2.0/src/askql.egg-info/PKG-INFO +251 -0
  37. askql-0.2.0/src/askql.egg-info/SOURCES.txt +55 -0
  38. askql-0.2.0/src/askql.egg-info/dependency_links.txt +1 -0
  39. askql-0.2.0/src/askql.egg-info/entry_points.txt +3 -0
  40. askql-0.2.0/src/askql.egg-info/requires.txt +40 -0
  41. askql-0.2.0/src/askql.egg-info/top_level.txt +1 -0
  42. askql-0.2.0/tests/test_api.py +79 -0
  43. askql-0.2.0/tests/test_audit_read.py +52 -0
  44. askql-0.2.0/tests/test_capability.py +70 -0
  45. askql-0.2.0/tests/test_compressor.py +54 -0
  46. askql-0.2.0/tests/test_doctor.py +28 -0
  47. askql-0.2.0/tests/test_domain_rules.py +34 -0
  48. askql-0.2.0/tests/test_embeddings.py +88 -0
  49. askql-0.2.0/tests/test_executor.py +64 -0
  50. askql-0.2.0/tests/test_generate.py +108 -0
  51. askql-0.2.0/tests/test_jdbc_transport.py +78 -0
  52. askql-0.2.0/tests/test_orchestrator.py +150 -0
  53. askql-0.2.0/tests/test_policy.py +152 -0
  54. askql-0.2.0/tests/test_public_api.py +67 -0
  55. askql-0.2.0/tests/test_relocatable.py +30 -0
  56. askql-0.2.0/tests/test_schema_graph.py +75 -0
  57. askql-0.2.0/tests/test_validator.py +166 -0
askql-0.2.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 askql authors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
askql-0.2.0/PKG-INFO ADDED
@@ -0,0 +1,251 @@
1
+ Metadata-Version: 2.4
2
+ Name: askql
3
+ Version: 0.2.0
4
+ Summary: Safe natural-language-to-SQL: validated, read-only queries across many SQL engines.
5
+ Author: askql maintainers
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/TestAutomationArchitect/askql
8
+ Project-URL: Source, https://github.com/TestAutomationArchitect/askql
9
+ Project-URL: Changelog, https://github.com/TestAutomationArchitect/askql/blob/main/CHANGELOG.md
10
+ Keywords: sql,nl2sql,text-to-sql,llm,guardrails,read-only,database
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Topic :: Database
18
+ Classifier: Topic :: Software Development :: Libraries
19
+ Classifier: Typing :: Typed
20
+ Requires-Python: >=3.11
21
+ Description-Content-Type: text/markdown
22
+ License-File: LICENSE
23
+ Requires-Dist: sqlglot>=23.0
24
+ Requires-Dist: pyyaml>=6.0
25
+ Provides-Extra: postgres
26
+ Requires-Dist: psycopg[binary]>=3.1; extra == "postgres"
27
+ Provides-Extra: oracle
28
+ Requires-Dist: python-oracledb>=2.0; extra == "oracle"
29
+ Provides-Extra: mssql
30
+ Requires-Dist: pymssql>=2.2; extra == "mssql"
31
+ Provides-Extra: jdbc
32
+ Requires-Dist: JayDeBeApi>=1.2; extra == "jdbc"
33
+ Requires-Dist: JPype1>=1.5; extra == "jdbc"
34
+ Provides-Extra: llm
35
+ Requires-Dist: anthropic>=0.40; extra == "llm"
36
+ Provides-Extra: llm-aws
37
+ Requires-Dist: anthropic[bedrock]>=0.40; extra == "llm-aws"
38
+ Provides-Extra: llm-vertex
39
+ Requires-Dist: anthropic[vertex]>=0.40; extra == "llm-vertex"
40
+ Provides-Extra: llm-openai
41
+ Requires-Dist: openai>=1.40; extra == "llm-openai"
42
+ Provides-Extra: embeddings
43
+ Requires-Dist: sentence-transformers>=2.2; extra == "embeddings"
44
+ Provides-Extra: api
45
+ Requires-Dist: fastapi>=0.110; extra == "api"
46
+ Requires-Dist: uvicorn[standard]>=0.27; extra == "api"
47
+ Provides-Extra: dev
48
+ Requires-Dist: pytest>=8.0; extra == "dev"
49
+ Requires-Dist: ruff>=0.5; extra == "dev"
50
+ Requires-Dist: mypy>=1.10; extra == "dev"
51
+ Requires-Dist: httpx>=0.27; extra == "dev"
52
+ Dynamic: license-file
53
+
54
+ # askql — Safe Natural-Language → SQL
55
+
56
+ Translate plain-English questions into **validated, read-only SELECT** queries and run them
57
+ against a relational database. Designed for **QA / dev environments only**, with
58
+ defense-in-depth guardrails so an AI agent can never modify data or leak PII.
59
+
60
+ > ⚠️ **Non-production by design.** This system executes only read-only `SELECT`
61
+ > statements through a least-privilege DB user. Do not point it at a production
62
+ > database without the Phase-3 approval workflow (see [ARCHITECTURE.md](ARCHITECTURE.md)).
63
+
64
+ ---
65
+
66
+ ## How it works
67
+
68
+ ```
69
+ question ─▶ compress (pick ~10 relevant tables) ─▶ LLM writes SQL
70
+ ─▶ validate (AST guardrails) ─▶ execute (read-only, rollback, timeout)
71
+ ─▶ markdown/CSV + audit log
72
+ ```
73
+
74
+ Five defense layers: **AST validation → read-only DB user → connection hygiene →
75
+ output caps → audit trail**. See [ARCHITECTURE.md](ARCHITECTURE.md) for the full design and
76
+ the security hardening applied on top of the source playbook.
77
+
78
+ > **Credentials:** use whatever your org gives you — the tool runs with any user. A read-only
79
+ > user is a recommended nice-to-have (defense in depth), not a requirement; if the connected
80
+ > user can write, you get a one-line advisory, never a block.
81
+
82
+ ---
83
+
84
+ ## Quick start
85
+
86
+ ```bash
87
+ # 1. Install (Python 3.11+; default driver is PostgreSQL)
88
+ python -m venv .venv && .venv\Scripts\activate # Windows PowerShell
89
+ pip install -e ".[postgres,dev]"
90
+
91
+ # 2. Configure
92
+ copy .env.example .env # then edit credentials
93
+ # edit config/settings.yaml # set dialect + schemas
94
+
95
+ # 3. Build the metadata pipeline (one command)
96
+ python scripts/scrape_schema.py --schemas public --build-graph
97
+
98
+ # 4. Ask a question (the orchestrator runs compress→validate→execute)
99
+ askql ask "show me the 10 newest active accounts"
100
+
101
+ # …or drive the steps manually:
102
+ python scripts/compress_metadata.py --question "active accounts" --graph docs/schema-graph.json
103
+ python scripts/validate_sql.py build/query.sql --max-rows 100
104
+ python scripts/execute_sql.py build/query.sql --limit 50
105
+ ```
106
+
107
+ No database handy? Everything except `execute` runs fully offline, and the executor
108
+ has an **offline render mode** (`--rows-json`) used by the test suite.
109
+
110
+ Verify your setup any time with the built-in health check:
111
+
112
+ ```bash
113
+ python scripts/doctor.py # config, credentials, transport, live connectivity, graph
114
+ ```
115
+
116
+ ---
117
+
118
+ ## Run in Docker (zero local install)
119
+
120
+ The image bundles Python **and a JRE**, so the universal JDBC transport works without
121
+ installing Java or native drivers on your machine. Mount your config, `.env`, and vendor JDBC
122
+ jars at runtime:
123
+
124
+ ```bash
125
+ docker build -t askql .
126
+
127
+ # health check
128
+ docker run --rm --env-file .env \
129
+ -v "$PWD/config:/app/config" -v "$PWD/jdbc-drivers:/app/jdbc-drivers" askql doctor
130
+
131
+ # or via compose
132
+ docker compose run --rm askql doctor
133
+ docker compose run --rm askql execute build/query.sql --limit 50
134
+ ```
135
+
136
+ Vendor JDBC jars aren't baked into the image (licensing) — drop them in `jdbc-drivers/` and
137
+ they're mounted in. This is the "usable by all" path: one image, any database engine/version.
138
+
139
+ ---
140
+
141
+ ## REST API (connectivity-as-a-service)
142
+
143
+ Run askql as a service so clients (a web UI, a Slack bot, scripts, other teams) hold **no**
144
+ drivers, JVM, or credentials — they just call HTTP. The service applies the same validator,
145
+ RBAC, read-only execution, and audit.
146
+
147
+ ```bash
148
+ pip install -e ".[api,postgres,jdbc]"
149
+ export T2S_API_KEYS="devkey:alice@corp" # identity drives RBAC; omit + ALLOW_OPEN for dev
150
+ askql-api # or: uvicorn askql.api:app --port 8000
151
+ # or containerized: docker compose up api
152
+ ```
153
+
154
+ | Method & path | Auth | Purpose |
155
+ |---------------|------|---------|
156
+ | `GET /health` | public | liveness |
157
+ | `GET /api/v1/databases` | public | registry names (no creds/conn strings) |
158
+ | `POST /api/v1/validate` | required | `{sql, database?}` → guardrail check |
159
+ | `POST /api/v1/compress` | required | `{question}` → focused schema slice |
160
+ | `POST /api/v1/query` | required | `{sql, database?, max_rows?, format?}` → execute read-only |
161
+ | `POST /api/v1/ask` | required | `{question, database?}` → compress→LLM→validate→retry→execute (needs `ANTHROPIC_API_KEY`) |
162
+
163
+ ```bash
164
+ curl -s -X POST localhost:8000/api/v1/query -H "X-API-Key: devkey" \
165
+ -H "Content-Type: application/json" \
166
+ -d '{"sql":"SELECT first_name, salary FROM HR.EMPLOYEES WHERE ROWNUM <= 3","database":"oracle-prod"}'
167
+ # → {"ok":true,"columns":["FIRST_NAME","SALARY"],"rows":[["Steven",24000], ...],"latency_ms":104}
168
+ ```
169
+
170
+ Auth: `T2S_API_KEYS="key1:alice@corp,key2:bob@corp"` maps a key → identity (→ role in
171
+ `config/access-control.yaml`). Writing endpoints refuse to run unless keys are set (or
172
+ `T2S_API_ALLOW_OPEN=true` for local dev). A non-SELECT returns `400` before any DB work.
173
+
174
+ ### Choosing the model (provider-agnostic / BYOM)
175
+
176
+ `/ask` (and `askql ask`) generates SQL through a pluggable provider — **you bring your own
177
+ model.** The safety pipeline is identical regardless of provider; only `T2S_LLM_PROVIDER`
178
+ changes. With nothing configured, generation falls back to the **BYO-LLM / IDE lane** (your
179
+ Copilot / Claude Code agent does it — no backend model, no key).
180
+
181
+ | `T2S_LLM_PROVIDER` | What it uses | Config |
182
+ |--------------------|--------------|--------|
183
+ | `anthropic` (auto) | Anthropic API | `ANTHROPIC_API_KEY` · `pip install '.[llm]'` |
184
+ | `bedrock` | Claude on **AWS** | AWS creds/role · `'.[llm-aws]'` · `T2S_LLM_MODEL=anthropic.claude-…` |
185
+ | `vertex` | Claude on **GCP** | GCP creds · `'.[llm-vertex]'` |
186
+ | `openai` | OpenAI GPT | `OPENAI_API_KEY` · `'.[llm-openai]'` |
187
+ | `azure-openai` | Azure OpenAI | `AZURE_OPENAI_API_KEY/_ENDPOINT/OPENAI_API_VERSION` · `'.[llm-openai]'` |
188
+ | `openai-compatible` | **any** self-hosted / OSS (vLLM, Ollama, LM Studio, OpenRouter, GitHub Models) | `T2S_LLM_BASE_URL=…` + `T2S_LLM_MODEL=…` · `'.[llm-openai]'` |
189
+ | `custom` | your own gateway | `T2S_LLM_FACTORY=module:callable` |
190
+ | _unset / `none`_ | **BYO-LLM** — IDE agent (Copilot/Claude Code) | nothing |
191
+
192
+ > **Copilot note:** Copilot has no server-side completions API, so it can't power the `/ask`
193
+ > *service* — but it *is* the model in the **IDE lane** (the agent runs the scripts/skill with
194
+ > its own model). For a token-based API in the GitHub/MS ecosystem, use Azure OpenAI or
195
+ > "GitHub Models" via the `openai-compatible` provider.
196
+
197
+ ---
198
+
199
+ ## Use as a library
200
+
201
+ `askql` is a clean, typed, importable package (`py.typed`) — build it into your own app. The
202
+ public API is everything in `askql.__all__`; heavy/optional deps (DB drivers, LLM SDKs,
203
+ FastAPI) load lazily, so `import askql` is light.
204
+
205
+ ```python
206
+ from askql import validate, compress, execute_sql_text, ask, Settings, load_graph
207
+
208
+ s = Settings(dialect="postgres", max_rows=100)
209
+
210
+ # 1) Guardrail check (pure, no DB)
211
+ r = validate("SELECT id, name FROM app.users LIMIT 10", settings=s)
212
+ assert r.ok, r.errors
213
+
214
+ # 2) Pick relevant tables for a question
215
+ slice_ = compress(load_graph("schema-graph.json"), "active users this week", max_tables=8)
216
+
217
+ # 3) Full NL->SQL (compress -> LLM -> validate -> retry -> execute), provider via env
218
+ result = ask("how many active users this week", settings=s) # needs an LLM provider configured
219
+ ```
220
+
221
+ Everything is config-injectable (pass `Settings` / `DatabaseEntry` / a custom `SqlGenerator`) —
222
+ no checked-out repo required. For pip-installed use, set `T2S_HOME` (config/data root) and/or
223
+ `T2S_DATA_DIR` (writable dir for the audit log + caches) so nothing is written next to the
224
+ installed package. Packaging/publishing details: [SHIPPING.md](SHIPPING.md).
225
+
226
+ ## Repository map
227
+
228
+ | Path | What it is |
229
+ |------|------------|
230
+ | [src/askql/](src/askql/) | The library: validator, compressor, executor, drivers, scraper |
231
+ | [scripts/](scripts/) | Thin CLI wrappers (the commands the agent/humans call) |
232
+ | [config/](config/) | `settings.yaml`, `sensitive-columns.yaml`, `databases.yaml` |
233
+ | [docs/domain-rules.md](docs/domain-rules.md) | Business logic hints the LLM reads |
234
+ | [tests/](tests/) | Offline unit tests (validator / compressor / graph) |
235
+ | [.claude/](.claude/) | Claude Code skill + permission settings |
236
+ | [CLAUDE.md](CLAUDE.md) | Operating instructions for the AI agent |
237
+ | [QUICKSTART.md](QUICKSTART.md) | Install + first run in your org (≈15 min) |
238
+ | [SHIPPING.md](SHIPPING.md) | How to package, ship, and install (wheel / index / Docker) |
239
+ | [PB/](PB/) | The original source playbook (reference) |
240
+
241
+ ---
242
+
243
+ ## Safety rules (non-negotiable)
244
+
245
+ - Only `SELECT` (incl. `UNION`/`INTERSECT`/`EXCEPT`). No DDL/DML, no procedural code.
246
+ - No `SELECT *` — explicit columns only (`COUNT(*)` is fine).
247
+ - A row limit is **required** and capped at `max_rows`.
248
+ - No system schemas, no sensitive columns (SSN, PASSWORD, …), no dangerous functions.
249
+ - Every execution rolls back, times out, and is audited.
250
+
251
+ See [CLAUDE.md](CLAUDE.md) for the agent workflow and [tests/](tests/) for the enforced behavior.
askql-0.2.0/README.md ADDED
@@ -0,0 +1,198 @@
1
+ # askql — Safe Natural-Language → SQL
2
+
3
+ Translate plain-English questions into **validated, read-only SELECT** queries and run them
4
+ against a relational database. Designed for **QA / dev environments only**, with
5
+ defense-in-depth guardrails so an AI agent can never modify data or leak PII.
6
+
7
+ > ⚠️ **Non-production by design.** This system executes only read-only `SELECT`
8
+ > statements through a least-privilege DB user. Do not point it at a production
9
+ > database without the Phase-3 approval workflow (see [ARCHITECTURE.md](ARCHITECTURE.md)).
10
+
11
+ ---
12
+
13
+ ## How it works
14
+
15
+ ```
16
+ question ─▶ compress (pick ~10 relevant tables) ─▶ LLM writes SQL
17
+ ─▶ validate (AST guardrails) ─▶ execute (read-only, rollback, timeout)
18
+ ─▶ markdown/CSV + audit log
19
+ ```
20
+
21
+ Five defense layers: **AST validation → read-only DB user → connection hygiene →
22
+ output caps → audit trail**. See [ARCHITECTURE.md](ARCHITECTURE.md) for the full design and
23
+ the security hardening applied on top of the source playbook.
24
+
25
+ > **Credentials:** use whatever your org gives you — the tool runs with any user. A read-only
26
+ > user is a recommended nice-to-have (defense in depth), not a requirement; if the connected
27
+ > user can write, you get a one-line advisory, never a block.
28
+
29
+ ---
30
+
31
+ ## Quick start
32
+
33
+ ```bash
34
+ # 1. Install (Python 3.11+; default driver is PostgreSQL)
35
+ python -m venv .venv && .venv\Scripts\activate # Windows PowerShell
36
+ pip install -e ".[postgres,dev]"
37
+
38
+ # 2. Configure
39
+ copy .env.example .env # then edit credentials
40
+ # edit config/settings.yaml # set dialect + schemas
41
+
42
+ # 3. Build the metadata pipeline (one command)
43
+ python scripts/scrape_schema.py --schemas public --build-graph
44
+
45
+ # 4. Ask a question (the orchestrator runs compress→validate→execute)
46
+ askql ask "show me the 10 newest active accounts"
47
+
48
+ # …or drive the steps manually:
49
+ python scripts/compress_metadata.py --question "active accounts" --graph docs/schema-graph.json
50
+ python scripts/validate_sql.py build/query.sql --max-rows 100
51
+ python scripts/execute_sql.py build/query.sql --limit 50
52
+ ```
53
+
54
+ No database handy? Everything except `execute` runs fully offline, and the executor
55
+ has an **offline render mode** (`--rows-json`) used by the test suite.
56
+
57
+ Verify your setup any time with the built-in health check:
58
+
59
+ ```bash
60
+ python scripts/doctor.py # config, credentials, transport, live connectivity, graph
61
+ ```
62
+
63
+ ---
64
+
65
+ ## Run in Docker (zero local install)
66
+
67
+ The image bundles Python **and a JRE**, so the universal JDBC transport works without
68
+ installing Java or native drivers on your machine. Mount your config, `.env`, and vendor JDBC
69
+ jars at runtime:
70
+
71
+ ```bash
72
+ docker build -t askql .
73
+
74
+ # health check
75
+ docker run --rm --env-file .env \
76
+ -v "$PWD/config:/app/config" -v "$PWD/jdbc-drivers:/app/jdbc-drivers" askql doctor
77
+
78
+ # or via compose
79
+ docker compose run --rm askql doctor
80
+ docker compose run --rm askql execute build/query.sql --limit 50
81
+ ```
82
+
83
+ Vendor JDBC jars aren't baked into the image (licensing) — drop them in `jdbc-drivers/` and
84
+ they're mounted in. This is the "usable by all" path: one image, any database engine/version.
85
+
86
+ ---
87
+
88
+ ## REST API (connectivity-as-a-service)
89
+
90
+ Run askql as a service so clients (a web UI, a Slack bot, scripts, other teams) hold **no**
91
+ drivers, JVM, or credentials — they just call HTTP. The service applies the same validator,
92
+ RBAC, read-only execution, and audit.
93
+
94
+ ```bash
95
+ pip install -e ".[api,postgres,jdbc]"
96
+ export T2S_API_KEYS="devkey:alice@corp" # identity drives RBAC; omit + ALLOW_OPEN for dev
97
+ askql-api # or: uvicorn askql.api:app --port 8000
98
+ # or containerized: docker compose up api
99
+ ```
100
+
101
+ | Method & path | Auth | Purpose |
102
+ |---------------|------|---------|
103
+ | `GET /health` | public | liveness |
104
+ | `GET /api/v1/databases` | public | registry names (no creds/conn strings) |
105
+ | `POST /api/v1/validate` | required | `{sql, database?}` → guardrail check |
106
+ | `POST /api/v1/compress` | required | `{question}` → focused schema slice |
107
+ | `POST /api/v1/query` | required | `{sql, database?, max_rows?, format?}` → execute read-only |
108
+ | `POST /api/v1/ask` | required | `{question, database?}` → compress→LLM→validate→retry→execute (needs `ANTHROPIC_API_KEY`) |
109
+
110
+ ```bash
111
+ curl -s -X POST localhost:8000/api/v1/query -H "X-API-Key: devkey" \
112
+ -H "Content-Type: application/json" \
113
+ -d '{"sql":"SELECT first_name, salary FROM HR.EMPLOYEES WHERE ROWNUM <= 3","database":"oracle-prod"}'
114
+ # → {"ok":true,"columns":["FIRST_NAME","SALARY"],"rows":[["Steven",24000], ...],"latency_ms":104}
115
+ ```
116
+
117
+ Auth: `T2S_API_KEYS="key1:alice@corp,key2:bob@corp"` maps a key → identity (→ role in
118
+ `config/access-control.yaml`). Writing endpoints refuse to run unless keys are set (or
119
+ `T2S_API_ALLOW_OPEN=true` for local dev). A non-SELECT returns `400` before any DB work.
120
+
121
+ ### Choosing the model (provider-agnostic / BYOM)
122
+
123
+ `/ask` (and `askql ask`) generates SQL through a pluggable provider — **you bring your own
124
+ model.** The safety pipeline is identical regardless of provider; only `T2S_LLM_PROVIDER`
125
+ changes. With nothing configured, generation falls back to the **BYO-LLM / IDE lane** (your
126
+ Copilot / Claude Code agent does it — no backend model, no key).
127
+
128
+ | `T2S_LLM_PROVIDER` | What it uses | Config |
129
+ |--------------------|--------------|--------|
130
+ | `anthropic` (auto) | Anthropic API | `ANTHROPIC_API_KEY` · `pip install '.[llm]'` |
131
+ | `bedrock` | Claude on **AWS** | AWS creds/role · `'.[llm-aws]'` · `T2S_LLM_MODEL=anthropic.claude-…` |
132
+ | `vertex` | Claude on **GCP** | GCP creds · `'.[llm-vertex]'` |
133
+ | `openai` | OpenAI GPT | `OPENAI_API_KEY` · `'.[llm-openai]'` |
134
+ | `azure-openai` | Azure OpenAI | `AZURE_OPENAI_API_KEY/_ENDPOINT/OPENAI_API_VERSION` · `'.[llm-openai]'` |
135
+ | `openai-compatible` | **any** self-hosted / OSS (vLLM, Ollama, LM Studio, OpenRouter, GitHub Models) | `T2S_LLM_BASE_URL=…` + `T2S_LLM_MODEL=…` · `'.[llm-openai]'` |
136
+ | `custom` | your own gateway | `T2S_LLM_FACTORY=module:callable` |
137
+ | _unset / `none`_ | **BYO-LLM** — IDE agent (Copilot/Claude Code) | nothing |
138
+
139
+ > **Copilot note:** Copilot has no server-side completions API, so it can't power the `/ask`
140
+ > *service* — but it *is* the model in the **IDE lane** (the agent runs the scripts/skill with
141
+ > its own model). For a token-based API in the GitHub/MS ecosystem, use Azure OpenAI or
142
+ > "GitHub Models" via the `openai-compatible` provider.
143
+
144
+ ---
145
+
146
+ ## Use as a library
147
+
148
+ `askql` is a clean, typed, importable package (`py.typed`) — build it into your own app. The
149
+ public API is everything in `askql.__all__`; heavy/optional deps (DB drivers, LLM SDKs,
150
+ FastAPI) load lazily, so `import askql` is light.
151
+
152
+ ```python
153
+ from askql import validate, compress, execute_sql_text, ask, Settings, load_graph
154
+
155
+ s = Settings(dialect="postgres", max_rows=100)
156
+
157
+ # 1) Guardrail check (pure, no DB)
158
+ r = validate("SELECT id, name FROM app.users LIMIT 10", settings=s)
159
+ assert r.ok, r.errors
160
+
161
+ # 2) Pick relevant tables for a question
162
+ slice_ = compress(load_graph("schema-graph.json"), "active users this week", max_tables=8)
163
+
164
+ # 3) Full NL->SQL (compress -> LLM -> validate -> retry -> execute), provider via env
165
+ result = ask("how many active users this week", settings=s) # needs an LLM provider configured
166
+ ```
167
+
168
+ Everything is config-injectable (pass `Settings` / `DatabaseEntry` / a custom `SqlGenerator`) —
169
+ no checked-out repo required. For pip-installed use, set `T2S_HOME` (config/data root) and/or
170
+ `T2S_DATA_DIR` (writable dir for the audit log + caches) so nothing is written next to the
171
+ installed package. Packaging/publishing details: [SHIPPING.md](SHIPPING.md).
172
+
173
+ ## Repository map
174
+
175
+ | Path | What it is |
176
+ |------|------------|
177
+ | [src/askql/](src/askql/) | The library: validator, compressor, executor, drivers, scraper |
178
+ | [scripts/](scripts/) | Thin CLI wrappers (the commands the agent/humans call) |
179
+ | [config/](config/) | `settings.yaml`, `sensitive-columns.yaml`, `databases.yaml` |
180
+ | [docs/domain-rules.md](docs/domain-rules.md) | Business logic hints the LLM reads |
181
+ | [tests/](tests/) | Offline unit tests (validator / compressor / graph) |
182
+ | [.claude/](.claude/) | Claude Code skill + permission settings |
183
+ | [CLAUDE.md](CLAUDE.md) | Operating instructions for the AI agent |
184
+ | [QUICKSTART.md](QUICKSTART.md) | Install + first run in your org (≈15 min) |
185
+ | [SHIPPING.md](SHIPPING.md) | How to package, ship, and install (wheel / index / Docker) |
186
+ | [PB/](PB/) | The original source playbook (reference) |
187
+
188
+ ---
189
+
190
+ ## Safety rules (non-negotiable)
191
+
192
+ - Only `SELECT` (incl. `UNION`/`INTERSECT`/`EXCEPT`). No DDL/DML, no procedural code.
193
+ - No `SELECT *` — explicit columns only (`COUNT(*)` is fine).
194
+ - A row limit is **required** and capped at `max_rows`.
195
+ - No system schemas, no sensitive columns (SSN, PASSWORD, …), no dangerous functions.
196
+ - Every execution rolls back, times out, and is audited.
197
+
198
+ See [CLAUDE.md](CLAUDE.md) for the agent workflow and [tests/](tests/) for the enforced behavior.
@@ -0,0 +1,82 @@
1
+ [project]
2
+ name = "askql"
3
+ version = "0.2.0"
4
+ description = "Safe natural-language-to-SQL: validated, read-only queries across many SQL engines."
5
+ requires-python = ">=3.11"
6
+ readme = "README.md"
7
+ dependencies = ["sqlglot>=23.0", "pyyaml>=6.0"]
8
+ keywords = ["sql", "nl2sql", "text-to-sql", "llm", "guardrails", "read-only", "database"]
9
+ authors = [{ name = "askql maintainers" }]
10
+ license = { text = "MIT" }
11
+ classifiers = [
12
+ "Development Status :: 4 - Beta",
13
+ "Intended Audience :: Developers",
14
+ "License :: OSI Approved :: MIT License",
15
+ "Programming Language :: Python :: 3",
16
+ "Programming Language :: Python :: 3.11",
17
+ "Programming Language :: Python :: 3.12",
18
+ "Topic :: Database",
19
+ "Topic :: Software Development :: Libraries",
20
+ "Typing :: Typed",
21
+ ]
22
+
23
+ [project.urls]
24
+ # TODO: point these at your repo/docs when published.
25
+ Homepage = "https://github.com/TestAutomationArchitect/askql"
26
+ Source = "https://github.com/TestAutomationArchitect/askql"
27
+ Changelog = "https://github.com/TestAutomationArchitect/askql/blob/main/CHANGELOG.md"
28
+
29
+ [project.optional-dependencies]
30
+ postgres = ["psycopg[binary]>=3.1"]
31
+ oracle = ["python-oracledb>=2.0"]
32
+ mssql = ["pymssql>=2.2"]
33
+ jdbc = ["JayDeBeApi>=1.2", "JPype1>=1.5"] # universal transport (needs a JVM on host)
34
+ # NL->SQL generation providers (pick what your org uses; the pipeline is identical):
35
+ llm = ["anthropic>=0.40"] # Anthropic API
36
+ llm-aws = ["anthropic[bedrock]>=0.40"] # Claude on Amazon Bedrock
37
+ llm-vertex = ["anthropic[vertex]>=0.40"] # Claude on Google Vertex
38
+ llm-openai = ["openai>=1.40"] # OpenAI / Azure OpenAI / any OpenAI-compatible (BYOM)
39
+ embeddings = ["sentence-transformers>=2.2"] # local semantic table retrieval (or use openai embeddings)
40
+ api = ["fastapi>=0.110", "uvicorn[standard]>=0.27"]
41
+ dev = ["pytest>=8.0", "ruff>=0.5", "mypy>=1.10", "httpx>=0.27"]
42
+
43
+ [project.scripts]
44
+ # Single orchestrator entrypoint (compress -> generate -> validate -> execute).
45
+ askql = "askql.cli:main"
46
+ askql-api = "askql.api:main"
47
+
48
+ [build-system]
49
+ requires = ["setuptools>=68"]
50
+ build-backend = "setuptools.build_meta"
51
+
52
+ [tool.setuptools.packages.find]
53
+ where = ["src"]
54
+
55
+ [tool.setuptools.package-data]
56
+ askql = ["py.typed"] # ship type-hint marker so consumers get types (PEP 561)
57
+
58
+ [tool.pytest.ini_options]
59
+ testpaths = ["tests"]
60
+ markers = [
61
+ "integration: requires a live database connection (skipped by default)",
62
+ ]
63
+
64
+ [tool.ruff]
65
+ line-length = 100
66
+ src = ["src", "scripts", "tests"]
67
+
68
+ [tool.ruff.lint]
69
+ select = ["E", "F", "I", "B", "UP", "S"] # incl. flake8-bandit (S) for security smells
70
+ # S101: asserts are fine. S110: try/except/pass is a deliberate, documented pattern in the
71
+ # audit/advisory/cleanup paths (those must never raise and break a query).
72
+ ignore = ["S101", "S110"]
73
+
74
+ [tool.ruff.lint.per-file-ignores]
75
+ "tests/*" = ["S", "E501"] # tests may embed long CSV/SQL fixtures
76
+
77
+ [tool.mypy]
78
+ python_version = "3.11"
79
+ mypy_path = "src"
80
+ packages = ["askql"]
81
+ ignore_missing_imports = true
82
+ explicit_package_bases = true
askql-0.2.0/setup.cfg ADDED
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,103 @@
1
+ """askql — safe natural-language-to-SQL with defense-in-depth guardrails.
2
+
3
+ A library teams can build on: turn questions into validated, read-only SELECTs and run them
4
+ safely against many SQL engines. Read-only by design; the validator is the single source of
5
+ truth for what is safe to execute.
6
+
7
+ Public API (stable surface — internals may change between minor versions):
8
+
9
+ from askql import validate, compress, ask, execute_sql_text, Settings
10
+
11
+ Quick start (library use)::
12
+
13
+ from askql import validate, Settings
14
+ r = validate("SELECT id FROM s.t LIMIT 10", settings=Settings(dialect="postgres"))
15
+ assert r.ok
16
+
17
+ Optional features install via extras: `askql[postgres]`, `[jdbc]`, `[llm]`, `[llm-openai]`,
18
+ `[api]`. Heavy/optional deps (DB drivers, LLM SDKs, FastAPI, JPype) are imported lazily, so
19
+ `import askql` is light and never fails on a missing optional dependency.
20
+ """
21
+
22
+ from __future__ import annotations
23
+
24
+ __version__ = "0.2.0"
25
+
26
+ # Config / models
27
+ # Schema graph + per-question compression
28
+ from .compressor import compress
29
+ from .config import (
30
+ DatabaseEntry,
31
+ Settings,
32
+ data_dir,
33
+ load_database,
34
+ load_env_file,
35
+ load_sensitive_patterns,
36
+ load_settings,
37
+ )
38
+
39
+ # Connectivity (transport-agnostic)
40
+ from .drivers import DatabaseDriver, get_driver
41
+
42
+ # Execution (read-only)
43
+ from .executor import (
44
+ ExecutionResult,
45
+ execute_sql,
46
+ execute_sql_text,
47
+ format_csv,
48
+ format_markdown,
49
+ sanitize_error,
50
+ )
51
+
52
+ # NL->SQL generation (provider-agnostic / BYOM)
53
+ from .generate import GeneratorUnavailable, SqlGenerator, get_generator
54
+
55
+ # Orchestration
56
+ from .orchestrator import ask
57
+
58
+ # RBAC (optional, opt-in by identity)
59
+ from .policy import AccessDenied, Policy, get_policy, resolve_current_user
60
+ from .schema_graph import build_graph, load_graph, write_graph
61
+
62
+ # Validation (the safety core)
63
+ from .validator import ValidationResult, validate
64
+
65
+ __all__ = [
66
+ "__version__",
67
+ # config
68
+ "Settings",
69
+ "DatabaseEntry",
70
+ "load_settings",
71
+ "load_database",
72
+ "load_env_file",
73
+ "load_sensitive_patterns",
74
+ "data_dir",
75
+ # validation
76
+ "validate",
77
+ "ValidationResult",
78
+ # schema
79
+ "compress",
80
+ "build_graph",
81
+ "load_graph",
82
+ "write_graph",
83
+ # execution
84
+ "execute_sql",
85
+ "execute_sql_text",
86
+ "ExecutionResult",
87
+ "format_markdown",
88
+ "format_csv",
89
+ "sanitize_error",
90
+ # connectivity
91
+ "get_driver",
92
+ "DatabaseDriver",
93
+ # rbac
94
+ "get_policy",
95
+ "Policy",
96
+ "AccessDenied",
97
+ "resolve_current_user",
98
+ # generation
99
+ "ask",
100
+ "get_generator",
101
+ "SqlGenerator",
102
+ "GeneratorUnavailable",
103
+ ]