PyPI - askql - Versions diffs - 0.2.0__py3-none-any.whl - Mend

askql 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

askql/__init__.py +103 -0
askql/api.py +215 -0
askql/audit.py +121 -0
askql/capability.py +120 -0
askql/cli.py +243 -0
askql/compressor.py +106 -0
askql/config.py +227 -0
askql/dialects/__init__.py +26 -0
askql/dialects/mssql.py +58 -0
askql/dialects/oracle.py +50 -0
askql/dialects/postgres.py +59 -0
askql/doctor.py +262 -0
askql/drivers/__init__.py +97 -0
askql/drivers/base.py +22 -0
askql/drivers/jdbc.py +223 -0
askql/drivers/mssql.py +37 -0
askql/drivers/oracle.py +49 -0
askql/drivers/postgres.py +37 -0
askql/embeddings.py +86 -0
askql/executor.py +337 -0
askql/generate.py +292 -0
askql/orchestrator.py +171 -0
askql/policy.py +92 -0
askql/py.typed +0 -0
askql/ratelimit.py +53 -0
askql/retriever.py +168 -0
askql/schema_graph.py +163 -0
askql/scraper.py +152 -0
askql/tokenize.py +116 -0
askql/validator.py +361 -0
askql-0.2.0.dist-info/METADATA +251 -0
askql-0.2.0.dist-info/RECORD +36 -0
askql-0.2.0.dist-info/WHEEL +5 -0
askql-0.2.0.dist-info/entry_points.txt +3 -0
askql-0.2.0.dist-info/licenses/LICENSE +21 -0
askql-0.2.0.dist-info/top_level.txt +1 -0

askql/__init__.py ADDED Viewed

@@ -0,0 +1,103 @@
+"""askql — safe natural-language-to-SQL with defense-in-depth guardrails.
+A library teams can build on: turn questions into validated, read-only SELECTs and run them
+safely against many SQL engines. Read-only by design; the validator is the single source of
+truth for what is safe to execute.
+Public API (stable surface — internals may change between minor versions):
+    from askql import validate, compress, ask, execute_sql_text, Settings
+Quick start (library use)::
+    from askql import validate, Settings
+    r = validate("SELECT id FROM s.t LIMIT 10", settings=Settings(dialect="postgres"))
+    assert r.ok
+Optional features install via extras: `askql[postgres]`, `[jdbc]`, `[llm]`, `[llm-openai]`,
+`[api]`. Heavy/optional deps (DB drivers, LLM SDKs, FastAPI, JPype) are imported lazily, so
+`import askql` is light and never fails on a missing optional dependency.
+"""
+from __future__ import annotations
+__version__ = "0.2.0"
+# Config / models
+# Schema graph + per-question compression
+from .compressor import compress
+from .config import (
+    DatabaseEntry,
+    Settings,
+    data_dir,
+    load_database,
+    load_env_file,
+    load_sensitive_patterns,
+    load_settings,
+)
+# Connectivity (transport-agnostic)
+from .drivers import DatabaseDriver, get_driver
+# Execution (read-only)
+from .executor import (
+    ExecutionResult,
+    execute_sql,
+    execute_sql_text,
+    format_csv,
+    format_markdown,
+    sanitize_error,
+)
+# NL->SQL generation (provider-agnostic / BYOM)
+from .generate import GeneratorUnavailable, SqlGenerator, get_generator
+# Orchestration
+from .orchestrator import ask
+# RBAC (optional, opt-in by identity)
+from .policy import AccessDenied, Policy, get_policy, resolve_current_user
+from .schema_graph import build_graph, load_graph, write_graph
+# Validation (the safety core)
+from .validator import ValidationResult, validate
+__all__ = [
+    "__version__",
+    # config
+    "Settings",
+    "DatabaseEntry",
+    "load_settings",
+    "load_database",
+    "load_env_file",
+    "load_sensitive_patterns",
+    "data_dir",
+    # validation
+    "validate",
+    "ValidationResult",
+    # schema
+    "compress",
+    "build_graph",
+    "load_graph",
+    "write_graph",
+    # execution
+    "execute_sql",
+    "execute_sql_text",
+    "ExecutionResult",
+    "format_markdown",
+    "format_csv",
+    "sanitize_error",
+    # connectivity
+    "get_driver",
+    "DatabaseDriver",
+    # rbac
+    "get_policy",
+    "Policy",
+    "AccessDenied",
+    "resolve_current_user",
+    # generation
+    "ask",
+    "get_generator",
+    "SqlGenerator",
+    "GeneratorUnavailable",
+]

askql/api.py ADDED Viewed

@@ -0,0 +1,215 @@
+"""REST API — connectivity-as-a-service.
+The "usable by all" rung: the service holds the drivers + credentials; clients (QA, support,
+devs, a web UI, a Slack bot) just call HTTP and hold nothing. Every request still flows through
+the same validator / RBAC / read-only execution / audit, so the safety guarantees are identical
+to the CLI.
+Auth: set T2S_API_KEYS="key1:alice@corp,key2:bob@corp". The matched identity drives RBAC
+(config/access-control.yaml). For local dev only, T2S_API_ALLOW_OPEN=true permits unauthenticated
+calls (identity from an X-T2S-User header). Writing endpoints refuse to run if neither is set.
+Run:  uvicorn askql.api:app --host 0.0.0.0 --port 8000
+  or  python -m askql.api
+"""
+from __future__ import annotations
+import os
+from dataclasses import replace
+from fastapi import Depends, FastAPI, Header, HTTPException
+from fastapi.responses import PlainTextResponse
+from pydantic import BaseModel
+from . import __version__
+from .compressor import compress
+from .config import (
+    CONFIG_DIR,
+    ROOT,
+    _read_yaml,
+    load_database,
+    load_sensitive_patterns,
+    load_settings,
+)
+from .executor import execute_sql_text, format_csv, format_markdown
+from .schema_graph import load_graph
+from .validator import validate
+app = FastAPI(
+    title="askql API",
+    version=__version__,
+    description="Safe, read-only natural-language-to-SQL as a service.",
+)
+# ── Auth ──────────────────────────────────────────────────────
+def _api_keys() -> dict[str, str]:
+    keys: dict[str, str] = {}
+    for pair in os.environ.get("T2S_API_KEYS", "").split(","):
+        if ":" in pair:
+            k, ident = pair.split(":", 1)
+            keys[k.strip()] = ident.strip()
+    return keys
+def _open_allowed() -> bool:
+    return os.environ.get("T2S_API_ALLOW_OPEN", "").lower() == "true"
+def auth(x_api_key: str | None = Header(None), x_t2s_user: str | None = Header(None)) -> str | None:
+    """Resolve the caller identity (drives RBAC). Required for non-public endpoints."""
+    keys = _api_keys()
+    if keys:
+        if not x_api_key or x_api_key not in keys:
+            raise HTTPException(status_code=401, detail="invalid or missing X-API-Key")
+        return keys[x_api_key]
+    if _open_allowed():
+        return x_t2s_user  # trusted-gateway / dev mode; may be None -> pilot
+    raise HTTPException(
+        status_code=503,
+        detail="API auth not configured: set T2S_API_KEYS (or T2S_API_ALLOW_OPEN=true for dev)",
+    )
+# ── Models ────────────────────────────────────────────────────
+class ValidateReq(BaseModel):
+    sql: str
+    database: str | None = None
+class CompressReq(BaseModel):
+    question: str
+    max_tables: int | None = None
+    max_columns: int | None = None
+class QueryReq(BaseModel):
+    sql: str
+    database: str | None = None
+    max_rows: int | None = None
+    format: str = "json"  # json | csv | markdown
+class AskReq(BaseModel):
+    question: str
+    database: str | None = None
+    max_rows: int | None = None
+# ── Public endpoints ──────────────────────────────────────────
+@app.get("/health")
+def health() -> dict:
+    return {"status": "ok", "version": __version__}
+@app.get("/api/v1/databases")
+def databases() -> dict:
+    """Registry names only — never connection strings or credentials."""
+    data = _read_yaml(CONFIG_DIR / "databases.yaml")
+    reg = data.get("databases") or {}
+    return {
+        "default": data.get("default"),
+        "databases": [
+            {"name": n, "dialect": e.get("dialect"), "environment": e.get("environment", "dev")}
+            for n, e in reg.items()
+        ],
+    }
+# ── Authenticated endpoints ───────────────────────────────────
+@app.post("/api/v1/validate")
+def api_validate(req: ValidateReq, identity: str | None = Depends(auth)) -> dict:
+    settings = load_settings()
+    if req.database:
+        db = load_database(req.database)
+        if db:
+            settings = replace(settings, dialect=db.dialect)
+    return validate(req.sql, settings=settings).to_dict()
+@app.post("/api/v1/compress")
+def api_compress(req: CompressReq, identity: str | None = Depends(auth)) -> dict:
+    settings = load_settings()
+    graph_path = ROOT / "docs" / "schema-graph.json"
+    if not graph_path.exists():
+        raise HTTPException(
+            status_code=409, detail="schema graph not built (run scrape_schema --build-graph)"
+        )
+    pats = load_sensitive_patterns() if settings.strip_sensitive_in_compressor else None
+    return compress(
+        load_graph(graph_path),
+        req.question,
+        max_tables=req.max_tables or settings.max_tables,
+        max_columns=req.max_columns or settings.max_columns,
+        seed_count=settings.seed_count,
+        sensitive_patterns=pats,
+    )
+@app.post("/api/v1/query")
+def api_query(req: QueryReq, identity: str | None = Depends(auth)):
+    db = load_database(req.database)
+    if db is None:
+        raise HTTPException(status_code=400, detail="no database configured")
+    res = execute_sql_text(req.sql, sql_label="api", database=db, limit=req.max_rows, user=identity)
+    payload = {
+        "ok": res.ok,
+        "error": res.error,
+        "warning": res.warning,
+        "row_count": len(res.rows),
+        "truncated": res.truncated,
+        "latency_ms": res.latency_ms,
+        "columns": res.columns,
+    }
+    if not res.ok:
+        raise HTTPException(status_code=400, detail=res.error)
+    if req.format == "csv":
+        return PlainTextResponse(format_csv(res), media_type="text/csv")
+    if req.format == "markdown":
+        return {**payload, "markdown": format_markdown(res)}
+    payload["rows"] = res.rows
+    return payload
+@app.post("/api/v1/ask")
+def api_ask(req: AskReq, identity: str | None = Depends(auth)):
+    """Full NL->SQL: compress -> generate -> validate -> retry -> execute. Needs an LLM."""
+    from .generate import GeneratorUnavailable
+    from .orchestrator import ask
+    db = load_database(req.database)
+    if db is None:
+        raise HTTPException(status_code=400, detail="no database configured")
+    try:
+        result = ask(req.question, database=db, user=identity, max_rows=req.max_rows)
+    except GeneratorUnavailable as exc:
+        raise HTTPException(status_code=503, detail=str(exc)) from exc
+    if not result["ok"]:
+        raise HTTPException(status_code=400, detail=result.get("error", "ask failed"))
+    return result
+def main() -> None:
+    import uvicorn
+    from .config import load_env_file
+    load_env_file()  # pick up .env credentials for the server process
+    uvicorn.run(
+        "askql.api:app",
+        host=os.environ.get("T2S_API_HOST", "127.0.0.1"),
+        port=int(os.environ.get("T2S_API_PORT", "8000")),
+    )
+if __name__ == "__main__":
+    main()

askql/audit.py ADDED Viewed

@@ -0,0 +1,121 @@
+"""Audit sink — every execution (success or failure) is recorded.
+`AuditSink` is an interface so the JSONL pilot implementation can be swapped for a
+DB-backed sink later without changing callers (ARCHITECTURE.md §4). Audit writes must
+NEVER block execution — failures here are swallowed.
+"""
+from __future__ import annotations
+import getpass
+import json
+import socket
+from datetime import UTC, datetime
+from pathlib import Path
+from typing import Any, Protocol
+from .config import data_dir
+def _now_iso() -> str:
+    return datetime.now(UTC).isoformat()
+class AuditSink(Protocol):
+    def record(self, entry: dict[str, Any]) -> None: ...
+class JsonlAuditSink:
+    """Append one JSON object per line to build/query-audit.jsonl."""
+    def __init__(self, path: Path | None = None) -> None:
+        self.path = path or data_dir() / "query-audit.jsonl"
+    def record(self, entry: dict[str, Any]) -> None:
+        try:
+            self.path.parent.mkdir(parents=True, exist_ok=True)
+            with self.path.open("a", encoding="utf-8") as fh:
+                fh.write(json.dumps(entry) + "\n")
+        except Exception:  # noqa: BLE001 - audit must never break execution
+            pass
+def audit_path() -> Path:
+    return data_dir() / "query-audit.jsonl"
+def is_failure(entry: dict) -> bool:
+    """A failure = blocked by the validator, or an execution error."""
+    return (not entry.get("validationOk")) or bool(entry.get("error"))
+def read_audit(
+    limit: int = 20,
+    *,
+    failures_only: bool = False,
+    database: str | None = None,
+    path: Path | None = None,
+) -> list[dict]:
+    """Return the most recent audit entries (newest last). Tolerates partial/corrupt lines."""
+    path = path or audit_path()
+    if not path.exists():
+        return []
+    entries: list[dict] = []
+    for line in path.read_text(encoding="utf-8").splitlines():
+        line = line.strip()
+        if not line:
+            continue
+        try:
+            entries.append(json.loads(line))
+        except json.JSONDecodeError:
+            continue
+    if database:
+        entries = [e for e in entries if e.get("database") == database]
+    if failures_only:
+        entries = [e for e in entries if is_failure(e)]
+    return entries[-limit:]
+def build_entry(
+    *,
+    sql: str,
+    sql_file: str,
+    validation_ok: bool,
+    row_count: int | None = None,
+    truncated: bool = False,
+    error: str | None = None,
+    latency_ms: int | None = None,
+    environment: str = "qa",
+    database: str | None = None,
+    identity: str | None = None,
+    role: str | None = None,
+    pii_mode: str | None = None,
+) -> dict[str, Any]:
+    """Assemble a sanitized audit record. SQL is truncated; credentials are never included.
+    `identity`/`role` capture the authenticated user and resolved role (RBAC); falls back to
+    the OS user when no identity is supplied (pilot mode).
+    """
+    return {
+        "timestamp": _now_iso(),
+        "user": identity or _safe(getpass.getuser),
+        "role": role,
+        "host": _safe(socket.gethostname),
+        "environment": environment,
+        "database": database,
+        "piiMode": pii_mode,
+        "sqlFile": sql_file,
+        "sql": sql[:2000],
+        "validationOk": validation_ok,
+        "rowCount": row_count,
+        "truncated": truncated,
+        "error": error,
+        "latencyMs": latency_ms,
+    }
+def _safe(fn) -> str:
+    try:
+        return fn()
+    except Exception:  # noqa: BLE001
+        return "unknown"

askql/capability.py ADDED Viewed

@@ -0,0 +1,120 @@
+"""Non-blocking write-capability advisory.
+Primary mode is "use the credentials you're given." A read-only DB user is a recommended
+nice-to-have, not a requirement. This module probes (best-effort, read-only) whether the
+connected user can write and, if so, returns a one-line warning — it NEVER blocks execution.
+The probe runs at most once/day per (database, user) via a small cache so it adds no per-query
+overhead or noise. Any probe error -> unknown -> silent (advisory must not be fragile).
+"""
+from __future__ import annotations
+import json
+from datetime import UTC, datetime
+from pathlib import Path
+from .config import data_dir
+# Best-effort per-dialect scalar probe: returns a count > 0 when the user holds write/DDL privs.
+# Run directly through the driver (not the validator), so referencing catalog views is fine.
+WRITE_PROBES: dict[str, str] = {
+    "postgres": (
+        "SELECT "
+        "(SELECT count(*) FROM information_schema.role_table_grants "
+        "  WHERE grantee = current_user "
+        "    AND privilege_type IN ('INSERT','UPDATE','DELETE','TRUNCATE')) "
+        "+ (SELECT count(*) FROM pg_roles "
+        "   WHERE rolname = current_user AND (rolsuper OR rolcreatedb)) AS w"
+    ),
+    "redshift": (
+        "SELECT count(*) AS w FROM information_schema.role_table_grants "
+        "WHERE grantee = current_user AND privilege_type IN ('INSERT','UPDATE','DELETE')"
+    ),
+    "oracle": (
+        "SELECT ("
+        "(SELECT COUNT(*) FROM session_privs WHERE privilege IN "
+        "('INSERT ANY TABLE','UPDATE ANY TABLE','DELETE ANY TABLE','CREATE TABLE',"
+        "'CREATE ANY TABLE','DROP ANY TABLE','ALTER ANY TABLE'))"
+        "+ (SELECT COUNT(*) FROM user_tab_privs WHERE grantee = USER "
+        "   AND privilege IN ('INSERT','UPDATE','DELETE'))) AS w FROM dual"
+    ),
+    "tsql": (
+        "SELECT COUNT(*) AS w FROM fn_my_permissions(NULL,'DATABASE') "
+        "WHERE permission_name IN ('INSERT','UPDATE','DELETE','ALTER','CONTROL','CREATE TABLE')"
+    ),
+}
+def probe_write_capability(driver, dialect: str) -> bool | None:
+    """True=can write, False=read-only, None=unknown. Best-effort; never raises."""
+    sql = WRITE_PROBES.get(dialect)
+    if not sql:
+        return None
+    try:
+        _cols, rows = driver.execute(sql, 1, 10)
+        if rows and rows[0] and rows[0][0] is not None:
+            return int(rows[0][0]) > 0
+    except Exception:  # noqa: BLE001 - advisory must not be fragile
+        return None
+    return None
+def _today() -> int:
+    return int(datetime.now(UTC).timestamp() // 86400)
+def probe_and_advise(
+    probe,
+    db_name: str,
+    environment: str,
+    identity: str | None,
+    *,
+    cache_dir: Path | None = None,
+    today: int | None = None,
+) -> str | None:
+    """Return a one-line advisory if the user can write (else None). `probe` is a zero-arg
+    callable returning bool|None (run on its own connection). Caches per (db,user)/day so the
+    probe runs at most once daily and we warn at most once daily."""
+    cache_dir = cache_dir or (data_dir() / ".write-check")
+    today = today if today is not None else _today()
+    who = identity or "default"
+    key = "".join(c if c.isalnum() else "_" for c in f"{db_name}_{who}")
+    path = cache_dir / f"{key}.json"
+    try:
+        state = json.loads(path.read_text(encoding="utf-8")) if path.exists() else {}
+    except (json.JSONDecodeError, OSError):
+        state = {}
+    if state.get("day") == today:
+        capable = state.get("capable")
+    else:
+        try:
+            capable = probe()
+        except Exception:  # noqa: BLE001 - advisory must not be fragile
+            capable = None
+        state = {"day": today, "capable": capable, "warned_day": state.get("warned_day")}
+        _save(path, state)
+    if not capable:
+        return None
+    if state.get("warned_day") == today:
+        return None  # already warned today
+    state["warned_day"] = today
+    _save(path, state)
+    return (
+        f"advisory: connected user '{who}' appears to have WRITE privileges on "
+        f"'{db_name}' ({environment}). askql blocks writes in software, but a read-only DB "
+        f"user is recommended for prod/sensitive data (see PB11). "
+        f"Set warn_if_writable: false to silence."
+    )
+def _save(path: Path, state: dict) -> None:
+    try:
+        path.parent.mkdir(parents=True, exist_ok=True)
+        path.write_text(json.dumps(state), encoding="utf-8")
+    except OSError:
+        pass