kontra 0.5.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kontra/__init__.py +1871 -0
- kontra/api/__init__.py +22 -0
- kontra/api/compare.py +340 -0
- kontra/api/decorators.py +153 -0
- kontra/api/results.py +2121 -0
- kontra/api/rules.py +681 -0
- kontra/cli/__init__.py +0 -0
- kontra/cli/commands/__init__.py +1 -0
- kontra/cli/commands/config.py +153 -0
- kontra/cli/commands/diff.py +450 -0
- kontra/cli/commands/history.py +196 -0
- kontra/cli/commands/profile.py +289 -0
- kontra/cli/commands/validate.py +468 -0
- kontra/cli/constants.py +6 -0
- kontra/cli/main.py +48 -0
- kontra/cli/renderers.py +304 -0
- kontra/cli/utils.py +28 -0
- kontra/config/__init__.py +34 -0
- kontra/config/loader.py +127 -0
- kontra/config/models.py +49 -0
- kontra/config/settings.py +797 -0
- kontra/connectors/__init__.py +0 -0
- kontra/connectors/db_utils.py +251 -0
- kontra/connectors/detection.py +323 -0
- kontra/connectors/handle.py +368 -0
- kontra/connectors/postgres.py +127 -0
- kontra/connectors/sqlserver.py +226 -0
- kontra/engine/__init__.py +0 -0
- kontra/engine/backends/duckdb_session.py +227 -0
- kontra/engine/backends/duckdb_utils.py +18 -0
- kontra/engine/backends/polars_backend.py +47 -0
- kontra/engine/engine.py +1205 -0
- kontra/engine/executors/__init__.py +15 -0
- kontra/engine/executors/base.py +50 -0
- kontra/engine/executors/database_base.py +528 -0
- kontra/engine/executors/duckdb_sql.py +607 -0
- kontra/engine/executors/postgres_sql.py +162 -0
- kontra/engine/executors/registry.py +69 -0
- kontra/engine/executors/sqlserver_sql.py +163 -0
- kontra/engine/materializers/__init__.py +14 -0
- kontra/engine/materializers/base.py +42 -0
- kontra/engine/materializers/duckdb.py +110 -0
- kontra/engine/materializers/factory.py +22 -0
- kontra/engine/materializers/polars_connector.py +131 -0
- kontra/engine/materializers/postgres.py +157 -0
- kontra/engine/materializers/registry.py +138 -0
- kontra/engine/materializers/sqlserver.py +160 -0
- kontra/engine/result.py +15 -0
- kontra/engine/sql_utils.py +611 -0
- kontra/engine/sql_validator.py +609 -0
- kontra/engine/stats.py +194 -0
- kontra/engine/types.py +138 -0
- kontra/errors.py +533 -0
- kontra/logging.py +85 -0
- kontra/preplan/__init__.py +5 -0
- kontra/preplan/planner.py +253 -0
- kontra/preplan/postgres.py +179 -0
- kontra/preplan/sqlserver.py +191 -0
- kontra/preplan/types.py +24 -0
- kontra/probes/__init__.py +20 -0
- kontra/probes/compare.py +400 -0
- kontra/probes/relationship.py +283 -0
- kontra/reporters/__init__.py +0 -0
- kontra/reporters/json_reporter.py +190 -0
- kontra/reporters/rich_reporter.py +11 -0
- kontra/rules/__init__.py +35 -0
- kontra/rules/base.py +186 -0
- kontra/rules/builtin/__init__.py +40 -0
- kontra/rules/builtin/allowed_values.py +156 -0
- kontra/rules/builtin/compare.py +188 -0
- kontra/rules/builtin/conditional_not_null.py +213 -0
- kontra/rules/builtin/conditional_range.py +310 -0
- kontra/rules/builtin/contains.py +138 -0
- kontra/rules/builtin/custom_sql_check.py +182 -0
- kontra/rules/builtin/disallowed_values.py +140 -0
- kontra/rules/builtin/dtype.py +203 -0
- kontra/rules/builtin/ends_with.py +129 -0
- kontra/rules/builtin/freshness.py +240 -0
- kontra/rules/builtin/length.py +193 -0
- kontra/rules/builtin/max_rows.py +35 -0
- kontra/rules/builtin/min_rows.py +46 -0
- kontra/rules/builtin/not_null.py +121 -0
- kontra/rules/builtin/range.py +222 -0
- kontra/rules/builtin/regex.py +143 -0
- kontra/rules/builtin/starts_with.py +129 -0
- kontra/rules/builtin/unique.py +124 -0
- kontra/rules/condition_parser.py +203 -0
- kontra/rules/execution_plan.py +455 -0
- kontra/rules/factory.py +103 -0
- kontra/rules/predicates.py +25 -0
- kontra/rules/registry.py +24 -0
- kontra/rules/static_predicates.py +120 -0
- kontra/scout/__init__.py +9 -0
- kontra/scout/backends/__init__.py +17 -0
- kontra/scout/backends/base.py +111 -0
- kontra/scout/backends/duckdb_backend.py +359 -0
- kontra/scout/backends/postgres_backend.py +519 -0
- kontra/scout/backends/sqlserver_backend.py +577 -0
- kontra/scout/dtype_mapping.py +150 -0
- kontra/scout/patterns.py +69 -0
- kontra/scout/profiler.py +801 -0
- kontra/scout/reporters/__init__.py +39 -0
- kontra/scout/reporters/json_reporter.py +165 -0
- kontra/scout/reporters/markdown_reporter.py +152 -0
- kontra/scout/reporters/rich_reporter.py +144 -0
- kontra/scout/store.py +208 -0
- kontra/scout/suggest.py +200 -0
- kontra/scout/types.py +652 -0
- kontra/state/__init__.py +29 -0
- kontra/state/backends/__init__.py +79 -0
- kontra/state/backends/base.py +348 -0
- kontra/state/backends/local.py +480 -0
- kontra/state/backends/postgres.py +1010 -0
- kontra/state/backends/s3.py +543 -0
- kontra/state/backends/sqlserver.py +969 -0
- kontra/state/fingerprint.py +166 -0
- kontra/state/types.py +1061 -0
- kontra/version.py +1 -0
- kontra-0.5.2.dist-info/METADATA +122 -0
- kontra-0.5.2.dist-info/RECORD +124 -0
- kontra-0.5.2.dist-info/WHEEL +5 -0
- kontra-0.5.2.dist-info/entry_points.txt +2 -0
- kontra-0.5.2.dist-info/licenses/LICENSE +17 -0
- kontra-0.5.2.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,368 @@
|
|
|
1
|
+
# src/kontra/connectors/handle.py
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
"""
|
|
5
|
+
DatasetHandle — a normalized, engine-agnostic view of a dataset location.
|
|
6
|
+
|
|
7
|
+
Why this exists
|
|
8
|
+
---------------
|
|
9
|
+
Materializers (DuckDB/Polars) and SQL executors shouldn't have to parse URIs
|
|
10
|
+
or chase environment variables. This small value object centralizes that logic:
|
|
11
|
+
|
|
12
|
+
- `uri`: the original string you passed (e.g., "s3://bucket/key.parquet")
|
|
13
|
+
- `scheme`: parsed scheme: "s3", "file", "https", "" (bare local), "byoc", etc.
|
|
14
|
+
- `path`: the path we should hand to the backend (typically the original URI)
|
|
15
|
+
- `format`: best-effort file format: "parquet" | "csv" | "postgres" | "sqlserver" | "unknown"
|
|
16
|
+
- `fs_opts`: normalized filesystem options pulled from env (e.g., S3 creds,
|
|
17
|
+
region, endpoint, URL style). These are safe to pass to a DuckDB
|
|
18
|
+
httpfs session or other backends.
|
|
19
|
+
|
|
20
|
+
BYOC (Bring Your Own Connection) support:
|
|
21
|
+
- `external_conn`: User-provided database connection object
|
|
22
|
+
- `dialect`: Database dialect ("postgresql", "sqlserver")
|
|
23
|
+
- `table_ref`: Table reference ("schema.table" or "db.schema.table")
|
|
24
|
+
- `owned`: If True, Kontra closes the connection. If False (BYOC), user closes it.
|
|
25
|
+
|
|
26
|
+
This object is intentionally tiny and immutable. If a connector later wants to
|
|
27
|
+
enrich it (e.g., SAS tokens for ADLS), we can extend `fs_opts` without touching
|
|
28
|
+
the engine or materializers.
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
from dataclasses import dataclass, field
|
|
32
|
+
from typing import Any, Dict, Optional
|
|
33
|
+
import os
|
|
34
|
+
from urllib.parse import urlparse
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@dataclass(frozen=True)
|
|
38
|
+
class DatasetHandle:
|
|
39
|
+
uri: str
|
|
40
|
+
scheme: str
|
|
41
|
+
path: str
|
|
42
|
+
format: str
|
|
43
|
+
fs_opts: Dict[str, str]
|
|
44
|
+
# Database connection parameters (for URI-based connections)
|
|
45
|
+
db_params: Optional[Any] = field(default=None)
|
|
46
|
+
|
|
47
|
+
# BYOC (Bring Your Own Connection) fields
|
|
48
|
+
external_conn: Optional[Any] = field(default=None) # User's connection object
|
|
49
|
+
dialect: Optional[str] = field(default=None) # "postgresql" | "sqlserver"
|
|
50
|
+
table_ref: Optional[str] = field(default=None) # "schema.table" or "db.schema.table"
|
|
51
|
+
owned: bool = field(default=True) # True = we close, False = user closes
|
|
52
|
+
|
|
53
|
+
# ------------------------------ Constructors ------------------------------
|
|
54
|
+
|
|
55
|
+
@staticmethod
|
|
56
|
+
def from_connection(conn: Any, table: str) -> "DatasetHandle":
|
|
57
|
+
"""
|
|
58
|
+
Create a DatasetHandle from a BYOC (Bring Your Own Connection) database connection.
|
|
59
|
+
|
|
60
|
+
This allows users to pass their own database connection objects (psycopg2,
|
|
61
|
+
pyodbc, SQLAlchemy, etc.) while Kontra still performs SQL pushdown and preplan.
|
|
62
|
+
|
|
63
|
+
Args:
|
|
64
|
+
conn: A database connection object (psycopg2, pyodbc, SQLAlchemy engine, etc.)
|
|
65
|
+
table: Table reference: "table", "schema.table", or "database.schema.table"
|
|
66
|
+
|
|
67
|
+
Returns:
|
|
68
|
+
DatasetHandle configured for BYOC mode
|
|
69
|
+
|
|
70
|
+
Examples:
|
|
71
|
+
>>> import psycopg2
|
|
72
|
+
>>> conn = psycopg2.connect(host="localhost", dbname="mydb")
|
|
73
|
+
>>> handle = DatasetHandle.from_connection(conn, "public.users")
|
|
74
|
+
|
|
75
|
+
>>> import pyodbc
|
|
76
|
+
>>> conn = pyodbc.connect("DRIVER={ODBC Driver 17};SERVER=...")
|
|
77
|
+
>>> handle = DatasetHandle.from_connection(conn, "dbo.orders")
|
|
78
|
+
|
|
79
|
+
Notes:
|
|
80
|
+
- Kontra does NOT close the connection (owned=False). User manages lifecycle.
|
|
81
|
+
- SQL pushdown and preplan still work using the provided connection.
|
|
82
|
+
- The `dialect` is auto-detected from the connection type.
|
|
83
|
+
- SQLAlchemy engines/connections are automatically unwrapped to raw DBAPI.
|
|
84
|
+
"""
|
|
85
|
+
from kontra.connectors.detection import (
|
|
86
|
+
detect_connection_dialect,
|
|
87
|
+
unwrap_sqlalchemy_connection,
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
# Detect dialect before unwrapping (SQLAlchemy has better dialect info)
|
|
91
|
+
dialect = detect_connection_dialect(conn)
|
|
92
|
+
|
|
93
|
+
# Unwrap SQLAlchemy to raw DBAPI connection (has .cursor() method)
|
|
94
|
+
raw_conn = unwrap_sqlalchemy_connection(conn)
|
|
95
|
+
|
|
96
|
+
return DatasetHandle(
|
|
97
|
+
uri=f"byoc://{dialect}/{table}",
|
|
98
|
+
scheme="byoc",
|
|
99
|
+
path=table,
|
|
100
|
+
format=dialect,
|
|
101
|
+
fs_opts={},
|
|
102
|
+
db_params=None,
|
|
103
|
+
external_conn=raw_conn,
|
|
104
|
+
dialect=dialect,
|
|
105
|
+
table_ref=table,
|
|
106
|
+
owned=False, # User owns the connection, not Kontra
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
@staticmethod
|
|
110
|
+
def from_uri(
|
|
111
|
+
uri: str,
|
|
112
|
+
storage_options: Optional[Dict[str, Any]] = None,
|
|
113
|
+
) -> "DatasetHandle":
|
|
114
|
+
"""
|
|
115
|
+
Create a DatasetHandle from a user-provided URI or path.
|
|
116
|
+
|
|
117
|
+
Examples:
|
|
118
|
+
- "s3://my-bucket/data/users.parquet"
|
|
119
|
+
- "/data/users.parquet" (scheme = "")
|
|
120
|
+
- "file:///data/users.csv" (scheme = "file")
|
|
121
|
+
- "https://example.com/x.parquet"
|
|
122
|
+
|
|
123
|
+
Args:
|
|
124
|
+
uri: Path or URI to the dataset
|
|
125
|
+
storage_options: Optional dict of cloud storage credentials.
|
|
126
|
+
For S3/MinIO:
|
|
127
|
+
- aws_access_key_id, aws_secret_access_key
|
|
128
|
+
- aws_region (required for Polars)
|
|
129
|
+
- endpoint_url (for MinIO/S3-compatible)
|
|
130
|
+
For Azure:
|
|
131
|
+
- account_name, account_key, sas_token, etc.
|
|
132
|
+
These override environment variables when provided.
|
|
133
|
+
|
|
134
|
+
Notes:
|
|
135
|
+
- We keep `path` equal to the original `uri` so engines that accept
|
|
136
|
+
URIs directly (DuckDB: read_parquet) can use it verbatim.
|
|
137
|
+
- `fs_opts` is populated from environment variables, then merged with
|
|
138
|
+
storage_options (storage_options take precedence).
|
|
139
|
+
"""
|
|
140
|
+
parsed = urlparse(uri)
|
|
141
|
+
scheme = (parsed.scheme or "").lower()
|
|
142
|
+
lower = uri.lower()
|
|
143
|
+
|
|
144
|
+
# Very light format inference (enough for materializer selection)
|
|
145
|
+
if lower.endswith(".parquet"):
|
|
146
|
+
fmt = "parquet"
|
|
147
|
+
elif lower.endswith(".csv") or lower.endswith(".tsv"):
|
|
148
|
+
fmt = "csv" # TSV is CSV with tab separator (auto-detected by Polars)
|
|
149
|
+
else:
|
|
150
|
+
fmt = "unknown"
|
|
151
|
+
|
|
152
|
+
# Defaults: pass the original URI through to backends that accept URIs
|
|
153
|
+
path = uri
|
|
154
|
+
|
|
155
|
+
# Filesystem options (extensible). For now we focus on S3-compatible settings;
|
|
156
|
+
# other filesystems can add their own keys without breaking callers.
|
|
157
|
+
fs_opts: Dict[str, str] = {}
|
|
158
|
+
|
|
159
|
+
if scheme == "s3":
|
|
160
|
+
_inject_s3_env(fs_opts)
|
|
161
|
+
# Merge user-provided storage_options (takes precedence over env vars)
|
|
162
|
+
if storage_options:
|
|
163
|
+
_merge_s3_storage_options(fs_opts, storage_options)
|
|
164
|
+
|
|
165
|
+
# Azure Data Lake Storage / Azure Blob Storage
|
|
166
|
+
if scheme in ("abfs", "abfss", "az"):
|
|
167
|
+
_inject_azure_env(fs_opts)
|
|
168
|
+
# Merge user-provided storage_options (takes precedence over env vars)
|
|
169
|
+
if storage_options:
|
|
170
|
+
_merge_azure_storage_options(fs_opts, storage_options)
|
|
171
|
+
|
|
172
|
+
# HTTP(S): typically public or signed URLs. No defaults needed here.
|
|
173
|
+
# Local `""`/`file` schemes: no fs_opts.
|
|
174
|
+
|
|
175
|
+
# PostgreSQL: resolve connection parameters from URI + environment
|
|
176
|
+
db_params = None
|
|
177
|
+
if scheme in ("postgres", "postgresql"):
|
|
178
|
+
from kontra.connectors.postgres import resolve_connection_params
|
|
179
|
+
|
|
180
|
+
db_params = resolve_connection_params(uri)
|
|
181
|
+
fmt = "postgres"
|
|
182
|
+
|
|
183
|
+
# SQL Server: resolve connection parameters from URI + environment
|
|
184
|
+
if scheme in ("mssql", "sqlserver"):
|
|
185
|
+
from kontra.connectors.sqlserver import resolve_connection_params as resolve_sqlserver_params
|
|
186
|
+
|
|
187
|
+
db_params = resolve_sqlserver_params(uri)
|
|
188
|
+
fmt = "sqlserver"
|
|
189
|
+
|
|
190
|
+
return DatasetHandle(
|
|
191
|
+
uri=uri, scheme=scheme, path=path, format=fmt, fs_opts=fs_opts, db_params=db_params
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
# ------------------------------ Helpers ---------------------------------------
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
def _inject_s3_env(opts: Dict[str, str]) -> None:
|
|
199
|
+
"""
|
|
200
|
+
Read S3/MinIO-related environment variables and copy them into `opts` using
|
|
201
|
+
the normalized keys that our DuckDB session factory/materializer expect.
|
|
202
|
+
|
|
203
|
+
We *don’t* log or print these values anywhere; the caller just passes them to
|
|
204
|
+
the backend session config. All keys are optional.
|
|
205
|
+
"""
|
|
206
|
+
# Credentials
|
|
207
|
+
ak = os.getenv("AWS_ACCESS_KEY_ID")
|
|
208
|
+
sk = os.getenv("AWS_SECRET_ACCESS_KEY")
|
|
209
|
+
st = os.getenv("AWS_SESSION_TOKEN")
|
|
210
|
+
|
|
211
|
+
# Region (prefer DUCKDB_S3_REGION when provided, else AWS_REGION, else default)
|
|
212
|
+
region = os.getenv("DUCKDB_S3_REGION") or os.getenv("AWS_REGION") or "us-east-1"
|
|
213
|
+
|
|
214
|
+
# Endpoint / style (MinIO/custom endpoints)
|
|
215
|
+
endpoint = os.getenv("DUCKDB_S3_ENDPOINT") or os.getenv("AWS_ENDPOINT_URL")
|
|
216
|
+
url_style = os.getenv("DUCKDB_S3_URL_STYLE") # 'path' | 'host'
|
|
217
|
+
use_ssl = os.getenv("DUCKDB_S3_USE_SSL") # 'true' | 'false'
|
|
218
|
+
max_conns = os.getenv("DUCKDB_S3_MAX_CONNECTIONS") or "64"
|
|
219
|
+
|
|
220
|
+
if ak:
|
|
221
|
+
opts["s3_access_key_id"] = ak
|
|
222
|
+
if sk:
|
|
223
|
+
opts["s3_secret_access_key"] = sk
|
|
224
|
+
if st:
|
|
225
|
+
opts["s3_session_token"] = st
|
|
226
|
+
if region:
|
|
227
|
+
opts["s3_region"] = region
|
|
228
|
+
if endpoint:
|
|
229
|
+
# Keep the full endpoint string; the DuckDB session factory will parse it.
|
|
230
|
+
opts["s3_endpoint"] = endpoint
|
|
231
|
+
if url_style:
|
|
232
|
+
opts["s3_url_style"] = url_style
|
|
233
|
+
if use_ssl:
|
|
234
|
+
opts["s3_use_ssl"] = use_ssl
|
|
235
|
+
if max_conns:
|
|
236
|
+
opts["s3_max_connections"] = str(max_conns)
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
def _inject_azure_env(opts: Dict[str, str]) -> None:
|
|
240
|
+
"""
|
|
241
|
+
Read Azure Storage environment variables and copy them into `opts` using
|
|
242
|
+
normalized keys that our DuckDB session factory expects.
|
|
243
|
+
|
|
244
|
+
Supports multiple auth methods:
|
|
245
|
+
- Account key: AZURE_STORAGE_ACCOUNT_NAME + AZURE_STORAGE_ACCOUNT_KEY
|
|
246
|
+
- SAS token: AZURE_STORAGE_ACCOUNT_NAME + AZURE_STORAGE_SAS_TOKEN
|
|
247
|
+
- Connection string: AZURE_STORAGE_CONNECTION_STRING
|
|
248
|
+
- Service principal (OAuth): AZURE_TENANT_ID + AZURE_CLIENT_ID + AZURE_CLIENT_SECRET
|
|
249
|
+
|
|
250
|
+
All keys are optional. DuckDB's azure extension will use what's available.
|
|
251
|
+
"""
|
|
252
|
+
# Account name (required for most auth methods)
|
|
253
|
+
account_name = os.getenv("AZURE_STORAGE_ACCOUNT_NAME")
|
|
254
|
+
if account_name:
|
|
255
|
+
opts["azure_account_name"] = account_name
|
|
256
|
+
|
|
257
|
+
# Account key auth
|
|
258
|
+
account_key = os.getenv("AZURE_STORAGE_ACCOUNT_KEY")
|
|
259
|
+
if account_key:
|
|
260
|
+
opts["azure_account_key"] = account_key
|
|
261
|
+
|
|
262
|
+
# SAS token auth (alternative to account key)
|
|
263
|
+
sas_token = os.getenv("AZURE_STORAGE_SAS_TOKEN")
|
|
264
|
+
if sas_token:
|
|
265
|
+
opts["azure_sas_token"] = sas_token
|
|
266
|
+
|
|
267
|
+
# Connection string auth (contains account name + key/SAS)
|
|
268
|
+
conn_string = os.getenv("AZURE_STORAGE_CONNECTION_STRING")
|
|
269
|
+
if conn_string:
|
|
270
|
+
opts["azure_connection_string"] = conn_string
|
|
271
|
+
|
|
272
|
+
# OAuth / Service Principal auth
|
|
273
|
+
tenant_id = os.getenv("AZURE_TENANT_ID")
|
|
274
|
+
client_id = os.getenv("AZURE_CLIENT_ID")
|
|
275
|
+
client_secret = os.getenv("AZURE_CLIENT_SECRET")
|
|
276
|
+
if tenant_id:
|
|
277
|
+
opts["azure_tenant_id"] = tenant_id
|
|
278
|
+
if client_id:
|
|
279
|
+
opts["azure_client_id"] = client_id
|
|
280
|
+
if client_secret:
|
|
281
|
+
opts["azure_client_secret"] = client_secret
|
|
282
|
+
|
|
283
|
+
# Custom endpoint (for Databricks, sovereign clouds, Azurite emulator)
|
|
284
|
+
endpoint = os.getenv("AZURE_STORAGE_ENDPOINT")
|
|
285
|
+
if endpoint:
|
|
286
|
+
opts["azure_endpoint"] = endpoint
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
def _merge_s3_storage_options(opts: Dict[str, str], storage_options: Dict[str, Any]) -> None:
|
|
290
|
+
"""
|
|
291
|
+
Merge user-provided storage_options into fs_opts for S3.
|
|
292
|
+
|
|
293
|
+
Maps Polars-style keys to our internal normalized keys.
|
|
294
|
+
User values take precedence over env-var derived values.
|
|
295
|
+
|
|
296
|
+
Polars storage_options keys:
|
|
297
|
+
- aws_access_key_id -> s3_access_key_id
|
|
298
|
+
- aws_secret_access_key -> s3_secret_access_key
|
|
299
|
+
- aws_session_token -> s3_session_token
|
|
300
|
+
- aws_region -> s3_region
|
|
301
|
+
- endpoint_url -> s3_endpoint
|
|
302
|
+
"""
|
|
303
|
+
# Mapping from Polars/user keys to our internal keys
|
|
304
|
+
key_map = {
|
|
305
|
+
"aws_access_key_id": "s3_access_key_id",
|
|
306
|
+
"aws_secret_access_key": "s3_secret_access_key",
|
|
307
|
+
"aws_session_token": "s3_session_token",
|
|
308
|
+
"aws_region": "s3_region",
|
|
309
|
+
"region": "s3_region", # Alternative key
|
|
310
|
+
"endpoint_url": "s3_endpoint",
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
for user_key, internal_key in key_map.items():
|
|
314
|
+
if user_key in storage_options and storage_options[user_key] is not None:
|
|
315
|
+
opts[internal_key] = str(storage_options[user_key])
|
|
316
|
+
|
|
317
|
+
# Also accept our internal keys directly (pass-through)
|
|
318
|
+
internal_keys = [
|
|
319
|
+
"s3_access_key_id",
|
|
320
|
+
"s3_secret_access_key",
|
|
321
|
+
"s3_session_token",
|
|
322
|
+
"s3_region",
|
|
323
|
+
"s3_endpoint",
|
|
324
|
+
"s3_url_style",
|
|
325
|
+
"s3_use_ssl",
|
|
326
|
+
]
|
|
327
|
+
for key in internal_keys:
|
|
328
|
+
if key in storage_options and storage_options[key] is not None:
|
|
329
|
+
opts[key] = str(storage_options[key])
|
|
330
|
+
|
|
331
|
+
|
|
332
|
+
def _merge_azure_storage_options(opts: Dict[str, str], storage_options: Dict[str, Any]) -> None:
|
|
333
|
+
"""
|
|
334
|
+
Merge user-provided storage_options into fs_opts for Azure.
|
|
335
|
+
|
|
336
|
+
Maps common Azure keys to our internal normalized keys.
|
|
337
|
+
User values take precedence over env-var derived values.
|
|
338
|
+
"""
|
|
339
|
+
# Mapping from user keys to our internal keys
|
|
340
|
+
key_map = {
|
|
341
|
+
"account_name": "azure_account_name",
|
|
342
|
+
"account_key": "azure_account_key",
|
|
343
|
+
"sas_token": "azure_sas_token",
|
|
344
|
+
"connection_string": "azure_connection_string",
|
|
345
|
+
"tenant_id": "azure_tenant_id",
|
|
346
|
+
"client_id": "azure_client_id",
|
|
347
|
+
"client_secret": "azure_client_secret",
|
|
348
|
+
"endpoint": "azure_endpoint",
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
for user_key, internal_key in key_map.items():
|
|
352
|
+
if user_key in storage_options and storage_options[user_key] is not None:
|
|
353
|
+
opts[internal_key] = str(storage_options[user_key])
|
|
354
|
+
|
|
355
|
+
# Also accept our internal keys directly (pass-through)
|
|
356
|
+
internal_keys = [
|
|
357
|
+
"azure_account_name",
|
|
358
|
+
"azure_account_key",
|
|
359
|
+
"azure_sas_token",
|
|
360
|
+
"azure_connection_string",
|
|
361
|
+
"azure_tenant_id",
|
|
362
|
+
"azure_client_id",
|
|
363
|
+
"azure_client_secret",
|
|
364
|
+
"azure_endpoint",
|
|
365
|
+
]
|
|
366
|
+
for key in internal_keys:
|
|
367
|
+
if key in storage_options and storage_options[key] is not None:
|
|
368
|
+
opts[key] = str(storage_options[key])
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
# src/kontra/connectors/postgres.py
|
|
2
|
+
"""
|
|
3
|
+
PostgreSQL connection utilities for Kontra.
|
|
4
|
+
|
|
5
|
+
Supports multiple authentication methods:
|
|
6
|
+
1. Full URI: postgres://user:pass@host:port/database/schema.table
|
|
7
|
+
2. Environment variables (libpq standard): PGHOST, PGPORT, PGUSER, PGPASSWORD, PGDATABASE
|
|
8
|
+
3. DATABASE_URL (common in PaaS like Heroku, Railway)
|
|
9
|
+
|
|
10
|
+
Priority: URI values > DATABASE_URL > PGXXX env vars > defaults
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import os
|
|
16
|
+
from dataclasses import dataclass
|
|
17
|
+
from typing import Any, Dict, Optional
|
|
18
|
+
|
|
19
|
+
from .db_utils import (
|
|
20
|
+
DbConnectionConfig,
|
|
21
|
+
resolve_connection_params as _resolve_params,
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
# PostgreSQL-specific configuration for parameter resolution
|
|
26
|
+
_PG_CONFIG = DbConnectionConfig(
|
|
27
|
+
default_host="localhost",
|
|
28
|
+
default_port=5432,
|
|
29
|
+
default_user=os.getenv("USER", "postgres"),
|
|
30
|
+
default_schema="public",
|
|
31
|
+
env_host="PGHOST",
|
|
32
|
+
env_port="PGPORT",
|
|
33
|
+
env_user="PGUSER",
|
|
34
|
+
env_password="PGPASSWORD",
|
|
35
|
+
env_database="PGDATABASE",
|
|
36
|
+
env_url="DATABASE_URL",
|
|
37
|
+
db_name="PostgreSQL",
|
|
38
|
+
uri_example="postgres://user:pass@host:5432/database/schema.table",
|
|
39
|
+
env_example="PGDATABASE",
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
@dataclass
|
|
44
|
+
class PostgresConnectionParams:
|
|
45
|
+
"""Resolved PostgreSQL connection parameters."""
|
|
46
|
+
|
|
47
|
+
host: str
|
|
48
|
+
port: int
|
|
49
|
+
user: str
|
|
50
|
+
password: Optional[str]
|
|
51
|
+
database: str
|
|
52
|
+
schema: str
|
|
53
|
+
table: str
|
|
54
|
+
|
|
55
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
56
|
+
"""Return connection kwargs for psycopg.connect()."""
|
|
57
|
+
return {
|
|
58
|
+
"host": self.host,
|
|
59
|
+
"port": self.port,
|
|
60
|
+
"user": self.user,
|
|
61
|
+
"password": self.password,
|
|
62
|
+
"dbname": self.database,
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
@property
|
|
66
|
+
def qualified_table(self) -> str:
|
|
67
|
+
"""Return schema.table identifier."""
|
|
68
|
+
return f"{self.schema}.{self.table}"
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def resolve_connection_params(uri: str) -> PostgresConnectionParams:
|
|
72
|
+
"""
|
|
73
|
+
Resolve PostgreSQL connection parameters from URI + environment.
|
|
74
|
+
|
|
75
|
+
URI format:
|
|
76
|
+
postgres://user:pass@host:port/database/schema.table
|
|
77
|
+
postgres:///public.users (uses env vars for connection)
|
|
78
|
+
|
|
79
|
+
Priority: URI values > DATABASE_URL > PGXXX env vars > defaults
|
|
80
|
+
|
|
81
|
+
Raises:
|
|
82
|
+
ValueError: If required parameters (database, table) cannot be resolved.
|
|
83
|
+
"""
|
|
84
|
+
resolved = _resolve_params(uri, _PG_CONFIG)
|
|
85
|
+
|
|
86
|
+
return PostgresConnectionParams(
|
|
87
|
+
host=resolved.host,
|
|
88
|
+
port=resolved.port,
|
|
89
|
+
user=resolved.user,
|
|
90
|
+
password=resolved.password,
|
|
91
|
+
database=resolved.database, # type: ignore (validated in _resolve_params)
|
|
92
|
+
schema=resolved.schema,
|
|
93
|
+
table=resolved.table, # type: ignore (validated in _resolve_params)
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def get_connection(params: PostgresConnectionParams):
|
|
98
|
+
"""
|
|
99
|
+
Create a psycopg connection from resolved parameters.
|
|
100
|
+
|
|
101
|
+
Returns:
|
|
102
|
+
psycopg.Connection
|
|
103
|
+
"""
|
|
104
|
+
try:
|
|
105
|
+
import psycopg
|
|
106
|
+
except ImportError as e:
|
|
107
|
+
raise ImportError(
|
|
108
|
+
"psycopg is required for PostgreSQL support.\n"
|
|
109
|
+
"Install with: pip install 'psycopg[binary]'"
|
|
110
|
+
) from e
|
|
111
|
+
|
|
112
|
+
try:
|
|
113
|
+
return psycopg.connect(**params.to_dict())
|
|
114
|
+
except psycopg.OperationalError as e:
|
|
115
|
+
raise ConnectionError(
|
|
116
|
+
f"PostgreSQL connection failed: {e}\n\n"
|
|
117
|
+
f"Connection details:\n"
|
|
118
|
+
f" Host: {params.host}:{params.port}\n"
|
|
119
|
+
f" Database: {params.database}\n"
|
|
120
|
+
f" User: {params.user}\n\n"
|
|
121
|
+
"Check your connection settings or set environment variables:\n"
|
|
122
|
+
" export PGHOST=localhost\n"
|
|
123
|
+
" export PGPORT=5432\n"
|
|
124
|
+
" export PGUSER=your_user\n"
|
|
125
|
+
" export PGPASSWORD=your_password\n"
|
|
126
|
+
" export PGDATABASE=your_database"
|
|
127
|
+
) from e
|