kontra 0.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. kontra/__init__.py +1871 -0
  2. kontra/api/__init__.py +22 -0
  3. kontra/api/compare.py +340 -0
  4. kontra/api/decorators.py +153 -0
  5. kontra/api/results.py +2121 -0
  6. kontra/api/rules.py +681 -0
  7. kontra/cli/__init__.py +0 -0
  8. kontra/cli/commands/__init__.py +1 -0
  9. kontra/cli/commands/config.py +153 -0
  10. kontra/cli/commands/diff.py +450 -0
  11. kontra/cli/commands/history.py +196 -0
  12. kontra/cli/commands/profile.py +289 -0
  13. kontra/cli/commands/validate.py +468 -0
  14. kontra/cli/constants.py +6 -0
  15. kontra/cli/main.py +48 -0
  16. kontra/cli/renderers.py +304 -0
  17. kontra/cli/utils.py +28 -0
  18. kontra/config/__init__.py +34 -0
  19. kontra/config/loader.py +127 -0
  20. kontra/config/models.py +49 -0
  21. kontra/config/settings.py +797 -0
  22. kontra/connectors/__init__.py +0 -0
  23. kontra/connectors/db_utils.py +251 -0
  24. kontra/connectors/detection.py +323 -0
  25. kontra/connectors/handle.py +368 -0
  26. kontra/connectors/postgres.py +127 -0
  27. kontra/connectors/sqlserver.py +226 -0
  28. kontra/engine/__init__.py +0 -0
  29. kontra/engine/backends/duckdb_session.py +227 -0
  30. kontra/engine/backends/duckdb_utils.py +18 -0
  31. kontra/engine/backends/polars_backend.py +47 -0
  32. kontra/engine/engine.py +1205 -0
  33. kontra/engine/executors/__init__.py +15 -0
  34. kontra/engine/executors/base.py +50 -0
  35. kontra/engine/executors/database_base.py +528 -0
  36. kontra/engine/executors/duckdb_sql.py +607 -0
  37. kontra/engine/executors/postgres_sql.py +162 -0
  38. kontra/engine/executors/registry.py +69 -0
  39. kontra/engine/executors/sqlserver_sql.py +163 -0
  40. kontra/engine/materializers/__init__.py +14 -0
  41. kontra/engine/materializers/base.py +42 -0
  42. kontra/engine/materializers/duckdb.py +110 -0
  43. kontra/engine/materializers/factory.py +22 -0
  44. kontra/engine/materializers/polars_connector.py +131 -0
  45. kontra/engine/materializers/postgres.py +157 -0
  46. kontra/engine/materializers/registry.py +138 -0
  47. kontra/engine/materializers/sqlserver.py +160 -0
  48. kontra/engine/result.py +15 -0
  49. kontra/engine/sql_utils.py +611 -0
  50. kontra/engine/sql_validator.py +609 -0
  51. kontra/engine/stats.py +194 -0
  52. kontra/engine/types.py +138 -0
  53. kontra/errors.py +533 -0
  54. kontra/logging.py +85 -0
  55. kontra/preplan/__init__.py +5 -0
  56. kontra/preplan/planner.py +253 -0
  57. kontra/preplan/postgres.py +179 -0
  58. kontra/preplan/sqlserver.py +191 -0
  59. kontra/preplan/types.py +24 -0
  60. kontra/probes/__init__.py +20 -0
  61. kontra/probes/compare.py +400 -0
  62. kontra/probes/relationship.py +283 -0
  63. kontra/reporters/__init__.py +0 -0
  64. kontra/reporters/json_reporter.py +190 -0
  65. kontra/reporters/rich_reporter.py +11 -0
  66. kontra/rules/__init__.py +35 -0
  67. kontra/rules/base.py +186 -0
  68. kontra/rules/builtin/__init__.py +40 -0
  69. kontra/rules/builtin/allowed_values.py +156 -0
  70. kontra/rules/builtin/compare.py +188 -0
  71. kontra/rules/builtin/conditional_not_null.py +213 -0
  72. kontra/rules/builtin/conditional_range.py +310 -0
  73. kontra/rules/builtin/contains.py +138 -0
  74. kontra/rules/builtin/custom_sql_check.py +182 -0
  75. kontra/rules/builtin/disallowed_values.py +140 -0
  76. kontra/rules/builtin/dtype.py +203 -0
  77. kontra/rules/builtin/ends_with.py +129 -0
  78. kontra/rules/builtin/freshness.py +240 -0
  79. kontra/rules/builtin/length.py +193 -0
  80. kontra/rules/builtin/max_rows.py +35 -0
  81. kontra/rules/builtin/min_rows.py +46 -0
  82. kontra/rules/builtin/not_null.py +121 -0
  83. kontra/rules/builtin/range.py +222 -0
  84. kontra/rules/builtin/regex.py +143 -0
  85. kontra/rules/builtin/starts_with.py +129 -0
  86. kontra/rules/builtin/unique.py +124 -0
  87. kontra/rules/condition_parser.py +203 -0
  88. kontra/rules/execution_plan.py +455 -0
  89. kontra/rules/factory.py +103 -0
  90. kontra/rules/predicates.py +25 -0
  91. kontra/rules/registry.py +24 -0
  92. kontra/rules/static_predicates.py +120 -0
  93. kontra/scout/__init__.py +9 -0
  94. kontra/scout/backends/__init__.py +17 -0
  95. kontra/scout/backends/base.py +111 -0
  96. kontra/scout/backends/duckdb_backend.py +359 -0
  97. kontra/scout/backends/postgres_backend.py +519 -0
  98. kontra/scout/backends/sqlserver_backend.py +577 -0
  99. kontra/scout/dtype_mapping.py +150 -0
  100. kontra/scout/patterns.py +69 -0
  101. kontra/scout/profiler.py +801 -0
  102. kontra/scout/reporters/__init__.py +39 -0
  103. kontra/scout/reporters/json_reporter.py +165 -0
  104. kontra/scout/reporters/markdown_reporter.py +152 -0
  105. kontra/scout/reporters/rich_reporter.py +144 -0
  106. kontra/scout/store.py +208 -0
  107. kontra/scout/suggest.py +200 -0
  108. kontra/scout/types.py +652 -0
  109. kontra/state/__init__.py +29 -0
  110. kontra/state/backends/__init__.py +79 -0
  111. kontra/state/backends/base.py +348 -0
  112. kontra/state/backends/local.py +480 -0
  113. kontra/state/backends/postgres.py +1010 -0
  114. kontra/state/backends/s3.py +543 -0
  115. kontra/state/backends/sqlserver.py +969 -0
  116. kontra/state/fingerprint.py +166 -0
  117. kontra/state/types.py +1061 -0
  118. kontra/version.py +1 -0
  119. kontra-0.5.2.dist-info/METADATA +122 -0
  120. kontra-0.5.2.dist-info/RECORD +124 -0
  121. kontra-0.5.2.dist-info/WHEEL +5 -0
  122. kontra-0.5.2.dist-info/entry_points.txt +2 -0
  123. kontra-0.5.2.dist-info/licenses/LICENSE +17 -0
  124. kontra-0.5.2.dist-info/top_level.txt +1 -0
@@ -0,0 +1,368 @@
1
+ # src/kontra/connectors/handle.py
2
+ from __future__ import annotations
3
+
4
+ """
5
+ DatasetHandle — a normalized, engine-agnostic view of a dataset location.
6
+
7
+ Why this exists
8
+ ---------------
9
+ Materializers (DuckDB/Polars) and SQL executors shouldn't have to parse URIs
10
+ or chase environment variables. This small value object centralizes that logic:
11
+
12
+ - `uri`: the original string you passed (e.g., "s3://bucket/key.parquet")
13
+ - `scheme`: parsed scheme: "s3", "file", "https", "" (bare local), "byoc", etc.
14
+ - `path`: the path we should hand to the backend (typically the original URI)
15
+ - `format`: best-effort file format: "parquet" | "csv" | "postgres" | "sqlserver" | "unknown"
16
+ - `fs_opts`: normalized filesystem options pulled from env (e.g., S3 creds,
17
+ region, endpoint, URL style). These are safe to pass to a DuckDB
18
+ httpfs session or other backends.
19
+
20
+ BYOC (Bring Your Own Connection) support:
21
+ - `external_conn`: User-provided database connection object
22
+ - `dialect`: Database dialect ("postgresql", "sqlserver")
23
+ - `table_ref`: Table reference ("schema.table" or "db.schema.table")
24
+ - `owned`: If True, Kontra closes the connection. If False (BYOC), user closes it.
25
+
26
+ This object is intentionally tiny and immutable. If a connector later wants to
27
+ enrich it (e.g., SAS tokens for ADLS), we can extend `fs_opts` without touching
28
+ the engine or materializers.
29
+ """
30
+
31
+ from dataclasses import dataclass, field
32
+ from typing import Any, Dict, Optional
33
+ import os
34
+ from urllib.parse import urlparse
35
+
36
+
37
+ @dataclass(frozen=True)
38
+ class DatasetHandle:
39
+ uri: str
40
+ scheme: str
41
+ path: str
42
+ format: str
43
+ fs_opts: Dict[str, str]
44
+ # Database connection parameters (for URI-based connections)
45
+ db_params: Optional[Any] = field(default=None)
46
+
47
+ # BYOC (Bring Your Own Connection) fields
48
+ external_conn: Optional[Any] = field(default=None) # User's connection object
49
+ dialect: Optional[str] = field(default=None) # "postgresql" | "sqlserver"
50
+ table_ref: Optional[str] = field(default=None) # "schema.table" or "db.schema.table"
51
+ owned: bool = field(default=True) # True = we close, False = user closes
52
+
53
+ # ------------------------------ Constructors ------------------------------
54
+
55
+ @staticmethod
56
+ def from_connection(conn: Any, table: str) -> "DatasetHandle":
57
+ """
58
+ Create a DatasetHandle from a BYOC (Bring Your Own Connection) database connection.
59
+
60
+ This allows users to pass their own database connection objects (psycopg2,
61
+ pyodbc, SQLAlchemy, etc.) while Kontra still performs SQL pushdown and preplan.
62
+
63
+ Args:
64
+ conn: A database connection object (psycopg2, pyodbc, SQLAlchemy engine, etc.)
65
+ table: Table reference: "table", "schema.table", or "database.schema.table"
66
+
67
+ Returns:
68
+ DatasetHandle configured for BYOC mode
69
+
70
+ Examples:
71
+ >>> import psycopg2
72
+ >>> conn = psycopg2.connect(host="localhost", dbname="mydb")
73
+ >>> handle = DatasetHandle.from_connection(conn, "public.users")
74
+
75
+ >>> import pyodbc
76
+ >>> conn = pyodbc.connect("DRIVER={ODBC Driver 17};SERVER=...")
77
+ >>> handle = DatasetHandle.from_connection(conn, "dbo.orders")
78
+
79
+ Notes:
80
+ - Kontra does NOT close the connection (owned=False). User manages lifecycle.
81
+ - SQL pushdown and preplan still work using the provided connection.
82
+ - The `dialect` is auto-detected from the connection type.
83
+ - SQLAlchemy engines/connections are automatically unwrapped to raw DBAPI.
84
+ """
85
+ from kontra.connectors.detection import (
86
+ detect_connection_dialect,
87
+ unwrap_sqlalchemy_connection,
88
+ )
89
+
90
+ # Detect dialect before unwrapping (SQLAlchemy has better dialect info)
91
+ dialect = detect_connection_dialect(conn)
92
+
93
+ # Unwrap SQLAlchemy to raw DBAPI connection (has .cursor() method)
94
+ raw_conn = unwrap_sqlalchemy_connection(conn)
95
+
96
+ return DatasetHandle(
97
+ uri=f"byoc://{dialect}/{table}",
98
+ scheme="byoc",
99
+ path=table,
100
+ format=dialect,
101
+ fs_opts={},
102
+ db_params=None,
103
+ external_conn=raw_conn,
104
+ dialect=dialect,
105
+ table_ref=table,
106
+ owned=False, # User owns the connection, not Kontra
107
+ )
108
+
109
+ @staticmethod
110
+ def from_uri(
111
+ uri: str,
112
+ storage_options: Optional[Dict[str, Any]] = None,
113
+ ) -> "DatasetHandle":
114
+ """
115
+ Create a DatasetHandle from a user-provided URI or path.
116
+
117
+ Examples:
118
+ - "s3://my-bucket/data/users.parquet"
119
+ - "/data/users.parquet" (scheme = "")
120
+ - "file:///data/users.csv" (scheme = "file")
121
+ - "https://example.com/x.parquet"
122
+
123
+ Args:
124
+ uri: Path or URI to the dataset
125
+ storage_options: Optional dict of cloud storage credentials.
126
+ For S3/MinIO:
127
+ - aws_access_key_id, aws_secret_access_key
128
+ - aws_region (required for Polars)
129
+ - endpoint_url (for MinIO/S3-compatible)
130
+ For Azure:
131
+ - account_name, account_key, sas_token, etc.
132
+ These override environment variables when provided.
133
+
134
+ Notes:
135
+ - We keep `path` equal to the original `uri` so engines that accept
136
+ URIs directly (DuckDB: read_parquet) can use it verbatim.
137
+ - `fs_opts` is populated from environment variables, then merged with
138
+ storage_options (storage_options take precedence).
139
+ """
140
+ parsed = urlparse(uri)
141
+ scheme = (parsed.scheme or "").lower()
142
+ lower = uri.lower()
143
+
144
+ # Very light format inference (enough for materializer selection)
145
+ if lower.endswith(".parquet"):
146
+ fmt = "parquet"
147
+ elif lower.endswith(".csv") or lower.endswith(".tsv"):
148
+ fmt = "csv" # TSV is CSV with tab separator (auto-detected by Polars)
149
+ else:
150
+ fmt = "unknown"
151
+
152
+ # Defaults: pass the original URI through to backends that accept URIs
153
+ path = uri
154
+
155
+ # Filesystem options (extensible). For now we focus on S3-compatible settings;
156
+ # other filesystems can add their own keys without breaking callers.
157
+ fs_opts: Dict[str, str] = {}
158
+
159
+ if scheme == "s3":
160
+ _inject_s3_env(fs_opts)
161
+ # Merge user-provided storage_options (takes precedence over env vars)
162
+ if storage_options:
163
+ _merge_s3_storage_options(fs_opts, storage_options)
164
+
165
+ # Azure Data Lake Storage / Azure Blob Storage
166
+ if scheme in ("abfs", "abfss", "az"):
167
+ _inject_azure_env(fs_opts)
168
+ # Merge user-provided storage_options (takes precedence over env vars)
169
+ if storage_options:
170
+ _merge_azure_storage_options(fs_opts, storage_options)
171
+
172
+ # HTTP(S): typically public or signed URLs. No defaults needed here.
173
+ # Local `""`/`file` schemes: no fs_opts.
174
+
175
+ # PostgreSQL: resolve connection parameters from URI + environment
176
+ db_params = None
177
+ if scheme in ("postgres", "postgresql"):
178
+ from kontra.connectors.postgres import resolve_connection_params
179
+
180
+ db_params = resolve_connection_params(uri)
181
+ fmt = "postgres"
182
+
183
+ # SQL Server: resolve connection parameters from URI + environment
184
+ if scheme in ("mssql", "sqlserver"):
185
+ from kontra.connectors.sqlserver import resolve_connection_params as resolve_sqlserver_params
186
+
187
+ db_params = resolve_sqlserver_params(uri)
188
+ fmt = "sqlserver"
189
+
190
+ return DatasetHandle(
191
+ uri=uri, scheme=scheme, path=path, format=fmt, fs_opts=fs_opts, db_params=db_params
192
+ )
193
+
194
+
195
+ # ------------------------------ Helpers ---------------------------------------
196
+
197
+
198
+ def _inject_s3_env(opts: Dict[str, str]) -> None:
199
+ """
200
+ Read S3/MinIO-related environment variables and copy them into `opts` using
201
+ the normalized keys that our DuckDB session factory/materializer expect.
202
+
203
+ We *don’t* log or print these values anywhere; the caller just passes them to
204
+ the backend session config. All keys are optional.
205
+ """
206
+ # Credentials
207
+ ak = os.getenv("AWS_ACCESS_KEY_ID")
208
+ sk = os.getenv("AWS_SECRET_ACCESS_KEY")
209
+ st = os.getenv("AWS_SESSION_TOKEN")
210
+
211
+ # Region (prefer DUCKDB_S3_REGION when provided, else AWS_REGION, else default)
212
+ region = os.getenv("DUCKDB_S3_REGION") or os.getenv("AWS_REGION") or "us-east-1"
213
+
214
+ # Endpoint / style (MinIO/custom endpoints)
215
+ endpoint = os.getenv("DUCKDB_S3_ENDPOINT") or os.getenv("AWS_ENDPOINT_URL")
216
+ url_style = os.getenv("DUCKDB_S3_URL_STYLE") # 'path' | 'host'
217
+ use_ssl = os.getenv("DUCKDB_S3_USE_SSL") # 'true' | 'false'
218
+ max_conns = os.getenv("DUCKDB_S3_MAX_CONNECTIONS") or "64"
219
+
220
+ if ak:
221
+ opts["s3_access_key_id"] = ak
222
+ if sk:
223
+ opts["s3_secret_access_key"] = sk
224
+ if st:
225
+ opts["s3_session_token"] = st
226
+ if region:
227
+ opts["s3_region"] = region
228
+ if endpoint:
229
+ # Keep the full endpoint string; the DuckDB session factory will parse it.
230
+ opts["s3_endpoint"] = endpoint
231
+ if url_style:
232
+ opts["s3_url_style"] = url_style
233
+ if use_ssl:
234
+ opts["s3_use_ssl"] = use_ssl
235
+ if max_conns:
236
+ opts["s3_max_connections"] = str(max_conns)
237
+
238
+
239
+ def _inject_azure_env(opts: Dict[str, str]) -> None:
240
+ """
241
+ Read Azure Storage environment variables and copy them into `opts` using
242
+ normalized keys that our DuckDB session factory expects.
243
+
244
+ Supports multiple auth methods:
245
+ - Account key: AZURE_STORAGE_ACCOUNT_NAME + AZURE_STORAGE_ACCOUNT_KEY
246
+ - SAS token: AZURE_STORAGE_ACCOUNT_NAME + AZURE_STORAGE_SAS_TOKEN
247
+ - Connection string: AZURE_STORAGE_CONNECTION_STRING
248
+ - Service principal (OAuth): AZURE_TENANT_ID + AZURE_CLIENT_ID + AZURE_CLIENT_SECRET
249
+
250
+ All keys are optional. DuckDB's azure extension will use what's available.
251
+ """
252
+ # Account name (required for most auth methods)
253
+ account_name = os.getenv("AZURE_STORAGE_ACCOUNT_NAME")
254
+ if account_name:
255
+ opts["azure_account_name"] = account_name
256
+
257
+ # Account key auth
258
+ account_key = os.getenv("AZURE_STORAGE_ACCOUNT_KEY")
259
+ if account_key:
260
+ opts["azure_account_key"] = account_key
261
+
262
+ # SAS token auth (alternative to account key)
263
+ sas_token = os.getenv("AZURE_STORAGE_SAS_TOKEN")
264
+ if sas_token:
265
+ opts["azure_sas_token"] = sas_token
266
+
267
+ # Connection string auth (contains account name + key/SAS)
268
+ conn_string = os.getenv("AZURE_STORAGE_CONNECTION_STRING")
269
+ if conn_string:
270
+ opts["azure_connection_string"] = conn_string
271
+
272
+ # OAuth / Service Principal auth
273
+ tenant_id = os.getenv("AZURE_TENANT_ID")
274
+ client_id = os.getenv("AZURE_CLIENT_ID")
275
+ client_secret = os.getenv("AZURE_CLIENT_SECRET")
276
+ if tenant_id:
277
+ opts["azure_tenant_id"] = tenant_id
278
+ if client_id:
279
+ opts["azure_client_id"] = client_id
280
+ if client_secret:
281
+ opts["azure_client_secret"] = client_secret
282
+
283
+ # Custom endpoint (for Databricks, sovereign clouds, Azurite emulator)
284
+ endpoint = os.getenv("AZURE_STORAGE_ENDPOINT")
285
+ if endpoint:
286
+ opts["azure_endpoint"] = endpoint
287
+
288
+
289
+ def _merge_s3_storage_options(opts: Dict[str, str], storage_options: Dict[str, Any]) -> None:
290
+ """
291
+ Merge user-provided storage_options into fs_opts for S3.
292
+
293
+ Maps Polars-style keys to our internal normalized keys.
294
+ User values take precedence over env-var derived values.
295
+
296
+ Polars storage_options keys:
297
+ - aws_access_key_id -> s3_access_key_id
298
+ - aws_secret_access_key -> s3_secret_access_key
299
+ - aws_session_token -> s3_session_token
300
+ - aws_region -> s3_region
301
+ - endpoint_url -> s3_endpoint
302
+ """
303
+ # Mapping from Polars/user keys to our internal keys
304
+ key_map = {
305
+ "aws_access_key_id": "s3_access_key_id",
306
+ "aws_secret_access_key": "s3_secret_access_key",
307
+ "aws_session_token": "s3_session_token",
308
+ "aws_region": "s3_region",
309
+ "region": "s3_region", # Alternative key
310
+ "endpoint_url": "s3_endpoint",
311
+ }
312
+
313
+ for user_key, internal_key in key_map.items():
314
+ if user_key in storage_options and storage_options[user_key] is not None:
315
+ opts[internal_key] = str(storage_options[user_key])
316
+
317
+ # Also accept our internal keys directly (pass-through)
318
+ internal_keys = [
319
+ "s3_access_key_id",
320
+ "s3_secret_access_key",
321
+ "s3_session_token",
322
+ "s3_region",
323
+ "s3_endpoint",
324
+ "s3_url_style",
325
+ "s3_use_ssl",
326
+ ]
327
+ for key in internal_keys:
328
+ if key in storage_options and storage_options[key] is not None:
329
+ opts[key] = str(storage_options[key])
330
+
331
+
332
+ def _merge_azure_storage_options(opts: Dict[str, str], storage_options: Dict[str, Any]) -> None:
333
+ """
334
+ Merge user-provided storage_options into fs_opts for Azure.
335
+
336
+ Maps common Azure keys to our internal normalized keys.
337
+ User values take precedence over env-var derived values.
338
+ """
339
+ # Mapping from user keys to our internal keys
340
+ key_map = {
341
+ "account_name": "azure_account_name",
342
+ "account_key": "azure_account_key",
343
+ "sas_token": "azure_sas_token",
344
+ "connection_string": "azure_connection_string",
345
+ "tenant_id": "azure_tenant_id",
346
+ "client_id": "azure_client_id",
347
+ "client_secret": "azure_client_secret",
348
+ "endpoint": "azure_endpoint",
349
+ }
350
+
351
+ for user_key, internal_key in key_map.items():
352
+ if user_key in storage_options and storage_options[user_key] is not None:
353
+ opts[internal_key] = str(storage_options[user_key])
354
+
355
+ # Also accept our internal keys directly (pass-through)
356
+ internal_keys = [
357
+ "azure_account_name",
358
+ "azure_account_key",
359
+ "azure_sas_token",
360
+ "azure_connection_string",
361
+ "azure_tenant_id",
362
+ "azure_client_id",
363
+ "azure_client_secret",
364
+ "azure_endpoint",
365
+ ]
366
+ for key in internal_keys:
367
+ if key in storage_options and storage_options[key] is not None:
368
+ opts[key] = str(storage_options[key])
@@ -0,0 +1,127 @@
1
+ # src/kontra/connectors/postgres.py
2
+ """
3
+ PostgreSQL connection utilities for Kontra.
4
+
5
+ Supports multiple authentication methods:
6
+ 1. Full URI: postgres://user:pass@host:port/database/schema.table
7
+ 2. Environment variables (libpq standard): PGHOST, PGPORT, PGUSER, PGPASSWORD, PGDATABASE
8
+ 3. DATABASE_URL (common in PaaS like Heroku, Railway)
9
+
10
+ Priority: URI values > DATABASE_URL > PGXXX env vars > defaults
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import os
16
+ from dataclasses import dataclass
17
+ from typing import Any, Dict, Optional
18
+
19
+ from .db_utils import (
20
+ DbConnectionConfig,
21
+ resolve_connection_params as _resolve_params,
22
+ )
23
+
24
+
25
+ # PostgreSQL-specific configuration for parameter resolution
26
+ _PG_CONFIG = DbConnectionConfig(
27
+ default_host="localhost",
28
+ default_port=5432,
29
+ default_user=os.getenv("USER", "postgres"),
30
+ default_schema="public",
31
+ env_host="PGHOST",
32
+ env_port="PGPORT",
33
+ env_user="PGUSER",
34
+ env_password="PGPASSWORD",
35
+ env_database="PGDATABASE",
36
+ env_url="DATABASE_URL",
37
+ db_name="PostgreSQL",
38
+ uri_example="postgres://user:pass@host:5432/database/schema.table",
39
+ env_example="PGDATABASE",
40
+ )
41
+
42
+
43
+ @dataclass
44
+ class PostgresConnectionParams:
45
+ """Resolved PostgreSQL connection parameters."""
46
+
47
+ host: str
48
+ port: int
49
+ user: str
50
+ password: Optional[str]
51
+ database: str
52
+ schema: str
53
+ table: str
54
+
55
+ def to_dict(self) -> Dict[str, Any]:
56
+ """Return connection kwargs for psycopg.connect()."""
57
+ return {
58
+ "host": self.host,
59
+ "port": self.port,
60
+ "user": self.user,
61
+ "password": self.password,
62
+ "dbname": self.database,
63
+ }
64
+
65
+ @property
66
+ def qualified_table(self) -> str:
67
+ """Return schema.table identifier."""
68
+ return f"{self.schema}.{self.table}"
69
+
70
+
71
+ def resolve_connection_params(uri: str) -> PostgresConnectionParams:
72
+ """
73
+ Resolve PostgreSQL connection parameters from URI + environment.
74
+
75
+ URI format:
76
+ postgres://user:pass@host:port/database/schema.table
77
+ postgres:///public.users (uses env vars for connection)
78
+
79
+ Priority: URI values > DATABASE_URL > PGXXX env vars > defaults
80
+
81
+ Raises:
82
+ ValueError: If required parameters (database, table) cannot be resolved.
83
+ """
84
+ resolved = _resolve_params(uri, _PG_CONFIG)
85
+
86
+ return PostgresConnectionParams(
87
+ host=resolved.host,
88
+ port=resolved.port,
89
+ user=resolved.user,
90
+ password=resolved.password,
91
+ database=resolved.database, # type: ignore (validated in _resolve_params)
92
+ schema=resolved.schema,
93
+ table=resolved.table, # type: ignore (validated in _resolve_params)
94
+ )
95
+
96
+
97
+ def get_connection(params: PostgresConnectionParams):
98
+ """
99
+ Create a psycopg connection from resolved parameters.
100
+
101
+ Returns:
102
+ psycopg.Connection
103
+ """
104
+ try:
105
+ import psycopg
106
+ except ImportError as e:
107
+ raise ImportError(
108
+ "psycopg is required for PostgreSQL support.\n"
109
+ "Install with: pip install 'psycopg[binary]'"
110
+ ) from e
111
+
112
+ try:
113
+ return psycopg.connect(**params.to_dict())
114
+ except psycopg.OperationalError as e:
115
+ raise ConnectionError(
116
+ f"PostgreSQL connection failed: {e}\n\n"
117
+ f"Connection details:\n"
118
+ f" Host: {params.host}:{params.port}\n"
119
+ f" Database: {params.database}\n"
120
+ f" User: {params.user}\n\n"
121
+ "Check your connection settings or set environment variables:\n"
122
+ " export PGHOST=localhost\n"
123
+ " export PGPORT=5432\n"
124
+ " export PGUSER=your_user\n"
125
+ " export PGPASSWORD=your_password\n"
126
+ " export PGDATABASE=your_database"
127
+ ) from e