kontra 0.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. kontra/__init__.py +1871 -0
  2. kontra/api/__init__.py +22 -0
  3. kontra/api/compare.py +340 -0
  4. kontra/api/decorators.py +153 -0
  5. kontra/api/results.py +2121 -0
  6. kontra/api/rules.py +681 -0
  7. kontra/cli/__init__.py +0 -0
  8. kontra/cli/commands/__init__.py +1 -0
  9. kontra/cli/commands/config.py +153 -0
  10. kontra/cli/commands/diff.py +450 -0
  11. kontra/cli/commands/history.py +196 -0
  12. kontra/cli/commands/profile.py +289 -0
  13. kontra/cli/commands/validate.py +468 -0
  14. kontra/cli/constants.py +6 -0
  15. kontra/cli/main.py +48 -0
  16. kontra/cli/renderers.py +304 -0
  17. kontra/cli/utils.py +28 -0
  18. kontra/config/__init__.py +34 -0
  19. kontra/config/loader.py +127 -0
  20. kontra/config/models.py +49 -0
  21. kontra/config/settings.py +797 -0
  22. kontra/connectors/__init__.py +0 -0
  23. kontra/connectors/db_utils.py +251 -0
  24. kontra/connectors/detection.py +323 -0
  25. kontra/connectors/handle.py +368 -0
  26. kontra/connectors/postgres.py +127 -0
  27. kontra/connectors/sqlserver.py +226 -0
  28. kontra/engine/__init__.py +0 -0
  29. kontra/engine/backends/duckdb_session.py +227 -0
  30. kontra/engine/backends/duckdb_utils.py +18 -0
  31. kontra/engine/backends/polars_backend.py +47 -0
  32. kontra/engine/engine.py +1205 -0
  33. kontra/engine/executors/__init__.py +15 -0
  34. kontra/engine/executors/base.py +50 -0
  35. kontra/engine/executors/database_base.py +528 -0
  36. kontra/engine/executors/duckdb_sql.py +607 -0
  37. kontra/engine/executors/postgres_sql.py +162 -0
  38. kontra/engine/executors/registry.py +69 -0
  39. kontra/engine/executors/sqlserver_sql.py +163 -0
  40. kontra/engine/materializers/__init__.py +14 -0
  41. kontra/engine/materializers/base.py +42 -0
  42. kontra/engine/materializers/duckdb.py +110 -0
  43. kontra/engine/materializers/factory.py +22 -0
  44. kontra/engine/materializers/polars_connector.py +131 -0
  45. kontra/engine/materializers/postgres.py +157 -0
  46. kontra/engine/materializers/registry.py +138 -0
  47. kontra/engine/materializers/sqlserver.py +160 -0
  48. kontra/engine/result.py +15 -0
  49. kontra/engine/sql_utils.py +611 -0
  50. kontra/engine/sql_validator.py +609 -0
  51. kontra/engine/stats.py +194 -0
  52. kontra/engine/types.py +138 -0
  53. kontra/errors.py +533 -0
  54. kontra/logging.py +85 -0
  55. kontra/preplan/__init__.py +5 -0
  56. kontra/preplan/planner.py +253 -0
  57. kontra/preplan/postgres.py +179 -0
  58. kontra/preplan/sqlserver.py +191 -0
  59. kontra/preplan/types.py +24 -0
  60. kontra/probes/__init__.py +20 -0
  61. kontra/probes/compare.py +400 -0
  62. kontra/probes/relationship.py +283 -0
  63. kontra/reporters/__init__.py +0 -0
  64. kontra/reporters/json_reporter.py +190 -0
  65. kontra/reporters/rich_reporter.py +11 -0
  66. kontra/rules/__init__.py +35 -0
  67. kontra/rules/base.py +186 -0
  68. kontra/rules/builtin/__init__.py +40 -0
  69. kontra/rules/builtin/allowed_values.py +156 -0
  70. kontra/rules/builtin/compare.py +188 -0
  71. kontra/rules/builtin/conditional_not_null.py +213 -0
  72. kontra/rules/builtin/conditional_range.py +310 -0
  73. kontra/rules/builtin/contains.py +138 -0
  74. kontra/rules/builtin/custom_sql_check.py +182 -0
  75. kontra/rules/builtin/disallowed_values.py +140 -0
  76. kontra/rules/builtin/dtype.py +203 -0
  77. kontra/rules/builtin/ends_with.py +129 -0
  78. kontra/rules/builtin/freshness.py +240 -0
  79. kontra/rules/builtin/length.py +193 -0
  80. kontra/rules/builtin/max_rows.py +35 -0
  81. kontra/rules/builtin/min_rows.py +46 -0
  82. kontra/rules/builtin/not_null.py +121 -0
  83. kontra/rules/builtin/range.py +222 -0
  84. kontra/rules/builtin/regex.py +143 -0
  85. kontra/rules/builtin/starts_with.py +129 -0
  86. kontra/rules/builtin/unique.py +124 -0
  87. kontra/rules/condition_parser.py +203 -0
  88. kontra/rules/execution_plan.py +455 -0
  89. kontra/rules/factory.py +103 -0
  90. kontra/rules/predicates.py +25 -0
  91. kontra/rules/registry.py +24 -0
  92. kontra/rules/static_predicates.py +120 -0
  93. kontra/scout/__init__.py +9 -0
  94. kontra/scout/backends/__init__.py +17 -0
  95. kontra/scout/backends/base.py +111 -0
  96. kontra/scout/backends/duckdb_backend.py +359 -0
  97. kontra/scout/backends/postgres_backend.py +519 -0
  98. kontra/scout/backends/sqlserver_backend.py +577 -0
  99. kontra/scout/dtype_mapping.py +150 -0
  100. kontra/scout/patterns.py +69 -0
  101. kontra/scout/profiler.py +801 -0
  102. kontra/scout/reporters/__init__.py +39 -0
  103. kontra/scout/reporters/json_reporter.py +165 -0
  104. kontra/scout/reporters/markdown_reporter.py +152 -0
  105. kontra/scout/reporters/rich_reporter.py +144 -0
  106. kontra/scout/store.py +208 -0
  107. kontra/scout/suggest.py +200 -0
  108. kontra/scout/types.py +652 -0
  109. kontra/state/__init__.py +29 -0
  110. kontra/state/backends/__init__.py +79 -0
  111. kontra/state/backends/base.py +348 -0
  112. kontra/state/backends/local.py +480 -0
  113. kontra/state/backends/postgres.py +1010 -0
  114. kontra/state/backends/s3.py +543 -0
  115. kontra/state/backends/sqlserver.py +969 -0
  116. kontra/state/fingerprint.py +166 -0
  117. kontra/state/types.py +1061 -0
  118. kontra/version.py +1 -0
  119. kontra-0.5.2.dist-info/METADATA +122 -0
  120. kontra-0.5.2.dist-info/RECORD +124 -0
  121. kontra-0.5.2.dist-info/WHEEL +5 -0
  122. kontra-0.5.2.dist-info/entry_points.txt +2 -0
  123. kontra-0.5.2.dist-info/licenses/LICENSE +17 -0
  124. kontra-0.5.2.dist-info/top_level.txt +1 -0
@@ -0,0 +1,226 @@
1
+ # src/kontra/connectors/sqlserver.py
2
+ """
3
+ SQL Server connection utilities for Kontra.
4
+
5
+ Supports multiple authentication methods:
6
+ 1. Full URI: mssql://user:pass@host:port/database/schema.table
7
+ 2. Environment variables: MSSQL_HOST, MSSQL_PORT, MSSQL_USER, MSSQL_PASSWORD, MSSQL_DATABASE
8
+ 3. SQLSERVER_URL (similar to DATABASE_URL pattern)
9
+
10
+ Priority: URI values > SQLSERVER_URL > MSSQL_XXX env vars > defaults
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ from dataclasses import dataclass
16
+ from typing import Any, Dict, Optional
17
+
18
+ from .db_utils import (
19
+ DbConnectionConfig,
20
+ resolve_connection_params as _resolve_params,
21
+ )
22
+
23
+
24
+ # SQL Server-specific configuration for parameter resolution
25
+ _MSSQL_CONFIG = DbConnectionConfig(
26
+ default_host="localhost",
27
+ default_port=1433,
28
+ default_user="sa",
29
+ default_schema="dbo",
30
+ env_host="MSSQL_HOST",
31
+ env_port="MSSQL_PORT",
32
+ env_user="MSSQL_USER",
33
+ env_password="MSSQL_PASSWORD",
34
+ env_database="MSSQL_DATABASE",
35
+ env_url="SQLSERVER_URL",
36
+ db_name="SQL Server",
37
+ uri_example="mssql://user:pass@host:1433/database/schema.table",
38
+ env_example="MSSQL_DATABASE",
39
+ )
40
+
41
+
42
+ @dataclass
43
+ class SqlServerConnectionParams:
44
+ """Resolved SQL Server connection parameters."""
45
+
46
+ host: str
47
+ port: int
48
+ user: str
49
+ password: Optional[str]
50
+ database: str
51
+ schema: str
52
+ table: str
53
+
54
+ def to_dict(self) -> Dict[str, Any]:
55
+ """Return connection kwargs for pymssql.connect()."""
56
+ return {
57
+ "server": self.host,
58
+ "port": self.port,
59
+ "user": self.user,
60
+ "password": self.password,
61
+ "database": self.database,
62
+ }
63
+
64
+ @property
65
+ def qualified_table(self) -> str:
66
+ """Return schema.table identifier."""
67
+ return f"{self.schema}.{self.table}"
68
+
69
+
70
+ def resolve_connection_params(uri: str) -> SqlServerConnectionParams:
71
+ """
72
+ Resolve SQL Server connection parameters from URI + environment.
73
+
74
+ URI format:
75
+ mssql://user:pass@host:port/database/schema.table
76
+ mssql:///dbo.users (uses env vars for connection)
77
+ sqlserver://... (alias for mssql://)
78
+
79
+ Priority: URI values > SQLSERVER_URL > MSSQL_XXX env vars > defaults
80
+
81
+ Raises:
82
+ ValueError: If required parameters (database, table) cannot be resolved.
83
+ """
84
+ resolved = _resolve_params(uri, _MSSQL_CONFIG)
85
+
86
+ return SqlServerConnectionParams(
87
+ host=resolved.host,
88
+ port=resolved.port,
89
+ user=resolved.user,
90
+ password=resolved.password,
91
+ database=resolved.database, # type: ignore (validated in _resolve_params)
92
+ schema=resolved.schema,
93
+ table=resolved.table, # type: ignore (validated in _resolve_params)
94
+ )
95
+
96
+
97
+ def get_connection(params: SqlServerConnectionParams):
98
+ """
99
+ Create a pymssql connection from resolved parameters.
100
+
101
+ Returns:
102
+ pymssql.Connection
103
+ """
104
+ try:
105
+ import pymssql
106
+ except ImportError as e:
107
+ raise ImportError(
108
+ "pymssql is required for SQL Server support.\n"
109
+ "Install with: pip install pymssql"
110
+ ) from e
111
+
112
+ try:
113
+ return pymssql.connect(**params.to_dict())
114
+ except pymssql.OperationalError as e:
115
+ raise ConnectionError(
116
+ f"SQL Server connection failed: {e}\n\n"
117
+ f"Connection details:\n"
118
+ f" Host: {params.host}:{params.port}\n"
119
+ f" Database: {params.database}\n"
120
+ f" User: {params.user}\n\n"
121
+ "Check your connection settings or set environment variables:\n"
122
+ " export MSSQL_HOST=localhost\n"
123
+ " export MSSQL_PORT=1433\n"
124
+ " export MSSQL_USER=your_user\n"
125
+ " export MSSQL_PASSWORD=your_password\n"
126
+ " export MSSQL_DATABASE=your_database"
127
+ ) from e
128
+
129
+
130
+ def fetch_sqlserver_stats(params: SqlServerConnectionParams) -> Dict[str, Dict[str, Any]]:
131
+ """
132
+ Fetch SQL Server statistics from sys.dm_db_stats_properties and related DMVs.
133
+
134
+ Returns a dict keyed by column name with stats:
135
+ {
136
+ "column_name": {
137
+ "null_frac": 0.02, # Estimated fraction of nulls
138
+ "n_distinct": 1000, # Estimated distinct values (-1 = unique)
139
+ "rows": 10000, # Rows when stats were computed
140
+ },
141
+ "__table__": {
142
+ "row_estimate": 10000,
143
+ "page_count": 100,
144
+ }
145
+ }
146
+ """
147
+ import pymssql
148
+
149
+ with get_connection(params) as conn:
150
+ with conn.cursor() as cursor:
151
+ # Table-level stats from sys.dm_db_partition_stats
152
+ cursor.execute(
153
+ """
154
+ SELECT SUM(row_count) AS row_estimate,
155
+ SUM(used_page_count) AS page_count
156
+ FROM sys.dm_db_partition_stats ps
157
+ JOIN sys.objects o ON ps.object_id = o.object_id
158
+ JOIN sys.schemas s ON o.schema_id = s.schema_id
159
+ WHERE s.name = %s AND o.name = %s AND ps.index_id IN (0, 1)
160
+ """,
161
+ (params.schema, params.table),
162
+ )
163
+ row = cursor.fetchone()
164
+ table_stats = {
165
+ "row_estimate": row[0] if row and row[0] else 0,
166
+ "page_count": row[1] if row and row[1] else 0,
167
+ }
168
+
169
+ # Column-level stats from sys.dm_db_stats_properties + DBCC SHOW_STATISTICS
170
+ # We use density_vector from stats to estimate distinct values
171
+ cursor.execute(
172
+ """
173
+ SELECT
174
+ c.name AS column_name,
175
+ s.name AS stat_name,
176
+ sp.rows,
177
+ sp.modification_counter
178
+ FROM sys.stats s
179
+ JOIN sys.stats_columns sc ON s.stats_id = sc.stats_id AND s.object_id = sc.object_id
180
+ JOIN sys.columns c ON sc.column_id = c.column_id AND sc.object_id = c.object_id
181
+ CROSS APPLY sys.dm_db_stats_properties(s.object_id, s.stats_id) sp
182
+ WHERE s.object_id = OBJECT_ID(%s)
183
+ """,
184
+ (f"{params.schema}.{params.table}",),
185
+ )
186
+
187
+ result: Dict[str, Dict[str, Any]] = {"__table__": table_stats}
188
+
189
+ for row in cursor.fetchall():
190
+ col_name, stat_name, rows, mod_counter = row
191
+ if col_name not in result:
192
+ result[col_name] = {
193
+ "rows": rows,
194
+ "modification_counter": mod_counter,
195
+ "stat_name": stat_name,
196
+ }
197
+
198
+ # For each column with stats, get density (1/distinct) from DBCC SHOW_STATISTICS
199
+ # This requires more complex parsing, so we'll do a simpler approach:
200
+ # Query actual distinct counts for key columns (more reliable for preplan)
201
+ for col_name in list(result.keys()):
202
+ if col_name == "__table__":
203
+ continue
204
+ try:
205
+ # Get null fraction
206
+ cursor.execute(
207
+ f"""
208
+ SELECT
209
+ CAST(SUM(CASE WHEN [{col_name}] IS NULL THEN 1 ELSE 0 END) AS FLOAT)
210
+ / NULLIF(COUNT(*), 0) AS null_frac,
211
+ COUNT(DISTINCT [{col_name}]) AS n_distinct
212
+ FROM [{params.schema}].[{params.table}]
213
+ """,
214
+ )
215
+ stats_row = cursor.fetchone()
216
+ if stats_row:
217
+ result[col_name]["null_frac"] = stats_row[0] or 0.0
218
+ result[col_name]["n_distinct"] = stats_row[1] or 0
219
+ # Mark as unique if distinct = row count
220
+ if result[col_name]["n_distinct"] == table_stats["row_estimate"]:
221
+ result[col_name]["n_distinct"] = -1 # Convention: -1 = all unique
222
+ except Exception:
223
+ # Stats query failed, leave partial data
224
+ pass
225
+
226
+ return result
File without changes
@@ -0,0 +1,227 @@
1
+ # src/kontra/backends/duckdb_session.py
2
+ from __future__ import annotations
3
+
4
+ import os
5
+ from typing import Any, Dict
6
+ from urllib.parse import urlparse
7
+
8
+ import duckdb
9
+ from kontra.connectors.handle import DatasetHandle
10
+
11
+ # --- Public API ---
12
+
13
+
14
+ def create_duckdb_connection(handle: DatasetHandle) -> duckdb.DuckDBPyConnection:
15
+ """
16
+ Create a DuckDB connection configured specifically for the given DatasetHandle.
17
+
18
+ This is the centralized factory for all DuckDB instances in Kontra.
19
+ It inspects the handle's scheme and fs_opts to load the correct
20
+ extensions (httpfs) and apply the necessary configuration
21
+ (e.g., S3 endpoints, credentials, region) for I/O.
22
+
23
+ Args:
24
+ handle: The DatasetHandle containing the URI and filesystem options.
25
+
26
+ Returns:
27
+ A configured duckdb.DuckDBPyConnection.
28
+ """
29
+ con = duckdb.connect()
30
+
31
+ # Apply performance/threading tweaks (reads env, but for runtime, not I/O)
32
+ _configure_threads(con)
33
+
34
+ # Apply I/O and credential configuration based on the data source
35
+ match handle.scheme:
36
+ case "s3":
37
+ _configure_s3(con, handle.fs_opts)
38
+ case "abfs" | "abfss" | "az":
39
+ _configure_azure(con, handle.fs_opts) # Stubbed for future work
40
+ case "http" | "https":
41
+ _configure_http(con, handle.fs_opts)
42
+ case "file" | "":
43
+ # Local files need no special I/O config
44
+ pass
45
+ case _:
46
+ # Best-effort for unknown schemes: load httpfs just in case
47
+ try:
48
+ _configure_http(con, handle.fs_opts)
49
+ except Exception:
50
+ pass # Ignore if httpfs fails to load
51
+
52
+ return con
53
+
54
+
55
+ # --- Internal Helpers ---
56
+
57
+
58
+ def _safe_set(con: duckdb.DuckDBPyConnection, key: str, value: Any) -> None:
59
+ """
60
+ Safely execute a DuckDB SET command, ignoring errors.
61
+ """
62
+ try:
63
+ con.execute(f"SET {key} = ?", [str(value)])
64
+ except Exception:
65
+ # Fails gracefully if the setting doesn't exist (e.g., wrong DuckDB version)
66
+ pass
67
+
68
+
69
+ def _configure_threads(con: duckdb.DuckDBPyConnection) -> None:
70
+ """
71
+ Configure DuckDB thread count based on env vars or CPU count.
72
+ This is a performance tweak, not an I/O secret.
73
+ """
74
+ env_threads = os.getenv("DUCKDB_THREADS")
75
+ try:
76
+ nthreads = int(env_threads) if env_threads else (os.cpu_count() or 4)
77
+ except Exception:
78
+ nthreads = os.cpu_count() or 4
79
+
80
+ # Try both PRAGMA (older) and SET (newer) for compatibility
81
+ for sql in (f"PRAGMA threads={int(nthreads)};", f"SET threads = {int(nthreads)};"):
82
+ try:
83
+ con.execute(sql)
84
+ break
85
+ except Exception:
86
+ continue
87
+
88
+
89
+ def _configure_http(
90
+ con: duckdb.DuckDBPyConnection, fs_opts: Dict[str, str]
91
+ ) -> None:
92
+ """
93
+ Install and load the httpfs extension for reading http(s):// files.
94
+ """
95
+ con.execute("INSTALL httpfs;")
96
+ con.execute("LOAD httpfs;")
97
+ _safe_set(con, "enable_object_cache", "true")
98
+
99
+
100
+ def _configure_s3(con: duckdb.DuckDBPyConnection, fs_opts: Dict[str, str]) -> None:
101
+ """
102
+ Configure the httpfs extension for S3-compatible storage (AWS, MinIO, R2).
103
+
104
+ Expected fs_opts keys:
105
+ - s3_endpoint
106
+ - s3_region
107
+ - s3_url_style ('path' | 'host')
108
+ - s3_use_ssl ('true' | 'false')
109
+ - s3_access_key_id
110
+ - s3_secret_access_key
111
+ - s3_session_token
112
+ - s3_max_connections
113
+ """
114
+ _configure_http(con, fs_opts) # S3 depends on httpfs
115
+
116
+ # Credentials
117
+ if ak := fs_opts.get("s3_access_key_id"):
118
+ _safe_set(con, "s3_access_key_id", ak)
119
+ if sk := fs_opts.get("s3_secret_access_key"):
120
+ _safe_set(con, "s3_secret_access_key", sk)
121
+ if st := fs_opts.get("s3_session_token"):
122
+ _safe_set(con, "s3_session_token", st)
123
+
124
+ # Region
125
+ if region := fs_opts.get("s3_region"):
126
+ _safe_set(con, "s3_region", region)
127
+
128
+ # Endpoint (MinIO/S3-compatible)
129
+ endpoint = fs_opts.get("s3_endpoint")
130
+ url_style = fs_opts.get("s3_url_style")
131
+ use_ssl = fs_opts.get("s3_use_ssl")
132
+
133
+ if endpoint:
134
+ # Parse "http://host:port" or just "host:port"
135
+ parsed = urlparse(endpoint)
136
+ hostport = parsed.netloc or parsed.path or endpoint
137
+ _safe_set(con, "s3_endpoint", hostport)
138
+
139
+ # Infer SSL from endpoint scheme if not explicitly set
140
+ if use_ssl is None:
141
+ use_ssl = "true" if parsed.scheme == "https" else "false"
142
+ _safe_set(con, "s3_use_ssl", use_ssl)
143
+
144
+ # Default to path-style for custom endpoints (MinIO-friendly)
145
+ if url_style is None:
146
+ url_style = "path"
147
+
148
+ if url_style:
149
+ _safe_set(con, "s3_url_style", url_style)
150
+
151
+ # Performance and reliability for large files over S3/HTTP
152
+ # http_timeout is in seconds (default 30s - increase for large files)
153
+ _safe_set(con, "http_timeout", "600") # 10 minutes for large files
154
+ _safe_set(con, "http_retries", "5") # More retries for reliability
155
+ _safe_set(con, "http_retry_wait_ms", "2000") # 2s between retries
156
+ # Disable keep-alive for MinIO/S3-compatible - connection pooling can cause issues
157
+ _safe_set(con, "http_keep_alive", "false")
158
+
159
+
160
+ def _configure_azure(
161
+ con: duckdb.DuckDBPyConnection, fs_opts: Dict[str, str]
162
+ ) -> None:
163
+ """
164
+ Configure the Azure extension for ADLS Gen2 (abfs://, abfss://) and Azure Blob (az://).
165
+
166
+ DuckDB 0.10+ has native Azure support via the 'azure' extension.
167
+ This handles authentication and endpoint configuration.
168
+
169
+ Expected fs_opts keys:
170
+ - azure_account_name: Storage account name
171
+ - azure_account_key: Storage account key
172
+ - azure_sas_token: SAS token (alternative to key)
173
+ - azure_connection_string: Full connection string (alternative)
174
+ - azure_tenant_id: For OAuth/service principal
175
+ - azure_client_id: For OAuth/service principal
176
+ - azure_client_secret: For OAuth/service principal
177
+ - azure_endpoint: Custom endpoint (Databricks, sovereign clouds, Azurite)
178
+
179
+ Raises:
180
+ RuntimeError: If Azure extension is not available (DuckDB < 0.10.0)
181
+ """
182
+ # Install and load the Azure extension
183
+ try:
184
+ con.execute("INSTALL azure;")
185
+ con.execute("LOAD azure;")
186
+ except Exception as e:
187
+ raise RuntimeError(
188
+ f"Azure extension not available. DuckDB >= 0.10.0 is required for Azure support. "
189
+ f"Error: {e}"
190
+ ) from e
191
+
192
+ # Account name (required for key/SAS auth)
193
+ if account_name := fs_opts.get("azure_account_name"):
194
+ _safe_set(con, "azure_storage_account_name", account_name)
195
+
196
+ # Account key auth
197
+ if account_key := fs_opts.get("azure_account_key"):
198
+ _safe_set(con, "azure_account_key", account_key)
199
+
200
+ # SAS token auth (alternative to account key)
201
+ # Note: DuckDB expects the token without leading '?'
202
+ if sas_token := fs_opts.get("azure_sas_token"):
203
+ # Strip leading '?' if present
204
+ if sas_token.startswith("?"):
205
+ sas_token = sas_token[1:]
206
+ _safe_set(con, "azure_sas_token", sas_token)
207
+
208
+ # Connection string auth
209
+ if conn_string := fs_opts.get("azure_connection_string"):
210
+ _safe_set(con, "azure_storage_connection_string", conn_string)
211
+
212
+ # OAuth / Service Principal auth
213
+ if tenant_id := fs_opts.get("azure_tenant_id"):
214
+ _safe_set(con, "azure_tenant_id", tenant_id)
215
+ if client_id := fs_opts.get("azure_client_id"):
216
+ _safe_set(con, "azure_client_id", client_id)
217
+ if client_secret := fs_opts.get("azure_client_secret"):
218
+ _safe_set(con, "azure_client_secret", client_secret)
219
+
220
+ # Custom endpoint (for Databricks, sovereign clouds, Azurite emulator)
221
+ if endpoint := fs_opts.get("azure_endpoint"):
222
+ _safe_set(con, "azure_endpoint", endpoint)
223
+
224
+ # Performance settings (same as S3)
225
+ _safe_set(con, "http_timeout", "600") # 10 minutes for large files
226
+ _safe_set(con, "http_retries", "5")
227
+ _safe_set(con, "http_retry_wait_ms", "2000")
@@ -0,0 +1,18 @@
1
+ # src/kontra/engine/backends/duckdb_utils.py
2
+ from __future__ import annotations
3
+
4
+
5
+ def esc_ident(name: str) -> str:
6
+ """
7
+ Quote an identifier for DuckDB (double quotes, escape internal quotes).
8
+ This is a centralized helper used by executors and materializers.
9
+ """
10
+ return '"' + name.replace('"', '""') + '"'
11
+
12
+
13
+ def lit_str(s: str) -> str:
14
+ """
15
+ Return a single-quoted SQL string literal with internal quotes escaped.
16
+ This is a centralized helper used by executors and materializers.
17
+ """
18
+ return "'" + s.replace("'", "''") + "'"
@@ -0,0 +1,47 @@
1
+ # src/kontra/engine/backends/polars_backend.py
2
+ from __future__ import annotations
3
+
4
+ """
5
+ Polars Backend (Adapter)
6
+
7
+ Thin adapter that defers execution to the RuleExecutionPlan's compiled executor.
8
+ Keeps the backend boundary explicit and behavior deterministic.
9
+ """
10
+
11
+ from typing import Any, Callable, Dict, List
12
+
13
+ import polars as pl
14
+
15
+
16
+ class PolarsBackend:
17
+ name = "polars"
18
+
19
+ def __init__(self, executor: Callable[[pl.DataFrame, Any], List[Dict[str, Any]]]):
20
+ """
21
+ Parameters
22
+ ----------
23
+ executor : callable
24
+ Function that evaluates the compiled plan against a materialized
25
+ Polars DataFrame (typically RuleExecutionPlan.execute_compiled).
26
+ """
27
+ self._executor = executor
28
+
29
+ def supports(self, connector_caps: int) -> bool:
30
+ """Capability hook reserved for future; always True for local DataFrames."""
31
+ return True
32
+
33
+ def compile(self, compiled_plan: Any) -> Any:
34
+ """No-op for Polars: pass through the compiled plan."""
35
+ return compiled_plan
36
+
37
+ def execute(self, df: pl.DataFrame, compiled_artifact: Any) -> Dict[str, Any]:
38
+ """Execute the compiled artifact against `df` and wrap results."""
39
+ results = self._executor(df, compiled_artifact)
40
+ return {"results": results}
41
+
42
+ def introspect(self, df: pl.DataFrame) -> Dict[str, Any]:
43
+ """Basic observability: row count and available columns."""
44
+ return {
45
+ "row_count": int(df.height),
46
+ "available_cols": list(df.columns),
47
+ }