kontra 0.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. kontra/__init__.py +1871 -0
  2. kontra/api/__init__.py +22 -0
  3. kontra/api/compare.py +340 -0
  4. kontra/api/decorators.py +153 -0
  5. kontra/api/results.py +2121 -0
  6. kontra/api/rules.py +681 -0
  7. kontra/cli/__init__.py +0 -0
  8. kontra/cli/commands/__init__.py +1 -0
  9. kontra/cli/commands/config.py +153 -0
  10. kontra/cli/commands/diff.py +450 -0
  11. kontra/cli/commands/history.py +196 -0
  12. kontra/cli/commands/profile.py +289 -0
  13. kontra/cli/commands/validate.py +468 -0
  14. kontra/cli/constants.py +6 -0
  15. kontra/cli/main.py +48 -0
  16. kontra/cli/renderers.py +304 -0
  17. kontra/cli/utils.py +28 -0
  18. kontra/config/__init__.py +34 -0
  19. kontra/config/loader.py +127 -0
  20. kontra/config/models.py +49 -0
  21. kontra/config/settings.py +797 -0
  22. kontra/connectors/__init__.py +0 -0
  23. kontra/connectors/db_utils.py +251 -0
  24. kontra/connectors/detection.py +323 -0
  25. kontra/connectors/handle.py +368 -0
  26. kontra/connectors/postgres.py +127 -0
  27. kontra/connectors/sqlserver.py +226 -0
  28. kontra/engine/__init__.py +0 -0
  29. kontra/engine/backends/duckdb_session.py +227 -0
  30. kontra/engine/backends/duckdb_utils.py +18 -0
  31. kontra/engine/backends/polars_backend.py +47 -0
  32. kontra/engine/engine.py +1205 -0
  33. kontra/engine/executors/__init__.py +15 -0
  34. kontra/engine/executors/base.py +50 -0
  35. kontra/engine/executors/database_base.py +528 -0
  36. kontra/engine/executors/duckdb_sql.py +607 -0
  37. kontra/engine/executors/postgres_sql.py +162 -0
  38. kontra/engine/executors/registry.py +69 -0
  39. kontra/engine/executors/sqlserver_sql.py +163 -0
  40. kontra/engine/materializers/__init__.py +14 -0
  41. kontra/engine/materializers/base.py +42 -0
  42. kontra/engine/materializers/duckdb.py +110 -0
  43. kontra/engine/materializers/factory.py +22 -0
  44. kontra/engine/materializers/polars_connector.py +131 -0
  45. kontra/engine/materializers/postgres.py +157 -0
  46. kontra/engine/materializers/registry.py +138 -0
  47. kontra/engine/materializers/sqlserver.py +160 -0
  48. kontra/engine/result.py +15 -0
  49. kontra/engine/sql_utils.py +611 -0
  50. kontra/engine/sql_validator.py +609 -0
  51. kontra/engine/stats.py +194 -0
  52. kontra/engine/types.py +138 -0
  53. kontra/errors.py +533 -0
  54. kontra/logging.py +85 -0
  55. kontra/preplan/__init__.py +5 -0
  56. kontra/preplan/planner.py +253 -0
  57. kontra/preplan/postgres.py +179 -0
  58. kontra/preplan/sqlserver.py +191 -0
  59. kontra/preplan/types.py +24 -0
  60. kontra/probes/__init__.py +20 -0
  61. kontra/probes/compare.py +400 -0
  62. kontra/probes/relationship.py +283 -0
  63. kontra/reporters/__init__.py +0 -0
  64. kontra/reporters/json_reporter.py +190 -0
  65. kontra/reporters/rich_reporter.py +11 -0
  66. kontra/rules/__init__.py +35 -0
  67. kontra/rules/base.py +186 -0
  68. kontra/rules/builtin/__init__.py +40 -0
  69. kontra/rules/builtin/allowed_values.py +156 -0
  70. kontra/rules/builtin/compare.py +188 -0
  71. kontra/rules/builtin/conditional_not_null.py +213 -0
  72. kontra/rules/builtin/conditional_range.py +310 -0
  73. kontra/rules/builtin/contains.py +138 -0
  74. kontra/rules/builtin/custom_sql_check.py +182 -0
  75. kontra/rules/builtin/disallowed_values.py +140 -0
  76. kontra/rules/builtin/dtype.py +203 -0
  77. kontra/rules/builtin/ends_with.py +129 -0
  78. kontra/rules/builtin/freshness.py +240 -0
  79. kontra/rules/builtin/length.py +193 -0
  80. kontra/rules/builtin/max_rows.py +35 -0
  81. kontra/rules/builtin/min_rows.py +46 -0
  82. kontra/rules/builtin/not_null.py +121 -0
  83. kontra/rules/builtin/range.py +222 -0
  84. kontra/rules/builtin/regex.py +143 -0
  85. kontra/rules/builtin/starts_with.py +129 -0
  86. kontra/rules/builtin/unique.py +124 -0
  87. kontra/rules/condition_parser.py +203 -0
  88. kontra/rules/execution_plan.py +455 -0
  89. kontra/rules/factory.py +103 -0
  90. kontra/rules/predicates.py +25 -0
  91. kontra/rules/registry.py +24 -0
  92. kontra/rules/static_predicates.py +120 -0
  93. kontra/scout/__init__.py +9 -0
  94. kontra/scout/backends/__init__.py +17 -0
  95. kontra/scout/backends/base.py +111 -0
  96. kontra/scout/backends/duckdb_backend.py +359 -0
  97. kontra/scout/backends/postgres_backend.py +519 -0
  98. kontra/scout/backends/sqlserver_backend.py +577 -0
  99. kontra/scout/dtype_mapping.py +150 -0
  100. kontra/scout/patterns.py +69 -0
  101. kontra/scout/profiler.py +801 -0
  102. kontra/scout/reporters/__init__.py +39 -0
  103. kontra/scout/reporters/json_reporter.py +165 -0
  104. kontra/scout/reporters/markdown_reporter.py +152 -0
  105. kontra/scout/reporters/rich_reporter.py +144 -0
  106. kontra/scout/store.py +208 -0
  107. kontra/scout/suggest.py +200 -0
  108. kontra/scout/types.py +652 -0
  109. kontra/state/__init__.py +29 -0
  110. kontra/state/backends/__init__.py +79 -0
  111. kontra/state/backends/base.py +348 -0
  112. kontra/state/backends/local.py +480 -0
  113. kontra/state/backends/postgres.py +1010 -0
  114. kontra/state/backends/s3.py +543 -0
  115. kontra/state/backends/sqlserver.py +969 -0
  116. kontra/state/fingerprint.py +166 -0
  117. kontra/state/types.py +1061 -0
  118. kontra/version.py +1 -0
  119. kontra-0.5.2.dist-info/METADATA +122 -0
  120. kontra-0.5.2.dist-info/RECORD +124 -0
  121. kontra-0.5.2.dist-info/WHEEL +5 -0
  122. kontra-0.5.2.dist-info/entry_points.txt +2 -0
  123. kontra-0.5.2.dist-info/licenses/LICENSE +17 -0
  124. kontra-0.5.2.dist-info/top_level.txt +1 -0
@@ -0,0 +1,131 @@
1
+ # src/kontra/engine/materializers/polars_connector.py
2
+ from __future__ import annotations
3
+
4
+ """
5
+ PolarsConnectorMaterializer
6
+
7
+ Purpose
8
+ -------
9
+ Local, dependency-light materializer that produces a Polars DataFrame from
10
+ file-based datasets (Parquet/CSV). Supports column projection. Does *not*
11
+ require the legacy connectors package.
12
+
13
+ Design
14
+ ------
15
+ - First tries legacy `ConnectorFactory` (for back-compat if present).
16
+ - Otherwise uses native Polars lazy scans:
17
+ - scan_* → optional .select(projection) → collect()
18
+
19
+ Notes
20
+ -----
21
+ Polars ≥ 1.34 removed `columns=` from scan_*; apply projection via `.select()`.
22
+ """
23
+
24
+ from typing import Any, Dict, List, Optional
25
+
26
+ import polars as pl
27
+
28
+ from kontra.connectors.handle import DatasetHandle
29
+ from .base import BaseMaterializer
30
+ from .registry import register_materializer
31
+
32
+
33
+ def _infer_format(uri: str, explicit: Optional[str]) -> str:
34
+ """Resolve file format from explicit handle.format or file extension."""
35
+ if explicit:
36
+ return explicit.lower()
37
+ low = uri.lower()
38
+ if low.endswith(".parquet"):
39
+ return "parquet"
40
+ if low.endswith(".csv"):
41
+ return "csv"
42
+ return ""
43
+
44
+
45
+ @register_materializer("polars-connector")
46
+ class PolarsConnectorMaterializer(BaseMaterializer):
47
+ """
48
+ Minimal, deterministic materializer for local files.
49
+
50
+ Responsibilities
51
+ ----------------
52
+ - Cheap schema peek (names only)
53
+ - DataFrame materialization with optional projection
54
+ - No side effects; no hidden state
55
+ """
56
+
57
+ name = "polars-connector"
58
+
59
+ def __init__(self, handle: DatasetHandle):
60
+ super().__init__(handle)
61
+ self._io_debug: Optional[Dict[str, Any]] = None # retained for parity with duckdb materializer
62
+
63
+ # ------------------------------------------------------------------ #
64
+ # Introspection
65
+ # ------------------------------------------------------------------ #
66
+
67
+ def schema(self) -> List[str]:
68
+ """
69
+ Return column names using a lazy scan. Never raises — empty list on failure.
70
+ """
71
+ uri = self.handle.uri
72
+ fmt = _infer_format(uri, getattr(self.handle, "format", None))
73
+
74
+ try:
75
+ if fmt == "parquet":
76
+ return list(pl.scan_parquet(uri).collect_schema().names())
77
+ if fmt == "csv":
78
+ return list(pl.scan_csv(uri).collect_schema().names())
79
+ except Exception:
80
+ pass
81
+ return []
82
+
83
+ # ------------------------------------------------------------------ #
84
+ # Materialization
85
+ # ------------------------------------------------------------------ #
86
+
87
+ def to_polars(self, columns: Optional[List[str]]) -> pl.DataFrame:
88
+ """
89
+ Materialize dataset into a Polars DataFrame.
90
+
91
+ Strategy
92
+ --------
93
+ 1) Attempt legacy connectors path (if installed) to preserve behavior.
94
+ 2) Otherwise, native Polars scan with projection via `.select()`.
95
+ """
96
+ # --- Legacy path (optional/back-compat) --------------------------------
97
+ try:
98
+ from kontra.connectors.factory import ConnectorFactory # type: ignore
99
+
100
+ connector = ConnectorFactory.from_source(self.handle.uri)
101
+ # The legacy API accepts `columns=` (best-effort).
102
+ return connector.load(self.handle.uri, columns=columns)
103
+ except (ImportError, ModuleNotFoundError):
104
+ # Fall back to native Polars path
105
+ pass
106
+
107
+ # --- Native Polars path -------------------------------------------------
108
+ uri = self.handle.uri
109
+ fmt = _infer_format(uri, getattr(self.handle, "format", None))
110
+
111
+ if fmt == "parquet":
112
+ lf = pl.scan_parquet(uri)
113
+ elif fmt == "csv":
114
+ # Add CSV options here if your data requires (delimiter, nulls, dtypes).
115
+ lf = pl.scan_csv(uri)
116
+ else:
117
+ raise IOError(f"Unsupported format for PolarsConnectorMaterializer: {uri}")
118
+
119
+ if columns:
120
+ lf = lf.select([pl.col(c) for c in columns])
121
+
122
+ # NOTE: streaming=True is deprecated; default engine suffices for tests and CI.
123
+ return lf.collect()
124
+
125
+ # ------------------------------------------------------------------ #
126
+ # Diagnostics
127
+ # ------------------------------------------------------------------ #
128
+
129
+ def io_debug(self) -> Optional[Dict[str, Any]]:
130
+ """Reserved hook for I/O diagnostics (none for this materializer)."""
131
+ return None
@@ -0,0 +1,157 @@
1
+ # src/kontra/engine/materializers/postgres.py
2
+ """
3
+ PostgreSQL Materializer - loads PostgreSQL tables to Polars DataFrames.
4
+
5
+ Uses psycopg3's efficient binary COPY protocol for streaming data.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import os
11
+ import time
12
+ from typing import Any, Dict, List, Optional
13
+
14
+ import polars as pl
15
+
16
+ from kontra.connectors.handle import DatasetHandle
17
+ from kontra.connectors.postgres import PostgresConnectionParams, get_connection
18
+ from kontra.connectors.detection import parse_table_reference, get_default_schema, POSTGRESQL
19
+ from contextlib import contextmanager
20
+
21
+ from .base import BaseMaterializer
22
+ from .registry import register_materializer
23
+
24
+
25
+ @contextmanager
26
+ def _get_connection_ctx(handle: DatasetHandle):
27
+ """
28
+ Get a connection context for either BYOC or URI-based handles.
29
+
30
+ For BYOC, yields the external connection directly (not owned by us).
31
+ For URI-based, yields a new connection (owned by context manager).
32
+ """
33
+ if handle.scheme == "byoc" and handle.external_conn is not None:
34
+ # BYOC: yield external connection directly, don't close it
35
+ yield handle.external_conn
36
+ elif handle.db_params:
37
+ # URI-based: use our connection manager
38
+ with get_connection(handle.db_params) as conn:
39
+ yield conn
40
+ else:
41
+ raise ValueError("Handle has neither external_conn nor db_params")
42
+
43
+
44
+ @register_materializer("postgres")
45
+ class PostgresMaterializer(BaseMaterializer):
46
+ """
47
+ Materialize PostgreSQL tables as Polars DataFrames with column projection.
48
+
49
+ Features:
50
+ - Efficient data loading via psycopg3
51
+ - Column projection at source (SELECT only needed columns)
52
+ - Binary protocol for faster transfers (when available)
53
+ - BYOC (Bring Your Own Connection) support
54
+ """
55
+
56
+ def __init__(self, handle: DatasetHandle):
57
+ super().__init__(handle)
58
+
59
+ self._is_byoc = handle.scheme == "byoc" and handle.external_conn is not None
60
+
61
+ if self._is_byoc:
62
+ # BYOC: get table info from handle
63
+ if not handle.table_ref:
64
+ raise ValueError("BYOC handle missing table_ref")
65
+ _db, schema, table = parse_table_reference(handle.table_ref)
66
+ self._schema_name = schema or get_default_schema(POSTGRESQL)
67
+ self._table_name = table
68
+ self._qualified_table = f'{_esc_ident(self._schema_name)}.{_esc_ident(self._table_name)}'
69
+ elif handle.db_params:
70
+ # URI-based: use params
71
+ self.params: PostgresConnectionParams = handle.db_params
72
+ self._schema_name = self.params.schema
73
+ self._table_name = self.params.table
74
+ self._qualified_table = self.params.qualified_table
75
+ else:
76
+ raise ValueError("PostgreSQL handle missing db_params or external_conn")
77
+
78
+ self._io_debug_enabled = bool(os.getenv("KONTRA_IO_DEBUG"))
79
+ self._last_io_debug: Optional[Dict[str, Any]] = None
80
+
81
+ def schema(self) -> List[str]:
82
+ """Return column names without loading data."""
83
+ with _get_connection_ctx(self.handle) as conn:
84
+ with conn.cursor() as cur:
85
+ cur.execute(
86
+ """
87
+ SELECT column_name
88
+ FROM information_schema.columns
89
+ WHERE table_schema = %s AND table_name = %s
90
+ ORDER BY ordinal_position
91
+ """,
92
+ (self._schema_name, self._table_name),
93
+ )
94
+ return [row[0] for row in cur.fetchall()]
95
+
96
+ def to_polars(self, columns: Optional[List[str]]) -> pl.DataFrame:
97
+ """
98
+ Load table data as a Polars DataFrame with optional column projection.
99
+
100
+ Supports both URI-based connections (handle.db_params) and
101
+ BYOC connections (handle.external_conn).
102
+
103
+ Args:
104
+ columns: List of columns to load. If None, loads all columns.
105
+
106
+ Returns:
107
+ Polars DataFrame with the requested columns.
108
+ """
109
+ t0 = time.perf_counter()
110
+
111
+ # Build column list for SELECT
112
+ if columns:
113
+ cols_sql = ", ".join(_esc_ident(c) for c in columns)
114
+ else:
115
+ cols_sql = "*"
116
+
117
+ query = f"SELECT {cols_sql} FROM {self._qualified_table}"
118
+
119
+ with _get_connection_ctx(self.handle) as conn:
120
+ with conn.cursor() as cur:
121
+ cur.execute(query)
122
+ # Fetch all rows - for large tables, consider chunked loading
123
+ rows = cur.fetchall()
124
+ col_names = [desc[0] for desc in cur.description] if cur.description else []
125
+
126
+ t1 = time.perf_counter()
127
+
128
+ # Convert to Polars DataFrame
129
+ if rows:
130
+ df = pl.DataFrame(rows, schema=col_names, orient="row")
131
+ else:
132
+ # Empty DataFrame with correct schema
133
+ df = pl.DataFrame(schema={name: pl.Utf8 for name in col_names})
134
+
135
+ if self._io_debug_enabled:
136
+ self._last_io_debug = {
137
+ "materializer": "postgres",
138
+ "mode": "psycopg_fetch" if not self._is_byoc else "byoc_fetch",
139
+ "table": self._qualified_table,
140
+ "columns_requested": list(columns or []),
141
+ "column_count": len(columns or col_names),
142
+ "row_count": len(rows) if rows else 0,
143
+ "elapsed_ms": int((t1 - t0) * 1000),
144
+ }
145
+ else:
146
+ self._last_io_debug = None
147
+
148
+ return df
149
+
150
+ def io_debug(self) -> Optional[Dict[str, Any]]:
151
+ return self._last_io_debug
152
+
153
+
154
+ def _esc_ident(name: str) -> str:
155
+ """Escape a PostgreSQL identifier (column/table name)."""
156
+ # Double any internal quotes and wrap in quotes
157
+ return '"' + name.replace('"', '""') + '"'
@@ -0,0 +1,138 @@
1
+ # src/kontra/engine/materializers/registry.py
2
+ from __future__ import annotations
3
+
4
+ from typing import TYPE_CHECKING, Callable, Dict, List
5
+
6
+ from kontra.connectors.handle import DatasetHandle
7
+
8
+ if TYPE_CHECKING:
9
+ # Import from the new base file
10
+ from .base import BaseMaterializer as Materializer
11
+ from .duckdb import DuckDBMaterializer # noqa: F401
12
+ from .polars_connector import PolarsConnectorMaterializer # noqa: F401
13
+ from .postgres import PostgresMaterializer # noqa: F401
14
+ from .sqlserver import SqlServerMaterializer # noqa: F401
15
+
16
+
17
+ # Registry: materializer_name -> ctor(handle) function
18
+ _MATS: Dict[str, Callable[[DatasetHandle], Materializer]] = {}
19
+ # Simple order for picking when multiple can handle a handle
20
+ _ORDER: List[str] = []
21
+
22
+
23
+ def register_materializer(name: str):
24
+ """
25
+ Decorator to register a materializer class under a stable name.
26
+ The class must implement the Materializer protocol.
27
+ """
28
+
29
+ def deco(cls: Callable[[DatasetHandle], Materializer]) -> Callable[
30
+ [DatasetHandle], Materializer
31
+ ]:
32
+ if name in _MATS:
33
+ raise ValueError(f"Materializer '{name}' is already registered.")
34
+ _MATS[name] = cls
35
+ if name not in _ORDER:
36
+ _ORDER.append(name)
37
+ cls.materializer_name = name # friendly label for stats.io
38
+ return cls
39
+
40
+ return deco
41
+
42
+
43
+ def pick_materializer(handle: DatasetHandle) -> Materializer:
44
+ """
45
+ Choose the best materializer for the given dataset handle.
46
+
47
+ Policy (v1.4 - BYOC support):
48
+ - BYOC handles use the materializer matching their dialect.
49
+ - PostgreSQL URIs use the PostgreSQL materializer.
50
+ - SQL Server URIs use the SQL Server materializer.
51
+ - Remote files (s3, http) with known formats use DuckDB materializer.
52
+ - Otherwise, fall back to PolarsConnector materializer.
53
+
54
+ This logic is INDEPENDENT of the projection flag.
55
+ """
56
+ # BYOC: route based on dialect
57
+ if handle.scheme == "byoc":
58
+ if handle.dialect == "postgresql":
59
+ ctor = _MATS.get("postgres")
60
+ if ctor:
61
+ return ctor(handle)
62
+ raise RuntimeError(
63
+ "PostgreSQL materializer not registered. "
64
+ "Ensure psycopg is installed: pip install 'psycopg[binary]'"
65
+ )
66
+ elif handle.dialect == "sqlserver":
67
+ ctor = _MATS.get("sqlserver")
68
+ if ctor:
69
+ return ctor(handle)
70
+ raise RuntimeError(
71
+ "SQL Server materializer not registered. "
72
+ "Ensure pymssql is installed: pip install pymssql"
73
+ )
74
+ else:
75
+ raise RuntimeError(
76
+ f"Unsupported BYOC dialect: {handle.dialect}. "
77
+ "Supported: postgresql, sqlserver"
78
+ )
79
+
80
+ # PostgreSQL: use dedicated materializer
81
+ if handle.scheme in ("postgres", "postgresql"):
82
+ ctor = _MATS.get("postgres")
83
+ if ctor:
84
+ return ctor(handle)
85
+ raise RuntimeError(
86
+ "PostgreSQL materializer not registered. "
87
+ "Ensure psycopg is installed: pip install 'psycopg[binary]'"
88
+ )
89
+
90
+ # SQL Server: use dedicated materializer
91
+ if handle.scheme in ("mssql", "sqlserver"):
92
+ ctor = _MATS.get("sqlserver")
93
+ if ctor:
94
+ return ctor(handle)
95
+ raise RuntimeError(
96
+ "SQL Server materializer not registered. "
97
+ "Ensure pymssql is installed: pip install pymssql"
98
+ )
99
+
100
+ # Remote files with known formats: use DuckDB for efficient I/O
101
+ # Includes S3, HTTP(S), and Azure (ADLS Gen2, Azure Blob)
102
+ is_remote = handle.scheme in ("s3", "http", "https", "abfs", "abfss", "az")
103
+ is_known_format = handle.format in ("parquet", "csv")
104
+
105
+ if is_remote and is_known_format:
106
+ ctor = _MATS.get("duckdb")
107
+ if ctor:
108
+ return ctor(handle)
109
+
110
+ # Fallback for local files or unknown formats
111
+ ctor = _MATS.get("polars-connector")
112
+ if not ctor:
113
+ raise RuntimeError(
114
+ "No default materializer registered (polars-connector missing)"
115
+ )
116
+ return ctor(handle)
117
+
118
+
119
+ def register_default_materializers() -> None:
120
+ """
121
+ Eagerly import built-in materializers so their @register_materializer
122
+ decorators run and populate the registry.
123
+ """
124
+ # Local imports to trigger decorator side-effects
125
+ from . import duckdb # noqa: F401
126
+ from . import polars_connector # noqa: F401
127
+
128
+ # PostgreSQL materializer (optional - requires psycopg)
129
+ try:
130
+ from . import postgres # noqa: F401
131
+ except ImportError:
132
+ pass # psycopg not installed, skip postgres materializer
133
+
134
+ # SQL Server materializer (optional - requires pymssql)
135
+ try:
136
+ from . import sqlserver # noqa: F401
137
+ except ImportError:
138
+ pass # pymssql not installed, skip sqlserver materializer
@@ -0,0 +1,160 @@
1
+ # src/kontra/engine/materializers/sqlserver.py
2
+ """
3
+ SQL Server Materializer - loads SQL Server tables to Polars DataFrames.
4
+
5
+ Uses pymssql for database connectivity.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import os
11
+ import time
12
+ from typing import Any, Dict, List, Optional
13
+
14
+ import polars as pl
15
+
16
+ from kontra.connectors.handle import DatasetHandle
17
+ from kontra.connectors.sqlserver import SqlServerConnectionParams, get_connection
18
+ from kontra.connectors.detection import parse_table_reference, get_default_schema, SQLSERVER
19
+ from contextlib import contextmanager
20
+
21
+ from .base import BaseMaterializer
22
+ from .registry import register_materializer
23
+
24
+
25
+ @contextmanager
26
+ def _get_connection_ctx(handle: DatasetHandle):
27
+ """
28
+ Get a connection context for either BYOC or URI-based handles.
29
+
30
+ For BYOC, yields the external connection directly (not owned by us).
31
+ For URI-based, yields a new connection (owned by context manager).
32
+ """
33
+ if handle.scheme == "byoc" and handle.external_conn is not None:
34
+ # BYOC: yield external connection directly, don't close it
35
+ yield handle.external_conn
36
+ elif handle.db_params:
37
+ # URI-based: use our connection manager
38
+ with get_connection(handle.db_params) as conn:
39
+ yield conn
40
+ else:
41
+ raise ValueError("Handle has neither external_conn nor db_params")
42
+
43
+
44
+ @register_materializer("sqlserver")
45
+ class SqlServerMaterializer(BaseMaterializer):
46
+ """
47
+ Materialize SQL Server tables as Polars DataFrames with column projection.
48
+
49
+ Features:
50
+ - Efficient data loading via pymssql
51
+ - Column projection at source (SELECT only needed columns)
52
+ - BYOC (Bring Your Own Connection) support
53
+ """
54
+
55
+ materializer_name = "sqlserver"
56
+
57
+ def __init__(self, handle: DatasetHandle):
58
+ super().__init__(handle)
59
+
60
+ self._is_byoc = handle.scheme == "byoc" and handle.external_conn is not None
61
+
62
+ if self._is_byoc:
63
+ # BYOC: get table info from handle
64
+ if not handle.table_ref:
65
+ raise ValueError("BYOC handle missing table_ref")
66
+ _db, schema, table = parse_table_reference(handle.table_ref)
67
+ self._schema_name = schema or get_default_schema(SQLSERVER)
68
+ self._table_name = table
69
+ self._qualified_table = f'[{self._schema_name}].[{self._table_name}]'
70
+ elif handle.db_params:
71
+ # URI-based: use params
72
+ self.params: SqlServerConnectionParams = handle.db_params
73
+ self._schema_name = self.params.schema
74
+ self._table_name = self.params.table
75
+ self._qualified_table = f'[{self.params.schema}].[{self.params.table}]'
76
+ else:
77
+ raise ValueError("SQL Server handle missing db_params or external_conn")
78
+
79
+ self._io_debug_enabled = bool(os.getenv("KONTRA_IO_DEBUG"))
80
+ self._last_io_debug: Optional[Dict[str, Any]] = None
81
+
82
+ def schema(self) -> List[str]:
83
+ """Return column names without loading data."""
84
+ with _get_connection_ctx(self.handle) as conn:
85
+ cursor = conn.cursor()
86
+ # pymssql uses %s as placeholder (pyodbc uses ?)
87
+ cursor.execute(
88
+ """
89
+ SELECT column_name
90
+ FROM information_schema.columns
91
+ WHERE table_schema = %s AND table_name = %s
92
+ ORDER BY ordinal_position
93
+ """,
94
+ (self._schema_name, self._table_name),
95
+ )
96
+ return [row[0] for row in cursor.fetchall()]
97
+
98
+ def to_polars(self, columns: Optional[List[str]]) -> pl.DataFrame:
99
+ """
100
+ Load table data as a Polars DataFrame with optional column projection.
101
+
102
+ Supports both URI-based connections (handle.db_params) and
103
+ BYOC connections (handle.external_conn).
104
+
105
+ Args:
106
+ columns: List of columns to load. If None, loads all columns.
107
+
108
+ Returns:
109
+ Polars DataFrame with the requested columns.
110
+ """
111
+ t0 = time.perf_counter()
112
+
113
+ # Build column list for SELECT
114
+ if columns:
115
+ cols_sql = ", ".join(_esc_ident(c) for c in columns)
116
+ else:
117
+ cols_sql = "*"
118
+
119
+ query = f"SELECT {cols_sql} FROM {self._qualified_table}"
120
+
121
+ with _get_connection_ctx(self.handle) as conn:
122
+ cursor = conn.cursor()
123
+ cursor.execute(query)
124
+ # Fetch all rows - for large tables, consider chunked loading
125
+ rows = cursor.fetchall()
126
+ col_names = [desc[0] for desc in cursor.description] if cursor.description else []
127
+
128
+ t1 = time.perf_counter()
129
+
130
+ # Convert to Polars DataFrame
131
+ if rows:
132
+ df = pl.DataFrame(rows, schema=col_names, orient="row")
133
+ else:
134
+ # Empty DataFrame with correct schema
135
+ df = pl.DataFrame(schema={name: pl.Utf8 for name in col_names})
136
+
137
+ if self._io_debug_enabled:
138
+ self._last_io_debug = {
139
+ "materializer": "sqlserver",
140
+ "mode": "pymssql_fetch" if not self._is_byoc else "byoc_fetch",
141
+ "table": self._qualified_table,
142
+ "columns_requested": list(columns or []),
143
+ "column_count": len(columns or col_names),
144
+ "row_count": len(rows) if rows else 0,
145
+ "elapsed_ms": int((t1 - t0) * 1000),
146
+ }
147
+ else:
148
+ self._last_io_debug = None
149
+
150
+ return df
151
+
152
+ def io_debug(self) -> Optional[Dict[str, Any]]:
153
+ return self._last_io_debug
154
+
155
+
156
+ def _esc_ident(name: str) -> str:
157
+ """Escape a SQL Server identifier (column/table name)."""
158
+ # SQL Server uses [brackets] for quoting identifiers
159
+ # Double any internal brackets
160
+ return "[" + name.replace("]", "]]") + "]"
@@ -0,0 +1,15 @@
1
+ # src/contra/engine/result.py
2
+ from typing import List, Dict, Any
3
+ from dataclasses import dataclass, asdict
4
+
5
+ @dataclass
6
+ class ValidationResult:
7
+ dataset: str
8
+ results: List[Dict[str, Any]]
9
+ summary: Dict[str, Any]
10
+
11
+ def to_dict(self) -> Dict[str, Any]:
12
+ return asdict(self)
13
+
14
+ def passed(self) -> bool:
15
+ return self.summary.get("passed", False)