kontra 0.5.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kontra/__init__.py +1871 -0
- kontra/api/__init__.py +22 -0
- kontra/api/compare.py +340 -0
- kontra/api/decorators.py +153 -0
- kontra/api/results.py +2121 -0
- kontra/api/rules.py +681 -0
- kontra/cli/__init__.py +0 -0
- kontra/cli/commands/__init__.py +1 -0
- kontra/cli/commands/config.py +153 -0
- kontra/cli/commands/diff.py +450 -0
- kontra/cli/commands/history.py +196 -0
- kontra/cli/commands/profile.py +289 -0
- kontra/cli/commands/validate.py +468 -0
- kontra/cli/constants.py +6 -0
- kontra/cli/main.py +48 -0
- kontra/cli/renderers.py +304 -0
- kontra/cli/utils.py +28 -0
- kontra/config/__init__.py +34 -0
- kontra/config/loader.py +127 -0
- kontra/config/models.py +49 -0
- kontra/config/settings.py +797 -0
- kontra/connectors/__init__.py +0 -0
- kontra/connectors/db_utils.py +251 -0
- kontra/connectors/detection.py +323 -0
- kontra/connectors/handle.py +368 -0
- kontra/connectors/postgres.py +127 -0
- kontra/connectors/sqlserver.py +226 -0
- kontra/engine/__init__.py +0 -0
- kontra/engine/backends/duckdb_session.py +227 -0
- kontra/engine/backends/duckdb_utils.py +18 -0
- kontra/engine/backends/polars_backend.py +47 -0
- kontra/engine/engine.py +1205 -0
- kontra/engine/executors/__init__.py +15 -0
- kontra/engine/executors/base.py +50 -0
- kontra/engine/executors/database_base.py +528 -0
- kontra/engine/executors/duckdb_sql.py +607 -0
- kontra/engine/executors/postgres_sql.py +162 -0
- kontra/engine/executors/registry.py +69 -0
- kontra/engine/executors/sqlserver_sql.py +163 -0
- kontra/engine/materializers/__init__.py +14 -0
- kontra/engine/materializers/base.py +42 -0
- kontra/engine/materializers/duckdb.py +110 -0
- kontra/engine/materializers/factory.py +22 -0
- kontra/engine/materializers/polars_connector.py +131 -0
- kontra/engine/materializers/postgres.py +157 -0
- kontra/engine/materializers/registry.py +138 -0
- kontra/engine/materializers/sqlserver.py +160 -0
- kontra/engine/result.py +15 -0
- kontra/engine/sql_utils.py +611 -0
- kontra/engine/sql_validator.py +609 -0
- kontra/engine/stats.py +194 -0
- kontra/engine/types.py +138 -0
- kontra/errors.py +533 -0
- kontra/logging.py +85 -0
- kontra/preplan/__init__.py +5 -0
- kontra/preplan/planner.py +253 -0
- kontra/preplan/postgres.py +179 -0
- kontra/preplan/sqlserver.py +191 -0
- kontra/preplan/types.py +24 -0
- kontra/probes/__init__.py +20 -0
- kontra/probes/compare.py +400 -0
- kontra/probes/relationship.py +283 -0
- kontra/reporters/__init__.py +0 -0
- kontra/reporters/json_reporter.py +190 -0
- kontra/reporters/rich_reporter.py +11 -0
- kontra/rules/__init__.py +35 -0
- kontra/rules/base.py +186 -0
- kontra/rules/builtin/__init__.py +40 -0
- kontra/rules/builtin/allowed_values.py +156 -0
- kontra/rules/builtin/compare.py +188 -0
- kontra/rules/builtin/conditional_not_null.py +213 -0
- kontra/rules/builtin/conditional_range.py +310 -0
- kontra/rules/builtin/contains.py +138 -0
- kontra/rules/builtin/custom_sql_check.py +182 -0
- kontra/rules/builtin/disallowed_values.py +140 -0
- kontra/rules/builtin/dtype.py +203 -0
- kontra/rules/builtin/ends_with.py +129 -0
- kontra/rules/builtin/freshness.py +240 -0
- kontra/rules/builtin/length.py +193 -0
- kontra/rules/builtin/max_rows.py +35 -0
- kontra/rules/builtin/min_rows.py +46 -0
- kontra/rules/builtin/not_null.py +121 -0
- kontra/rules/builtin/range.py +222 -0
- kontra/rules/builtin/regex.py +143 -0
- kontra/rules/builtin/starts_with.py +129 -0
- kontra/rules/builtin/unique.py +124 -0
- kontra/rules/condition_parser.py +203 -0
- kontra/rules/execution_plan.py +455 -0
- kontra/rules/factory.py +103 -0
- kontra/rules/predicates.py +25 -0
- kontra/rules/registry.py +24 -0
- kontra/rules/static_predicates.py +120 -0
- kontra/scout/__init__.py +9 -0
- kontra/scout/backends/__init__.py +17 -0
- kontra/scout/backends/base.py +111 -0
- kontra/scout/backends/duckdb_backend.py +359 -0
- kontra/scout/backends/postgres_backend.py +519 -0
- kontra/scout/backends/sqlserver_backend.py +577 -0
- kontra/scout/dtype_mapping.py +150 -0
- kontra/scout/patterns.py +69 -0
- kontra/scout/profiler.py +801 -0
- kontra/scout/reporters/__init__.py +39 -0
- kontra/scout/reporters/json_reporter.py +165 -0
- kontra/scout/reporters/markdown_reporter.py +152 -0
- kontra/scout/reporters/rich_reporter.py +144 -0
- kontra/scout/store.py +208 -0
- kontra/scout/suggest.py +200 -0
- kontra/scout/types.py +652 -0
- kontra/state/__init__.py +29 -0
- kontra/state/backends/__init__.py +79 -0
- kontra/state/backends/base.py +348 -0
- kontra/state/backends/local.py +480 -0
- kontra/state/backends/postgres.py +1010 -0
- kontra/state/backends/s3.py +543 -0
- kontra/state/backends/sqlserver.py +969 -0
- kontra/state/fingerprint.py +166 -0
- kontra/state/types.py +1061 -0
- kontra/version.py +1 -0
- kontra-0.5.2.dist-info/METADATA +122 -0
- kontra-0.5.2.dist-info/RECORD +124 -0
- kontra-0.5.2.dist-info/WHEEL +5 -0
- kontra-0.5.2.dist-info/entry_points.txt +2 -0
- kontra-0.5.2.dist-info/licenses/LICENSE +17 -0
- kontra-0.5.2.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
# src/kontra/scout/dtype_mapping.py
|
|
2
|
+
"""
|
|
3
|
+
Unified dtype mapping for Scout profiler.
|
|
4
|
+
|
|
5
|
+
Provides consistent type normalization across all backends (DuckDB, PostgreSQL, SQL Server).
|
|
6
|
+
This module consolidates dtype mappings that were previously duplicated across backend files.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
from typing import Dict
|
|
12
|
+
|
|
13
|
+
# Normalized type categories
|
|
14
|
+
NUMERIC_TYPES = {"int", "float"}
|
|
15
|
+
TEMPORAL_TYPES = {"date", "time", "datetime", "interval"}
|
|
16
|
+
STRING_TYPES = {"string"}
|
|
17
|
+
BOOLEAN_TYPES = {"bool"}
|
|
18
|
+
BINARY_TYPES = {"binary"}
|
|
19
|
+
|
|
20
|
+
# Master dtype mapping (lowercase keys for case-insensitive lookup)
|
|
21
|
+
# Maps raw database types to normalized Kontra types
|
|
22
|
+
DTYPE_MAP: Dict[str, str] = {
|
|
23
|
+
# Integer types (common)
|
|
24
|
+
"tinyint": "int",
|
|
25
|
+
"smallint": "int",
|
|
26
|
+
"integer": "int",
|
|
27
|
+
"int": "int",
|
|
28
|
+
"bigint": "int",
|
|
29
|
+
"hugeint": "int",
|
|
30
|
+
"int2": "int",
|
|
31
|
+
"int4": "int",
|
|
32
|
+
"int8": "int",
|
|
33
|
+
"int16": "int",
|
|
34
|
+
"int32": "int",
|
|
35
|
+
"int64": "int",
|
|
36
|
+
"int128": "int",
|
|
37
|
+
"serial": "int",
|
|
38
|
+
"bigserial": "int",
|
|
39
|
+
# Unsigned integers (DuckDB)
|
|
40
|
+
"utinyint": "int",
|
|
41
|
+
"usmallint": "int",
|
|
42
|
+
"uinteger": "int",
|
|
43
|
+
"ubigint": "int",
|
|
44
|
+
"uint8": "int",
|
|
45
|
+
"uint16": "int",
|
|
46
|
+
"uint32": "int",
|
|
47
|
+
"uint64": "int",
|
|
48
|
+
# Float types (common)
|
|
49
|
+
"float": "float",
|
|
50
|
+
"float4": "float",
|
|
51
|
+
"float8": "float",
|
|
52
|
+
"real": "float",
|
|
53
|
+
"double": "float",
|
|
54
|
+
"double precision": "float",
|
|
55
|
+
"decimal": "float",
|
|
56
|
+
"numeric": "float",
|
|
57
|
+
# Float types (SQL Server)
|
|
58
|
+
"money": "float",
|
|
59
|
+
"smallmoney": "float",
|
|
60
|
+
# Boolean types
|
|
61
|
+
"boolean": "bool",
|
|
62
|
+
"bool": "bool",
|
|
63
|
+
"bit": "bool", # SQL Server
|
|
64
|
+
# String types (common)
|
|
65
|
+
"varchar": "string",
|
|
66
|
+
"char": "string",
|
|
67
|
+
"bpchar": "string", # PostgreSQL blank-padded char
|
|
68
|
+
"text": "string",
|
|
69
|
+
"string": "string",
|
|
70
|
+
"character varying": "string",
|
|
71
|
+
"character": "string",
|
|
72
|
+
# String types (SQL Server)
|
|
73
|
+
"nvarchar": "string",
|
|
74
|
+
"nchar": "string",
|
|
75
|
+
"ntext": "string",
|
|
76
|
+
# Date types
|
|
77
|
+
"date": "date",
|
|
78
|
+
# Time types
|
|
79
|
+
"time": "time",
|
|
80
|
+
"time without time zone": "time",
|
|
81
|
+
"time with time zone": "time",
|
|
82
|
+
# Datetime types (common)
|
|
83
|
+
"timestamp": "datetime",
|
|
84
|
+
"timestamp with time zone": "datetime",
|
|
85
|
+
"timestamp without time zone": "datetime",
|
|
86
|
+
"timestamptz": "datetime",
|
|
87
|
+
# Datetime types (SQL Server)
|
|
88
|
+
"datetime": "datetime",
|
|
89
|
+
"datetime2": "datetime",
|
|
90
|
+
"smalldatetime": "datetime",
|
|
91
|
+
"datetimeoffset": "datetime",
|
|
92
|
+
# Interval
|
|
93
|
+
"interval": "interval",
|
|
94
|
+
# Binary types (common)
|
|
95
|
+
"blob": "binary",
|
|
96
|
+
"bytea": "binary", # PostgreSQL
|
|
97
|
+
# Binary types (SQL Server)
|
|
98
|
+
"binary": "binary",
|
|
99
|
+
"varbinary": "binary",
|
|
100
|
+
"image": "binary",
|
|
101
|
+
# UUID / special string types
|
|
102
|
+
"uuid": "string",
|
|
103
|
+
"json": "string",
|
|
104
|
+
"jsonb": "string",
|
|
105
|
+
"uniqueidentifier": "string", # SQL Server UUID
|
|
106
|
+
"xml": "string", # SQL Server
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def normalize_dtype(raw_type: str) -> str:
|
|
111
|
+
"""
|
|
112
|
+
Normalize a raw database type to a simplified Kontra type name.
|
|
113
|
+
|
|
114
|
+
Args:
|
|
115
|
+
raw_type: Raw type string from database (e.g., "VARCHAR(255)", "BIGINT")
|
|
116
|
+
|
|
117
|
+
Returns:
|
|
118
|
+
Normalized type: "int", "float", "string", "bool", "date", "datetime",
|
|
119
|
+
"time", "interval", "binary", or "unknown"
|
|
120
|
+
|
|
121
|
+
Examples:
|
|
122
|
+
>>> normalize_dtype("VARCHAR(255)")
|
|
123
|
+
'string'
|
|
124
|
+
>>> normalize_dtype("DECIMAL(10,2)")
|
|
125
|
+
'float'
|
|
126
|
+
>>> normalize_dtype("bigint")
|
|
127
|
+
'int'
|
|
128
|
+
"""
|
|
129
|
+
# Lowercase and strip whitespace for case-insensitive matching
|
|
130
|
+
lower = raw_type.lower().strip()
|
|
131
|
+
|
|
132
|
+
# Handle parameterized types like DECIMAL(10,2) or VARCHAR(255)
|
|
133
|
+
base = lower.split("(")[0].strip()
|
|
134
|
+
|
|
135
|
+
return DTYPE_MAP.get(base, "unknown")
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def is_numeric_type(normalized_type: str) -> bool:
|
|
139
|
+
"""Check if a normalized type is numeric."""
|
|
140
|
+
return normalized_type in NUMERIC_TYPES
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def is_temporal_type(normalized_type: str) -> bool:
|
|
144
|
+
"""Check if a normalized type is temporal (date/time)."""
|
|
145
|
+
return normalized_type in TEMPORAL_TYPES
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def is_string_type(normalized_type: str) -> bool:
|
|
149
|
+
"""Check if a normalized type is a string."""
|
|
150
|
+
return normalized_type in STRING_TYPES
|
kontra/scout/patterns.py
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
# src/kontra/scout/patterns.py
|
|
2
|
+
"""
|
|
3
|
+
Pattern detection for common data formats.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
import re
|
|
9
|
+
from typing import List
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
# Common patterns to detect
|
|
13
|
+
PATTERNS = {
|
|
14
|
+
"email": re.compile(r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$"),
|
|
15
|
+
"uuid": re.compile(
|
|
16
|
+
r"^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$"
|
|
17
|
+
),
|
|
18
|
+
"phone_us": re.compile(
|
|
19
|
+
r"^\+?1?[-.\s]?\(?[0-9]{3}\)?[-.\s]?[0-9]{3}[-.\s]?[0-9]{4}$"
|
|
20
|
+
),
|
|
21
|
+
"phone_intl": re.compile(r"^\+[1-9]\d{6,14}$"),
|
|
22
|
+
"url": re.compile(r"^https?://[^\s]+$"),
|
|
23
|
+
"ipv4": re.compile(r"^(?:[0-9]{1,3}\.){3}[0-9]{1,3}$"),
|
|
24
|
+
"ipv6": re.compile(r"^([0-9a-fA-F]{1,4}:){7}[0-9a-fA-F]{1,4}$"),
|
|
25
|
+
"iso_date": re.compile(r"^\d{4}-\d{2}-\d{2}$"),
|
|
26
|
+
"iso_datetime": re.compile(r"^\d{4}-\d{2}-\d{2}[T ]\d{2}:\d{2}:\d{2}"),
|
|
27
|
+
"hex_color": re.compile(r"^#[0-9A-Fa-f]{6}$"),
|
|
28
|
+
"credit_card": re.compile(r"^[0-9]{4}[- ]?[0-9]{4}[- ]?[0-9]{4}[- ]?[0-9]{4}$"),
|
|
29
|
+
"ssn": re.compile(r"^\d{3}-\d{2}-\d{4}$"),
|
|
30
|
+
"zip_us": re.compile(r"^\d{5}(-\d{4})?$"),
|
|
31
|
+
"slug": re.compile(r"^[a-z0-9]+(?:-[a-z0-9]+)*$"),
|
|
32
|
+
"json": re.compile(r"^[\[\{].*[\]\}]$", re.DOTALL),
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def detect_patterns(sample_values: List[str], threshold: float = 0.8) -> List[str]:
|
|
37
|
+
"""
|
|
38
|
+
Detect common patterns in a sample of string values.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
sample_values: List of string values to analyze
|
|
42
|
+
threshold: Minimum fraction of values that must match (default: 80%)
|
|
43
|
+
|
|
44
|
+
Returns:
|
|
45
|
+
List of pattern names where >= threshold of non-null values match.
|
|
46
|
+
"""
|
|
47
|
+
if not sample_values:
|
|
48
|
+
return []
|
|
49
|
+
|
|
50
|
+
# Filter out empty strings for pattern matching
|
|
51
|
+
non_empty = [v for v in sample_values if v and v.strip()]
|
|
52
|
+
if not non_empty:
|
|
53
|
+
return []
|
|
54
|
+
|
|
55
|
+
matches = []
|
|
56
|
+
for pattern_name, regex in PATTERNS.items():
|
|
57
|
+
match_count = sum(1 for v in non_empty if regex.match(str(v)))
|
|
58
|
+
if match_count / len(non_empty) >= threshold:
|
|
59
|
+
matches.append(pattern_name)
|
|
60
|
+
|
|
61
|
+
return matches
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def get_pattern_regex(pattern_name: str) -> str:
|
|
65
|
+
"""Get the regex pattern string for a pattern name."""
|
|
66
|
+
pattern = PATTERNS.get(pattern_name)
|
|
67
|
+
if pattern:
|
|
68
|
+
return pattern.pattern
|
|
69
|
+
return ""
|