kontra 0.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. kontra/__init__.py +1871 -0
  2. kontra/api/__init__.py +22 -0
  3. kontra/api/compare.py +340 -0
  4. kontra/api/decorators.py +153 -0
  5. kontra/api/results.py +2121 -0
  6. kontra/api/rules.py +681 -0
  7. kontra/cli/__init__.py +0 -0
  8. kontra/cli/commands/__init__.py +1 -0
  9. kontra/cli/commands/config.py +153 -0
  10. kontra/cli/commands/diff.py +450 -0
  11. kontra/cli/commands/history.py +196 -0
  12. kontra/cli/commands/profile.py +289 -0
  13. kontra/cli/commands/validate.py +468 -0
  14. kontra/cli/constants.py +6 -0
  15. kontra/cli/main.py +48 -0
  16. kontra/cli/renderers.py +304 -0
  17. kontra/cli/utils.py +28 -0
  18. kontra/config/__init__.py +34 -0
  19. kontra/config/loader.py +127 -0
  20. kontra/config/models.py +49 -0
  21. kontra/config/settings.py +797 -0
  22. kontra/connectors/__init__.py +0 -0
  23. kontra/connectors/db_utils.py +251 -0
  24. kontra/connectors/detection.py +323 -0
  25. kontra/connectors/handle.py +368 -0
  26. kontra/connectors/postgres.py +127 -0
  27. kontra/connectors/sqlserver.py +226 -0
  28. kontra/engine/__init__.py +0 -0
  29. kontra/engine/backends/duckdb_session.py +227 -0
  30. kontra/engine/backends/duckdb_utils.py +18 -0
  31. kontra/engine/backends/polars_backend.py +47 -0
  32. kontra/engine/engine.py +1205 -0
  33. kontra/engine/executors/__init__.py +15 -0
  34. kontra/engine/executors/base.py +50 -0
  35. kontra/engine/executors/database_base.py +528 -0
  36. kontra/engine/executors/duckdb_sql.py +607 -0
  37. kontra/engine/executors/postgres_sql.py +162 -0
  38. kontra/engine/executors/registry.py +69 -0
  39. kontra/engine/executors/sqlserver_sql.py +163 -0
  40. kontra/engine/materializers/__init__.py +14 -0
  41. kontra/engine/materializers/base.py +42 -0
  42. kontra/engine/materializers/duckdb.py +110 -0
  43. kontra/engine/materializers/factory.py +22 -0
  44. kontra/engine/materializers/polars_connector.py +131 -0
  45. kontra/engine/materializers/postgres.py +157 -0
  46. kontra/engine/materializers/registry.py +138 -0
  47. kontra/engine/materializers/sqlserver.py +160 -0
  48. kontra/engine/result.py +15 -0
  49. kontra/engine/sql_utils.py +611 -0
  50. kontra/engine/sql_validator.py +609 -0
  51. kontra/engine/stats.py +194 -0
  52. kontra/engine/types.py +138 -0
  53. kontra/errors.py +533 -0
  54. kontra/logging.py +85 -0
  55. kontra/preplan/__init__.py +5 -0
  56. kontra/preplan/planner.py +253 -0
  57. kontra/preplan/postgres.py +179 -0
  58. kontra/preplan/sqlserver.py +191 -0
  59. kontra/preplan/types.py +24 -0
  60. kontra/probes/__init__.py +20 -0
  61. kontra/probes/compare.py +400 -0
  62. kontra/probes/relationship.py +283 -0
  63. kontra/reporters/__init__.py +0 -0
  64. kontra/reporters/json_reporter.py +190 -0
  65. kontra/reporters/rich_reporter.py +11 -0
  66. kontra/rules/__init__.py +35 -0
  67. kontra/rules/base.py +186 -0
  68. kontra/rules/builtin/__init__.py +40 -0
  69. kontra/rules/builtin/allowed_values.py +156 -0
  70. kontra/rules/builtin/compare.py +188 -0
  71. kontra/rules/builtin/conditional_not_null.py +213 -0
  72. kontra/rules/builtin/conditional_range.py +310 -0
  73. kontra/rules/builtin/contains.py +138 -0
  74. kontra/rules/builtin/custom_sql_check.py +182 -0
  75. kontra/rules/builtin/disallowed_values.py +140 -0
  76. kontra/rules/builtin/dtype.py +203 -0
  77. kontra/rules/builtin/ends_with.py +129 -0
  78. kontra/rules/builtin/freshness.py +240 -0
  79. kontra/rules/builtin/length.py +193 -0
  80. kontra/rules/builtin/max_rows.py +35 -0
  81. kontra/rules/builtin/min_rows.py +46 -0
  82. kontra/rules/builtin/not_null.py +121 -0
  83. kontra/rules/builtin/range.py +222 -0
  84. kontra/rules/builtin/regex.py +143 -0
  85. kontra/rules/builtin/starts_with.py +129 -0
  86. kontra/rules/builtin/unique.py +124 -0
  87. kontra/rules/condition_parser.py +203 -0
  88. kontra/rules/execution_plan.py +455 -0
  89. kontra/rules/factory.py +103 -0
  90. kontra/rules/predicates.py +25 -0
  91. kontra/rules/registry.py +24 -0
  92. kontra/rules/static_predicates.py +120 -0
  93. kontra/scout/__init__.py +9 -0
  94. kontra/scout/backends/__init__.py +17 -0
  95. kontra/scout/backends/base.py +111 -0
  96. kontra/scout/backends/duckdb_backend.py +359 -0
  97. kontra/scout/backends/postgres_backend.py +519 -0
  98. kontra/scout/backends/sqlserver_backend.py +577 -0
  99. kontra/scout/dtype_mapping.py +150 -0
  100. kontra/scout/patterns.py +69 -0
  101. kontra/scout/profiler.py +801 -0
  102. kontra/scout/reporters/__init__.py +39 -0
  103. kontra/scout/reporters/json_reporter.py +165 -0
  104. kontra/scout/reporters/markdown_reporter.py +152 -0
  105. kontra/scout/reporters/rich_reporter.py +144 -0
  106. kontra/scout/store.py +208 -0
  107. kontra/scout/suggest.py +200 -0
  108. kontra/scout/types.py +652 -0
  109. kontra/state/__init__.py +29 -0
  110. kontra/state/backends/__init__.py +79 -0
  111. kontra/state/backends/base.py +348 -0
  112. kontra/state/backends/local.py +480 -0
  113. kontra/state/backends/postgres.py +1010 -0
  114. kontra/state/backends/s3.py +543 -0
  115. kontra/state/backends/sqlserver.py +969 -0
  116. kontra/state/fingerprint.py +166 -0
  117. kontra/state/types.py +1061 -0
  118. kontra/version.py +1 -0
  119. kontra-0.5.2.dist-info/METADATA +122 -0
  120. kontra-0.5.2.dist-info/RECORD +124 -0
  121. kontra-0.5.2.dist-info/WHEEL +5 -0
  122. kontra-0.5.2.dist-info/entry_points.txt +2 -0
  123. kontra-0.5.2.dist-info/licenses/LICENSE +17 -0
  124. kontra-0.5.2.dist-info/top_level.txt +1 -0
@@ -0,0 +1,150 @@
1
+ # src/kontra/scout/dtype_mapping.py
2
+ """
3
+ Unified dtype mapping for Scout profiler.
4
+
5
+ Provides consistent type normalization across all backends (DuckDB, PostgreSQL, SQL Server).
6
+ This module consolidates dtype mappings that were previously duplicated across backend files.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from typing import Dict
12
+
13
+ # Normalized type categories
14
+ NUMERIC_TYPES = {"int", "float"}
15
+ TEMPORAL_TYPES = {"date", "time", "datetime", "interval"}
16
+ STRING_TYPES = {"string"}
17
+ BOOLEAN_TYPES = {"bool"}
18
+ BINARY_TYPES = {"binary"}
19
+
20
+ # Master dtype mapping (lowercase keys for case-insensitive lookup)
21
+ # Maps raw database types to normalized Kontra types
22
+ DTYPE_MAP: Dict[str, str] = {
23
+ # Integer types (common)
24
+ "tinyint": "int",
25
+ "smallint": "int",
26
+ "integer": "int",
27
+ "int": "int",
28
+ "bigint": "int",
29
+ "hugeint": "int",
30
+ "int2": "int",
31
+ "int4": "int",
32
+ "int8": "int",
33
+ "int16": "int",
34
+ "int32": "int",
35
+ "int64": "int",
36
+ "int128": "int",
37
+ "serial": "int",
38
+ "bigserial": "int",
39
+ # Unsigned integers (DuckDB)
40
+ "utinyint": "int",
41
+ "usmallint": "int",
42
+ "uinteger": "int",
43
+ "ubigint": "int",
44
+ "uint8": "int",
45
+ "uint16": "int",
46
+ "uint32": "int",
47
+ "uint64": "int",
48
+ # Float types (common)
49
+ "float": "float",
50
+ "float4": "float",
51
+ "float8": "float",
52
+ "real": "float",
53
+ "double": "float",
54
+ "double precision": "float",
55
+ "decimal": "float",
56
+ "numeric": "float",
57
+ # Float types (SQL Server)
58
+ "money": "float",
59
+ "smallmoney": "float",
60
+ # Boolean types
61
+ "boolean": "bool",
62
+ "bool": "bool",
63
+ "bit": "bool", # SQL Server
64
+ # String types (common)
65
+ "varchar": "string",
66
+ "char": "string",
67
+ "bpchar": "string", # PostgreSQL blank-padded char
68
+ "text": "string",
69
+ "string": "string",
70
+ "character varying": "string",
71
+ "character": "string",
72
+ # String types (SQL Server)
73
+ "nvarchar": "string",
74
+ "nchar": "string",
75
+ "ntext": "string",
76
+ # Date types
77
+ "date": "date",
78
+ # Time types
79
+ "time": "time",
80
+ "time without time zone": "time",
81
+ "time with time zone": "time",
82
+ # Datetime types (common)
83
+ "timestamp": "datetime",
84
+ "timestamp with time zone": "datetime",
85
+ "timestamp without time zone": "datetime",
86
+ "timestamptz": "datetime",
87
+ # Datetime types (SQL Server)
88
+ "datetime": "datetime",
89
+ "datetime2": "datetime",
90
+ "smalldatetime": "datetime",
91
+ "datetimeoffset": "datetime",
92
+ # Interval
93
+ "interval": "interval",
94
+ # Binary types (common)
95
+ "blob": "binary",
96
+ "bytea": "binary", # PostgreSQL
97
+ # Binary types (SQL Server)
98
+ "binary": "binary",
99
+ "varbinary": "binary",
100
+ "image": "binary",
101
+ # UUID / special string types
102
+ "uuid": "string",
103
+ "json": "string",
104
+ "jsonb": "string",
105
+ "uniqueidentifier": "string", # SQL Server UUID
106
+ "xml": "string", # SQL Server
107
+ }
108
+
109
+
110
+ def normalize_dtype(raw_type: str) -> str:
111
+ """
112
+ Normalize a raw database type to a simplified Kontra type name.
113
+
114
+ Args:
115
+ raw_type: Raw type string from database (e.g., "VARCHAR(255)", "BIGINT")
116
+
117
+ Returns:
118
+ Normalized type: "int", "float", "string", "bool", "date", "datetime",
119
+ "time", "interval", "binary", or "unknown"
120
+
121
+ Examples:
122
+ >>> normalize_dtype("VARCHAR(255)")
123
+ 'string'
124
+ >>> normalize_dtype("DECIMAL(10,2)")
125
+ 'float'
126
+ >>> normalize_dtype("bigint")
127
+ 'int'
128
+ """
129
+ # Lowercase and strip whitespace for case-insensitive matching
130
+ lower = raw_type.lower().strip()
131
+
132
+ # Handle parameterized types like DECIMAL(10,2) or VARCHAR(255)
133
+ base = lower.split("(")[0].strip()
134
+
135
+ return DTYPE_MAP.get(base, "unknown")
136
+
137
+
138
+ def is_numeric_type(normalized_type: str) -> bool:
139
+ """Check if a normalized type is numeric."""
140
+ return normalized_type in NUMERIC_TYPES
141
+
142
+
143
+ def is_temporal_type(normalized_type: str) -> bool:
144
+ """Check if a normalized type is temporal (date/time)."""
145
+ return normalized_type in TEMPORAL_TYPES
146
+
147
+
148
+ def is_string_type(normalized_type: str) -> bool:
149
+ """Check if a normalized type is a string."""
150
+ return normalized_type in STRING_TYPES
@@ -0,0 +1,69 @@
1
+ # src/kontra/scout/patterns.py
2
+ """
3
+ Pattern detection for common data formats.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import re
9
+ from typing import List
10
+
11
+
12
+ # Common patterns to detect
13
+ PATTERNS = {
14
+ "email": re.compile(r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$"),
15
+ "uuid": re.compile(
16
+ r"^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$"
17
+ ),
18
+ "phone_us": re.compile(
19
+ r"^\+?1?[-.\s]?\(?[0-9]{3}\)?[-.\s]?[0-9]{3}[-.\s]?[0-9]{4}$"
20
+ ),
21
+ "phone_intl": re.compile(r"^\+[1-9]\d{6,14}$"),
22
+ "url": re.compile(r"^https?://[^\s]+$"),
23
+ "ipv4": re.compile(r"^(?:[0-9]{1,3}\.){3}[0-9]{1,3}$"),
24
+ "ipv6": re.compile(r"^([0-9a-fA-F]{1,4}:){7}[0-9a-fA-F]{1,4}$"),
25
+ "iso_date": re.compile(r"^\d{4}-\d{2}-\d{2}$"),
26
+ "iso_datetime": re.compile(r"^\d{4}-\d{2}-\d{2}[T ]\d{2}:\d{2}:\d{2}"),
27
+ "hex_color": re.compile(r"^#[0-9A-Fa-f]{6}$"),
28
+ "credit_card": re.compile(r"^[0-9]{4}[- ]?[0-9]{4}[- ]?[0-9]{4}[- ]?[0-9]{4}$"),
29
+ "ssn": re.compile(r"^\d{3}-\d{2}-\d{4}$"),
30
+ "zip_us": re.compile(r"^\d{5}(-\d{4})?$"),
31
+ "slug": re.compile(r"^[a-z0-9]+(?:-[a-z0-9]+)*$"),
32
+ "json": re.compile(r"^[\[\{].*[\]\}]$", re.DOTALL),
33
+ }
34
+
35
+
36
+ def detect_patterns(sample_values: List[str], threshold: float = 0.8) -> List[str]:
37
+ """
38
+ Detect common patterns in a sample of string values.
39
+
40
+ Args:
41
+ sample_values: List of string values to analyze
42
+ threshold: Minimum fraction of values that must match (default: 80%)
43
+
44
+ Returns:
45
+ List of pattern names where >= threshold of non-null values match.
46
+ """
47
+ if not sample_values:
48
+ return []
49
+
50
+ # Filter out empty strings for pattern matching
51
+ non_empty = [v for v in sample_values if v and v.strip()]
52
+ if not non_empty:
53
+ return []
54
+
55
+ matches = []
56
+ for pattern_name, regex in PATTERNS.items():
57
+ match_count = sum(1 for v in non_empty if regex.match(str(v)))
58
+ if match_count / len(non_empty) >= threshold:
59
+ matches.append(pattern_name)
60
+
61
+ return matches
62
+
63
+
64
+ def get_pattern_regex(pattern_name: str) -> str:
65
+ """Get the regex pattern string for a pattern name."""
66
+ pattern = PATTERNS.get(pattern_name)
67
+ if pattern:
68
+ return pattern.pattern
69
+ return ""