kontra 0.5.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kontra/__init__.py +1871 -0
- kontra/api/__init__.py +22 -0
- kontra/api/compare.py +340 -0
- kontra/api/decorators.py +153 -0
- kontra/api/results.py +2121 -0
- kontra/api/rules.py +681 -0
- kontra/cli/__init__.py +0 -0
- kontra/cli/commands/__init__.py +1 -0
- kontra/cli/commands/config.py +153 -0
- kontra/cli/commands/diff.py +450 -0
- kontra/cli/commands/history.py +196 -0
- kontra/cli/commands/profile.py +289 -0
- kontra/cli/commands/validate.py +468 -0
- kontra/cli/constants.py +6 -0
- kontra/cli/main.py +48 -0
- kontra/cli/renderers.py +304 -0
- kontra/cli/utils.py +28 -0
- kontra/config/__init__.py +34 -0
- kontra/config/loader.py +127 -0
- kontra/config/models.py +49 -0
- kontra/config/settings.py +797 -0
- kontra/connectors/__init__.py +0 -0
- kontra/connectors/db_utils.py +251 -0
- kontra/connectors/detection.py +323 -0
- kontra/connectors/handle.py +368 -0
- kontra/connectors/postgres.py +127 -0
- kontra/connectors/sqlserver.py +226 -0
- kontra/engine/__init__.py +0 -0
- kontra/engine/backends/duckdb_session.py +227 -0
- kontra/engine/backends/duckdb_utils.py +18 -0
- kontra/engine/backends/polars_backend.py +47 -0
- kontra/engine/engine.py +1205 -0
- kontra/engine/executors/__init__.py +15 -0
- kontra/engine/executors/base.py +50 -0
- kontra/engine/executors/database_base.py +528 -0
- kontra/engine/executors/duckdb_sql.py +607 -0
- kontra/engine/executors/postgres_sql.py +162 -0
- kontra/engine/executors/registry.py +69 -0
- kontra/engine/executors/sqlserver_sql.py +163 -0
- kontra/engine/materializers/__init__.py +14 -0
- kontra/engine/materializers/base.py +42 -0
- kontra/engine/materializers/duckdb.py +110 -0
- kontra/engine/materializers/factory.py +22 -0
- kontra/engine/materializers/polars_connector.py +131 -0
- kontra/engine/materializers/postgres.py +157 -0
- kontra/engine/materializers/registry.py +138 -0
- kontra/engine/materializers/sqlserver.py +160 -0
- kontra/engine/result.py +15 -0
- kontra/engine/sql_utils.py +611 -0
- kontra/engine/sql_validator.py +609 -0
- kontra/engine/stats.py +194 -0
- kontra/engine/types.py +138 -0
- kontra/errors.py +533 -0
- kontra/logging.py +85 -0
- kontra/preplan/__init__.py +5 -0
- kontra/preplan/planner.py +253 -0
- kontra/preplan/postgres.py +179 -0
- kontra/preplan/sqlserver.py +191 -0
- kontra/preplan/types.py +24 -0
- kontra/probes/__init__.py +20 -0
- kontra/probes/compare.py +400 -0
- kontra/probes/relationship.py +283 -0
- kontra/reporters/__init__.py +0 -0
- kontra/reporters/json_reporter.py +190 -0
- kontra/reporters/rich_reporter.py +11 -0
- kontra/rules/__init__.py +35 -0
- kontra/rules/base.py +186 -0
- kontra/rules/builtin/__init__.py +40 -0
- kontra/rules/builtin/allowed_values.py +156 -0
- kontra/rules/builtin/compare.py +188 -0
- kontra/rules/builtin/conditional_not_null.py +213 -0
- kontra/rules/builtin/conditional_range.py +310 -0
- kontra/rules/builtin/contains.py +138 -0
- kontra/rules/builtin/custom_sql_check.py +182 -0
- kontra/rules/builtin/disallowed_values.py +140 -0
- kontra/rules/builtin/dtype.py +203 -0
- kontra/rules/builtin/ends_with.py +129 -0
- kontra/rules/builtin/freshness.py +240 -0
- kontra/rules/builtin/length.py +193 -0
- kontra/rules/builtin/max_rows.py +35 -0
- kontra/rules/builtin/min_rows.py +46 -0
- kontra/rules/builtin/not_null.py +121 -0
- kontra/rules/builtin/range.py +222 -0
- kontra/rules/builtin/regex.py +143 -0
- kontra/rules/builtin/starts_with.py +129 -0
- kontra/rules/builtin/unique.py +124 -0
- kontra/rules/condition_parser.py +203 -0
- kontra/rules/execution_plan.py +455 -0
- kontra/rules/factory.py +103 -0
- kontra/rules/predicates.py +25 -0
- kontra/rules/registry.py +24 -0
- kontra/rules/static_predicates.py +120 -0
- kontra/scout/__init__.py +9 -0
- kontra/scout/backends/__init__.py +17 -0
- kontra/scout/backends/base.py +111 -0
- kontra/scout/backends/duckdb_backend.py +359 -0
- kontra/scout/backends/postgres_backend.py +519 -0
- kontra/scout/backends/sqlserver_backend.py +577 -0
- kontra/scout/dtype_mapping.py +150 -0
- kontra/scout/patterns.py +69 -0
- kontra/scout/profiler.py +801 -0
- kontra/scout/reporters/__init__.py +39 -0
- kontra/scout/reporters/json_reporter.py +165 -0
- kontra/scout/reporters/markdown_reporter.py +152 -0
- kontra/scout/reporters/rich_reporter.py +144 -0
- kontra/scout/store.py +208 -0
- kontra/scout/suggest.py +200 -0
- kontra/scout/types.py +652 -0
- kontra/state/__init__.py +29 -0
- kontra/state/backends/__init__.py +79 -0
- kontra/state/backends/base.py +348 -0
- kontra/state/backends/local.py +480 -0
- kontra/state/backends/postgres.py +1010 -0
- kontra/state/backends/s3.py +543 -0
- kontra/state/backends/sqlserver.py +969 -0
- kontra/state/fingerprint.py +166 -0
- kontra/state/types.py +1061 -0
- kontra/version.py +1 -0
- kontra-0.5.2.dist-info/METADATA +122 -0
- kontra-0.5.2.dist-info/RECORD +124 -0
- kontra-0.5.2.dist-info/WHEEL +5 -0
- kontra-0.5.2.dist-info/entry_points.txt +2 -0
- kontra-0.5.2.dist-info/licenses/LICENSE +17 -0
- kontra-0.5.2.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
# src/kontra/state/fingerprint.py
|
|
2
|
+
"""
|
|
3
|
+
Fingerprinting utilities for contracts and datasets.
|
|
4
|
+
|
|
5
|
+
Fingerprints are stable hashes that identify a contract or dataset
|
|
6
|
+
across runs, enabling state comparison and history lookup.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import hashlib
|
|
12
|
+
import json
|
|
13
|
+
from datetime import date, datetime
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
from typing import Any, Dict, List, Optional, TYPE_CHECKING
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _json_default(obj: Any) -> Any:
|
|
19
|
+
"""JSON encoder for non-serializable types (dates, etc.)."""
|
|
20
|
+
if isinstance(obj, (date, datetime)):
|
|
21
|
+
return obj.isoformat()
|
|
22
|
+
raise TypeError(f"Object of type {type(obj).__name__} is not JSON serializable")
|
|
23
|
+
|
|
24
|
+
if TYPE_CHECKING:
|
|
25
|
+
from kontra.config.models import Contract
|
|
26
|
+
from kontra.connectors.handle import DatasetHandle
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _stable_hash(data: str) -> str:
|
|
30
|
+
"""Generate a stable SHA-256 hash prefix."""
|
|
31
|
+
return hashlib.sha256(data.encode("utf-8")).hexdigest()[:16]
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def fingerprint_contract(
|
|
35
|
+
contract: "Contract",
|
|
36
|
+
*,
|
|
37
|
+
include_dataset: bool = False,
|
|
38
|
+
) -> str:
|
|
39
|
+
"""
|
|
40
|
+
Generate a stable fingerprint for a contract.
|
|
41
|
+
|
|
42
|
+
The fingerprint is based on:
|
|
43
|
+
- Contract name
|
|
44
|
+
- Rule definitions (name, params)
|
|
45
|
+
- Optionally: dataset URI
|
|
46
|
+
|
|
47
|
+
This allows tracking the same contract across runs even if
|
|
48
|
+
the file path changes.
|
|
49
|
+
|
|
50
|
+
Args:
|
|
51
|
+
contract: The Contract object
|
|
52
|
+
include_dataset: If True, include dataset URI in fingerprint
|
|
53
|
+
|
|
54
|
+
Returns:
|
|
55
|
+
A 16-character hex string (sha256 prefix)
|
|
56
|
+
"""
|
|
57
|
+
# Build canonical representation
|
|
58
|
+
canonical: Dict[str, Any] = {
|
|
59
|
+
"name": contract.name,
|
|
60
|
+
"rules": [],
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
# Sort rules for determinism
|
|
64
|
+
for rule in sorted(contract.rules, key=lambda r: (r.name, json.dumps(r.params, sort_keys=True, default=_json_default))):
|
|
65
|
+
canonical["rules"].append({
|
|
66
|
+
"name": rule.name,
|
|
67
|
+
"params": rule.params,
|
|
68
|
+
})
|
|
69
|
+
|
|
70
|
+
if include_dataset:
|
|
71
|
+
canonical["datasource"] = contract.datasource
|
|
72
|
+
|
|
73
|
+
# Generate stable JSON string
|
|
74
|
+
json_str = json.dumps(canonical, sort_keys=True, separators=(",", ":"), default=_json_default)
|
|
75
|
+
return _stable_hash(json_str)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def fingerprint_contract_file(path: str) -> str:
|
|
79
|
+
"""
|
|
80
|
+
Generate a fingerprint from a contract file path.
|
|
81
|
+
|
|
82
|
+
Uses the file content hash for simplicity. Less stable than
|
|
83
|
+
fingerprint_contract() if formatting changes, but works without
|
|
84
|
+
parsing.
|
|
85
|
+
|
|
86
|
+
Args:
|
|
87
|
+
path: Path to the contract YAML file
|
|
88
|
+
|
|
89
|
+
Returns:
|
|
90
|
+
A 16-character hex string
|
|
91
|
+
"""
|
|
92
|
+
content = Path(path).read_text(encoding="utf-8")
|
|
93
|
+
return _stable_hash(content)
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def fingerprint_dataset(
|
|
97
|
+
handle: "DatasetHandle",
|
|
98
|
+
*,
|
|
99
|
+
include_stats: bool = False,
|
|
100
|
+
row_count: Optional[int] = None,
|
|
101
|
+
schema: Optional[List[str]] = None,
|
|
102
|
+
) -> Optional[str]:
|
|
103
|
+
"""
|
|
104
|
+
Generate a fingerprint for a dataset.
|
|
105
|
+
|
|
106
|
+
For files: based on URI (and optionally metadata like row count)
|
|
107
|
+
For databases: based on connection params and table name
|
|
108
|
+
|
|
109
|
+
Args:
|
|
110
|
+
handle: The DatasetHandle
|
|
111
|
+
include_stats: If True, include row count and schema in fingerprint
|
|
112
|
+
row_count: Row count (if known)
|
|
113
|
+
schema: List of column names (if known)
|
|
114
|
+
|
|
115
|
+
Returns:
|
|
116
|
+
A 16-character hex string, or None if fingerprinting fails
|
|
117
|
+
"""
|
|
118
|
+
try:
|
|
119
|
+
canonical: Dict[str, Any] = {
|
|
120
|
+
"uri": handle.uri,
|
|
121
|
+
"scheme": handle.scheme,
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
# Add database-specific identifiers
|
|
125
|
+
if handle.db_params:
|
|
126
|
+
db = handle.db_params
|
|
127
|
+
canonical["db"] = {
|
|
128
|
+
"host": getattr(db, "host", None),
|
|
129
|
+
"database": getattr(db, "database", None),
|
|
130
|
+
"schema": getattr(db, "schema", None),
|
|
131
|
+
"table": getattr(db, "table", None),
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
if include_stats:
|
|
135
|
+
if row_count is not None:
|
|
136
|
+
canonical["row_count"] = row_count
|
|
137
|
+
if schema is not None:
|
|
138
|
+
canonical["schema"] = sorted(schema)
|
|
139
|
+
|
|
140
|
+
json_str = json.dumps(canonical, sort_keys=True, separators=(",", ":"))
|
|
141
|
+
return _stable_hash(json_str)
|
|
142
|
+
|
|
143
|
+
except Exception:
|
|
144
|
+
# Don't fail validation if fingerprinting fails
|
|
145
|
+
return None
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def fingerprint_from_name_and_uri(name: str, uri: str) -> str:
|
|
149
|
+
"""
|
|
150
|
+
Simple fingerprint from contract name and dataset URI.
|
|
151
|
+
|
|
152
|
+
Use this when you don't have access to the full Contract object.
|
|
153
|
+
|
|
154
|
+
Args:
|
|
155
|
+
name: Contract name
|
|
156
|
+
uri: Dataset URI
|
|
157
|
+
|
|
158
|
+
Returns:
|
|
159
|
+
A 16-character hex string
|
|
160
|
+
"""
|
|
161
|
+
canonical = json.dumps(
|
|
162
|
+
{"name": name, "uri": uri},
|
|
163
|
+
sort_keys=True,
|
|
164
|
+
separators=(",", ":"),
|
|
165
|
+
)
|
|
166
|
+
return _stable_hash(canonical)
|