kontra 0.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. kontra/__init__.py +1871 -0
  2. kontra/api/__init__.py +22 -0
  3. kontra/api/compare.py +340 -0
  4. kontra/api/decorators.py +153 -0
  5. kontra/api/results.py +2121 -0
  6. kontra/api/rules.py +681 -0
  7. kontra/cli/__init__.py +0 -0
  8. kontra/cli/commands/__init__.py +1 -0
  9. kontra/cli/commands/config.py +153 -0
  10. kontra/cli/commands/diff.py +450 -0
  11. kontra/cli/commands/history.py +196 -0
  12. kontra/cli/commands/profile.py +289 -0
  13. kontra/cli/commands/validate.py +468 -0
  14. kontra/cli/constants.py +6 -0
  15. kontra/cli/main.py +48 -0
  16. kontra/cli/renderers.py +304 -0
  17. kontra/cli/utils.py +28 -0
  18. kontra/config/__init__.py +34 -0
  19. kontra/config/loader.py +127 -0
  20. kontra/config/models.py +49 -0
  21. kontra/config/settings.py +797 -0
  22. kontra/connectors/__init__.py +0 -0
  23. kontra/connectors/db_utils.py +251 -0
  24. kontra/connectors/detection.py +323 -0
  25. kontra/connectors/handle.py +368 -0
  26. kontra/connectors/postgres.py +127 -0
  27. kontra/connectors/sqlserver.py +226 -0
  28. kontra/engine/__init__.py +0 -0
  29. kontra/engine/backends/duckdb_session.py +227 -0
  30. kontra/engine/backends/duckdb_utils.py +18 -0
  31. kontra/engine/backends/polars_backend.py +47 -0
  32. kontra/engine/engine.py +1205 -0
  33. kontra/engine/executors/__init__.py +15 -0
  34. kontra/engine/executors/base.py +50 -0
  35. kontra/engine/executors/database_base.py +528 -0
  36. kontra/engine/executors/duckdb_sql.py +607 -0
  37. kontra/engine/executors/postgres_sql.py +162 -0
  38. kontra/engine/executors/registry.py +69 -0
  39. kontra/engine/executors/sqlserver_sql.py +163 -0
  40. kontra/engine/materializers/__init__.py +14 -0
  41. kontra/engine/materializers/base.py +42 -0
  42. kontra/engine/materializers/duckdb.py +110 -0
  43. kontra/engine/materializers/factory.py +22 -0
  44. kontra/engine/materializers/polars_connector.py +131 -0
  45. kontra/engine/materializers/postgres.py +157 -0
  46. kontra/engine/materializers/registry.py +138 -0
  47. kontra/engine/materializers/sqlserver.py +160 -0
  48. kontra/engine/result.py +15 -0
  49. kontra/engine/sql_utils.py +611 -0
  50. kontra/engine/sql_validator.py +609 -0
  51. kontra/engine/stats.py +194 -0
  52. kontra/engine/types.py +138 -0
  53. kontra/errors.py +533 -0
  54. kontra/logging.py +85 -0
  55. kontra/preplan/__init__.py +5 -0
  56. kontra/preplan/planner.py +253 -0
  57. kontra/preplan/postgres.py +179 -0
  58. kontra/preplan/sqlserver.py +191 -0
  59. kontra/preplan/types.py +24 -0
  60. kontra/probes/__init__.py +20 -0
  61. kontra/probes/compare.py +400 -0
  62. kontra/probes/relationship.py +283 -0
  63. kontra/reporters/__init__.py +0 -0
  64. kontra/reporters/json_reporter.py +190 -0
  65. kontra/reporters/rich_reporter.py +11 -0
  66. kontra/rules/__init__.py +35 -0
  67. kontra/rules/base.py +186 -0
  68. kontra/rules/builtin/__init__.py +40 -0
  69. kontra/rules/builtin/allowed_values.py +156 -0
  70. kontra/rules/builtin/compare.py +188 -0
  71. kontra/rules/builtin/conditional_not_null.py +213 -0
  72. kontra/rules/builtin/conditional_range.py +310 -0
  73. kontra/rules/builtin/contains.py +138 -0
  74. kontra/rules/builtin/custom_sql_check.py +182 -0
  75. kontra/rules/builtin/disallowed_values.py +140 -0
  76. kontra/rules/builtin/dtype.py +203 -0
  77. kontra/rules/builtin/ends_with.py +129 -0
  78. kontra/rules/builtin/freshness.py +240 -0
  79. kontra/rules/builtin/length.py +193 -0
  80. kontra/rules/builtin/max_rows.py +35 -0
  81. kontra/rules/builtin/min_rows.py +46 -0
  82. kontra/rules/builtin/not_null.py +121 -0
  83. kontra/rules/builtin/range.py +222 -0
  84. kontra/rules/builtin/regex.py +143 -0
  85. kontra/rules/builtin/starts_with.py +129 -0
  86. kontra/rules/builtin/unique.py +124 -0
  87. kontra/rules/condition_parser.py +203 -0
  88. kontra/rules/execution_plan.py +455 -0
  89. kontra/rules/factory.py +103 -0
  90. kontra/rules/predicates.py +25 -0
  91. kontra/rules/registry.py +24 -0
  92. kontra/rules/static_predicates.py +120 -0
  93. kontra/scout/__init__.py +9 -0
  94. kontra/scout/backends/__init__.py +17 -0
  95. kontra/scout/backends/base.py +111 -0
  96. kontra/scout/backends/duckdb_backend.py +359 -0
  97. kontra/scout/backends/postgres_backend.py +519 -0
  98. kontra/scout/backends/sqlserver_backend.py +577 -0
  99. kontra/scout/dtype_mapping.py +150 -0
  100. kontra/scout/patterns.py +69 -0
  101. kontra/scout/profiler.py +801 -0
  102. kontra/scout/reporters/__init__.py +39 -0
  103. kontra/scout/reporters/json_reporter.py +165 -0
  104. kontra/scout/reporters/markdown_reporter.py +152 -0
  105. kontra/scout/reporters/rich_reporter.py +144 -0
  106. kontra/scout/store.py +208 -0
  107. kontra/scout/suggest.py +200 -0
  108. kontra/scout/types.py +652 -0
  109. kontra/state/__init__.py +29 -0
  110. kontra/state/backends/__init__.py +79 -0
  111. kontra/state/backends/base.py +348 -0
  112. kontra/state/backends/local.py +480 -0
  113. kontra/state/backends/postgres.py +1010 -0
  114. kontra/state/backends/s3.py +543 -0
  115. kontra/state/backends/sqlserver.py +969 -0
  116. kontra/state/fingerprint.py +166 -0
  117. kontra/state/types.py +1061 -0
  118. kontra/version.py +1 -0
  119. kontra-0.5.2.dist-info/METADATA +122 -0
  120. kontra-0.5.2.dist-info/RECORD +124 -0
  121. kontra-0.5.2.dist-info/WHEEL +5 -0
  122. kontra-0.5.2.dist-info/entry_points.txt +2 -0
  123. kontra-0.5.2.dist-info/licenses/LICENSE +17 -0
  124. kontra-0.5.2.dist-info/top_level.txt +1 -0
@@ -0,0 +1,166 @@
1
+ # src/kontra/state/fingerprint.py
2
+ """
3
+ Fingerprinting utilities for contracts and datasets.
4
+
5
+ Fingerprints are stable hashes that identify a contract or dataset
6
+ across runs, enabling state comparison and history lookup.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import hashlib
12
+ import json
13
+ from datetime import date, datetime
14
+ from pathlib import Path
15
+ from typing import Any, Dict, List, Optional, TYPE_CHECKING
16
+
17
+
18
+ def _json_default(obj: Any) -> Any:
19
+ """JSON encoder for non-serializable types (dates, etc.)."""
20
+ if isinstance(obj, (date, datetime)):
21
+ return obj.isoformat()
22
+ raise TypeError(f"Object of type {type(obj).__name__} is not JSON serializable")
23
+
24
+ if TYPE_CHECKING:
25
+ from kontra.config.models import Contract
26
+ from kontra.connectors.handle import DatasetHandle
27
+
28
+
29
+ def _stable_hash(data: str) -> str:
30
+ """Generate a stable SHA-256 hash prefix."""
31
+ return hashlib.sha256(data.encode("utf-8")).hexdigest()[:16]
32
+
33
+
34
+ def fingerprint_contract(
35
+ contract: "Contract",
36
+ *,
37
+ include_dataset: bool = False,
38
+ ) -> str:
39
+ """
40
+ Generate a stable fingerprint for a contract.
41
+
42
+ The fingerprint is based on:
43
+ - Contract name
44
+ - Rule definitions (name, params)
45
+ - Optionally: dataset URI
46
+
47
+ This allows tracking the same contract across runs even if
48
+ the file path changes.
49
+
50
+ Args:
51
+ contract: The Contract object
52
+ include_dataset: If True, include dataset URI in fingerprint
53
+
54
+ Returns:
55
+ A 16-character hex string (sha256 prefix)
56
+ """
57
+ # Build canonical representation
58
+ canonical: Dict[str, Any] = {
59
+ "name": contract.name,
60
+ "rules": [],
61
+ }
62
+
63
+ # Sort rules for determinism
64
+ for rule in sorted(contract.rules, key=lambda r: (r.name, json.dumps(r.params, sort_keys=True, default=_json_default))):
65
+ canonical["rules"].append({
66
+ "name": rule.name,
67
+ "params": rule.params,
68
+ })
69
+
70
+ if include_dataset:
71
+ canonical["datasource"] = contract.datasource
72
+
73
+ # Generate stable JSON string
74
+ json_str = json.dumps(canonical, sort_keys=True, separators=(",", ":"), default=_json_default)
75
+ return _stable_hash(json_str)
76
+
77
+
78
+ def fingerprint_contract_file(path: str) -> str:
79
+ """
80
+ Generate a fingerprint from a contract file path.
81
+
82
+ Uses the file content hash for simplicity. Less stable than
83
+ fingerprint_contract() if formatting changes, but works without
84
+ parsing.
85
+
86
+ Args:
87
+ path: Path to the contract YAML file
88
+
89
+ Returns:
90
+ A 16-character hex string
91
+ """
92
+ content = Path(path).read_text(encoding="utf-8")
93
+ return _stable_hash(content)
94
+
95
+
96
+ def fingerprint_dataset(
97
+ handle: "DatasetHandle",
98
+ *,
99
+ include_stats: bool = False,
100
+ row_count: Optional[int] = None,
101
+ schema: Optional[List[str]] = None,
102
+ ) -> Optional[str]:
103
+ """
104
+ Generate a fingerprint for a dataset.
105
+
106
+ For files: based on URI (and optionally metadata like row count)
107
+ For databases: based on connection params and table name
108
+
109
+ Args:
110
+ handle: The DatasetHandle
111
+ include_stats: If True, include row count and schema in fingerprint
112
+ row_count: Row count (if known)
113
+ schema: List of column names (if known)
114
+
115
+ Returns:
116
+ A 16-character hex string, or None if fingerprinting fails
117
+ """
118
+ try:
119
+ canonical: Dict[str, Any] = {
120
+ "uri": handle.uri,
121
+ "scheme": handle.scheme,
122
+ }
123
+
124
+ # Add database-specific identifiers
125
+ if handle.db_params:
126
+ db = handle.db_params
127
+ canonical["db"] = {
128
+ "host": getattr(db, "host", None),
129
+ "database": getattr(db, "database", None),
130
+ "schema": getattr(db, "schema", None),
131
+ "table": getattr(db, "table", None),
132
+ }
133
+
134
+ if include_stats:
135
+ if row_count is not None:
136
+ canonical["row_count"] = row_count
137
+ if schema is not None:
138
+ canonical["schema"] = sorted(schema)
139
+
140
+ json_str = json.dumps(canonical, sort_keys=True, separators=(",", ":"))
141
+ return _stable_hash(json_str)
142
+
143
+ except Exception:
144
+ # Don't fail validation if fingerprinting fails
145
+ return None
146
+
147
+
148
+ def fingerprint_from_name_and_uri(name: str, uri: str) -> str:
149
+ """
150
+ Simple fingerprint from contract name and dataset URI.
151
+
152
+ Use this when you don't have access to the full Contract object.
153
+
154
+ Args:
155
+ name: Contract name
156
+ uri: Dataset URI
157
+
158
+ Returns:
159
+ A 16-character hex string
160
+ """
161
+ canonical = json.dumps(
162
+ {"name": name, "uri": uri},
163
+ sort_keys=True,
164
+ separators=(",", ":"),
165
+ )
166
+ return _stable_hash(canonical)