kontra 0.5.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kontra/__init__.py +1871 -0
- kontra/api/__init__.py +22 -0
- kontra/api/compare.py +340 -0
- kontra/api/decorators.py +153 -0
- kontra/api/results.py +2121 -0
- kontra/api/rules.py +681 -0
- kontra/cli/__init__.py +0 -0
- kontra/cli/commands/__init__.py +1 -0
- kontra/cli/commands/config.py +153 -0
- kontra/cli/commands/diff.py +450 -0
- kontra/cli/commands/history.py +196 -0
- kontra/cli/commands/profile.py +289 -0
- kontra/cli/commands/validate.py +468 -0
- kontra/cli/constants.py +6 -0
- kontra/cli/main.py +48 -0
- kontra/cli/renderers.py +304 -0
- kontra/cli/utils.py +28 -0
- kontra/config/__init__.py +34 -0
- kontra/config/loader.py +127 -0
- kontra/config/models.py +49 -0
- kontra/config/settings.py +797 -0
- kontra/connectors/__init__.py +0 -0
- kontra/connectors/db_utils.py +251 -0
- kontra/connectors/detection.py +323 -0
- kontra/connectors/handle.py +368 -0
- kontra/connectors/postgres.py +127 -0
- kontra/connectors/sqlserver.py +226 -0
- kontra/engine/__init__.py +0 -0
- kontra/engine/backends/duckdb_session.py +227 -0
- kontra/engine/backends/duckdb_utils.py +18 -0
- kontra/engine/backends/polars_backend.py +47 -0
- kontra/engine/engine.py +1205 -0
- kontra/engine/executors/__init__.py +15 -0
- kontra/engine/executors/base.py +50 -0
- kontra/engine/executors/database_base.py +528 -0
- kontra/engine/executors/duckdb_sql.py +607 -0
- kontra/engine/executors/postgres_sql.py +162 -0
- kontra/engine/executors/registry.py +69 -0
- kontra/engine/executors/sqlserver_sql.py +163 -0
- kontra/engine/materializers/__init__.py +14 -0
- kontra/engine/materializers/base.py +42 -0
- kontra/engine/materializers/duckdb.py +110 -0
- kontra/engine/materializers/factory.py +22 -0
- kontra/engine/materializers/polars_connector.py +131 -0
- kontra/engine/materializers/postgres.py +157 -0
- kontra/engine/materializers/registry.py +138 -0
- kontra/engine/materializers/sqlserver.py +160 -0
- kontra/engine/result.py +15 -0
- kontra/engine/sql_utils.py +611 -0
- kontra/engine/sql_validator.py +609 -0
- kontra/engine/stats.py +194 -0
- kontra/engine/types.py +138 -0
- kontra/errors.py +533 -0
- kontra/logging.py +85 -0
- kontra/preplan/__init__.py +5 -0
- kontra/preplan/planner.py +253 -0
- kontra/preplan/postgres.py +179 -0
- kontra/preplan/sqlserver.py +191 -0
- kontra/preplan/types.py +24 -0
- kontra/probes/__init__.py +20 -0
- kontra/probes/compare.py +400 -0
- kontra/probes/relationship.py +283 -0
- kontra/reporters/__init__.py +0 -0
- kontra/reporters/json_reporter.py +190 -0
- kontra/reporters/rich_reporter.py +11 -0
- kontra/rules/__init__.py +35 -0
- kontra/rules/base.py +186 -0
- kontra/rules/builtin/__init__.py +40 -0
- kontra/rules/builtin/allowed_values.py +156 -0
- kontra/rules/builtin/compare.py +188 -0
- kontra/rules/builtin/conditional_not_null.py +213 -0
- kontra/rules/builtin/conditional_range.py +310 -0
- kontra/rules/builtin/contains.py +138 -0
- kontra/rules/builtin/custom_sql_check.py +182 -0
- kontra/rules/builtin/disallowed_values.py +140 -0
- kontra/rules/builtin/dtype.py +203 -0
- kontra/rules/builtin/ends_with.py +129 -0
- kontra/rules/builtin/freshness.py +240 -0
- kontra/rules/builtin/length.py +193 -0
- kontra/rules/builtin/max_rows.py +35 -0
- kontra/rules/builtin/min_rows.py +46 -0
- kontra/rules/builtin/not_null.py +121 -0
- kontra/rules/builtin/range.py +222 -0
- kontra/rules/builtin/regex.py +143 -0
- kontra/rules/builtin/starts_with.py +129 -0
- kontra/rules/builtin/unique.py +124 -0
- kontra/rules/condition_parser.py +203 -0
- kontra/rules/execution_plan.py +455 -0
- kontra/rules/factory.py +103 -0
- kontra/rules/predicates.py +25 -0
- kontra/rules/registry.py +24 -0
- kontra/rules/static_predicates.py +120 -0
- kontra/scout/__init__.py +9 -0
- kontra/scout/backends/__init__.py +17 -0
- kontra/scout/backends/base.py +111 -0
- kontra/scout/backends/duckdb_backend.py +359 -0
- kontra/scout/backends/postgres_backend.py +519 -0
- kontra/scout/backends/sqlserver_backend.py +577 -0
- kontra/scout/dtype_mapping.py +150 -0
- kontra/scout/patterns.py +69 -0
- kontra/scout/profiler.py +801 -0
- kontra/scout/reporters/__init__.py +39 -0
- kontra/scout/reporters/json_reporter.py +165 -0
- kontra/scout/reporters/markdown_reporter.py +152 -0
- kontra/scout/reporters/rich_reporter.py +144 -0
- kontra/scout/store.py +208 -0
- kontra/scout/suggest.py +200 -0
- kontra/scout/types.py +652 -0
- kontra/state/__init__.py +29 -0
- kontra/state/backends/__init__.py +79 -0
- kontra/state/backends/base.py +348 -0
- kontra/state/backends/local.py +480 -0
- kontra/state/backends/postgres.py +1010 -0
- kontra/state/backends/s3.py +543 -0
- kontra/state/backends/sqlserver.py +969 -0
- kontra/state/fingerprint.py +166 -0
- kontra/state/types.py +1061 -0
- kontra/version.py +1 -0
- kontra-0.5.2.dist-info/METADATA +122 -0
- kontra-0.5.2.dist-info/RECORD +124 -0
- kontra-0.5.2.dist-info/WHEEL +5 -0
- kontra-0.5.2.dist-info/entry_points.txt +2 -0
- kontra-0.5.2.dist-info/licenses/LICENSE +17 -0
- kontra-0.5.2.dist-info/top_level.txt +1 -0
kontra/scout/store.py
ADDED
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
# src/kontra/scout/store.py
|
|
2
|
+
"""
|
|
3
|
+
Profile storage for Kontra Scout.
|
|
4
|
+
|
|
5
|
+
Stores scout profiles using the same backend infrastructure as validation state.
|
|
6
|
+
Profiles are stored separately from validation states but can use the same
|
|
7
|
+
storage backend (local, S3, PostgreSQL).
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import hashlib
|
|
13
|
+
import json
|
|
14
|
+
from datetime import datetime, timezone
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
from typing import Any, Dict, List, Optional
|
|
17
|
+
|
|
18
|
+
from kontra.version import VERSION
|
|
19
|
+
from .types import DatasetProfile, ProfileState
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def fingerprint_source(source_uri: str) -> str:
|
|
23
|
+
"""
|
|
24
|
+
Generate a stable fingerprint for a data source URI.
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
source_uri: The data source URI
|
|
28
|
+
|
|
29
|
+
Returns:
|
|
30
|
+
16-character hex fingerprint
|
|
31
|
+
"""
|
|
32
|
+
# Normalize the URI
|
|
33
|
+
normalized = source_uri.strip()
|
|
34
|
+
|
|
35
|
+
# Hash it
|
|
36
|
+
return hashlib.sha256(normalized.encode("utf-8")).hexdigest()[:16]
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class LocalProfileStore:
|
|
40
|
+
"""
|
|
41
|
+
Filesystem-based profile storage.
|
|
42
|
+
|
|
43
|
+
Stores profiles in .kontra/profiles/ directory:
|
|
44
|
+
.kontra/profiles/<source_fingerprint>/<timestamp>.json
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
def __init__(self, base_path: Optional[str] = None):
|
|
48
|
+
"""
|
|
49
|
+
Initialize the local profile store.
|
|
50
|
+
|
|
51
|
+
Args:
|
|
52
|
+
base_path: Base directory for profile storage.
|
|
53
|
+
Defaults to .kontra/profiles/ in cwd.
|
|
54
|
+
"""
|
|
55
|
+
if base_path:
|
|
56
|
+
self.base_path = Path(base_path)
|
|
57
|
+
else:
|
|
58
|
+
self.base_path = Path.cwd() / ".kontra" / "profiles"
|
|
59
|
+
|
|
60
|
+
def _source_dir(self, source_fingerprint: str) -> Path:
|
|
61
|
+
"""Get the directory for a source's profiles."""
|
|
62
|
+
return self.base_path / source_fingerprint
|
|
63
|
+
|
|
64
|
+
def _profile_filename(self, profiled_at: str) -> str:
|
|
65
|
+
"""Generate filename from timestamp."""
|
|
66
|
+
# Use ISO format but replace : with - for filesystem compatibility
|
|
67
|
+
ts = profiled_at.replace(":", "-").replace("+", "_")
|
|
68
|
+
return f"{ts}.json"
|
|
69
|
+
|
|
70
|
+
def save(self, state: ProfileState) -> None:
|
|
71
|
+
"""Save a profile state to the filesystem."""
|
|
72
|
+
source_dir = self._source_dir(state.source_fingerprint)
|
|
73
|
+
source_dir.mkdir(parents=True, exist_ok=True)
|
|
74
|
+
|
|
75
|
+
filename = self._profile_filename(state.profiled_at)
|
|
76
|
+
filepath = source_dir / filename
|
|
77
|
+
|
|
78
|
+
# Write atomically
|
|
79
|
+
temp_path = filepath.with_suffix(".tmp")
|
|
80
|
+
try:
|
|
81
|
+
temp_path.write_text(state.to_json(), encoding="utf-8")
|
|
82
|
+
temp_path.rename(filepath)
|
|
83
|
+
except Exception:
|
|
84
|
+
if temp_path.exists():
|
|
85
|
+
temp_path.unlink()
|
|
86
|
+
raise
|
|
87
|
+
|
|
88
|
+
def get_latest(self, source_fingerprint: str) -> Optional[ProfileState]:
|
|
89
|
+
"""Get the most recent profile for a source."""
|
|
90
|
+
history = self.get_history(source_fingerprint, limit=1)
|
|
91
|
+
return history[0] if history else None
|
|
92
|
+
|
|
93
|
+
def get_history(
|
|
94
|
+
self,
|
|
95
|
+
source_fingerprint: str,
|
|
96
|
+
limit: int = 10,
|
|
97
|
+
) -> List[ProfileState]:
|
|
98
|
+
"""Get recent profile history for a source, newest first."""
|
|
99
|
+
source_dir = self._source_dir(source_fingerprint)
|
|
100
|
+
|
|
101
|
+
if not source_dir.exists():
|
|
102
|
+
return []
|
|
103
|
+
|
|
104
|
+
# List all JSON files
|
|
105
|
+
profile_files = sorted(
|
|
106
|
+
source_dir.glob("*.json"),
|
|
107
|
+
key=lambda p: p.name,
|
|
108
|
+
reverse=True,
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
states = []
|
|
112
|
+
for filepath in profile_files[:limit]:
|
|
113
|
+
try:
|
|
114
|
+
content = filepath.read_text(encoding="utf-8")
|
|
115
|
+
state = ProfileState.from_json(content)
|
|
116
|
+
states.append(state)
|
|
117
|
+
except Exception:
|
|
118
|
+
continue
|
|
119
|
+
|
|
120
|
+
return states
|
|
121
|
+
|
|
122
|
+
def list_sources(self) -> List[str]:
|
|
123
|
+
"""List all source fingerprints with stored profiles."""
|
|
124
|
+
if not self.base_path.exists():
|
|
125
|
+
return []
|
|
126
|
+
|
|
127
|
+
sources = []
|
|
128
|
+
for item in self.base_path.iterdir():
|
|
129
|
+
if item.is_dir() and len(item.name) == 16:
|
|
130
|
+
sources.append(item.name)
|
|
131
|
+
|
|
132
|
+
return sorted(sources)
|
|
133
|
+
|
|
134
|
+
def clear(self, source_fingerprint: Optional[str] = None) -> int:
|
|
135
|
+
"""Clear stored profiles."""
|
|
136
|
+
deleted = 0
|
|
137
|
+
|
|
138
|
+
if source_fingerprint:
|
|
139
|
+
source_dir = self._source_dir(source_fingerprint)
|
|
140
|
+
if source_dir.exists():
|
|
141
|
+
for filepath in source_dir.glob("*.json"):
|
|
142
|
+
filepath.unlink()
|
|
143
|
+
deleted += 1
|
|
144
|
+
try:
|
|
145
|
+
source_dir.rmdir()
|
|
146
|
+
except OSError:
|
|
147
|
+
pass
|
|
148
|
+
else:
|
|
149
|
+
if self.base_path.exists():
|
|
150
|
+
for source_dir in self.base_path.iterdir():
|
|
151
|
+
if source_dir.is_dir():
|
|
152
|
+
for filepath in source_dir.glob("*.json"):
|
|
153
|
+
filepath.unlink()
|
|
154
|
+
deleted += 1
|
|
155
|
+
try:
|
|
156
|
+
source_dir.rmdir()
|
|
157
|
+
except OSError:
|
|
158
|
+
pass
|
|
159
|
+
|
|
160
|
+
return deleted
|
|
161
|
+
|
|
162
|
+
def __repr__(self) -> str:
|
|
163
|
+
return f"LocalProfileStore(base_path={self.base_path})"
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def create_profile_state(profile: DatasetProfile) -> ProfileState:
|
|
167
|
+
"""
|
|
168
|
+
Create a ProfileState from a DatasetProfile.
|
|
169
|
+
|
|
170
|
+
Args:
|
|
171
|
+
profile: The profiled dataset
|
|
172
|
+
|
|
173
|
+
Returns:
|
|
174
|
+
ProfileState ready for storage
|
|
175
|
+
"""
|
|
176
|
+
return ProfileState(
|
|
177
|
+
source_fingerprint=fingerprint_source(profile.source_uri),
|
|
178
|
+
source_uri=profile.source_uri,
|
|
179
|
+
profiled_at=profile.profiled_at,
|
|
180
|
+
profile=profile,
|
|
181
|
+
engine_version=VERSION,
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
# Default store
|
|
186
|
+
_default_profile_store: Optional[LocalProfileStore] = None
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
def get_default_profile_store() -> LocalProfileStore:
|
|
190
|
+
"""Get the default profile store."""
|
|
191
|
+
global _default_profile_store
|
|
192
|
+
if _default_profile_store is None:
|
|
193
|
+
_default_profile_store = LocalProfileStore()
|
|
194
|
+
return _default_profile_store
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def get_profile_store(backend: str = "local") -> LocalProfileStore:
|
|
198
|
+
"""
|
|
199
|
+
Get a profile store by backend identifier.
|
|
200
|
+
|
|
201
|
+
Currently only supports local storage. Future: S3, PostgreSQL.
|
|
202
|
+
"""
|
|
203
|
+
if not backend or backend == "local":
|
|
204
|
+
return get_default_profile_store()
|
|
205
|
+
|
|
206
|
+
# For now, all backends use local profile storage
|
|
207
|
+
# Future: implement S3ProfileStore, PostgresProfileStore
|
|
208
|
+
return get_default_profile_store()
|
kontra/scout/suggest.py
ADDED
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
# src/kontra/scout/suggest.py
|
|
2
|
+
"""
|
|
3
|
+
Generate suggested validation rules based on profile analysis.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
from typing import Any, Dict, List
|
|
9
|
+
|
|
10
|
+
from .patterns import get_pattern_regex
|
|
11
|
+
from .types import DatasetProfile, ColumnProfile
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def generate_rules(profile: DatasetProfile) -> List[Dict[str, Any]]:
|
|
15
|
+
"""
|
|
16
|
+
Generate suggested validation rules based on profile.
|
|
17
|
+
|
|
18
|
+
Returns a list of rule dictionaries in Kontra contract format.
|
|
19
|
+
"""
|
|
20
|
+
rules: List[Dict[str, Any]] = []
|
|
21
|
+
|
|
22
|
+
for col in profile.columns:
|
|
23
|
+
col_rules = _suggest_column_rules(col)
|
|
24
|
+
rules.extend(col_rules)
|
|
25
|
+
|
|
26
|
+
return rules
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def generate_rules_yaml(profile: DatasetProfile) -> str:
|
|
30
|
+
"""
|
|
31
|
+
Generate YAML-formatted contract with suggested rules.
|
|
32
|
+
|
|
33
|
+
Returns a complete contract YAML that can be used directly with `kontra validate`.
|
|
34
|
+
"""
|
|
35
|
+
rules = generate_rules(profile)
|
|
36
|
+
|
|
37
|
+
if not rules:
|
|
38
|
+
return "# No rules suggested - dataset may be empty or all columns have high null rates\nrules: []"
|
|
39
|
+
|
|
40
|
+
lines = [
|
|
41
|
+
f"# Auto-generated contract from Kontra Scout",
|
|
42
|
+
f"# Source: {profile.source_uri}",
|
|
43
|
+
f"# Rows analyzed: {profile.row_count:,}",
|
|
44
|
+
f"# Generated by Kontra Scout v{profile.engine_version}",
|
|
45
|
+
"",
|
|
46
|
+
"name: suggested_contract",
|
|
47
|
+
f"description: Auto-generated from Scout profile",
|
|
48
|
+
"",
|
|
49
|
+
f'dataset: "{profile.source_uri}"',
|
|
50
|
+
"",
|
|
51
|
+
"rules:",
|
|
52
|
+
]
|
|
53
|
+
|
|
54
|
+
# Add min_rows suggestion (90% of current count)
|
|
55
|
+
min_threshold = int(profile.row_count * 0.9)
|
|
56
|
+
if min_threshold > 0:
|
|
57
|
+
lines.append(f" # Dataset: expecting at least {min_threshold:,} rows (90% of current)")
|
|
58
|
+
lines.append(" - name: min_rows")
|
|
59
|
+
lines.append(f" params: {{ threshold: {min_threshold} }}")
|
|
60
|
+
lines.append("")
|
|
61
|
+
|
|
62
|
+
for rule in rules:
|
|
63
|
+
name = rule["name"]
|
|
64
|
+
params = rule.get("params", {})
|
|
65
|
+
comment = rule.get("_comment", "")
|
|
66
|
+
|
|
67
|
+
if comment:
|
|
68
|
+
lines.append(f" # {comment}")
|
|
69
|
+
|
|
70
|
+
if params:
|
|
71
|
+
# Format params inline for simple cases, multi-line for complex
|
|
72
|
+
if len(params) <= 2 and all(
|
|
73
|
+
not isinstance(v, (list, dict)) or (isinstance(v, list) and len(v) <= 5)
|
|
74
|
+
for v in params.values()
|
|
75
|
+
):
|
|
76
|
+
params_str = ", ".join(_format_param(k, v) for k, v in params.items())
|
|
77
|
+
lines.append(f" - name: {name}")
|
|
78
|
+
lines.append(f" params: {{ {params_str} }}")
|
|
79
|
+
else:
|
|
80
|
+
lines.append(f" - name: {name}")
|
|
81
|
+
lines.append(" params:")
|
|
82
|
+
for k, v in params.items():
|
|
83
|
+
lines.append(f" {k}: {_format_value(v)}")
|
|
84
|
+
else:
|
|
85
|
+
lines.append(f" - name: {name}")
|
|
86
|
+
|
|
87
|
+
lines.append("")
|
|
88
|
+
|
|
89
|
+
return "\n".join(lines)
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def _suggest_column_rules(col: ColumnProfile) -> List[Dict[str, Any]]:
|
|
93
|
+
"""Generate rules for a single column based on its profile."""
|
|
94
|
+
rules: List[Dict[str, Any]] = []
|
|
95
|
+
|
|
96
|
+
# 1. not_null: Suggest if column has 0% nulls
|
|
97
|
+
if col.null_rate == 0 and col.row_count > 0:
|
|
98
|
+
rules.append({
|
|
99
|
+
"name": "not_null",
|
|
100
|
+
"params": {"column": col.name},
|
|
101
|
+
"_comment": f"{col.name}: 100% non-null",
|
|
102
|
+
})
|
|
103
|
+
|
|
104
|
+
# 2. unique: Suggest only for columns that look like identifiers
|
|
105
|
+
is_identifier = col.semantic_type == "identifier"
|
|
106
|
+
name_hints = any(hint in col.name.lower() for hint in ["_id", "id", "key", "uuid", "guid", "pk", "code"])
|
|
107
|
+
if col.uniqueness_ratio >= 0.999 and col.null_rate == 0 and (is_identifier or name_hints):
|
|
108
|
+
rules.append({
|
|
109
|
+
"name": "unique",
|
|
110
|
+
"params": {"column": col.name},
|
|
111
|
+
"_comment": f"{col.name}: 100% unique (primary key candidate)",
|
|
112
|
+
})
|
|
113
|
+
|
|
114
|
+
# 3. allowed_values: Suggest for low-cardinality columns with known values
|
|
115
|
+
if col.is_low_cardinality and col.values and len(col.values) <= 20:
|
|
116
|
+
rules.append({
|
|
117
|
+
"name": "allowed_values",
|
|
118
|
+
"params": {
|
|
119
|
+
"column": col.name,
|
|
120
|
+
"values": col.values,
|
|
121
|
+
},
|
|
122
|
+
"_comment": f"{col.name}: low cardinality ({len(col.values)} values)",
|
|
123
|
+
})
|
|
124
|
+
|
|
125
|
+
# 4. dtype: Suggest type validation
|
|
126
|
+
if col.dtype in ("int", "float", "bool", "date", "datetime", "string"):
|
|
127
|
+
# Map to Kontra/Polars type names
|
|
128
|
+
type_map = {
|
|
129
|
+
"int": "int64",
|
|
130
|
+
"float": "float64",
|
|
131
|
+
"bool": "bool",
|
|
132
|
+
"date": "date",
|
|
133
|
+
"datetime": "datetime",
|
|
134
|
+
"string": "utf8",
|
|
135
|
+
}
|
|
136
|
+
kontra_type = type_map.get(col.dtype, col.dtype)
|
|
137
|
+
rules.append({
|
|
138
|
+
"name": "dtype",
|
|
139
|
+
"params": {
|
|
140
|
+
"column": col.name,
|
|
141
|
+
"type": kontra_type,
|
|
142
|
+
},
|
|
143
|
+
"_comment": f"{col.name}: detected type {col.dtype_raw}",
|
|
144
|
+
})
|
|
145
|
+
|
|
146
|
+
# 5. regex: Suggest for detected patterns
|
|
147
|
+
for pattern in col.detected_patterns:
|
|
148
|
+
regex = get_pattern_regex(pattern)
|
|
149
|
+
if regex:
|
|
150
|
+
rules.append({
|
|
151
|
+
"name": "regex",
|
|
152
|
+
"params": {
|
|
153
|
+
"column": col.name,
|
|
154
|
+
"pattern": regex,
|
|
155
|
+
},
|
|
156
|
+
"_comment": f"{col.name}: detected {pattern} pattern",
|
|
157
|
+
})
|
|
158
|
+
|
|
159
|
+
# 6. freshness: Suggest for timestamp columns that look like update times
|
|
160
|
+
if col.dtype in ("datetime", "timestamp") or col.temporal is not None:
|
|
161
|
+
# Check if column name suggests it's an update/modified timestamp
|
|
162
|
+
name_lower = col.name.lower()
|
|
163
|
+
freshness_hints = ["updated", "modified", "timestamp", "created_at", "updated_at", "last_"]
|
|
164
|
+
if any(hint in name_lower for hint in freshness_hints):
|
|
165
|
+
rules.append({
|
|
166
|
+
"name": "freshness",
|
|
167
|
+
"params": {
|
|
168
|
+
"column": col.name,
|
|
169
|
+
"max_age": "7d",
|
|
170
|
+
},
|
|
171
|
+
"_comment": f"{col.name}: timestamp column, adjust max_age as needed",
|
|
172
|
+
})
|
|
173
|
+
|
|
174
|
+
return rules
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def _format_param(key: str, value: Any) -> str:
|
|
178
|
+
"""Format a single parameter for inline YAML."""
|
|
179
|
+
return f"{key}: {_format_value(value)}"
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
def _format_value(value: Any) -> str:
|
|
183
|
+
"""Format a value for YAML output."""
|
|
184
|
+
if isinstance(value, str):
|
|
185
|
+
# Escape quotes and use quotes if needed
|
|
186
|
+
if any(c in value for c in ['"', "'", ":", "{", "}", "[", "]", ",", "\n"]):
|
|
187
|
+
escaped = value.replace("\\", "\\\\").replace('"', '\\"')
|
|
188
|
+
return f'"{escaped}"'
|
|
189
|
+
return f'"{value}"'
|
|
190
|
+
elif isinstance(value, bool):
|
|
191
|
+
return "true" if value else "false"
|
|
192
|
+
elif isinstance(value, (int, float)):
|
|
193
|
+
return str(value)
|
|
194
|
+
elif isinstance(value, list):
|
|
195
|
+
items = ", ".join(_format_value(v) for v in value)
|
|
196
|
+
return f"[{items}]"
|
|
197
|
+
elif value is None:
|
|
198
|
+
return "null"
|
|
199
|
+
else:
|
|
200
|
+
return str(value)
|