kontra 0.5.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kontra/__init__.py +1871 -0
- kontra/api/__init__.py +22 -0
- kontra/api/compare.py +340 -0
- kontra/api/decorators.py +153 -0
- kontra/api/results.py +2121 -0
- kontra/api/rules.py +681 -0
- kontra/cli/__init__.py +0 -0
- kontra/cli/commands/__init__.py +1 -0
- kontra/cli/commands/config.py +153 -0
- kontra/cli/commands/diff.py +450 -0
- kontra/cli/commands/history.py +196 -0
- kontra/cli/commands/profile.py +289 -0
- kontra/cli/commands/validate.py +468 -0
- kontra/cli/constants.py +6 -0
- kontra/cli/main.py +48 -0
- kontra/cli/renderers.py +304 -0
- kontra/cli/utils.py +28 -0
- kontra/config/__init__.py +34 -0
- kontra/config/loader.py +127 -0
- kontra/config/models.py +49 -0
- kontra/config/settings.py +797 -0
- kontra/connectors/__init__.py +0 -0
- kontra/connectors/db_utils.py +251 -0
- kontra/connectors/detection.py +323 -0
- kontra/connectors/handle.py +368 -0
- kontra/connectors/postgres.py +127 -0
- kontra/connectors/sqlserver.py +226 -0
- kontra/engine/__init__.py +0 -0
- kontra/engine/backends/duckdb_session.py +227 -0
- kontra/engine/backends/duckdb_utils.py +18 -0
- kontra/engine/backends/polars_backend.py +47 -0
- kontra/engine/engine.py +1205 -0
- kontra/engine/executors/__init__.py +15 -0
- kontra/engine/executors/base.py +50 -0
- kontra/engine/executors/database_base.py +528 -0
- kontra/engine/executors/duckdb_sql.py +607 -0
- kontra/engine/executors/postgres_sql.py +162 -0
- kontra/engine/executors/registry.py +69 -0
- kontra/engine/executors/sqlserver_sql.py +163 -0
- kontra/engine/materializers/__init__.py +14 -0
- kontra/engine/materializers/base.py +42 -0
- kontra/engine/materializers/duckdb.py +110 -0
- kontra/engine/materializers/factory.py +22 -0
- kontra/engine/materializers/polars_connector.py +131 -0
- kontra/engine/materializers/postgres.py +157 -0
- kontra/engine/materializers/registry.py +138 -0
- kontra/engine/materializers/sqlserver.py +160 -0
- kontra/engine/result.py +15 -0
- kontra/engine/sql_utils.py +611 -0
- kontra/engine/sql_validator.py +609 -0
- kontra/engine/stats.py +194 -0
- kontra/engine/types.py +138 -0
- kontra/errors.py +533 -0
- kontra/logging.py +85 -0
- kontra/preplan/__init__.py +5 -0
- kontra/preplan/planner.py +253 -0
- kontra/preplan/postgres.py +179 -0
- kontra/preplan/sqlserver.py +191 -0
- kontra/preplan/types.py +24 -0
- kontra/probes/__init__.py +20 -0
- kontra/probes/compare.py +400 -0
- kontra/probes/relationship.py +283 -0
- kontra/reporters/__init__.py +0 -0
- kontra/reporters/json_reporter.py +190 -0
- kontra/reporters/rich_reporter.py +11 -0
- kontra/rules/__init__.py +35 -0
- kontra/rules/base.py +186 -0
- kontra/rules/builtin/__init__.py +40 -0
- kontra/rules/builtin/allowed_values.py +156 -0
- kontra/rules/builtin/compare.py +188 -0
- kontra/rules/builtin/conditional_not_null.py +213 -0
- kontra/rules/builtin/conditional_range.py +310 -0
- kontra/rules/builtin/contains.py +138 -0
- kontra/rules/builtin/custom_sql_check.py +182 -0
- kontra/rules/builtin/disallowed_values.py +140 -0
- kontra/rules/builtin/dtype.py +203 -0
- kontra/rules/builtin/ends_with.py +129 -0
- kontra/rules/builtin/freshness.py +240 -0
- kontra/rules/builtin/length.py +193 -0
- kontra/rules/builtin/max_rows.py +35 -0
- kontra/rules/builtin/min_rows.py +46 -0
- kontra/rules/builtin/not_null.py +121 -0
- kontra/rules/builtin/range.py +222 -0
- kontra/rules/builtin/regex.py +143 -0
- kontra/rules/builtin/starts_with.py +129 -0
- kontra/rules/builtin/unique.py +124 -0
- kontra/rules/condition_parser.py +203 -0
- kontra/rules/execution_plan.py +455 -0
- kontra/rules/factory.py +103 -0
- kontra/rules/predicates.py +25 -0
- kontra/rules/registry.py +24 -0
- kontra/rules/static_predicates.py +120 -0
- kontra/scout/__init__.py +9 -0
- kontra/scout/backends/__init__.py +17 -0
- kontra/scout/backends/base.py +111 -0
- kontra/scout/backends/duckdb_backend.py +359 -0
- kontra/scout/backends/postgres_backend.py +519 -0
- kontra/scout/backends/sqlserver_backend.py +577 -0
- kontra/scout/dtype_mapping.py +150 -0
- kontra/scout/patterns.py +69 -0
- kontra/scout/profiler.py +801 -0
- kontra/scout/reporters/__init__.py +39 -0
- kontra/scout/reporters/json_reporter.py +165 -0
- kontra/scout/reporters/markdown_reporter.py +152 -0
- kontra/scout/reporters/rich_reporter.py +144 -0
- kontra/scout/store.py +208 -0
- kontra/scout/suggest.py +200 -0
- kontra/scout/types.py +652 -0
- kontra/state/__init__.py +29 -0
- kontra/state/backends/__init__.py +79 -0
- kontra/state/backends/base.py +348 -0
- kontra/state/backends/local.py +480 -0
- kontra/state/backends/postgres.py +1010 -0
- kontra/state/backends/s3.py +543 -0
- kontra/state/backends/sqlserver.py +969 -0
- kontra/state/fingerprint.py +166 -0
- kontra/state/types.py +1061 -0
- kontra/version.py +1 -0
- kontra-0.5.2.dist-info/METADATA +122 -0
- kontra-0.5.2.dist-info/RECORD +124 -0
- kontra-0.5.2.dist-info/WHEEL +5 -0
- kontra-0.5.2.dist-info/entry_points.txt +2 -0
- kontra-0.5.2.dist-info/licenses/LICENSE +17 -0
- kontra-0.5.2.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,797 @@
|
|
|
1
|
+
# src/kontra/config/settings.py
|
|
2
|
+
"""
|
|
3
|
+
Kontra configuration file system.
|
|
4
|
+
|
|
5
|
+
Loads project-level config from .kontra/config.yml with:
|
|
6
|
+
- Environment variable substitution (${VAR} syntax)
|
|
7
|
+
- Named environments (--env production)
|
|
8
|
+
- Precedence: CLI > env vars > config file > defaults
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import os
|
|
14
|
+
import re
|
|
15
|
+
from dataclasses import dataclass, field
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
from typing import Any, Dict, List, Literal, Optional, Union
|
|
18
|
+
|
|
19
|
+
import yaml
|
|
20
|
+
from pydantic import BaseModel, Field, field_validator
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
# =============================================================================
|
|
24
|
+
# Environment Variable Substitution
|
|
25
|
+
# =============================================================================
|
|
26
|
+
|
|
27
|
+
ENV_VAR_PATTERN = re.compile(r"\$\{([^}]+)\}")
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def substitute_env_vars(value: str) -> str:
|
|
31
|
+
"""
|
|
32
|
+
Replace ${VAR} with environment variable value.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
value: String potentially containing ${VAR} patterns
|
|
36
|
+
|
|
37
|
+
Returns:
|
|
38
|
+
String with env vars substituted (missing vars become empty string)
|
|
39
|
+
"""
|
|
40
|
+
def replacer(match: re.Match) -> str:
|
|
41
|
+
var_name = match.group(1)
|
|
42
|
+
return os.environ.get(var_name, "")
|
|
43
|
+
|
|
44
|
+
return ENV_VAR_PATTERN.sub(replacer, value)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def substitute_env_vars_recursive(obj: Any) -> Any:
|
|
48
|
+
"""
|
|
49
|
+
Recursively substitute ${VAR} in strings throughout a nested structure.
|
|
50
|
+
|
|
51
|
+
Args:
|
|
52
|
+
obj: Any Python object (dict, list, str, etc.)
|
|
53
|
+
|
|
54
|
+
Returns:
|
|
55
|
+
Same structure with env vars substituted in strings
|
|
56
|
+
"""
|
|
57
|
+
if isinstance(obj, str):
|
|
58
|
+
return substitute_env_vars(obj)
|
|
59
|
+
elif isinstance(obj, dict):
|
|
60
|
+
return {k: substitute_env_vars_recursive(v) for k, v in obj.items()}
|
|
61
|
+
elif isinstance(obj, list):
|
|
62
|
+
return [substitute_env_vars_recursive(item) for item in obj]
|
|
63
|
+
return obj
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
# =============================================================================
|
|
67
|
+
# Pydantic Models
|
|
68
|
+
# =============================================================================
|
|
69
|
+
|
|
70
|
+
# =============================================================================
|
|
71
|
+
# Datasource Models
|
|
72
|
+
# =============================================================================
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
class PostgresDatasourceConfig(BaseModel):
|
|
76
|
+
"""PostgreSQL datasource configuration."""
|
|
77
|
+
|
|
78
|
+
type: Literal["postgres"] = "postgres"
|
|
79
|
+
host: str = "${PGHOST}"
|
|
80
|
+
port: int = 5432
|
|
81
|
+
user: str = "${PGUSER}"
|
|
82
|
+
password: str = "${PGPASSWORD}"
|
|
83
|
+
database: str = "${PGDATABASE}"
|
|
84
|
+
# Tables: map alias -> schema.table
|
|
85
|
+
tables: Dict[str, str] = Field(default_factory=dict)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
class FilesDatasourceConfig(BaseModel):
|
|
89
|
+
"""File-based datasource configuration (Parquet, CSV)."""
|
|
90
|
+
|
|
91
|
+
type: Literal["files", "file"] = "files"
|
|
92
|
+
base_path: str = "./"
|
|
93
|
+
path: str = "" # Alias for base_path
|
|
94
|
+
# Tables: map alias -> relative path
|
|
95
|
+
tables: Dict[str, str] = Field(default_factory=dict)
|
|
96
|
+
datasets: Dict[str, str] = Field(default_factory=dict) # Alias for tables
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
class S3DatasourceConfig(BaseModel):
|
|
100
|
+
"""S3 datasource configuration."""
|
|
101
|
+
|
|
102
|
+
type: Literal["s3"] = "s3"
|
|
103
|
+
bucket: str
|
|
104
|
+
prefix: str = ""
|
|
105
|
+
# Tables: map alias -> relative key
|
|
106
|
+
tables: Dict[str, str] = Field(default_factory=dict)
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
class MSSQLDatasourceConfig(BaseModel):
|
|
110
|
+
"""SQL Server datasource configuration."""
|
|
111
|
+
|
|
112
|
+
type: Literal["mssql"] = "mssql"
|
|
113
|
+
host: str = "localhost"
|
|
114
|
+
port: int = 1433
|
|
115
|
+
user: str = "sa"
|
|
116
|
+
password: str = ""
|
|
117
|
+
database: str = ""
|
|
118
|
+
# Tables: map alias -> schema.table
|
|
119
|
+
tables: Dict[str, str] = Field(default_factory=dict)
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
# Union type for datasource configs
|
|
123
|
+
DatasourceConfig = PostgresDatasourceConfig | FilesDatasourceConfig | S3DatasourceConfig | MSSQLDatasourceConfig
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
class DefaultsConfig(BaseModel):
|
|
127
|
+
"""Default values for CLI options."""
|
|
128
|
+
|
|
129
|
+
preplan: Literal["on", "off", "auto"] = "auto"
|
|
130
|
+
pushdown: Literal["on", "off", "auto"] = "auto"
|
|
131
|
+
projection: Literal["on", "off"] = "on"
|
|
132
|
+
output_format: Literal["rich", "json"] = "rich"
|
|
133
|
+
stats: Literal["none", "summary", "profile"] = "none"
|
|
134
|
+
state_backend: str = "local"
|
|
135
|
+
csv_mode: Literal["auto", "duckdb", "parquet"] = "auto"
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
class ScoutConfig(BaseModel):
|
|
139
|
+
"""Profile-specific settings (also known as Scout internally)."""
|
|
140
|
+
|
|
141
|
+
# Accept both new (scout/scan/interrogate) and old (lite/standard/deep) preset names
|
|
142
|
+
preset: Literal["scout", "scan", "interrogate", "lite", "standard", "deep", "llm"] = "scan"
|
|
143
|
+
save_profile: bool = False
|
|
144
|
+
list_values_threshold: Optional[int] = None
|
|
145
|
+
top_n: Optional[int] = None
|
|
146
|
+
include_patterns: bool = False
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
class EnvironmentConfig(BaseModel):
|
|
150
|
+
"""
|
|
151
|
+
Environment-specific overrides.
|
|
152
|
+
|
|
153
|
+
All fields are optional - only specified fields override defaults.
|
|
154
|
+
"""
|
|
155
|
+
|
|
156
|
+
preplan: Optional[Literal["on", "off", "auto"]] = None
|
|
157
|
+
pushdown: Optional[Literal["on", "off", "auto"]] = None
|
|
158
|
+
projection: Optional[Literal["on", "off"]] = None
|
|
159
|
+
output_format: Optional[Literal["rich", "json"]] = None
|
|
160
|
+
stats: Optional[Literal["none", "summary", "profile"]] = None
|
|
161
|
+
state_backend: Optional[str] = None
|
|
162
|
+
csv_mode: Optional[Literal["auto", "duckdb", "parquet"]] = None
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
class KontraConfig(BaseModel):
|
|
166
|
+
"""
|
|
167
|
+
Root configuration model for .kontra/config.yml
|
|
168
|
+
"""
|
|
169
|
+
|
|
170
|
+
version: str = "1"
|
|
171
|
+
defaults: DefaultsConfig = Field(default_factory=DefaultsConfig)
|
|
172
|
+
# Accept both "profile" and "scout" as the config key (profile is preferred)
|
|
173
|
+
scout: ScoutConfig = Field(default_factory=ScoutConfig, alias="profile")
|
|
174
|
+
datasources: Dict[str, Any] = Field(default_factory=dict) # Flexible for different types
|
|
175
|
+
environments: Dict[str, EnvironmentConfig] = Field(default_factory=dict)
|
|
176
|
+
|
|
177
|
+
model_config = {"populate_by_name": True} # Allow both 'scout' and 'profile'
|
|
178
|
+
|
|
179
|
+
# LLM juice: user-defined severity weights (Kontra carries but never acts on these)
|
|
180
|
+
severity_weights: Optional[Dict[str, float]] = Field(
|
|
181
|
+
default=None,
|
|
182
|
+
description="User-defined numeric weights for severity levels. Kontra carries these but never uses them internally."
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
@field_validator("version")
|
|
186
|
+
@classmethod
|
|
187
|
+
def validate_version(cls, v: str) -> str:
|
|
188
|
+
if v != "1":
|
|
189
|
+
raise ValueError(f"Unsupported config version: {v}. Expected '1'.")
|
|
190
|
+
return v
|
|
191
|
+
|
|
192
|
+
def get_datasource(self, name: str) -> Optional[DatasourceConfig]:
|
|
193
|
+
"""
|
|
194
|
+
Get a datasource config by name.
|
|
195
|
+
|
|
196
|
+
Returns None if not found.
|
|
197
|
+
"""
|
|
198
|
+
if name not in self.datasources:
|
|
199
|
+
return None
|
|
200
|
+
|
|
201
|
+
ds_data = self.datasources[name]
|
|
202
|
+
ds_type = ds_data.get("type", "files")
|
|
203
|
+
|
|
204
|
+
if ds_type == "postgres":
|
|
205
|
+
return PostgresDatasourceConfig.model_validate(ds_data)
|
|
206
|
+
elif ds_type == "mssql":
|
|
207
|
+
return MSSQLDatasourceConfig.model_validate(ds_data)
|
|
208
|
+
elif ds_type == "s3":
|
|
209
|
+
return S3DatasourceConfig.model_validate(ds_data)
|
|
210
|
+
elif ds_type in ("files", "file"):
|
|
211
|
+
return FilesDatasourceConfig.model_validate(ds_data)
|
|
212
|
+
else:
|
|
213
|
+
# Default to files for unknown types
|
|
214
|
+
return FilesDatasourceConfig.model_validate(ds_data)
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
# =============================================================================
|
|
218
|
+
# Effective Config (resolved values)
|
|
219
|
+
# =============================================================================
|
|
220
|
+
|
|
221
|
+
@dataclass
|
|
222
|
+
class EffectiveConfig:
|
|
223
|
+
"""
|
|
224
|
+
Fully resolved configuration after merging all sources.
|
|
225
|
+
|
|
226
|
+
This is what the CLI commands actually use.
|
|
227
|
+
"""
|
|
228
|
+
|
|
229
|
+
# Execution controls
|
|
230
|
+
preplan: str = "auto"
|
|
231
|
+
pushdown: str = "auto"
|
|
232
|
+
projection: str = "on"
|
|
233
|
+
|
|
234
|
+
# Output
|
|
235
|
+
output_format: str = "rich"
|
|
236
|
+
stats: str = "none"
|
|
237
|
+
|
|
238
|
+
# State
|
|
239
|
+
state_backend: str = "local"
|
|
240
|
+
|
|
241
|
+
# CSV
|
|
242
|
+
csv_mode: str = "auto"
|
|
243
|
+
|
|
244
|
+
# Scout
|
|
245
|
+
scout_preset: str = "standard"
|
|
246
|
+
scout_save_profile: bool = False
|
|
247
|
+
scout_list_values_threshold: Optional[int] = None
|
|
248
|
+
scout_top_n: Optional[int] = None
|
|
249
|
+
scout_include_patterns: bool = False
|
|
250
|
+
|
|
251
|
+
# Metadata
|
|
252
|
+
config_file_path: Optional[Path] = None
|
|
253
|
+
environment: Optional[str] = None
|
|
254
|
+
|
|
255
|
+
# LLM juice: user-defined severity weights (None if unconfigured)
|
|
256
|
+
severity_weights: Optional[Dict[str, float]] = None
|
|
257
|
+
|
|
258
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
259
|
+
"""Convert to dictionary for display."""
|
|
260
|
+
d = {
|
|
261
|
+
"preplan": self.preplan,
|
|
262
|
+
"pushdown": self.pushdown,
|
|
263
|
+
"projection": self.projection,
|
|
264
|
+
"output_format": self.output_format,
|
|
265
|
+
"stats": self.stats,
|
|
266
|
+
"state_backend": self.state_backend,
|
|
267
|
+
"csv_mode": self.csv_mode,
|
|
268
|
+
"scout": {
|
|
269
|
+
"preset": self.scout_preset,
|
|
270
|
+
"save_profile": self.scout_save_profile,
|
|
271
|
+
"list_values_threshold": self.scout_list_values_threshold,
|
|
272
|
+
"top_n": self.scout_top_n,
|
|
273
|
+
"include_patterns": self.scout_include_patterns,
|
|
274
|
+
},
|
|
275
|
+
}
|
|
276
|
+
if self.severity_weights is not None:
|
|
277
|
+
d["severity_weights"] = self.severity_weights
|
|
278
|
+
return d
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
# =============================================================================
|
|
282
|
+
# Config Loading
|
|
283
|
+
# =============================================================================
|
|
284
|
+
|
|
285
|
+
def find_config_file(start_path: Optional[Path] = None) -> Optional[Path]:
|
|
286
|
+
"""
|
|
287
|
+
Find .kontra/config.yml in current directory.
|
|
288
|
+
|
|
289
|
+
Args:
|
|
290
|
+
start_path: Directory to search (default: cwd)
|
|
291
|
+
|
|
292
|
+
Returns:
|
|
293
|
+
Path to config file if found, None otherwise
|
|
294
|
+
"""
|
|
295
|
+
base = start_path or Path.cwd()
|
|
296
|
+
config_path = base / ".kontra" / "config.yml"
|
|
297
|
+
|
|
298
|
+
if config_path.exists():
|
|
299
|
+
return config_path
|
|
300
|
+
|
|
301
|
+
return None
|
|
302
|
+
|
|
303
|
+
|
|
304
|
+
def load_config_file(path: Path) -> KontraConfig:
|
|
305
|
+
"""
|
|
306
|
+
Load and parse a config file.
|
|
307
|
+
|
|
308
|
+
Args:
|
|
309
|
+
path: Path to config.yml
|
|
310
|
+
|
|
311
|
+
Returns:
|
|
312
|
+
Parsed KontraConfig
|
|
313
|
+
|
|
314
|
+
Raises:
|
|
315
|
+
ConfigParseError: If YAML is invalid
|
|
316
|
+
ConfigValidationError: If structure is invalid
|
|
317
|
+
"""
|
|
318
|
+
from kontra.errors import ConfigParseError, ConfigValidationError
|
|
319
|
+
|
|
320
|
+
try:
|
|
321
|
+
content = path.read_text(encoding="utf-8")
|
|
322
|
+
except OSError as e:
|
|
323
|
+
raise ConfigParseError(str(path), f"Cannot read file: {e}")
|
|
324
|
+
|
|
325
|
+
# Parse YAML
|
|
326
|
+
try:
|
|
327
|
+
raw = yaml.safe_load(content)
|
|
328
|
+
except yaml.YAMLError as e:
|
|
329
|
+
raise ConfigParseError(str(path), f"Invalid YAML: {e}")
|
|
330
|
+
|
|
331
|
+
if raw is None:
|
|
332
|
+
raw = {}
|
|
333
|
+
|
|
334
|
+
# Substitute environment variables
|
|
335
|
+
raw = substitute_env_vars_recursive(raw)
|
|
336
|
+
|
|
337
|
+
# Validate with Pydantic
|
|
338
|
+
try:
|
|
339
|
+
return KontraConfig.model_validate(raw)
|
|
340
|
+
except Exception as e:
|
|
341
|
+
raise ConfigValidationError([str(e)], str(path))
|
|
342
|
+
|
|
343
|
+
|
|
344
|
+
# --- Config overlay helpers ---
|
|
345
|
+
|
|
346
|
+
# Core validation fields (same name in all config layers)
|
|
347
|
+
_CORE_OVERLAY_FIELDS = [
|
|
348
|
+
"preplan",
|
|
349
|
+
"pushdown",
|
|
350
|
+
"projection",
|
|
351
|
+
"output_format",
|
|
352
|
+
"stats",
|
|
353
|
+
"state_backend",
|
|
354
|
+
"csv_mode",
|
|
355
|
+
]
|
|
356
|
+
|
|
357
|
+
# CLI override to effective config field mappings (for scout fields)
|
|
358
|
+
_CLI_FIELD_MAPPINGS = {
|
|
359
|
+
"preset": "scout_preset",
|
|
360
|
+
"save_profile": "scout_save_profile",
|
|
361
|
+
"list_values_threshold": "scout_list_values_threshold",
|
|
362
|
+
"top_n": "scout_top_n",
|
|
363
|
+
"include_patterns": "scout_include_patterns",
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
|
|
367
|
+
def _apply_optional_overrides(
|
|
368
|
+
effective: "EffectiveConfig",
|
|
369
|
+
source: Any,
|
|
370
|
+
fields: List[str],
|
|
371
|
+
) -> None:
|
|
372
|
+
"""
|
|
373
|
+
Apply non-None values from source object to effective config.
|
|
374
|
+
|
|
375
|
+
Args:
|
|
376
|
+
effective: Target EffectiveConfig to update
|
|
377
|
+
source: Source object with same-named attributes
|
|
378
|
+
fields: List of field names to copy
|
|
379
|
+
"""
|
|
380
|
+
for field in fields:
|
|
381
|
+
value = getattr(source, field, None)
|
|
382
|
+
if value is not None:
|
|
383
|
+
setattr(effective, field, value)
|
|
384
|
+
|
|
385
|
+
|
|
386
|
+
def _apply_cli_overrides(
|
|
387
|
+
effective: "EffectiveConfig",
|
|
388
|
+
cli_overrides: Dict[str, Any],
|
|
389
|
+
core_fields: List[str],
|
|
390
|
+
field_mappings: Dict[str, str],
|
|
391
|
+
) -> None:
|
|
392
|
+
"""
|
|
393
|
+
Apply CLI override values to effective config.
|
|
394
|
+
|
|
395
|
+
Args:
|
|
396
|
+
effective: Target EffectiveConfig to update
|
|
397
|
+
cli_overrides: Dict of CLI argument values
|
|
398
|
+
core_fields: Fields with same name in CLI and effective config
|
|
399
|
+
field_mappings: CLI name -> effective config name mappings
|
|
400
|
+
"""
|
|
401
|
+
# Apply core fields (same name)
|
|
402
|
+
for field in core_fields:
|
|
403
|
+
if field in cli_overrides and cli_overrides[field] is not None:
|
|
404
|
+
setattr(effective, field, cli_overrides[field])
|
|
405
|
+
|
|
406
|
+
# Apply mapped fields (different names)
|
|
407
|
+
for cli_name, effective_name in field_mappings.items():
|
|
408
|
+
if cli_name in cli_overrides and cli_overrides[cli_name] is not None:
|
|
409
|
+
setattr(effective, effective_name, cli_overrides[cli_name])
|
|
410
|
+
|
|
411
|
+
|
|
412
|
+
# --- End config overlay helpers ---
|
|
413
|
+
|
|
414
|
+
|
|
415
|
+
def resolve_effective_config(
|
|
416
|
+
env_name: Optional[str] = None,
|
|
417
|
+
cli_overrides: Optional[Dict[str, Any]] = None,
|
|
418
|
+
config_path: Optional[Path] = None,
|
|
419
|
+
) -> EffectiveConfig:
|
|
420
|
+
"""
|
|
421
|
+
Resolve final configuration from all sources.
|
|
422
|
+
|
|
423
|
+
Precedence (highest to lowest):
|
|
424
|
+
1. CLI overrides (explicit flags)
|
|
425
|
+
2. Environment-specific config (if --env specified)
|
|
426
|
+
3. Config file defaults
|
|
427
|
+
4. Hardcoded defaults
|
|
428
|
+
|
|
429
|
+
Args:
|
|
430
|
+
env_name: Environment to activate (e.g., "production")
|
|
431
|
+
cli_overrides: Values explicitly set on CLI (not Typer defaults)
|
|
432
|
+
config_path: Explicit config file path (default: auto-discover)
|
|
433
|
+
|
|
434
|
+
Returns:
|
|
435
|
+
EffectiveConfig with resolved values
|
|
436
|
+
"""
|
|
437
|
+
from kontra.errors import UnknownEnvironmentError
|
|
438
|
+
|
|
439
|
+
cli_overrides = cli_overrides or {}
|
|
440
|
+
|
|
441
|
+
# Start with hardcoded defaults
|
|
442
|
+
effective = EffectiveConfig()
|
|
443
|
+
|
|
444
|
+
# Try to load config file
|
|
445
|
+
if config_path is None:
|
|
446
|
+
config_path = find_config_file()
|
|
447
|
+
|
|
448
|
+
file_config: Optional[KontraConfig] = None
|
|
449
|
+
if config_path and config_path.exists():
|
|
450
|
+
try:
|
|
451
|
+
file_config = load_config_file(config_path)
|
|
452
|
+
effective.config_file_path = config_path
|
|
453
|
+
except Exception as e:
|
|
454
|
+
# Fail-safe: continue with defaults if config is broken
|
|
455
|
+
# Always warn when config fails to load (BUG-011)
|
|
456
|
+
import warnings
|
|
457
|
+
warnings.warn(
|
|
458
|
+
f"Config file '{config_path}' failed to load: {e}. Using defaults.",
|
|
459
|
+
UserWarning,
|
|
460
|
+
stacklevel=2,
|
|
461
|
+
)
|
|
462
|
+
if os.getenv("KONTRA_VERBOSE"):
|
|
463
|
+
import traceback
|
|
464
|
+
traceback.print_exc()
|
|
465
|
+
|
|
466
|
+
# Layer 1: Apply config file defaults
|
|
467
|
+
if file_config:
|
|
468
|
+
effective.preplan = file_config.defaults.preplan
|
|
469
|
+
effective.pushdown = file_config.defaults.pushdown
|
|
470
|
+
effective.projection = file_config.defaults.projection
|
|
471
|
+
effective.output_format = file_config.defaults.output_format
|
|
472
|
+
effective.stats = file_config.defaults.stats
|
|
473
|
+
effective.state_backend = file_config.defaults.state_backend
|
|
474
|
+
effective.csv_mode = file_config.defaults.csv_mode
|
|
475
|
+
|
|
476
|
+
# Scout settings
|
|
477
|
+
effective.scout_preset = file_config.scout.preset
|
|
478
|
+
effective.scout_save_profile = file_config.scout.save_profile
|
|
479
|
+
effective.scout_list_values_threshold = file_config.scout.list_values_threshold
|
|
480
|
+
effective.scout_top_n = file_config.scout.top_n
|
|
481
|
+
effective.scout_include_patterns = file_config.scout.include_patterns
|
|
482
|
+
|
|
483
|
+
# LLM juice: severity weights (user-defined, Kontra carries but never acts)
|
|
484
|
+
effective.severity_weights = file_config.severity_weights
|
|
485
|
+
|
|
486
|
+
# Layer 2: Apply environment overlay
|
|
487
|
+
if env_name:
|
|
488
|
+
effective.environment = env_name
|
|
489
|
+
|
|
490
|
+
if file_config and env_name in file_config.environments:
|
|
491
|
+
env_config = file_config.environments[env_name]
|
|
492
|
+
_apply_optional_overrides(effective, env_config, _CORE_OVERLAY_FIELDS)
|
|
493
|
+
|
|
494
|
+
elif file_config:
|
|
495
|
+
# Environment specified but not found
|
|
496
|
+
available = list(file_config.environments.keys())
|
|
497
|
+
raise UnknownEnvironmentError(env_name, available)
|
|
498
|
+
else:
|
|
499
|
+
# No config file, warn about ignored --env (BUG-012)
|
|
500
|
+
import warnings
|
|
501
|
+
warnings.warn(
|
|
502
|
+
f"Environment '{env_name}' specified but no config file found. "
|
|
503
|
+
"Create .kontra/config.yml with environments section.",
|
|
504
|
+
UserWarning,
|
|
505
|
+
stacklevel=2,
|
|
506
|
+
)
|
|
507
|
+
|
|
508
|
+
# Layer 3: Apply CLI overrides (core fields + scout fields with mappings)
|
|
509
|
+
_apply_cli_overrides(effective, cli_overrides, _CORE_OVERLAY_FIELDS, _CLI_FIELD_MAPPINGS)
|
|
510
|
+
|
|
511
|
+
return effective
|
|
512
|
+
|
|
513
|
+
|
|
514
|
+
# =============================================================================
|
|
515
|
+
# Datasource Resolution
|
|
516
|
+
# =============================================================================
|
|
517
|
+
|
|
518
|
+
|
|
519
|
+
def resolve_datasource(
|
|
520
|
+
reference: str,
|
|
521
|
+
config: Optional[KontraConfig] = None,
|
|
522
|
+
) -> str:
|
|
523
|
+
"""
|
|
524
|
+
Resolve a datasource reference to a full URI.
|
|
525
|
+
|
|
526
|
+
Supports both:
|
|
527
|
+
- Named references: "prod_db.users" -> "postgres://user:pass@host/db/public.users"
|
|
528
|
+
- Direct URIs: "postgres://..." -> returned as-is
|
|
529
|
+
|
|
530
|
+
Args:
|
|
531
|
+
reference: Either "datasource_name.table_name" or a direct URI
|
|
532
|
+
config: KontraConfig with datasources (auto-loaded if None)
|
|
533
|
+
|
|
534
|
+
Returns:
|
|
535
|
+
Full URI string
|
|
536
|
+
|
|
537
|
+
Raises:
|
|
538
|
+
ValueError: If datasource or table not found
|
|
539
|
+
"""
|
|
540
|
+
# Check if it's already a URI (has scheme)
|
|
541
|
+
if "://" in reference or reference.startswith("/") or reference.endswith((".parquet", ".csv")):
|
|
542
|
+
return reference
|
|
543
|
+
|
|
544
|
+
# Check if it looks like a file path
|
|
545
|
+
if "/" in reference:
|
|
546
|
+
return reference
|
|
547
|
+
|
|
548
|
+
# Load config if not provided
|
|
549
|
+
if config is None:
|
|
550
|
+
config_path = find_config_file()
|
|
551
|
+
if config_path:
|
|
552
|
+
config = load_config_file(config_path)
|
|
553
|
+
else:
|
|
554
|
+
config = None
|
|
555
|
+
|
|
556
|
+
# Parse reference - could be "table", "datasource.table", or ambiguous
|
|
557
|
+
if "." in reference:
|
|
558
|
+
# Explicit datasource.table format
|
|
559
|
+
parts = reference.split(".", 1)
|
|
560
|
+
ds_name, table_name = parts
|
|
561
|
+
else:
|
|
562
|
+
# Just a table name - search all datasources
|
|
563
|
+
table_name = reference
|
|
564
|
+
ds_name = None
|
|
565
|
+
|
|
566
|
+
if config is None:
|
|
567
|
+
raise ValueError(
|
|
568
|
+
f"Table '{reference}' not found. "
|
|
569
|
+
"No config file exists. Run 'kontra init' to create one."
|
|
570
|
+
)
|
|
571
|
+
|
|
572
|
+
# Find which datasource(s) have this table
|
|
573
|
+
matches = []
|
|
574
|
+
for ds_key, ds_data in config.datasources.items():
|
|
575
|
+
tables = ds_data.get("tables", {})
|
|
576
|
+
if table_name in tables:
|
|
577
|
+
matches.append(ds_key)
|
|
578
|
+
|
|
579
|
+
if len(matches) == 0:
|
|
580
|
+
# List all available tables
|
|
581
|
+
all_tables = []
|
|
582
|
+
for ds_key, ds_data in config.datasources.items():
|
|
583
|
+
tables = ds_data.get("tables", {})
|
|
584
|
+
for t in tables.keys():
|
|
585
|
+
all_tables.append(f"{ds_key}.{t}")
|
|
586
|
+
tables_str = ", ".join(all_tables) if all_tables else "(none)"
|
|
587
|
+
raise ValueError(
|
|
588
|
+
f"Unknown table: '{reference}'. "
|
|
589
|
+
f"Available tables: {tables_str}"
|
|
590
|
+
)
|
|
591
|
+
elif len(matches) > 1:
|
|
592
|
+
matches_str = ", ".join(f"{m}.{table_name}" for m in matches)
|
|
593
|
+
raise ValueError(
|
|
594
|
+
f"Ambiguous table '{reference}' found in multiple datasources: {matches_str}. "
|
|
595
|
+
f"Use explicit 'datasource.table' format."
|
|
596
|
+
)
|
|
597
|
+
else:
|
|
598
|
+
ds_name = matches[0]
|
|
599
|
+
|
|
600
|
+
# At this point we have ds_name and table_name
|
|
601
|
+
if config is None:
|
|
602
|
+
raise ValueError(
|
|
603
|
+
f"Datasource '{ds_name}' not found. "
|
|
604
|
+
"No config file exists. Run 'kontra init' to create one."
|
|
605
|
+
)
|
|
606
|
+
|
|
607
|
+
# Get datasource
|
|
608
|
+
ds = config.get_datasource(ds_name)
|
|
609
|
+
if ds is None:
|
|
610
|
+
available = list(config.datasources.keys())
|
|
611
|
+
available_str = ", ".join(available) if available else "(none)"
|
|
612
|
+
raise ValueError(
|
|
613
|
+
f"Unknown datasource: '{ds_name}'. "
|
|
614
|
+
f"Available datasources: {available_str}"
|
|
615
|
+
)
|
|
616
|
+
|
|
617
|
+
# Resolve table reference
|
|
618
|
+
if table_name not in ds.tables:
|
|
619
|
+
available_tables = list(ds.tables.keys())
|
|
620
|
+
tables_str = ", ".join(available_tables) if available_tables else "(none)"
|
|
621
|
+
raise ValueError(
|
|
622
|
+
f"Unknown table '{table_name}' in datasource '{ds_name}'. "
|
|
623
|
+
f"Available tables: {tables_str}"
|
|
624
|
+
)
|
|
625
|
+
|
|
626
|
+
table_ref = ds.tables[table_name]
|
|
627
|
+
|
|
628
|
+
# Build full URI based on datasource type
|
|
629
|
+
if isinstance(ds, PostgresDatasourceConfig):
|
|
630
|
+
# postgres://user:pass@host:port/database/schema.table
|
|
631
|
+
user = ds.user
|
|
632
|
+
password = ds.password
|
|
633
|
+
host = ds.host
|
|
634
|
+
port = ds.port
|
|
635
|
+
database = ds.database
|
|
636
|
+
|
|
637
|
+
if user and password:
|
|
638
|
+
auth = f"{user}:{password}@"
|
|
639
|
+
elif user:
|
|
640
|
+
auth = f"{user}@"
|
|
641
|
+
else:
|
|
642
|
+
auth = ""
|
|
643
|
+
|
|
644
|
+
return f"postgres://{auth}{host}:{port}/{database}/{table_ref}"
|
|
645
|
+
|
|
646
|
+
elif isinstance(ds, S3DatasourceConfig):
|
|
647
|
+
# s3://bucket/prefix/key
|
|
648
|
+
prefix = ds.prefix.rstrip("/")
|
|
649
|
+
if prefix:
|
|
650
|
+
return f"s3://{ds.bucket}/{prefix}/{table_ref}"
|
|
651
|
+
else:
|
|
652
|
+
return f"s3://{ds.bucket}/{table_ref}"
|
|
653
|
+
|
|
654
|
+
elif isinstance(ds, FilesDatasourceConfig):
|
|
655
|
+
# Local file path
|
|
656
|
+
from pathlib import Path
|
|
657
|
+
base = Path(ds.base_path)
|
|
658
|
+
return str(base / table_ref)
|
|
659
|
+
|
|
660
|
+
else:
|
|
661
|
+
raise ValueError(f"Unknown datasource type for '{ds_name}'")
|
|
662
|
+
|
|
663
|
+
|
|
664
|
+
def list_datasources(config: Optional[KontraConfig] = None) -> Dict[str, List[str]]:
|
|
665
|
+
"""
|
|
666
|
+
List all datasources and their tables.
|
|
667
|
+
|
|
668
|
+
Returns:
|
|
669
|
+
Dict mapping datasource names to list of table names
|
|
670
|
+
"""
|
|
671
|
+
if config is None:
|
|
672
|
+
config_path = find_config_file()
|
|
673
|
+
if config_path:
|
|
674
|
+
config = load_config_file(config_path)
|
|
675
|
+
else:
|
|
676
|
+
return {}
|
|
677
|
+
|
|
678
|
+
result = {}
|
|
679
|
+
for ds_name in config.datasources:
|
|
680
|
+
ds = config.get_datasource(ds_name)
|
|
681
|
+
if ds:
|
|
682
|
+
result[ds_name] = list(ds.tables.keys())
|
|
683
|
+
|
|
684
|
+
return result
|
|
685
|
+
|
|
686
|
+
|
|
687
|
+
# =============================================================================
|
|
688
|
+
# Config Template
|
|
689
|
+
# =============================================================================
|
|
690
|
+
|
|
691
|
+
DEFAULT_CONFIG_TEMPLATE = '''# Kontra Configuration
|
|
692
|
+
# Generated by: kontra init
|
|
693
|
+
# Documentation: https://github.com/kontra-data/kontra
|
|
694
|
+
#
|
|
695
|
+
# CLI flags always take precedence over these settings.
|
|
696
|
+
# Environment variable substitution: ${VAR_NAME}
|
|
697
|
+
|
|
698
|
+
version: "1"
|
|
699
|
+
|
|
700
|
+
# ─────────────────────────────────────────────────────────────
|
|
701
|
+
# Default Settings
|
|
702
|
+
# ─────────────────────────────────────────────────────────────
|
|
703
|
+
|
|
704
|
+
defaults:
|
|
705
|
+
# Execution controls
|
|
706
|
+
preplan: "auto" # on | off | auto - Parquet metadata preflight
|
|
707
|
+
pushdown: "auto" # on | off | auto - SQL pushdown to DuckDB
|
|
708
|
+
projection: "on" # on | off - Column pruning at source
|
|
709
|
+
|
|
710
|
+
# Output
|
|
711
|
+
output_format: "rich" # rich | json - Output format
|
|
712
|
+
stats: "none" # none | summary | profile - Statistics detail
|
|
713
|
+
|
|
714
|
+
# State management
|
|
715
|
+
state_backend: "local" # local | s3://bucket/prefix | postgres://...
|
|
716
|
+
|
|
717
|
+
# CSV handling
|
|
718
|
+
csv_mode: "auto" # auto | duckdb | parquet
|
|
719
|
+
|
|
720
|
+
# ─────────────────────────────────────────────────────────────
|
|
721
|
+
# Profile Settings
|
|
722
|
+
# ─────────────────────────────────────────────────────────────
|
|
723
|
+
|
|
724
|
+
profile:
|
|
725
|
+
preset: "scan" # scout | scan | interrogate
|
|
726
|
+
save_profile: false # Save profile to state storage
|
|
727
|
+
# list_values_threshold: 10 # List all values if distinct <= N
|
|
728
|
+
# top_n: 5 # Show top N frequent values
|
|
729
|
+
# include_patterns: false # Detect patterns (email, uuid, etc.)
|
|
730
|
+
|
|
731
|
+
# ─────────────────────────────────────────────────────────────
|
|
732
|
+
# Datasources
|
|
733
|
+
# ─────────────────────────────────────────────────────────────
|
|
734
|
+
# Named data sources referenced as: datasource_name.table_name
|
|
735
|
+
# Credentials stay in config, contracts stay clean and portable.
|
|
736
|
+
#
|
|
737
|
+
# Usage:
|
|
738
|
+
# kontra validate contract.yml --data prod_db.users
|
|
739
|
+
# kontra profile prod_db.orders
|
|
740
|
+
#
|
|
741
|
+
# Or in contract YAML:
|
|
742
|
+
# dataset: prod_db.users
|
|
743
|
+
|
|
744
|
+
datasources: {}
|
|
745
|
+
# PostgreSQL example:
|
|
746
|
+
# prod_db:
|
|
747
|
+
# type: postgres
|
|
748
|
+
# host: ${PGHOST}
|
|
749
|
+
# port: 5432
|
|
750
|
+
# user: ${PGUSER}
|
|
751
|
+
# password: ${PGPASSWORD}
|
|
752
|
+
# database: ${PGDATABASE}
|
|
753
|
+
# tables:
|
|
754
|
+
# users: public.users
|
|
755
|
+
# orders: public.orders
|
|
756
|
+
|
|
757
|
+
# Local files example:
|
|
758
|
+
# local_data:
|
|
759
|
+
# type: files
|
|
760
|
+
# base_path: ./data
|
|
761
|
+
# tables:
|
|
762
|
+
# users: users.parquet
|
|
763
|
+
# orders: orders.csv
|
|
764
|
+
|
|
765
|
+
# S3 example:
|
|
766
|
+
# data_lake:
|
|
767
|
+
# type: s3
|
|
768
|
+
# bucket: ${S3_BUCKET}
|
|
769
|
+
# prefix: warehouse/
|
|
770
|
+
# tables:
|
|
771
|
+
# events: events.parquet
|
|
772
|
+
# metrics: metrics.parquet
|
|
773
|
+
|
|
774
|
+
# ─────────────────────────────────────────────────────────────
|
|
775
|
+
# Environments
|
|
776
|
+
# ─────────────────────────────────────────────────────────────
|
|
777
|
+
# Named configurations activated with --env <name>
|
|
778
|
+
# Only specified fields override defaults.
|
|
779
|
+
|
|
780
|
+
environments: {}
|
|
781
|
+
# Example: Production environment
|
|
782
|
+
# production:
|
|
783
|
+
# state_backend: postgres://${PGHOST}/${PGDATABASE}
|
|
784
|
+
# preplan: "on"
|
|
785
|
+
# pushdown: "on"
|
|
786
|
+
# output_format: "json"
|
|
787
|
+
|
|
788
|
+
# Example: Staging environment
|
|
789
|
+
# staging:
|
|
790
|
+
# state_backend: s3://${S3_BUCKET}/kontra-state/
|
|
791
|
+
# stats: "summary"
|
|
792
|
+
|
|
793
|
+
# Example: Local development
|
|
794
|
+
# local:
|
|
795
|
+
# state_backend: "local"
|
|
796
|
+
# stats: "profile"
|
|
797
|
+
'''
|