kontra 0.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. kontra/__init__.py +1871 -0
  2. kontra/api/__init__.py +22 -0
  3. kontra/api/compare.py +340 -0
  4. kontra/api/decorators.py +153 -0
  5. kontra/api/results.py +2121 -0
  6. kontra/api/rules.py +681 -0
  7. kontra/cli/__init__.py +0 -0
  8. kontra/cli/commands/__init__.py +1 -0
  9. kontra/cli/commands/config.py +153 -0
  10. kontra/cli/commands/diff.py +450 -0
  11. kontra/cli/commands/history.py +196 -0
  12. kontra/cli/commands/profile.py +289 -0
  13. kontra/cli/commands/validate.py +468 -0
  14. kontra/cli/constants.py +6 -0
  15. kontra/cli/main.py +48 -0
  16. kontra/cli/renderers.py +304 -0
  17. kontra/cli/utils.py +28 -0
  18. kontra/config/__init__.py +34 -0
  19. kontra/config/loader.py +127 -0
  20. kontra/config/models.py +49 -0
  21. kontra/config/settings.py +797 -0
  22. kontra/connectors/__init__.py +0 -0
  23. kontra/connectors/db_utils.py +251 -0
  24. kontra/connectors/detection.py +323 -0
  25. kontra/connectors/handle.py +368 -0
  26. kontra/connectors/postgres.py +127 -0
  27. kontra/connectors/sqlserver.py +226 -0
  28. kontra/engine/__init__.py +0 -0
  29. kontra/engine/backends/duckdb_session.py +227 -0
  30. kontra/engine/backends/duckdb_utils.py +18 -0
  31. kontra/engine/backends/polars_backend.py +47 -0
  32. kontra/engine/engine.py +1205 -0
  33. kontra/engine/executors/__init__.py +15 -0
  34. kontra/engine/executors/base.py +50 -0
  35. kontra/engine/executors/database_base.py +528 -0
  36. kontra/engine/executors/duckdb_sql.py +607 -0
  37. kontra/engine/executors/postgres_sql.py +162 -0
  38. kontra/engine/executors/registry.py +69 -0
  39. kontra/engine/executors/sqlserver_sql.py +163 -0
  40. kontra/engine/materializers/__init__.py +14 -0
  41. kontra/engine/materializers/base.py +42 -0
  42. kontra/engine/materializers/duckdb.py +110 -0
  43. kontra/engine/materializers/factory.py +22 -0
  44. kontra/engine/materializers/polars_connector.py +131 -0
  45. kontra/engine/materializers/postgres.py +157 -0
  46. kontra/engine/materializers/registry.py +138 -0
  47. kontra/engine/materializers/sqlserver.py +160 -0
  48. kontra/engine/result.py +15 -0
  49. kontra/engine/sql_utils.py +611 -0
  50. kontra/engine/sql_validator.py +609 -0
  51. kontra/engine/stats.py +194 -0
  52. kontra/engine/types.py +138 -0
  53. kontra/errors.py +533 -0
  54. kontra/logging.py +85 -0
  55. kontra/preplan/__init__.py +5 -0
  56. kontra/preplan/planner.py +253 -0
  57. kontra/preplan/postgres.py +179 -0
  58. kontra/preplan/sqlserver.py +191 -0
  59. kontra/preplan/types.py +24 -0
  60. kontra/probes/__init__.py +20 -0
  61. kontra/probes/compare.py +400 -0
  62. kontra/probes/relationship.py +283 -0
  63. kontra/reporters/__init__.py +0 -0
  64. kontra/reporters/json_reporter.py +190 -0
  65. kontra/reporters/rich_reporter.py +11 -0
  66. kontra/rules/__init__.py +35 -0
  67. kontra/rules/base.py +186 -0
  68. kontra/rules/builtin/__init__.py +40 -0
  69. kontra/rules/builtin/allowed_values.py +156 -0
  70. kontra/rules/builtin/compare.py +188 -0
  71. kontra/rules/builtin/conditional_not_null.py +213 -0
  72. kontra/rules/builtin/conditional_range.py +310 -0
  73. kontra/rules/builtin/contains.py +138 -0
  74. kontra/rules/builtin/custom_sql_check.py +182 -0
  75. kontra/rules/builtin/disallowed_values.py +140 -0
  76. kontra/rules/builtin/dtype.py +203 -0
  77. kontra/rules/builtin/ends_with.py +129 -0
  78. kontra/rules/builtin/freshness.py +240 -0
  79. kontra/rules/builtin/length.py +193 -0
  80. kontra/rules/builtin/max_rows.py +35 -0
  81. kontra/rules/builtin/min_rows.py +46 -0
  82. kontra/rules/builtin/not_null.py +121 -0
  83. kontra/rules/builtin/range.py +222 -0
  84. kontra/rules/builtin/regex.py +143 -0
  85. kontra/rules/builtin/starts_with.py +129 -0
  86. kontra/rules/builtin/unique.py +124 -0
  87. kontra/rules/condition_parser.py +203 -0
  88. kontra/rules/execution_plan.py +455 -0
  89. kontra/rules/factory.py +103 -0
  90. kontra/rules/predicates.py +25 -0
  91. kontra/rules/registry.py +24 -0
  92. kontra/rules/static_predicates.py +120 -0
  93. kontra/scout/__init__.py +9 -0
  94. kontra/scout/backends/__init__.py +17 -0
  95. kontra/scout/backends/base.py +111 -0
  96. kontra/scout/backends/duckdb_backend.py +359 -0
  97. kontra/scout/backends/postgres_backend.py +519 -0
  98. kontra/scout/backends/sqlserver_backend.py +577 -0
  99. kontra/scout/dtype_mapping.py +150 -0
  100. kontra/scout/patterns.py +69 -0
  101. kontra/scout/profiler.py +801 -0
  102. kontra/scout/reporters/__init__.py +39 -0
  103. kontra/scout/reporters/json_reporter.py +165 -0
  104. kontra/scout/reporters/markdown_reporter.py +152 -0
  105. kontra/scout/reporters/rich_reporter.py +144 -0
  106. kontra/scout/store.py +208 -0
  107. kontra/scout/suggest.py +200 -0
  108. kontra/scout/types.py +652 -0
  109. kontra/state/__init__.py +29 -0
  110. kontra/state/backends/__init__.py +79 -0
  111. kontra/state/backends/base.py +348 -0
  112. kontra/state/backends/local.py +480 -0
  113. kontra/state/backends/postgres.py +1010 -0
  114. kontra/state/backends/s3.py +543 -0
  115. kontra/state/backends/sqlserver.py +969 -0
  116. kontra/state/fingerprint.py +166 -0
  117. kontra/state/types.py +1061 -0
  118. kontra/version.py +1 -0
  119. kontra-0.5.2.dist-info/METADATA +122 -0
  120. kontra-0.5.2.dist-info/RECORD +124 -0
  121. kontra-0.5.2.dist-info/WHEEL +5 -0
  122. kontra-0.5.2.dist-info/entry_points.txt +2 -0
  123. kontra-0.5.2.dist-info/licenses/LICENSE +17 -0
  124. kontra-0.5.2.dist-info/top_level.txt +1 -0
@@ -0,0 +1,797 @@
1
+ # src/kontra/config/settings.py
2
+ """
3
+ Kontra configuration file system.
4
+
5
+ Loads project-level config from .kontra/config.yml with:
6
+ - Environment variable substitution (${VAR} syntax)
7
+ - Named environments (--env production)
8
+ - Precedence: CLI > env vars > config file > defaults
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import os
14
+ import re
15
+ from dataclasses import dataclass, field
16
+ from pathlib import Path
17
+ from typing import Any, Dict, List, Literal, Optional, Union
18
+
19
+ import yaml
20
+ from pydantic import BaseModel, Field, field_validator
21
+
22
+
23
+ # =============================================================================
24
+ # Environment Variable Substitution
25
+ # =============================================================================
26
+
27
+ ENV_VAR_PATTERN = re.compile(r"\$\{([^}]+)\}")
28
+
29
+
30
+ def substitute_env_vars(value: str) -> str:
31
+ """
32
+ Replace ${VAR} with environment variable value.
33
+
34
+ Args:
35
+ value: String potentially containing ${VAR} patterns
36
+
37
+ Returns:
38
+ String with env vars substituted (missing vars become empty string)
39
+ """
40
+ def replacer(match: re.Match) -> str:
41
+ var_name = match.group(1)
42
+ return os.environ.get(var_name, "")
43
+
44
+ return ENV_VAR_PATTERN.sub(replacer, value)
45
+
46
+
47
+ def substitute_env_vars_recursive(obj: Any) -> Any:
48
+ """
49
+ Recursively substitute ${VAR} in strings throughout a nested structure.
50
+
51
+ Args:
52
+ obj: Any Python object (dict, list, str, etc.)
53
+
54
+ Returns:
55
+ Same structure with env vars substituted in strings
56
+ """
57
+ if isinstance(obj, str):
58
+ return substitute_env_vars(obj)
59
+ elif isinstance(obj, dict):
60
+ return {k: substitute_env_vars_recursive(v) for k, v in obj.items()}
61
+ elif isinstance(obj, list):
62
+ return [substitute_env_vars_recursive(item) for item in obj]
63
+ return obj
64
+
65
+
66
+ # =============================================================================
67
+ # Pydantic Models
68
+ # =============================================================================
69
+
70
+ # =============================================================================
71
+ # Datasource Models
72
+ # =============================================================================
73
+
74
+
75
+ class PostgresDatasourceConfig(BaseModel):
76
+ """PostgreSQL datasource configuration."""
77
+
78
+ type: Literal["postgres"] = "postgres"
79
+ host: str = "${PGHOST}"
80
+ port: int = 5432
81
+ user: str = "${PGUSER}"
82
+ password: str = "${PGPASSWORD}"
83
+ database: str = "${PGDATABASE}"
84
+ # Tables: map alias -> schema.table
85
+ tables: Dict[str, str] = Field(default_factory=dict)
86
+
87
+
88
+ class FilesDatasourceConfig(BaseModel):
89
+ """File-based datasource configuration (Parquet, CSV)."""
90
+
91
+ type: Literal["files", "file"] = "files"
92
+ base_path: str = "./"
93
+ path: str = "" # Alias for base_path
94
+ # Tables: map alias -> relative path
95
+ tables: Dict[str, str] = Field(default_factory=dict)
96
+ datasets: Dict[str, str] = Field(default_factory=dict) # Alias for tables
97
+
98
+
99
+ class S3DatasourceConfig(BaseModel):
100
+ """S3 datasource configuration."""
101
+
102
+ type: Literal["s3"] = "s3"
103
+ bucket: str
104
+ prefix: str = ""
105
+ # Tables: map alias -> relative key
106
+ tables: Dict[str, str] = Field(default_factory=dict)
107
+
108
+
109
+ class MSSQLDatasourceConfig(BaseModel):
110
+ """SQL Server datasource configuration."""
111
+
112
+ type: Literal["mssql"] = "mssql"
113
+ host: str = "localhost"
114
+ port: int = 1433
115
+ user: str = "sa"
116
+ password: str = ""
117
+ database: str = ""
118
+ # Tables: map alias -> schema.table
119
+ tables: Dict[str, str] = Field(default_factory=dict)
120
+
121
+
122
+ # Union type for datasource configs
123
+ DatasourceConfig = PostgresDatasourceConfig | FilesDatasourceConfig | S3DatasourceConfig | MSSQLDatasourceConfig
124
+
125
+
126
+ class DefaultsConfig(BaseModel):
127
+ """Default values for CLI options."""
128
+
129
+ preplan: Literal["on", "off", "auto"] = "auto"
130
+ pushdown: Literal["on", "off", "auto"] = "auto"
131
+ projection: Literal["on", "off"] = "on"
132
+ output_format: Literal["rich", "json"] = "rich"
133
+ stats: Literal["none", "summary", "profile"] = "none"
134
+ state_backend: str = "local"
135
+ csv_mode: Literal["auto", "duckdb", "parquet"] = "auto"
136
+
137
+
138
+ class ScoutConfig(BaseModel):
139
+ """Profile-specific settings (also known as Scout internally)."""
140
+
141
+ # Accept both new (scout/scan/interrogate) and old (lite/standard/deep) preset names
142
+ preset: Literal["scout", "scan", "interrogate", "lite", "standard", "deep", "llm"] = "scan"
143
+ save_profile: bool = False
144
+ list_values_threshold: Optional[int] = None
145
+ top_n: Optional[int] = None
146
+ include_patterns: bool = False
147
+
148
+
149
+ class EnvironmentConfig(BaseModel):
150
+ """
151
+ Environment-specific overrides.
152
+
153
+ All fields are optional - only specified fields override defaults.
154
+ """
155
+
156
+ preplan: Optional[Literal["on", "off", "auto"]] = None
157
+ pushdown: Optional[Literal["on", "off", "auto"]] = None
158
+ projection: Optional[Literal["on", "off"]] = None
159
+ output_format: Optional[Literal["rich", "json"]] = None
160
+ stats: Optional[Literal["none", "summary", "profile"]] = None
161
+ state_backend: Optional[str] = None
162
+ csv_mode: Optional[Literal["auto", "duckdb", "parquet"]] = None
163
+
164
+
165
+ class KontraConfig(BaseModel):
166
+ """
167
+ Root configuration model for .kontra/config.yml
168
+ """
169
+
170
+ version: str = "1"
171
+ defaults: DefaultsConfig = Field(default_factory=DefaultsConfig)
172
+ # Accept both "profile" and "scout" as the config key (profile is preferred)
173
+ scout: ScoutConfig = Field(default_factory=ScoutConfig, alias="profile")
174
+ datasources: Dict[str, Any] = Field(default_factory=dict) # Flexible for different types
175
+ environments: Dict[str, EnvironmentConfig] = Field(default_factory=dict)
176
+
177
+ model_config = {"populate_by_name": True} # Allow both 'scout' and 'profile'
178
+
179
+ # LLM juice: user-defined severity weights (Kontra carries but never acts on these)
180
+ severity_weights: Optional[Dict[str, float]] = Field(
181
+ default=None,
182
+ description="User-defined numeric weights for severity levels. Kontra carries these but never uses them internally."
183
+ )
184
+
185
+ @field_validator("version")
186
+ @classmethod
187
+ def validate_version(cls, v: str) -> str:
188
+ if v != "1":
189
+ raise ValueError(f"Unsupported config version: {v}. Expected '1'.")
190
+ return v
191
+
192
+ def get_datasource(self, name: str) -> Optional[DatasourceConfig]:
193
+ """
194
+ Get a datasource config by name.
195
+
196
+ Returns None if not found.
197
+ """
198
+ if name not in self.datasources:
199
+ return None
200
+
201
+ ds_data = self.datasources[name]
202
+ ds_type = ds_data.get("type", "files")
203
+
204
+ if ds_type == "postgres":
205
+ return PostgresDatasourceConfig.model_validate(ds_data)
206
+ elif ds_type == "mssql":
207
+ return MSSQLDatasourceConfig.model_validate(ds_data)
208
+ elif ds_type == "s3":
209
+ return S3DatasourceConfig.model_validate(ds_data)
210
+ elif ds_type in ("files", "file"):
211
+ return FilesDatasourceConfig.model_validate(ds_data)
212
+ else:
213
+ # Default to files for unknown types
214
+ return FilesDatasourceConfig.model_validate(ds_data)
215
+
216
+
217
+ # =============================================================================
218
+ # Effective Config (resolved values)
219
+ # =============================================================================
220
+
221
+ @dataclass
222
+ class EffectiveConfig:
223
+ """
224
+ Fully resolved configuration after merging all sources.
225
+
226
+ This is what the CLI commands actually use.
227
+ """
228
+
229
+ # Execution controls
230
+ preplan: str = "auto"
231
+ pushdown: str = "auto"
232
+ projection: str = "on"
233
+
234
+ # Output
235
+ output_format: str = "rich"
236
+ stats: str = "none"
237
+
238
+ # State
239
+ state_backend: str = "local"
240
+
241
+ # CSV
242
+ csv_mode: str = "auto"
243
+
244
+ # Scout
245
+ scout_preset: str = "standard"
246
+ scout_save_profile: bool = False
247
+ scout_list_values_threshold: Optional[int] = None
248
+ scout_top_n: Optional[int] = None
249
+ scout_include_patterns: bool = False
250
+
251
+ # Metadata
252
+ config_file_path: Optional[Path] = None
253
+ environment: Optional[str] = None
254
+
255
+ # LLM juice: user-defined severity weights (None if unconfigured)
256
+ severity_weights: Optional[Dict[str, float]] = None
257
+
258
+ def to_dict(self) -> Dict[str, Any]:
259
+ """Convert to dictionary for display."""
260
+ d = {
261
+ "preplan": self.preplan,
262
+ "pushdown": self.pushdown,
263
+ "projection": self.projection,
264
+ "output_format": self.output_format,
265
+ "stats": self.stats,
266
+ "state_backend": self.state_backend,
267
+ "csv_mode": self.csv_mode,
268
+ "scout": {
269
+ "preset": self.scout_preset,
270
+ "save_profile": self.scout_save_profile,
271
+ "list_values_threshold": self.scout_list_values_threshold,
272
+ "top_n": self.scout_top_n,
273
+ "include_patterns": self.scout_include_patterns,
274
+ },
275
+ }
276
+ if self.severity_weights is not None:
277
+ d["severity_weights"] = self.severity_weights
278
+ return d
279
+
280
+
281
+ # =============================================================================
282
+ # Config Loading
283
+ # =============================================================================
284
+
285
+ def find_config_file(start_path: Optional[Path] = None) -> Optional[Path]:
286
+ """
287
+ Find .kontra/config.yml in current directory.
288
+
289
+ Args:
290
+ start_path: Directory to search (default: cwd)
291
+
292
+ Returns:
293
+ Path to config file if found, None otherwise
294
+ """
295
+ base = start_path or Path.cwd()
296
+ config_path = base / ".kontra" / "config.yml"
297
+
298
+ if config_path.exists():
299
+ return config_path
300
+
301
+ return None
302
+
303
+
304
+ def load_config_file(path: Path) -> KontraConfig:
305
+ """
306
+ Load and parse a config file.
307
+
308
+ Args:
309
+ path: Path to config.yml
310
+
311
+ Returns:
312
+ Parsed KontraConfig
313
+
314
+ Raises:
315
+ ConfigParseError: If YAML is invalid
316
+ ConfigValidationError: If structure is invalid
317
+ """
318
+ from kontra.errors import ConfigParseError, ConfigValidationError
319
+
320
+ try:
321
+ content = path.read_text(encoding="utf-8")
322
+ except OSError as e:
323
+ raise ConfigParseError(str(path), f"Cannot read file: {e}")
324
+
325
+ # Parse YAML
326
+ try:
327
+ raw = yaml.safe_load(content)
328
+ except yaml.YAMLError as e:
329
+ raise ConfigParseError(str(path), f"Invalid YAML: {e}")
330
+
331
+ if raw is None:
332
+ raw = {}
333
+
334
+ # Substitute environment variables
335
+ raw = substitute_env_vars_recursive(raw)
336
+
337
+ # Validate with Pydantic
338
+ try:
339
+ return KontraConfig.model_validate(raw)
340
+ except Exception as e:
341
+ raise ConfigValidationError([str(e)], str(path))
342
+
343
+
344
+ # --- Config overlay helpers ---
345
+
346
+ # Core validation fields (same name in all config layers)
347
+ _CORE_OVERLAY_FIELDS = [
348
+ "preplan",
349
+ "pushdown",
350
+ "projection",
351
+ "output_format",
352
+ "stats",
353
+ "state_backend",
354
+ "csv_mode",
355
+ ]
356
+
357
+ # CLI override to effective config field mappings (for scout fields)
358
+ _CLI_FIELD_MAPPINGS = {
359
+ "preset": "scout_preset",
360
+ "save_profile": "scout_save_profile",
361
+ "list_values_threshold": "scout_list_values_threshold",
362
+ "top_n": "scout_top_n",
363
+ "include_patterns": "scout_include_patterns",
364
+ }
365
+
366
+
367
+ def _apply_optional_overrides(
368
+ effective: "EffectiveConfig",
369
+ source: Any,
370
+ fields: List[str],
371
+ ) -> None:
372
+ """
373
+ Apply non-None values from source object to effective config.
374
+
375
+ Args:
376
+ effective: Target EffectiveConfig to update
377
+ source: Source object with same-named attributes
378
+ fields: List of field names to copy
379
+ """
380
+ for field in fields:
381
+ value = getattr(source, field, None)
382
+ if value is not None:
383
+ setattr(effective, field, value)
384
+
385
+
386
+ def _apply_cli_overrides(
387
+ effective: "EffectiveConfig",
388
+ cli_overrides: Dict[str, Any],
389
+ core_fields: List[str],
390
+ field_mappings: Dict[str, str],
391
+ ) -> None:
392
+ """
393
+ Apply CLI override values to effective config.
394
+
395
+ Args:
396
+ effective: Target EffectiveConfig to update
397
+ cli_overrides: Dict of CLI argument values
398
+ core_fields: Fields with same name in CLI and effective config
399
+ field_mappings: CLI name -> effective config name mappings
400
+ """
401
+ # Apply core fields (same name)
402
+ for field in core_fields:
403
+ if field in cli_overrides and cli_overrides[field] is not None:
404
+ setattr(effective, field, cli_overrides[field])
405
+
406
+ # Apply mapped fields (different names)
407
+ for cli_name, effective_name in field_mappings.items():
408
+ if cli_name in cli_overrides and cli_overrides[cli_name] is not None:
409
+ setattr(effective, effective_name, cli_overrides[cli_name])
410
+
411
+
412
+ # --- End config overlay helpers ---
413
+
414
+
415
+ def resolve_effective_config(
416
+ env_name: Optional[str] = None,
417
+ cli_overrides: Optional[Dict[str, Any]] = None,
418
+ config_path: Optional[Path] = None,
419
+ ) -> EffectiveConfig:
420
+ """
421
+ Resolve final configuration from all sources.
422
+
423
+ Precedence (highest to lowest):
424
+ 1. CLI overrides (explicit flags)
425
+ 2. Environment-specific config (if --env specified)
426
+ 3. Config file defaults
427
+ 4. Hardcoded defaults
428
+
429
+ Args:
430
+ env_name: Environment to activate (e.g., "production")
431
+ cli_overrides: Values explicitly set on CLI (not Typer defaults)
432
+ config_path: Explicit config file path (default: auto-discover)
433
+
434
+ Returns:
435
+ EffectiveConfig with resolved values
436
+ """
437
+ from kontra.errors import UnknownEnvironmentError
438
+
439
+ cli_overrides = cli_overrides or {}
440
+
441
+ # Start with hardcoded defaults
442
+ effective = EffectiveConfig()
443
+
444
+ # Try to load config file
445
+ if config_path is None:
446
+ config_path = find_config_file()
447
+
448
+ file_config: Optional[KontraConfig] = None
449
+ if config_path and config_path.exists():
450
+ try:
451
+ file_config = load_config_file(config_path)
452
+ effective.config_file_path = config_path
453
+ except Exception as e:
454
+ # Fail-safe: continue with defaults if config is broken
455
+ # Always warn when config fails to load (BUG-011)
456
+ import warnings
457
+ warnings.warn(
458
+ f"Config file '{config_path}' failed to load: {e}. Using defaults.",
459
+ UserWarning,
460
+ stacklevel=2,
461
+ )
462
+ if os.getenv("KONTRA_VERBOSE"):
463
+ import traceback
464
+ traceback.print_exc()
465
+
466
+ # Layer 1: Apply config file defaults
467
+ if file_config:
468
+ effective.preplan = file_config.defaults.preplan
469
+ effective.pushdown = file_config.defaults.pushdown
470
+ effective.projection = file_config.defaults.projection
471
+ effective.output_format = file_config.defaults.output_format
472
+ effective.stats = file_config.defaults.stats
473
+ effective.state_backend = file_config.defaults.state_backend
474
+ effective.csv_mode = file_config.defaults.csv_mode
475
+
476
+ # Scout settings
477
+ effective.scout_preset = file_config.scout.preset
478
+ effective.scout_save_profile = file_config.scout.save_profile
479
+ effective.scout_list_values_threshold = file_config.scout.list_values_threshold
480
+ effective.scout_top_n = file_config.scout.top_n
481
+ effective.scout_include_patterns = file_config.scout.include_patterns
482
+
483
+ # LLM juice: severity weights (user-defined, Kontra carries but never acts)
484
+ effective.severity_weights = file_config.severity_weights
485
+
486
+ # Layer 2: Apply environment overlay
487
+ if env_name:
488
+ effective.environment = env_name
489
+
490
+ if file_config and env_name in file_config.environments:
491
+ env_config = file_config.environments[env_name]
492
+ _apply_optional_overrides(effective, env_config, _CORE_OVERLAY_FIELDS)
493
+
494
+ elif file_config:
495
+ # Environment specified but not found
496
+ available = list(file_config.environments.keys())
497
+ raise UnknownEnvironmentError(env_name, available)
498
+ else:
499
+ # No config file, warn about ignored --env (BUG-012)
500
+ import warnings
501
+ warnings.warn(
502
+ f"Environment '{env_name}' specified but no config file found. "
503
+ "Create .kontra/config.yml with environments section.",
504
+ UserWarning,
505
+ stacklevel=2,
506
+ )
507
+
508
+ # Layer 3: Apply CLI overrides (core fields + scout fields with mappings)
509
+ _apply_cli_overrides(effective, cli_overrides, _CORE_OVERLAY_FIELDS, _CLI_FIELD_MAPPINGS)
510
+
511
+ return effective
512
+
513
+
514
+ # =============================================================================
515
+ # Datasource Resolution
516
+ # =============================================================================
517
+
518
+
519
+ def resolve_datasource(
520
+ reference: str,
521
+ config: Optional[KontraConfig] = None,
522
+ ) -> str:
523
+ """
524
+ Resolve a datasource reference to a full URI.
525
+
526
+ Supports both:
527
+ - Named references: "prod_db.users" -> "postgres://user:pass@host/db/public.users"
528
+ - Direct URIs: "postgres://..." -> returned as-is
529
+
530
+ Args:
531
+ reference: Either "datasource_name.table_name" or a direct URI
532
+ config: KontraConfig with datasources (auto-loaded if None)
533
+
534
+ Returns:
535
+ Full URI string
536
+
537
+ Raises:
538
+ ValueError: If datasource or table not found
539
+ """
540
+ # Check if it's already a URI (has scheme)
541
+ if "://" in reference or reference.startswith("/") or reference.endswith((".parquet", ".csv")):
542
+ return reference
543
+
544
+ # Check if it looks like a file path
545
+ if "/" in reference:
546
+ return reference
547
+
548
+ # Load config if not provided
549
+ if config is None:
550
+ config_path = find_config_file()
551
+ if config_path:
552
+ config = load_config_file(config_path)
553
+ else:
554
+ config = None
555
+
556
+ # Parse reference - could be "table", "datasource.table", or ambiguous
557
+ if "." in reference:
558
+ # Explicit datasource.table format
559
+ parts = reference.split(".", 1)
560
+ ds_name, table_name = parts
561
+ else:
562
+ # Just a table name - search all datasources
563
+ table_name = reference
564
+ ds_name = None
565
+
566
+ if config is None:
567
+ raise ValueError(
568
+ f"Table '{reference}' not found. "
569
+ "No config file exists. Run 'kontra init' to create one."
570
+ )
571
+
572
+ # Find which datasource(s) have this table
573
+ matches = []
574
+ for ds_key, ds_data in config.datasources.items():
575
+ tables = ds_data.get("tables", {})
576
+ if table_name in tables:
577
+ matches.append(ds_key)
578
+
579
+ if len(matches) == 0:
580
+ # List all available tables
581
+ all_tables = []
582
+ for ds_key, ds_data in config.datasources.items():
583
+ tables = ds_data.get("tables", {})
584
+ for t in tables.keys():
585
+ all_tables.append(f"{ds_key}.{t}")
586
+ tables_str = ", ".join(all_tables) if all_tables else "(none)"
587
+ raise ValueError(
588
+ f"Unknown table: '{reference}'. "
589
+ f"Available tables: {tables_str}"
590
+ )
591
+ elif len(matches) > 1:
592
+ matches_str = ", ".join(f"{m}.{table_name}" for m in matches)
593
+ raise ValueError(
594
+ f"Ambiguous table '{reference}' found in multiple datasources: {matches_str}. "
595
+ f"Use explicit 'datasource.table' format."
596
+ )
597
+ else:
598
+ ds_name = matches[0]
599
+
600
+ # At this point we have ds_name and table_name
601
+ if config is None:
602
+ raise ValueError(
603
+ f"Datasource '{ds_name}' not found. "
604
+ "No config file exists. Run 'kontra init' to create one."
605
+ )
606
+
607
+ # Get datasource
608
+ ds = config.get_datasource(ds_name)
609
+ if ds is None:
610
+ available = list(config.datasources.keys())
611
+ available_str = ", ".join(available) if available else "(none)"
612
+ raise ValueError(
613
+ f"Unknown datasource: '{ds_name}'. "
614
+ f"Available datasources: {available_str}"
615
+ )
616
+
617
+ # Resolve table reference
618
+ if table_name not in ds.tables:
619
+ available_tables = list(ds.tables.keys())
620
+ tables_str = ", ".join(available_tables) if available_tables else "(none)"
621
+ raise ValueError(
622
+ f"Unknown table '{table_name}' in datasource '{ds_name}'. "
623
+ f"Available tables: {tables_str}"
624
+ )
625
+
626
+ table_ref = ds.tables[table_name]
627
+
628
+ # Build full URI based on datasource type
629
+ if isinstance(ds, PostgresDatasourceConfig):
630
+ # postgres://user:pass@host:port/database/schema.table
631
+ user = ds.user
632
+ password = ds.password
633
+ host = ds.host
634
+ port = ds.port
635
+ database = ds.database
636
+
637
+ if user and password:
638
+ auth = f"{user}:{password}@"
639
+ elif user:
640
+ auth = f"{user}@"
641
+ else:
642
+ auth = ""
643
+
644
+ return f"postgres://{auth}{host}:{port}/{database}/{table_ref}"
645
+
646
+ elif isinstance(ds, S3DatasourceConfig):
647
+ # s3://bucket/prefix/key
648
+ prefix = ds.prefix.rstrip("/")
649
+ if prefix:
650
+ return f"s3://{ds.bucket}/{prefix}/{table_ref}"
651
+ else:
652
+ return f"s3://{ds.bucket}/{table_ref}"
653
+
654
+ elif isinstance(ds, FilesDatasourceConfig):
655
+ # Local file path
656
+ from pathlib import Path
657
+ base = Path(ds.base_path)
658
+ return str(base / table_ref)
659
+
660
+ else:
661
+ raise ValueError(f"Unknown datasource type for '{ds_name}'")
662
+
663
+
664
+ def list_datasources(config: Optional[KontraConfig] = None) -> Dict[str, List[str]]:
665
+ """
666
+ List all datasources and their tables.
667
+
668
+ Returns:
669
+ Dict mapping datasource names to list of table names
670
+ """
671
+ if config is None:
672
+ config_path = find_config_file()
673
+ if config_path:
674
+ config = load_config_file(config_path)
675
+ else:
676
+ return {}
677
+
678
+ result = {}
679
+ for ds_name in config.datasources:
680
+ ds = config.get_datasource(ds_name)
681
+ if ds:
682
+ result[ds_name] = list(ds.tables.keys())
683
+
684
+ return result
685
+
686
+
687
+ # =============================================================================
688
+ # Config Template
689
+ # =============================================================================
690
+
691
+ DEFAULT_CONFIG_TEMPLATE = '''# Kontra Configuration
692
+ # Generated by: kontra init
693
+ # Documentation: https://github.com/kontra-data/kontra
694
+ #
695
+ # CLI flags always take precedence over these settings.
696
+ # Environment variable substitution: ${VAR_NAME}
697
+
698
+ version: "1"
699
+
700
+ # ─────────────────────────────────────────────────────────────
701
+ # Default Settings
702
+ # ─────────────────────────────────────────────────────────────
703
+
704
+ defaults:
705
+ # Execution controls
706
+ preplan: "auto" # on | off | auto - Parquet metadata preflight
707
+ pushdown: "auto" # on | off | auto - SQL pushdown to DuckDB
708
+ projection: "on" # on | off - Column pruning at source
709
+
710
+ # Output
711
+ output_format: "rich" # rich | json - Output format
712
+ stats: "none" # none | summary | profile - Statistics detail
713
+
714
+ # State management
715
+ state_backend: "local" # local | s3://bucket/prefix | postgres://...
716
+
717
+ # CSV handling
718
+ csv_mode: "auto" # auto | duckdb | parquet
719
+
720
+ # ─────────────────────────────────────────────────────────────
721
+ # Profile Settings
722
+ # ─────────────────────────────────────────────────────────────
723
+
724
+ profile:
725
+ preset: "scan" # scout | scan | interrogate
726
+ save_profile: false # Save profile to state storage
727
+ # list_values_threshold: 10 # List all values if distinct <= N
728
+ # top_n: 5 # Show top N frequent values
729
+ # include_patterns: false # Detect patterns (email, uuid, etc.)
730
+
731
+ # ─────────────────────────────────────────────────────────────
732
+ # Datasources
733
+ # ─────────────────────────────────────────────────────────────
734
+ # Named data sources referenced as: datasource_name.table_name
735
+ # Credentials stay in config, contracts stay clean and portable.
736
+ #
737
+ # Usage:
738
+ # kontra validate contract.yml --data prod_db.users
739
+ # kontra profile prod_db.orders
740
+ #
741
+ # Or in contract YAML:
742
+ # dataset: prod_db.users
743
+
744
+ datasources: {}
745
+ # PostgreSQL example:
746
+ # prod_db:
747
+ # type: postgres
748
+ # host: ${PGHOST}
749
+ # port: 5432
750
+ # user: ${PGUSER}
751
+ # password: ${PGPASSWORD}
752
+ # database: ${PGDATABASE}
753
+ # tables:
754
+ # users: public.users
755
+ # orders: public.orders
756
+
757
+ # Local files example:
758
+ # local_data:
759
+ # type: files
760
+ # base_path: ./data
761
+ # tables:
762
+ # users: users.parquet
763
+ # orders: orders.csv
764
+
765
+ # S3 example:
766
+ # data_lake:
767
+ # type: s3
768
+ # bucket: ${S3_BUCKET}
769
+ # prefix: warehouse/
770
+ # tables:
771
+ # events: events.parquet
772
+ # metrics: metrics.parquet
773
+
774
+ # ─────────────────────────────────────────────────────────────
775
+ # Environments
776
+ # ─────────────────────────────────────────────────────────────
777
+ # Named configurations activated with --env <name>
778
+ # Only specified fields override defaults.
779
+
780
+ environments: {}
781
+ # Example: Production environment
782
+ # production:
783
+ # state_backend: postgres://${PGHOST}/${PGDATABASE}
784
+ # preplan: "on"
785
+ # pushdown: "on"
786
+ # output_format: "json"
787
+
788
+ # Example: Staging environment
789
+ # staging:
790
+ # state_backend: s3://${S3_BUCKET}/kontra-state/
791
+ # stats: "summary"
792
+
793
+ # Example: Local development
794
+ # local:
795
+ # state_backend: "local"
796
+ # stats: "profile"
797
+ '''