kontra 0.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. kontra/__init__.py +1871 -0
  2. kontra/api/__init__.py +22 -0
  3. kontra/api/compare.py +340 -0
  4. kontra/api/decorators.py +153 -0
  5. kontra/api/results.py +2121 -0
  6. kontra/api/rules.py +681 -0
  7. kontra/cli/__init__.py +0 -0
  8. kontra/cli/commands/__init__.py +1 -0
  9. kontra/cli/commands/config.py +153 -0
  10. kontra/cli/commands/diff.py +450 -0
  11. kontra/cli/commands/history.py +196 -0
  12. kontra/cli/commands/profile.py +289 -0
  13. kontra/cli/commands/validate.py +468 -0
  14. kontra/cli/constants.py +6 -0
  15. kontra/cli/main.py +48 -0
  16. kontra/cli/renderers.py +304 -0
  17. kontra/cli/utils.py +28 -0
  18. kontra/config/__init__.py +34 -0
  19. kontra/config/loader.py +127 -0
  20. kontra/config/models.py +49 -0
  21. kontra/config/settings.py +797 -0
  22. kontra/connectors/__init__.py +0 -0
  23. kontra/connectors/db_utils.py +251 -0
  24. kontra/connectors/detection.py +323 -0
  25. kontra/connectors/handle.py +368 -0
  26. kontra/connectors/postgres.py +127 -0
  27. kontra/connectors/sqlserver.py +226 -0
  28. kontra/engine/__init__.py +0 -0
  29. kontra/engine/backends/duckdb_session.py +227 -0
  30. kontra/engine/backends/duckdb_utils.py +18 -0
  31. kontra/engine/backends/polars_backend.py +47 -0
  32. kontra/engine/engine.py +1205 -0
  33. kontra/engine/executors/__init__.py +15 -0
  34. kontra/engine/executors/base.py +50 -0
  35. kontra/engine/executors/database_base.py +528 -0
  36. kontra/engine/executors/duckdb_sql.py +607 -0
  37. kontra/engine/executors/postgres_sql.py +162 -0
  38. kontra/engine/executors/registry.py +69 -0
  39. kontra/engine/executors/sqlserver_sql.py +163 -0
  40. kontra/engine/materializers/__init__.py +14 -0
  41. kontra/engine/materializers/base.py +42 -0
  42. kontra/engine/materializers/duckdb.py +110 -0
  43. kontra/engine/materializers/factory.py +22 -0
  44. kontra/engine/materializers/polars_connector.py +131 -0
  45. kontra/engine/materializers/postgres.py +157 -0
  46. kontra/engine/materializers/registry.py +138 -0
  47. kontra/engine/materializers/sqlserver.py +160 -0
  48. kontra/engine/result.py +15 -0
  49. kontra/engine/sql_utils.py +611 -0
  50. kontra/engine/sql_validator.py +609 -0
  51. kontra/engine/stats.py +194 -0
  52. kontra/engine/types.py +138 -0
  53. kontra/errors.py +533 -0
  54. kontra/logging.py +85 -0
  55. kontra/preplan/__init__.py +5 -0
  56. kontra/preplan/planner.py +253 -0
  57. kontra/preplan/postgres.py +179 -0
  58. kontra/preplan/sqlserver.py +191 -0
  59. kontra/preplan/types.py +24 -0
  60. kontra/probes/__init__.py +20 -0
  61. kontra/probes/compare.py +400 -0
  62. kontra/probes/relationship.py +283 -0
  63. kontra/reporters/__init__.py +0 -0
  64. kontra/reporters/json_reporter.py +190 -0
  65. kontra/reporters/rich_reporter.py +11 -0
  66. kontra/rules/__init__.py +35 -0
  67. kontra/rules/base.py +186 -0
  68. kontra/rules/builtin/__init__.py +40 -0
  69. kontra/rules/builtin/allowed_values.py +156 -0
  70. kontra/rules/builtin/compare.py +188 -0
  71. kontra/rules/builtin/conditional_not_null.py +213 -0
  72. kontra/rules/builtin/conditional_range.py +310 -0
  73. kontra/rules/builtin/contains.py +138 -0
  74. kontra/rules/builtin/custom_sql_check.py +182 -0
  75. kontra/rules/builtin/disallowed_values.py +140 -0
  76. kontra/rules/builtin/dtype.py +203 -0
  77. kontra/rules/builtin/ends_with.py +129 -0
  78. kontra/rules/builtin/freshness.py +240 -0
  79. kontra/rules/builtin/length.py +193 -0
  80. kontra/rules/builtin/max_rows.py +35 -0
  81. kontra/rules/builtin/min_rows.py +46 -0
  82. kontra/rules/builtin/not_null.py +121 -0
  83. kontra/rules/builtin/range.py +222 -0
  84. kontra/rules/builtin/regex.py +143 -0
  85. kontra/rules/builtin/starts_with.py +129 -0
  86. kontra/rules/builtin/unique.py +124 -0
  87. kontra/rules/condition_parser.py +203 -0
  88. kontra/rules/execution_plan.py +455 -0
  89. kontra/rules/factory.py +103 -0
  90. kontra/rules/predicates.py +25 -0
  91. kontra/rules/registry.py +24 -0
  92. kontra/rules/static_predicates.py +120 -0
  93. kontra/scout/__init__.py +9 -0
  94. kontra/scout/backends/__init__.py +17 -0
  95. kontra/scout/backends/base.py +111 -0
  96. kontra/scout/backends/duckdb_backend.py +359 -0
  97. kontra/scout/backends/postgres_backend.py +519 -0
  98. kontra/scout/backends/sqlserver_backend.py +577 -0
  99. kontra/scout/dtype_mapping.py +150 -0
  100. kontra/scout/patterns.py +69 -0
  101. kontra/scout/profiler.py +801 -0
  102. kontra/scout/reporters/__init__.py +39 -0
  103. kontra/scout/reporters/json_reporter.py +165 -0
  104. kontra/scout/reporters/markdown_reporter.py +152 -0
  105. kontra/scout/reporters/rich_reporter.py +144 -0
  106. kontra/scout/store.py +208 -0
  107. kontra/scout/suggest.py +200 -0
  108. kontra/scout/types.py +652 -0
  109. kontra/state/__init__.py +29 -0
  110. kontra/state/backends/__init__.py +79 -0
  111. kontra/state/backends/base.py +348 -0
  112. kontra/state/backends/local.py +480 -0
  113. kontra/state/backends/postgres.py +1010 -0
  114. kontra/state/backends/s3.py +543 -0
  115. kontra/state/backends/sqlserver.py +969 -0
  116. kontra/state/fingerprint.py +166 -0
  117. kontra/state/types.py +1061 -0
  118. kontra/version.py +1 -0
  119. kontra-0.5.2.dist-info/METADATA +122 -0
  120. kontra-0.5.2.dist-info/RECORD +124 -0
  121. kontra-0.5.2.dist-info/WHEEL +5 -0
  122. kontra-0.5.2.dist-info/entry_points.txt +2 -0
  123. kontra-0.5.2.dist-info/licenses/LICENSE +17 -0
  124. kontra-0.5.2.dist-info/top_level.txt +1 -0
@@ -0,0 +1,196 @@
1
+ # src/kontra/cli/commands/history.py
2
+ """History command for Kontra CLI."""
3
+
4
+ from __future__ import annotations
5
+
6
+ from typing import Literal, Optional
7
+
8
+ import typer
9
+
10
+ from kontra.cli.constants import (
11
+ EXIT_CONFIG_ERROR,
12
+ EXIT_RUNTIME_ERROR,
13
+ EXIT_SUCCESS,
14
+ )
15
+
16
+
17
+ def register(app: typer.Typer) -> None:
18
+ """Register the history command with the app."""
19
+
20
+ @app.command("history")
21
+ def history(
22
+ contract: str = typer.Argument(
23
+ ..., help="Path to the contract.yml file"
24
+ ),
25
+ since: Optional[str] = typer.Option(
26
+ None,
27
+ "--since",
28
+ "-s",
29
+ help="Time filter: '24h', '7d', or date like '2026-01-15' (default: all)",
30
+ ),
31
+ limit: int = typer.Option(
32
+ 20,
33
+ "--limit",
34
+ "-n",
35
+ help="Maximum runs to show (default: 20)",
36
+ ),
37
+ failed_only: bool = typer.Option(
38
+ False,
39
+ "--failed-only",
40
+ "-f",
41
+ help="Only show failed runs",
42
+ ),
43
+ output_format: Literal["table", "json"] = typer.Option(
44
+ "table",
45
+ "--output-format",
46
+ "-o",
47
+ help="Output format (default: table)",
48
+ ),
49
+ verbose: bool = typer.Option(
50
+ False, "--verbose", "-v", help="Show additional details"
51
+ ),
52
+ ) -> None:
53
+ """
54
+ Show validation history for a contract.
55
+
56
+ Displays past validation runs with timestamps, pass/fail status,
57
+ and violation counts. Useful for tracking data quality over time.
58
+
59
+ Examples:
60
+ kontra history contract.yml
61
+ kontra history contract.yml --since 7d
62
+ kontra history contract.yml --failed-only
63
+ kontra history contract.yml -o json
64
+ """
65
+ import json
66
+ import os
67
+
68
+ if verbose:
69
+ os.environ["KONTRA_VERBOSE"] = "1"
70
+
71
+ try:
72
+ import kontra
73
+
74
+ runs = kontra.get_history(
75
+ contract,
76
+ limit=limit,
77
+ since=since,
78
+ failed_only=failed_only,
79
+ )
80
+
81
+ if not runs:
82
+ typer.echo("No validation history found for this contract.")
83
+ raise typer.Exit(code=EXIT_SUCCESS)
84
+
85
+ if output_format == "json":
86
+ typer.echo(json.dumps(runs, indent=2, default=str))
87
+ else:
88
+ _render_table(runs, contract, verbose)
89
+
90
+ raise typer.Exit(code=EXIT_SUCCESS)
91
+
92
+ except typer.Exit:
93
+ raise
94
+
95
+ except FileNotFoundError as e:
96
+ from kontra.errors import format_error_for_cli
97
+
98
+ msg = format_error_for_cli(e)
99
+ typer.secho(f"Error: {msg}", fg=typer.colors.RED)
100
+ raise typer.Exit(code=EXIT_CONFIG_ERROR)
101
+
102
+ except ValueError as e:
103
+ typer.secho(f"Error: {e}", fg=typer.colors.RED)
104
+ raise typer.Exit(code=EXIT_CONFIG_ERROR)
105
+
106
+ except Exception as e:
107
+ from kontra.errors import format_error_for_cli
108
+
109
+ msg = format_error_for_cli(e)
110
+ if verbose:
111
+ import traceback
112
+
113
+ typer.secho(
114
+ f"Error: {msg}\n\n{traceback.format_exc()}", fg=typer.colors.RED
115
+ )
116
+ else:
117
+ typer.secho(f"Error: {msg}", fg=typer.colors.RED)
118
+ typer.secho("Use --verbose for full traceback.", fg=typer.colors.YELLOW)
119
+ raise typer.Exit(code=EXIT_RUNTIME_ERROR)
120
+
121
+
122
+ def _render_table(runs: list, contract: str, verbose: bool) -> None:
123
+ """Render history as a Rich table."""
124
+ from datetime import datetime
125
+
126
+ try:
127
+ from rich.console import Console
128
+ from rich.table import Table
129
+
130
+ console = Console()
131
+
132
+ # Header
133
+ contract_name = runs[0].get("contract_name", contract) if runs else contract
134
+ console.print(f"\n[bold]Validation History: {contract_name}[/bold]")
135
+ console.print(f"Showing {len(runs)} most recent runs\n")
136
+
137
+ # Table
138
+ table = Table(show_header=True, header_style="bold")
139
+ table.add_column("Timestamp", style="dim")
140
+ table.add_column("Status", justify="center")
141
+ table.add_column("Failed", justify="right")
142
+ table.add_column("Rows", justify="right")
143
+ if verbose:
144
+ table.add_column("Run ID", style="dim")
145
+
146
+ for run in runs:
147
+ # Parse timestamp
148
+ ts = run.get("timestamp", "")
149
+ if isinstance(ts, str):
150
+ try:
151
+ dt = datetime.fromisoformat(ts.replace("Z", "+00:00"))
152
+ ts_display = dt.strftime("%Y-%m-%d %H:%M")
153
+ except Exception:
154
+ ts_display = ts[:16]
155
+ else:
156
+ ts_display = str(ts)[:16]
157
+
158
+ # Status
159
+ passed = run.get("passed", False)
160
+ if passed:
161
+ status = "[green]PASS[/green]"
162
+ else:
163
+ status = "[red]FAIL[/red]"
164
+
165
+ # Failed count
166
+ failed_count = run.get("failed_count", 0)
167
+ failed_display = str(failed_count) if failed_count > 0 else "-"
168
+
169
+ # Rows
170
+ total_rows = run.get("total_rows")
171
+ if total_rows is not None:
172
+ rows_display = f"{total_rows:,}"
173
+ else:
174
+ rows_display = "-"
175
+
176
+ if verbose:
177
+ run_id = run.get("run_id", "-")
178
+ table.add_row(ts_display, status, failed_display, rows_display, run_id)
179
+ else:
180
+ table.add_row(ts_display, status, failed_display, rows_display)
181
+
182
+ console.print(table)
183
+
184
+ except ImportError:
185
+ # Fallback to plain text if Rich not available
186
+ typer.echo(f"\nValidation History: {contract}")
187
+ typer.echo(f"Showing {len(runs)} most recent runs\n")
188
+ typer.echo(f"{'Timestamp':<20} {'Status':<8} {'Failed':<8} {'Rows':<12}")
189
+ typer.echo("-" * 50)
190
+
191
+ for run in runs:
192
+ ts = run.get("timestamp", "")[:16]
193
+ passed = "PASS" if run.get("passed", False) else "FAIL"
194
+ failed = str(run.get("failed_count", 0))
195
+ rows = str(run.get("total_rows", "-"))
196
+ typer.echo(f"{ts:<20} {passed:<8} {failed:<8} {rows:<12}")
@@ -0,0 +1,289 @@
1
+ """Profile command for Kontra CLI."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Literal, Optional
6
+
7
+ import typer
8
+
9
+ from kontra.cli.constants import (
10
+ EXIT_CONFIG_ERROR,
11
+ EXIT_RUNTIME_ERROR,
12
+ EXIT_SUCCESS,
13
+ )
14
+
15
+
16
+ def register(app: typer.Typer) -> None:
17
+ """Register the profile command with the app."""
18
+
19
+ @app.command("profile")
20
+ def profile(
21
+ source: str = typer.Argument(
22
+ ..., help="Path or URI to the dataset (local file, s3://..., https://...)"
23
+ ),
24
+ output_format: Optional[Literal["rich", "json", "markdown", "llm"]] = typer.Option(
25
+ None, "--output-format", "-o", help="Output format (default: 'rich')."
26
+ ),
27
+ # Config-aware options
28
+ preset: Optional[Literal["scout", "scan", "interrogate"]] = typer.Option(
29
+ None,
30
+ "--preset",
31
+ "-p",
32
+ help="Profiling depth (default: from config or 'scan').",
33
+ ),
34
+ list_values_threshold: Optional[int] = typer.Option(
35
+ None,
36
+ "--list-values-threshold",
37
+ "-l",
38
+ help="List all values if distinct count <= threshold.",
39
+ ),
40
+ top_n: Optional[int] = typer.Option(
41
+ None,
42
+ "--top-n",
43
+ "-t",
44
+ help="Show top N most frequent values per column.",
45
+ ),
46
+ sample: Optional[int] = typer.Option(
47
+ None,
48
+ "--sample",
49
+ "-s",
50
+ help="Sample N rows for profiling (default: all rows).",
51
+ ),
52
+ include_patterns: Optional[bool] = typer.Option(
53
+ None,
54
+ "--include-patterns",
55
+ help="Detect common patterns (default: from config or False).",
56
+ ),
57
+ columns: Optional[str] = typer.Option(
58
+ None,
59
+ "--columns",
60
+ "-c",
61
+ help="Comma-separated list of columns to profile (default: all).",
62
+ ),
63
+ draft: bool = typer.Option(
64
+ False,
65
+ "--draft",
66
+ help="Generate draft validation rules based on profile.",
67
+ ),
68
+ save_profile: Optional[bool] = typer.Option(
69
+ None,
70
+ "--save-profile",
71
+ help="Save profile to state storage (default: from config or False).",
72
+ ),
73
+ # Environment selection
74
+ env: Optional[str] = typer.Option(
75
+ None,
76
+ "--env",
77
+ "-e",
78
+ help="Environment profile from .kontra/config.yml.",
79
+ envvar="KONTRA_ENV",
80
+ ),
81
+ storage_options: Optional[str] = typer.Option(
82
+ None,
83
+ "--storage-options",
84
+ help='Cloud storage credentials as JSON, e.g. \'{"aws_access_key_id": "...", "aws_region": "us-east-1"}\'',
85
+ ),
86
+ verbose: bool = typer.Option(
87
+ False, "--verbose", "-v", help="Enable verbose output."
88
+ ),
89
+ ) -> None:
90
+ """
91
+ Profile a dataset (Kontra Profile).
92
+
93
+ Generates comprehensive column-level statistics optimized for
94
+ developer exploration and LLM context compression.
95
+
96
+ Presets control profiling depth:
97
+ - scout: Quick recon. Metadata only (schema, row count, null/distinct counts).
98
+ - scan: Systematic pass. Full stats with moderate top values. [default]
99
+ - interrogate: Deep investigation. Everything including percentiles.
100
+
101
+ Examples:
102
+ kontra profile data.parquet
103
+ kontra profile s3://bucket/data.csv --sample 10000
104
+ kontra profile data.parquet -o json --preset interrogate
105
+ kontra profile data.parquet --draft > rules.yml
106
+ kontra profile data.parquet --save-profile # Save for diffing
107
+ """
108
+ _run_profile(
109
+ source=source,
110
+ output_format=output_format,
111
+ preset=preset,
112
+ list_values_threshold=list_values_threshold,
113
+ top_n=top_n,
114
+ sample=sample,
115
+ include_patterns=include_patterns,
116
+ columns=columns,
117
+ draft=draft,
118
+ save_profile=save_profile,
119
+ env=env,
120
+ storage_options=storage_options,
121
+ verbose=verbose,
122
+ )
123
+
124
+
125
+
126
+ def _run_profile(
127
+ source: str,
128
+ output_format: Optional[str],
129
+ preset: Optional[str],
130
+ list_values_threshold: Optional[int],
131
+ top_n: Optional[int],
132
+ sample: Optional[int],
133
+ include_patterns: Optional[bool],
134
+ columns: Optional[str],
135
+ draft: bool,
136
+ save_profile: Optional[bool],
137
+ env: Optional[str],
138
+ storage_options: Optional[str],
139
+ verbose: bool,
140
+ ) -> None:
141
+ """Shared implementation for profile and scout commands."""
142
+ import os
143
+
144
+ if verbose:
145
+ os.environ["KONTRA_VERBOSE"] = "1"
146
+
147
+ try:
148
+ from kontra.config.settings import resolve_effective_config
149
+
150
+ # --- LOAD CONFIG ---
151
+ cli_overrides = {
152
+ "preset": preset,
153
+ "save_profile": save_profile,
154
+ "list_values_threshold": list_values_threshold,
155
+ "top_n": top_n,
156
+ "include_patterns": include_patterns,
157
+ }
158
+
159
+ try:
160
+ config = resolve_effective_config(
161
+ env_name=env, cli_overrides=cli_overrides
162
+ )
163
+ except Exception as e:
164
+ from kontra.errors import format_error_for_cli
165
+
166
+ typer.secho(
167
+ f"Config error: {format_error_for_cli(e)}", fg=typer.colors.RED
168
+ )
169
+ raise typer.Exit(code=EXIT_CONFIG_ERROR)
170
+
171
+ # Resolve effective values from config
172
+ effective_preset = config.scout_preset
173
+ effective_save_profile = config.scout_save_profile
174
+ effective_list_values_threshold = config.scout_list_values_threshold
175
+ effective_top_n = config.scout_top_n
176
+ effective_include_patterns = config.scout_include_patterns
177
+
178
+ # --- RESOLVE DATASOURCE ---
179
+ from kontra.config.settings import resolve_datasource
180
+
181
+ try:
182
+ resolved_source = resolve_datasource(source)
183
+ except ValueError as e:
184
+ typer.secho(f"Datasource error: {e}", fg=typer.colors.RED)
185
+ raise typer.Exit(code=EXIT_CONFIG_ERROR)
186
+
187
+ # Parse columns filter
188
+ cols_filter = None
189
+ if columns:
190
+ cols_filter = [c.strip() for c in columns.split(",") if c.strip()]
191
+
192
+ # Output format defaults
193
+ effective_output_format = output_format or "rich"
194
+
195
+ from kontra.scout.profiler import ScoutProfiler
196
+
197
+ # Parse storage_options JSON if provided
198
+ parsed_storage_options = None
199
+ if storage_options:
200
+ import json
201
+ try:
202
+ parsed_storage_options = json.loads(storage_options)
203
+ except json.JSONDecodeError as e:
204
+ typer.secho(
205
+ f"Invalid --storage-options JSON: {e}",
206
+ fg=typer.colors.RED,
207
+ )
208
+ raise typer.Exit(code=EXIT_CONFIG_ERROR)
209
+
210
+ profiler = ScoutProfiler(
211
+ resolved_source,
212
+ preset=effective_preset,
213
+ list_values_threshold=effective_list_values_threshold,
214
+ top_n=effective_top_n,
215
+ sample_size=sample,
216
+ include_patterns=effective_include_patterns,
217
+ columns=cols_filter,
218
+ storage_options=parsed_storage_options,
219
+ )
220
+
221
+ profile_result = profiler.profile()
222
+
223
+ # Save profile if requested
224
+ if effective_save_profile:
225
+ from kontra.scout.store import (
226
+ create_profile_state,
227
+ get_default_profile_store,
228
+ )
229
+
230
+ state = create_profile_state(profile_result)
231
+ store = get_default_profile_store()
232
+ store.save(state)
233
+ typer.secho(
234
+ f"Profile saved (fingerprint: {state.source_fingerprint})",
235
+ fg=typer.colors.GREEN,
236
+ )
237
+
238
+ # Handle rule draft/suggestions
239
+ if draft:
240
+ from kontra.scout.suggest import generate_rules_yaml
241
+
242
+ output = generate_rules_yaml(profile_result)
243
+ else:
244
+ from kontra.scout.reporters import render_profile
245
+
246
+ output = render_profile(profile_result, format=effective_output_format)
247
+
248
+ typer.echo(output)
249
+ raise typer.Exit(code=EXIT_SUCCESS)
250
+
251
+ except typer.Exit:
252
+ raise
253
+
254
+ except FileNotFoundError as e:
255
+ from kontra.errors import format_error_for_cli
256
+
257
+ msg = format_error_for_cli(e)
258
+ typer.secho(f"Error: {msg}", fg=typer.colors.RED)
259
+ if verbose:
260
+ import traceback
261
+
262
+ typer.secho(f"\n{traceback.format_exc()}", fg=typer.colors.YELLOW)
263
+ raise typer.Exit(code=EXIT_CONFIG_ERROR)
264
+
265
+ except ConnectionError as e:
266
+ from kontra.errors import format_error_for_cli
267
+
268
+ msg = format_error_for_cli(e)
269
+ typer.secho(f"Error: {msg}", fg=typer.colors.RED)
270
+ if verbose:
271
+ import traceback
272
+
273
+ typer.secho(f"\n{traceback.format_exc()}", fg=typer.colors.YELLOW)
274
+ raise typer.Exit(code=EXIT_RUNTIME_ERROR)
275
+
276
+ except Exception as e:
277
+ from kontra.errors import format_error_for_cli
278
+
279
+ msg = format_error_for_cli(e)
280
+ if verbose:
281
+ import traceback
282
+
283
+ typer.secho(
284
+ f"Error: {msg}\n\n{traceback.format_exc()}", fg=typer.colors.RED
285
+ )
286
+ else:
287
+ typer.secho(f"Error: {msg}", fg=typer.colors.RED)
288
+ typer.secho("Use --verbose for full traceback.", fg=typer.colors.YELLOW)
289
+ raise typer.Exit(code=EXIT_RUNTIME_ERROR)