kontra 0.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. kontra/__init__.py +1871 -0
  2. kontra/api/__init__.py +22 -0
  3. kontra/api/compare.py +340 -0
  4. kontra/api/decorators.py +153 -0
  5. kontra/api/results.py +2121 -0
  6. kontra/api/rules.py +681 -0
  7. kontra/cli/__init__.py +0 -0
  8. kontra/cli/commands/__init__.py +1 -0
  9. kontra/cli/commands/config.py +153 -0
  10. kontra/cli/commands/diff.py +450 -0
  11. kontra/cli/commands/history.py +196 -0
  12. kontra/cli/commands/profile.py +289 -0
  13. kontra/cli/commands/validate.py +468 -0
  14. kontra/cli/constants.py +6 -0
  15. kontra/cli/main.py +48 -0
  16. kontra/cli/renderers.py +304 -0
  17. kontra/cli/utils.py +28 -0
  18. kontra/config/__init__.py +34 -0
  19. kontra/config/loader.py +127 -0
  20. kontra/config/models.py +49 -0
  21. kontra/config/settings.py +797 -0
  22. kontra/connectors/__init__.py +0 -0
  23. kontra/connectors/db_utils.py +251 -0
  24. kontra/connectors/detection.py +323 -0
  25. kontra/connectors/handle.py +368 -0
  26. kontra/connectors/postgres.py +127 -0
  27. kontra/connectors/sqlserver.py +226 -0
  28. kontra/engine/__init__.py +0 -0
  29. kontra/engine/backends/duckdb_session.py +227 -0
  30. kontra/engine/backends/duckdb_utils.py +18 -0
  31. kontra/engine/backends/polars_backend.py +47 -0
  32. kontra/engine/engine.py +1205 -0
  33. kontra/engine/executors/__init__.py +15 -0
  34. kontra/engine/executors/base.py +50 -0
  35. kontra/engine/executors/database_base.py +528 -0
  36. kontra/engine/executors/duckdb_sql.py +607 -0
  37. kontra/engine/executors/postgres_sql.py +162 -0
  38. kontra/engine/executors/registry.py +69 -0
  39. kontra/engine/executors/sqlserver_sql.py +163 -0
  40. kontra/engine/materializers/__init__.py +14 -0
  41. kontra/engine/materializers/base.py +42 -0
  42. kontra/engine/materializers/duckdb.py +110 -0
  43. kontra/engine/materializers/factory.py +22 -0
  44. kontra/engine/materializers/polars_connector.py +131 -0
  45. kontra/engine/materializers/postgres.py +157 -0
  46. kontra/engine/materializers/registry.py +138 -0
  47. kontra/engine/materializers/sqlserver.py +160 -0
  48. kontra/engine/result.py +15 -0
  49. kontra/engine/sql_utils.py +611 -0
  50. kontra/engine/sql_validator.py +609 -0
  51. kontra/engine/stats.py +194 -0
  52. kontra/engine/types.py +138 -0
  53. kontra/errors.py +533 -0
  54. kontra/logging.py +85 -0
  55. kontra/preplan/__init__.py +5 -0
  56. kontra/preplan/planner.py +253 -0
  57. kontra/preplan/postgres.py +179 -0
  58. kontra/preplan/sqlserver.py +191 -0
  59. kontra/preplan/types.py +24 -0
  60. kontra/probes/__init__.py +20 -0
  61. kontra/probes/compare.py +400 -0
  62. kontra/probes/relationship.py +283 -0
  63. kontra/reporters/__init__.py +0 -0
  64. kontra/reporters/json_reporter.py +190 -0
  65. kontra/reporters/rich_reporter.py +11 -0
  66. kontra/rules/__init__.py +35 -0
  67. kontra/rules/base.py +186 -0
  68. kontra/rules/builtin/__init__.py +40 -0
  69. kontra/rules/builtin/allowed_values.py +156 -0
  70. kontra/rules/builtin/compare.py +188 -0
  71. kontra/rules/builtin/conditional_not_null.py +213 -0
  72. kontra/rules/builtin/conditional_range.py +310 -0
  73. kontra/rules/builtin/contains.py +138 -0
  74. kontra/rules/builtin/custom_sql_check.py +182 -0
  75. kontra/rules/builtin/disallowed_values.py +140 -0
  76. kontra/rules/builtin/dtype.py +203 -0
  77. kontra/rules/builtin/ends_with.py +129 -0
  78. kontra/rules/builtin/freshness.py +240 -0
  79. kontra/rules/builtin/length.py +193 -0
  80. kontra/rules/builtin/max_rows.py +35 -0
  81. kontra/rules/builtin/min_rows.py +46 -0
  82. kontra/rules/builtin/not_null.py +121 -0
  83. kontra/rules/builtin/range.py +222 -0
  84. kontra/rules/builtin/regex.py +143 -0
  85. kontra/rules/builtin/starts_with.py +129 -0
  86. kontra/rules/builtin/unique.py +124 -0
  87. kontra/rules/condition_parser.py +203 -0
  88. kontra/rules/execution_plan.py +455 -0
  89. kontra/rules/factory.py +103 -0
  90. kontra/rules/predicates.py +25 -0
  91. kontra/rules/registry.py +24 -0
  92. kontra/rules/static_predicates.py +120 -0
  93. kontra/scout/__init__.py +9 -0
  94. kontra/scout/backends/__init__.py +17 -0
  95. kontra/scout/backends/base.py +111 -0
  96. kontra/scout/backends/duckdb_backend.py +359 -0
  97. kontra/scout/backends/postgres_backend.py +519 -0
  98. kontra/scout/backends/sqlserver_backend.py +577 -0
  99. kontra/scout/dtype_mapping.py +150 -0
  100. kontra/scout/patterns.py +69 -0
  101. kontra/scout/profiler.py +801 -0
  102. kontra/scout/reporters/__init__.py +39 -0
  103. kontra/scout/reporters/json_reporter.py +165 -0
  104. kontra/scout/reporters/markdown_reporter.py +152 -0
  105. kontra/scout/reporters/rich_reporter.py +144 -0
  106. kontra/scout/store.py +208 -0
  107. kontra/scout/suggest.py +200 -0
  108. kontra/scout/types.py +652 -0
  109. kontra/state/__init__.py +29 -0
  110. kontra/state/backends/__init__.py +79 -0
  111. kontra/state/backends/base.py +348 -0
  112. kontra/state/backends/local.py +480 -0
  113. kontra/state/backends/postgres.py +1010 -0
  114. kontra/state/backends/s3.py +543 -0
  115. kontra/state/backends/sqlserver.py +969 -0
  116. kontra/state/fingerprint.py +166 -0
  117. kontra/state/types.py +1061 -0
  118. kontra/version.py +1 -0
  119. kontra-0.5.2.dist-info/METADATA +122 -0
  120. kontra-0.5.2.dist-info/RECORD +124 -0
  121. kontra-0.5.2.dist-info/WHEEL +5 -0
  122. kontra-0.5.2.dist-info/entry_points.txt +2 -0
  123. kontra-0.5.2.dist-info/licenses/LICENSE +17 -0
  124. kontra-0.5.2.dist-info/top_level.txt +1 -0
kontra/__init__.py ADDED
@@ -0,0 +1,1871 @@
1
+ # src/kontra/__init__.py
2
+ """
3
+ Kontra - Developer-first Data Quality Engine
4
+
5
+ Usage:
6
+ # CLI
7
+ $ kontra validate contract.yml
8
+ $ kontra profile data.parquet
9
+
10
+ # Python API - Simple validation
11
+ import kontra
12
+ result = kontra.validate(df, "contract.yml")
13
+ if result.passed:
14
+ print("All rules passed!")
15
+
16
+ # Python API - Inline rules
17
+ from kontra import rules
18
+ result = kontra.validate(df, rules=[
19
+ rules.not_null("user_id"),
20
+ rules.unique("email"),
21
+ ])
22
+
23
+ # Python API - Profile data
24
+ profile = kontra.profile(df)
25
+ print(profile)
26
+
27
+ # Python API - Draft rules from profile
28
+ suggestions = kontra.draft(profile)
29
+ suggestions.save("contracts/users.yml")
30
+ """
31
+
32
+ from kontra.version import VERSION as __version__
33
+
34
+ # Type imports
35
+ from typing import Any, Dict, List, Optional, Union, TYPE_CHECKING
36
+
37
+ import json
38
+ import os
39
+ import polars as pl
40
+
41
+ if TYPE_CHECKING:
42
+ import pandas as pd
43
+
44
+ # Core engine (for advanced usage)
45
+ from kontra.engine.engine import ValidationEngine
46
+
47
+ # Scout profiler (for advanced usage)
48
+ from kontra.scout.profiler import ScoutProfiler
49
+
50
+ # Scout types
51
+ from kontra.scout.types import DatasetProfile, ColumnProfile, ProfileDiff
52
+
53
+ # Logging
54
+ from kontra.logging import get_logger, log_exception
55
+
56
+ _logger = get_logger(__name__)
57
+
58
+
59
+ def _is_pandas_dataframe(obj: Any) -> bool:
60
+ """Check if object is a pandas DataFrame without importing pandas."""
61
+ # Check module name to avoid importing pandas
62
+ return type(obj).__module__.startswith("pandas") and type(obj).__name__ == "DataFrame"
63
+
64
+
65
+ # Data file extensions that should not be passed to state functions
66
+ _DATA_FILE_EXTENSIONS = {".parquet", ".csv", ".json", ".ndjson", ".jsonl", ".arrow", ".feather"}
67
+
68
+
69
+ def _validate_contract_path(path: str, function_name: str) -> None:
70
+ """
71
+ Validate that a path looks like a contract file, not a data file.
72
+
73
+ Raises ValueError with a helpful message if the file appears to be a data file.
74
+ """
75
+ lower = path.lower()
76
+ for ext in _DATA_FILE_EXTENSIONS:
77
+ if lower.endswith(ext):
78
+ raise ValueError(
79
+ f"{function_name}() requires a contract YAML file path, not a data file. "
80
+ f"Received: '{path}' (appears to be a {ext[1:].upper()} file). "
81
+ f"Example: kontra.{function_name}('contract.yml')"
82
+ )
83
+
84
+
85
+ # API types
86
+ from kontra.api.results import (
87
+ ValidationResult,
88
+ RuleResult,
89
+ DryRunResult,
90
+ Diff,
91
+ Suggestions,
92
+ SuggestedRule,
93
+ )
94
+
95
+ # Probe types
96
+ from kontra.api.compare import CompareResult, RelationshipProfile
97
+
98
+ # Transformation probes
99
+ from kontra.probes import compare, profile_relationship
100
+
101
+ # Rules helpers
102
+ from kontra.api.rules import rules
103
+
104
+ # Decorators
105
+ from kontra.api.decorators import validate as validate_decorator
106
+
107
+ # Errors
108
+ from kontra.errors import ValidationError, StateCorruptedError
109
+
110
+ # Configuration
111
+ from kontra.config.settings import (
112
+ resolve_datasource,
113
+ resolve_effective_config,
114
+ list_datasources,
115
+ KontraConfig,
116
+ )
117
+
118
+
119
+ # =============================================================================
120
+ # Core Functions
121
+ # =============================================================================
122
+
123
+
124
+ def validate(
125
+ data: Union[str, pl.DataFrame, "pd.DataFrame", List[Dict[str, Any]], Dict[str, Any], Any],
126
+ contract: Optional[str] = None,
127
+ *,
128
+ table: Optional[str] = None,
129
+ rules: Optional[List[Dict[str, Any]]] = None,
130
+ emit_report: bool = False,
131
+ save: bool = True,
132
+ preplan: str = "auto",
133
+ pushdown: str = "auto",
134
+ projection: bool = True,
135
+ csv_mode: str = "auto",
136
+ env: Optional[str] = None,
137
+ stats: str = "none",
138
+ dry_run: bool = False,
139
+ sample: int = 0,
140
+ sample_budget: int = 50,
141
+ sample_columns: Optional[Union[List[str], str]] = None,
142
+ storage_options: Optional[Dict[str, Any]] = None,
143
+ **kwargs,
144
+ ) -> Union[ValidationResult, DryRunResult]:
145
+ """
146
+ Validate data against a contract and/or inline rules.
147
+
148
+ Args:
149
+ data: Data to validate. Accepts:
150
+ - str: File path, URI, or named datasource (e.g., "data.parquet", "s3://...", "prod_db.users")
151
+ - DataFrame: Polars or pandas DataFrame
152
+ - list[dict]: Flat tabular JSON (e.g., API response data)
153
+ - dict: Single record (converted to 1-row DataFrame)
154
+ - Database connection: psycopg2/pyodbc/SQLAlchemy connection (requires `table` param)
155
+ table: Table name for BYOC (Bring Your Own Connection) pattern.
156
+ Required when `data` is a database connection object.
157
+ Formats: "table", "schema.table", or "database.schema.table"
158
+ contract: Path to contract YAML file (optional if rules provided)
159
+ rules: List of inline rule dicts (optional if contract provided)
160
+ emit_report: Print validation report to console
161
+ save: Save result to history (default: True)
162
+ preplan: "on" | "off" | "auto"
163
+ pushdown: "on" | "off" | "auto"
164
+ projection: Enable column pruning
165
+ csv_mode: "auto" | "duckdb" | "parquet"
166
+ env: Environment name from config
167
+ stats: "none" | "summary" | "profile"
168
+ dry_run: If True, validate contract/rules syntax without executing
169
+ against data. Returns DryRunResult with .valid, .rules_count,
170
+ .columns_needed. Use to check contracts before running.
171
+ sample: Per-rule sample cap for failing rows (default: 0 disabled, set to 5 to enable)
172
+ sample_budget: Global sample cap across all rules (default: 50)
173
+ sample_columns: Columns to include in samples for token efficiency.
174
+ - None (default): All columns
175
+ - ["col1", "col2"]: Only specified columns
176
+ - "relevant": Rule's columns + _row_index only
177
+ storage_options: Cloud storage credentials (S3, Azure, GCS).
178
+ For S3/MinIO:
179
+ - aws_access_key_id, aws_secret_access_key
180
+ - aws_region (required for Polars)
181
+ - endpoint_url (for MinIO/S3-compatible)
182
+ For Azure:
183
+ - account_name, account_key, sas_token, etc.
184
+ These override environment variables when provided.
185
+ **kwargs: Additional arguments passed to ValidationEngine
186
+
187
+ Returns:
188
+ ValidationResult with .passed, .rules, .to_llm(), etc.
189
+ DryRunResult if dry_run=True, with .valid, .rules_count, .columns_needed
190
+
191
+ Example:
192
+ # With contract file
193
+ result = kontra.validate(df, "contract.yml")
194
+
195
+ # With inline rules
196
+ from kontra import rules
197
+ result = kontra.validate(df, rules=[
198
+ rules.not_null("user_id"),
199
+ rules.unique("email"),
200
+ ])
201
+
202
+ # With list of dicts (e.g., API response)
203
+ data = [{"id": 1, "email": "a@b.com"}, {"id": 2, "email": "c@d.com"}]
204
+ result = kontra.validate(data, rules=[rules.not_null("email")])
205
+
206
+ # With single dict (single record validation)
207
+ record = {"id": 1, "email": "test@example.com"}
208
+ result = kontra.validate(record, rules=[rules.regex("email", r".*@.*")])
209
+
210
+ # BYOC (Bring Your Own Connection) - database connection + table
211
+ import psycopg2
212
+ conn = psycopg2.connect(host="localhost", dbname="mydb")
213
+ result = kontra.validate(conn, table="public.users", rules=[
214
+ rules.not_null("user_id"),
215
+ ])
216
+ # Note: Kontra does NOT close your connection. You manage its lifecycle.
217
+
218
+ # Mix contract and inline rules
219
+ result = kontra.validate(df, "base.yml", rules=[
220
+ rules.freshness("updated_at", max_age="24h"),
221
+ ])
222
+
223
+ # Check result
224
+ if result.passed:
225
+ print("All rules passed!")
226
+ else:
227
+ for r in result.blocking_failures:
228
+ print(f"FAILED: {r.rule_id}")
229
+
230
+ # Dry run - validate contract syntax without running
231
+ check = kontra.validate(df, "contract.yml", dry_run=True)
232
+ if check.valid:
233
+ print(f"Contract OK: {check.rules_count} rules, needs columns: {check.columns_needed}")
234
+ else:
235
+ print(f"Contract errors: {check.errors}")
236
+ """
237
+ from kontra.errors import InvalidDataError, InvalidPathError
238
+ from kontra.connectors.detection import is_database_connection, is_cursor_object
239
+
240
+ # ==========================================================================
241
+ # Input validation - catch invalid data types early with clear errors
242
+ # ==========================================================================
243
+
244
+ # Validate inputs
245
+ if contract is None and rules is None:
246
+ raise ValueError("Either contract or rules must be provided")
247
+
248
+ # ==========================================================================
249
+ # Dry run - validate contract/rules syntax without executing
250
+ # Data can be None for dry_run since we're not actually validating
251
+ # ==========================================================================
252
+ if dry_run:
253
+ from kontra.config.loader import ContractLoader
254
+ from kontra.rules.factory import RuleFactory
255
+ from kontra.rules.execution_plan import RuleExecutionPlan
256
+
257
+ errors: List[str] = []
258
+ contract_name: Optional[str] = None
259
+ datasource: Optional[str] = None
260
+ all_rule_specs: List[Any] = []
261
+
262
+ # Load contract if provided
263
+ if contract is not None:
264
+ try:
265
+ contract_obj = ContractLoader.from_path(contract)
266
+ contract_name = contract_obj.name
267
+ datasource = contract_obj.datasource
268
+ all_rule_specs.extend(contract_obj.rules)
269
+ except FileNotFoundError as e:
270
+ errors.append(f"Contract not found: {e}")
271
+ except ValueError as e:
272
+ errors.append(f"Contract parse error: {e}")
273
+ except Exception as e:
274
+ errors.append(f"Contract error: {e}")
275
+
276
+ # Add inline rules if provided
277
+ inline_built_rules = [] # Already-built BaseRule instances
278
+ if rules is not None:
279
+ # Convert inline rules to RuleSpec format (or pass through BaseRule instances)
280
+ from kontra.config.models import RuleSpec
281
+ from kontra.rules.base import BaseRule as BaseRuleType
282
+ for i, r in enumerate(rules):
283
+ try:
284
+ if isinstance(r, BaseRuleType):
285
+ # Already a rule instance - use directly
286
+ inline_built_rules.append(r)
287
+ elif isinstance(r, dict):
288
+ spec = RuleSpec(
289
+ name=r.get("name", ""),
290
+ id=r.get("id"),
291
+ params=r.get("params", {}),
292
+ severity=r.get("severity", "blocking"),
293
+ )
294
+ all_rule_specs.append(spec)
295
+ else:
296
+ errors.append(
297
+ f"Inline rule {i}: expected dict or BaseRule, "
298
+ f"got {type(r).__name__}"
299
+ )
300
+ except Exception as e:
301
+ errors.append(f"Inline rule {i} error: {e}")
302
+
303
+ # Try to build rules and extract required columns
304
+ columns_needed: List[str] = []
305
+ rules_count = 0
306
+
307
+ if not errors and (all_rule_specs or inline_built_rules):
308
+ try:
309
+ built_rules = RuleFactory(all_rule_specs).build_rules() if all_rule_specs else []
310
+ # Merge with already-built rule instances
311
+ built_rules = list(built_rules) + inline_built_rules
312
+ rules_count = len(built_rules)
313
+
314
+ # Extract required columns
315
+ plan = RuleExecutionPlan(built_rules)
316
+ compiled = plan.compile()
317
+ columns_needed = list(compiled.required_cols or [])
318
+ except Exception as e:
319
+ errors.append(f"Rule build error: {e}")
320
+
321
+ return DryRunResult(
322
+ valid=len(errors) == 0,
323
+ rules_count=rules_count,
324
+ columns_needed=columns_needed,
325
+ contract_name=contract_name,
326
+ datasource=datasource,
327
+ errors=errors,
328
+ )
329
+
330
+ # ==========================================================================
331
+ # Input validation for actual validation (not dry_run)
332
+ # ==========================================================================
333
+
334
+ # Check for None
335
+ if data is None:
336
+ raise InvalidDataError("NoneType", detail="Data cannot be None")
337
+
338
+ # Check for cursor instead of connection (common mistake)
339
+ if is_cursor_object(data):
340
+ raise InvalidDataError(
341
+ type(data).__name__,
342
+ detail="Expected database connection, got cursor object. Pass the connection, not the cursor."
343
+ )
344
+
345
+ # Check for BYOC pattern: connection object + table
346
+
347
+ is_byoc = False
348
+ if is_database_connection(data):
349
+ if table is None:
350
+ raise ValueError(
351
+ "When passing a database connection, the 'table' parameter is required.\n"
352
+ "Example: kontra.validate(conn, table='public.users', rules=[...])"
353
+ )
354
+ is_byoc = True
355
+ elif table is not None:
356
+ raise ValueError(
357
+ "The 'table' parameter is only valid when 'data' is a database connection.\n"
358
+ "For other data types, use file paths, URIs, or named datasources."
359
+ )
360
+
361
+ # Resolve config (always, for severity_weights and other settings)
362
+ cfg = resolve_effective_config(env_name=env)
363
+
364
+ # Apply config defaults (CLI args take precedence)
365
+ if env:
366
+ if preplan == "auto" and cfg.preplan:
367
+ preplan = cfg.preplan
368
+ if pushdown == "auto" and cfg.pushdown:
369
+ pushdown = cfg.pushdown
370
+
371
+ # Build engine kwargs
372
+ engine_kwargs = {
373
+ "contract_path": contract,
374
+ "emit_report": emit_report,
375
+ "save_state": save,
376
+ "preplan": preplan,
377
+ "pushdown": pushdown,
378
+ "enable_projection": projection,
379
+ "csv_mode": csv_mode,
380
+ "stats_mode": stats,
381
+ "inline_rules": rules,
382
+ "storage_options": storage_options,
383
+ **kwargs,
384
+ }
385
+
386
+ # Normalize and create engine
387
+ if is_byoc:
388
+ # BYOC: database connection + table
389
+ from kontra.connectors.handle import DatasetHandle
390
+
391
+ handle = DatasetHandle.from_connection(data, table)
392
+ engine = ValidationEngine(handle=handle, **engine_kwargs)
393
+ elif isinstance(data, str):
394
+ # File path/URI or datasource name
395
+ # Validate: check if it's a directory (common mistake)
396
+ if os.path.isdir(data):
397
+ raise InvalidPathError(data, "Path is a directory, not a file")
398
+ engine = ValidationEngine(data_path=data, **engine_kwargs)
399
+ elif isinstance(data, list):
400
+ # list[dict] - flat tabular JSON (e.g., API response)
401
+ if not data:
402
+ # Empty list - create empty DataFrame (valid for dataset-level rules like min_rows)
403
+ df = pl.DataFrame()
404
+ else:
405
+ df = pl.DataFrame(data)
406
+ engine = ValidationEngine(dataframe=df, **engine_kwargs)
407
+ elif isinstance(data, dict) and not isinstance(data, pl.DataFrame):
408
+ # Single dict - convert to 1-row DataFrame
409
+ # Note: check for pl.DataFrame first since it's also dict-like in some contexts
410
+ if not data:
411
+ # Empty dict - create empty DataFrame
412
+ df = pl.DataFrame()
413
+ else:
414
+ df = pl.DataFrame([data])
415
+ engine = ValidationEngine(dataframe=df, **engine_kwargs)
416
+ elif isinstance(data, pl.DataFrame):
417
+ # Polars DataFrame
418
+ engine = ValidationEngine(dataframe=data, **engine_kwargs)
419
+ elif _is_pandas_dataframe(data):
420
+ # pandas DataFrame - will be converted by engine
421
+ engine = ValidationEngine(dataframe=data, **engine_kwargs)
422
+ else:
423
+ # Invalid data type
424
+ raise InvalidDataError(type(data).__name__)
425
+
426
+ # Run validation
427
+ try:
428
+ raw_result = engine.run()
429
+ except OSError as e:
430
+ # Catch internal errors about unsupported formats and wrap in user-friendly error
431
+ error_str = str(e)
432
+ if "Unsupported format" in error_str or "PolarsConnectorMaterializer" in error_str:
433
+ # Extract the problematic value from the error
434
+ if isinstance(data, str):
435
+ raise InvalidDataError(
436
+ "str",
437
+ detail=f"'{data}' is not a valid file path, URI, or datasource name"
438
+ ) from None
439
+ else:
440
+ raise InvalidDataError(type(data).__name__) from None
441
+ raise
442
+
443
+ # Determine data source for sample_failures()
444
+ # Priority: DataFrame > handle > data path
445
+ if isinstance(data, pl.DataFrame):
446
+ data_source = data
447
+ elif is_byoc:
448
+ # Store the handle for BYOC
449
+ data_source = engine._handle
450
+ elif isinstance(data, str):
451
+ data_source = data
452
+ else:
453
+ # list[dict] or dict - store as DataFrame
454
+ data_source = engine.df
455
+
456
+ # Determine loaded data to expose via result.data
457
+ # Priority: engine.df (loaded for Polars) > input DataFrame
458
+ if engine.df is not None:
459
+ loaded_data = engine.df
460
+ elif isinstance(data, pl.DataFrame):
461
+ loaded_data = data # User passed DataFrame directly
462
+ else:
463
+ loaded_data = None # Preplan/pushdown handled everything, no data loaded
464
+
465
+ # Wrap in ValidationResult with data source and rules for sample_failures()
466
+ return ValidationResult.from_engine_result(
467
+ raw_result,
468
+ data_source=data_source,
469
+ rule_objects=engine._rules,
470
+ sample=sample,
471
+ sample_budget=sample_budget,
472
+ sample_columns=sample_columns,
473
+ severity_weights=cfg.severity_weights,
474
+ data=loaded_data,
475
+ )
476
+
477
+
478
+ def profile(
479
+ data: Union[str, pl.DataFrame, List[Dict[str, Any]], Dict[str, Any]],
480
+ preset: str = "scan",
481
+ *,
482
+ columns: Optional[List[str]] = None,
483
+ sample: Optional[int] = None,
484
+ save: bool = True,
485
+ storage_options: Optional[Dict[str, Any]] = None,
486
+ **kwargs,
487
+ ) -> DatasetProfile:
488
+ """
489
+ Profile a dataset.
490
+
491
+ Args:
492
+ data: DataFrame (Polars), list[dict], dict, or path/URI to data file
493
+ preset: Profiling depth:
494
+ - "scout": Quick recon (metadata only)
495
+ - "scan": Systematic pass (full stats) [default]
496
+ - "interrogate": Deep investigation (everything + percentiles)
497
+ columns: Only profile these columns
498
+ sample: Sample N rows (default: all)
499
+ save: Save profile to history
500
+ storage_options: Cloud storage credentials (S3, Azure, GCS).
501
+ For S3/MinIO: aws_access_key_id, aws_secret_access_key, aws_region, endpoint_url
502
+ For Azure: account_name, account_key, sas_token, etc.
503
+ These override environment variables when provided.
504
+ **kwargs: Additional arguments passed to ScoutProfiler
505
+
506
+ Returns:
507
+ DatasetProfile with column statistics
508
+
509
+ Example:
510
+ profile = kontra.profile("data.parquet")
511
+ print(f"Rows: {profile.row_count}")
512
+ for col in profile.columns:
513
+ print(f"{col.name}: {col.dtype}")
514
+
515
+ # Quick metadata-only profile
516
+ profile = kontra.profile("big_data.parquet", preset="scout")
517
+
518
+ # Deep profile with percentiles
519
+ profile = kontra.profile("data.parquet", preset="interrogate")
520
+ """
521
+ import warnings
522
+ from kontra.scout.profiler import _DEPRECATED_PRESETS
523
+
524
+ # Warn on deprecated preset names
525
+ if preset in _DEPRECATED_PRESETS:
526
+ new_name = _DEPRECATED_PRESETS[preset]
527
+ warnings.warn(
528
+ f"Preset '{preset}' is deprecated, use '{new_name}' instead",
529
+ DeprecationWarning,
530
+ stacklevel=2,
531
+ )
532
+
533
+ # Convert list/dict to DataFrame
534
+ if isinstance(data, list):
535
+ if not data:
536
+ data = pl.DataFrame()
537
+ else:
538
+ data = pl.DataFrame(data)
539
+ elif isinstance(data, dict) and not isinstance(data, pl.DataFrame):
540
+ if not data:
541
+ data = pl.DataFrame()
542
+ else:
543
+ data = pl.DataFrame([data])
544
+
545
+ if isinstance(data, pl.DataFrame):
546
+ # Handle empty DataFrame (no columns) - DuckDB can't read parquet with no columns
547
+ if data.width == 0:
548
+ from datetime import datetime, timezone
549
+ from kontra.version import VERSION
550
+ return DatasetProfile(
551
+ source_uri="<inline DataFrame>",
552
+ source_format="dataframe",
553
+ profiled_at=datetime.now(timezone.utc).isoformat(),
554
+ engine_version=VERSION,
555
+ row_count=data.height,
556
+ column_count=0,
557
+ columns=[],
558
+ )
559
+
560
+ # For DataFrame input, write to temp file
561
+ import tempfile
562
+ import os
563
+
564
+ with tempfile.NamedTemporaryFile(suffix=".parquet", delete=False) as f:
565
+ temp_path = f.name
566
+ data.write_parquet(temp_path)
567
+
568
+ try:
569
+ profiler = ScoutProfiler(
570
+ temp_path,
571
+ preset=preset,
572
+ columns=columns,
573
+ sample_size=sample,
574
+ **kwargs,
575
+ )
576
+ return profiler.profile()
577
+ finally:
578
+ os.unlink(temp_path)
579
+ else:
580
+ # Resolve named datasources (e.g., "prod_db.users" -> actual URI)
581
+ resolved_data = data
582
+ if isinstance(data, str):
583
+ try:
584
+ resolved_data = resolve_datasource(data)
585
+ except ValueError:
586
+ # Not a named datasource - use as-is (file path or URI)
587
+ pass
588
+
589
+ profiler = ScoutProfiler(
590
+ resolved_data,
591
+ preset=preset,
592
+ columns=columns,
593
+ sample_size=sample,
594
+ storage_options=storage_options,
595
+ **kwargs,
596
+ )
597
+ return profiler.profile()
598
+
599
+
600
+ def draft(
601
+ profile: DatasetProfile,
602
+ min_confidence: float = 0.5,
603
+ ) -> Suggestions:
604
+ """
605
+ Draft validation rules from a profile.
606
+
607
+ Analyzes the profile and suggests rules based on observed patterns.
608
+ These are starting points - refine them based on domain knowledge.
609
+
610
+ Args:
611
+ profile: DatasetProfile from kontra.profile()
612
+ min_confidence: Minimum confidence score (0.0-1.0)
613
+
614
+ Returns:
615
+ Suggestions with .to_yaml(), .save(), .filter()
616
+
617
+ Example:
618
+ profile = kontra.profile(df, preset="interrogate")
619
+ suggestions = kontra.draft(profile)
620
+
621
+ # Filter high confidence
622
+ high_conf = suggestions.filter(min_confidence=0.9)
623
+
624
+ # Save to file
625
+ high_conf.save("contracts/users.yml")
626
+
627
+ # Or use directly
628
+ result = kontra.validate(df, rules=suggestions.to_dict())
629
+ """
630
+ return Suggestions.from_profile(profile, min_confidence=min_confidence)
631
+
632
+
633
+ def get_history(
634
+ contract: str,
635
+ *,
636
+ limit: int = 20,
637
+ since: Optional[str] = None,
638
+ failed_only: bool = False,
639
+ ) -> List[Dict[str, Any]]:
640
+ """
641
+ Get validation history for a contract.
642
+
643
+ Args:
644
+ contract: Path to contract YAML file
645
+ limit: Maximum number of runs to return (default: 20)
646
+ since: Only return runs after this date/time. Formats:
647
+ - "24h", "7d" - relative time
648
+ - "2026-01-15" - specific date
649
+ failed_only: Only return failed runs
650
+
651
+ Returns:
652
+ List of run summaries, newest first. Each summary contains:
653
+ - run_id: Unique identifier
654
+ - timestamp: When the run occurred (ISO format)
655
+ - passed: Overall pass/fail
656
+ - failed_count: Total failures
657
+ - total_rows: Row count (if available)
658
+ - contract_name: Name of the contract
659
+
660
+ Example:
661
+ history = kontra.get_history("contract.yml")
662
+ for run in history:
663
+ print(f"{run['timestamp']}: {'PASS' if run['passed'] else 'FAIL'}")
664
+
665
+ # Last 7 days only
666
+ recent = kontra.get_history("contract.yml", since="7d")
667
+
668
+ # Only failed runs
669
+ failures = kontra.get_history("contract.yml", failed_only=True)
670
+ """
671
+ from datetime import datetime, timedelta, timezone
672
+ from kontra.config.loader import ContractLoader
673
+ from kontra.state.fingerprint import fingerprint_contract
674
+ from kontra.state.backends import get_default_store
675
+
676
+ # Validate that contract is a YAML file, not a data file (BUG-014)
677
+ _validate_contract_path(contract, "get_history")
678
+
679
+ # Load contract to get fingerprint
680
+ contract_obj = ContractLoader.from_path(contract)
681
+ fp = fingerprint_contract(contract_obj)
682
+
683
+ # Parse since parameter
684
+ since_dt = None
685
+ if since:
686
+ now = datetime.now(timezone.utc)
687
+ since_lower = since.lower().strip()
688
+
689
+ if since_lower.endswith("h"):
690
+ hours = int(since_lower[:-1])
691
+ since_dt = now - timedelta(hours=hours)
692
+ elif since_lower.endswith("d"):
693
+ days = int(since_lower[:-1])
694
+ since_dt = now - timedelta(days=days)
695
+ else:
696
+ # Try parsing as date
697
+ try:
698
+ since_dt = datetime.fromisoformat(since)
699
+ if since_dt.tzinfo is None:
700
+ since_dt = since_dt.replace(tzinfo=timezone.utc)
701
+ except ValueError:
702
+ raise ValueError(f"Invalid since format: {since}. Use '24h', '7d', or 'YYYY-MM-DD'")
703
+
704
+ # Get history from store
705
+ store = get_default_store()
706
+ if store is None:
707
+ return []
708
+
709
+ summaries = store.get_run_summaries(
710
+ contract_fingerprint=fp,
711
+ limit=limit,
712
+ since=since_dt,
713
+ failed_only=failed_only,
714
+ )
715
+
716
+ return [s.to_dict() for s in summaries]
717
+
718
+
719
+ # =============================================================================
720
+ # Deprecated Aliases (for backward compatibility)
721
+ # =============================================================================
722
+
723
+
724
+ def scout(
725
+ data: Union[str, pl.DataFrame],
726
+ preset: str = "standard",
727
+ *,
728
+ columns: Optional[List[str]] = None,
729
+ sample: Optional[int] = None,
730
+ save: bool = True,
731
+ **kwargs,
732
+ ) -> DatasetProfile:
733
+ """
734
+ DEPRECATED: Use kontra.profile() instead.
735
+
736
+ Profile a dataset.
737
+ """
738
+ import warnings
739
+ warnings.warn(
740
+ "kontra.scout() is deprecated, use kontra.profile() instead",
741
+ DeprecationWarning,
742
+ stacklevel=2,
743
+ )
744
+ return profile(data, preset=preset, columns=columns, sample=sample, save=save, **kwargs)
745
+
746
+
747
+ def suggest_rules(
748
+ data: Union[str, DatasetProfile, pl.DataFrame],
749
+ min_confidence: float = 0.5,
750
+ ) -> Suggestions:
751
+ """
752
+ DEPRECATED: Use kontra.profile() then kontra.draft() instead.
753
+
754
+ Generate validation rule suggestions from data or a profile.
755
+
756
+ Args:
757
+ data: File path, DataFrame, or DatasetProfile
758
+ min_confidence: Minimum confidence score (0.0-1.0)
759
+
760
+ Returns:
761
+ Suggestions with .to_yaml(), .save(), .filter()
762
+ """
763
+ import warnings
764
+ warnings.warn(
765
+ "kontra.suggest_rules() is deprecated, use kontra.profile() then kontra.draft() instead",
766
+ DeprecationWarning,
767
+ stacklevel=2,
768
+ )
769
+ # Handle different input types
770
+ if isinstance(data, DatasetProfile):
771
+ prof = data
772
+ elif isinstance(data, (str, pl.DataFrame)):
773
+ prof = profile(data, preset="scan")
774
+ else:
775
+ raise TypeError(
776
+ f"suggest_rules() expects str, DataFrame, or DatasetProfile, got {type(data).__name__}"
777
+ )
778
+ return draft(prof, min_confidence=min_confidence)
779
+
780
+
781
+ def explain(
782
+ data: Union[str, pl.DataFrame],
783
+ contract: str,
784
+ **kwargs,
785
+ ) -> Dict[str, Any]:
786
+ """
787
+ Show execution plan without running validation.
788
+
789
+ Args:
790
+ data: DataFrame or path/URI to data file
791
+ contract: Path to contract YAML file
792
+
793
+ Returns:
794
+ Dict with preplan_rules, sql_rules, polars_rules, required_columns
795
+
796
+ Example:
797
+ plan = kontra.explain(df, "contract.yml")
798
+ print(f"Columns needed: {plan['required_columns']}")
799
+ for rule in plan['sql_rules']:
800
+ print(f"{rule['rule_id']}: {rule['sql']}")
801
+ """
802
+ # For now, return basic plan info
803
+ # TODO: Implement full explain with SQL preview
804
+ from kontra.config.loader import ContractLoader
805
+ from kontra.rules.factory import RuleFactory
806
+ from kontra.rules.execution_plan import RuleExecutionPlan
807
+
808
+ contract_obj = ContractLoader.from_path(contract)
809
+ rules = RuleFactory(contract_obj.rules).build_rules()
810
+ plan = RuleExecutionPlan(rules)
811
+ compiled = plan.compile()
812
+
813
+ # sql_rules may be Rule objects or dicts depending on compilation
814
+ sql_rules_info = []
815
+ for r in compiled.sql_rules:
816
+ if hasattr(r, "rule_id"):
817
+ sql_rules_info.append({"rule_id": r.rule_id, "name": r.name})
818
+ elif isinstance(r, dict):
819
+ sql_rules_info.append({"rule_id": r.get("rule_id", ""), "name": r.get("name", "")})
820
+
821
+ return {
822
+ "required_columns": list(compiled.required_cols or []),
823
+ "total_rules": len(rules),
824
+ "predicates": len(compiled.predicates),
825
+ "fallback_rules": len(compiled.fallback_rules),
826
+ "sql_rules": sql_rules_info,
827
+ }
828
+
829
+
830
+ def diff(
831
+ contract: str,
832
+ *,
833
+ since: Optional[str] = None,
834
+ before: Optional[str] = None,
835
+ after: Optional[str] = None,
836
+ ) -> Optional[Diff]:
837
+ """
838
+ Compare validation runs over time.
839
+
840
+ Args:
841
+ contract: Contract name or path
842
+ since: Compare to run from this time ago ("7d", "24h", "2024-01-15")
843
+ before: Specific run ID for before state
844
+ after: Specific run ID for after state (default: latest)
845
+
846
+ Returns:
847
+ Diff with .has_changes, .regressed, .new_failures, .to_llm()
848
+ Returns None if no history available
849
+
850
+ Example:
851
+ diff = kontra.diff("users_contract", since="7d")
852
+ if diff and diff.regressed:
853
+ print("Quality regressed!")
854
+ for failure in diff.new_failures:
855
+ print(f" NEW: {failure['rule_id']}")
856
+ """
857
+ from kontra.state.backends import get_default_store
858
+ from kontra.state.types import StateDiff
859
+ from kontra.state.fingerprint import fingerprint_contract
860
+ from kontra.config.loader import ContractLoader
861
+ from kontra.errors import StateCorruptedError
862
+
863
+ store = get_default_store()
864
+ if store is None:
865
+ return None
866
+
867
+ # Validate that contract is a YAML file, not a data file (BUG-014)
868
+ if os.path.isfile(contract):
869
+ _validate_contract_path(contract, "diff")
870
+
871
+ # Resolve contract to fingerprint
872
+ try:
873
+ # If it's a file path, load contract and compute semantic fingerprint
874
+ if os.path.isfile(contract):
875
+ contract_obj = ContractLoader.from_path(contract)
876
+ contract_fp = fingerprint_contract(contract_obj)
877
+ else:
878
+ # Assume it's a contract name - search stored states
879
+ # Look through all contracts for matching name
880
+ contract_fp = None
881
+ for fp in store.list_contracts():
882
+ history = store.get_history(fp, limit=1)
883
+ if history and history[0].contract_name == contract:
884
+ contract_fp = fp
885
+ break
886
+
887
+ if contract_fp is None:
888
+ return None
889
+
890
+ # Get history for this contract
891
+ states = store.get_history(contract_fp, limit=100)
892
+ if len(states) < 2:
893
+ return None
894
+
895
+ # states are newest first, so [0] is latest, [1] is previous
896
+ after_state = states[0]
897
+ before_state = states[1]
898
+
899
+ # Compute diff
900
+ state_diff = StateDiff.compute(before_state, after_state)
901
+ return Diff.from_state_diff(state_diff)
902
+
903
+ except (json.JSONDecodeError, KeyError, TypeError, AttributeError) as e:
904
+ # These indicate corrupted state data
905
+ raise StateCorruptedError(contract, str(e))
906
+ except FileNotFoundError:
907
+ # No history available - this is normal
908
+ return None
909
+ except Exception as e:
910
+ # For other exceptions, log and re-raise as state corruption
911
+ # since we've already handled the "no history" case
912
+ log_exception(_logger, "Failed to compute diff", e)
913
+ raise StateCorruptedError(contract, str(e))
914
+
915
+
916
+ def profile_diff(
917
+ source: str,
918
+ *,
919
+ since: Optional[str] = None,
920
+ ) -> Optional[ProfileDiff]:
921
+ """
922
+ Compare profile runs over time.
923
+
924
+ Args:
925
+ source: Data source path or name
926
+ since: Compare to profile from this time ago
927
+
928
+ Returns:
929
+ ProfileDiff with .has_changes, .schema_changes, .to_llm()
930
+ Returns None if no history available
931
+
932
+ Example:
933
+ diff = kontra.profile_diff("data.parquet", since="7d")
934
+ if diff and diff.has_schema_changes:
935
+ print("Schema changed!")
936
+ for col in diff.columns_added:
937
+ print(f" NEW: {col}")
938
+ """
939
+ # TODO: Implement profile history lookup
940
+ return None
941
+
942
+
943
+ def scout_diff(
944
+ source: str,
945
+ *,
946
+ since: Optional[str] = None,
947
+ ) -> Optional[ProfileDiff]:
948
+ """
949
+ DEPRECATED: Use kontra.profile_diff() instead.
950
+
951
+ Compare profile runs over time.
952
+ """
953
+ import warnings
954
+ warnings.warn(
955
+ "kontra.scout_diff() is deprecated, use kontra.profile_diff() instead",
956
+ DeprecationWarning,
957
+ stacklevel=2,
958
+ )
959
+ return profile_diff(source, since=since)
960
+
961
+
962
+ # =============================================================================
963
+ # History Functions
964
+ # =============================================================================
965
+
966
+
967
+ def _resolve_contract_fingerprint(contract: str, store: Any, caller: str = "state function") -> Optional[str]:
968
+ """
969
+ Resolve a contract name or path to its fingerprint.
970
+
971
+ Args:
972
+ contract: Contract name or file path
973
+ store: State store instance
974
+ caller: Name of the calling function (for error messages)
975
+
976
+ Returns:
977
+ Contract fingerprint or None if not found
978
+ """
979
+ from kontra.state.fingerprint import fingerprint_contract
980
+ from kontra.config.loader import ContractLoader
981
+
982
+ # If it's a file path, load contract and compute semantic fingerprint
983
+ if os.path.isfile(contract):
984
+ # Validate that it's not a data file (BUG-014)
985
+ _validate_contract_path(contract, caller)
986
+ contract_obj = ContractLoader.from_path(contract)
987
+ return fingerprint_contract(contract_obj)
988
+
989
+ # Assume it's a contract name - search stored states
990
+ for fp in store.list_contracts():
991
+ history = store.get_history(fp, limit=1)
992
+ if history and history[0].contract_name == contract:
993
+ return fp
994
+
995
+ return None
996
+
997
+
998
+ def list_runs(contract: str) -> List[Dict[str, Any]]:
999
+ """
1000
+ List past validation runs for a contract.
1001
+
1002
+ Args:
1003
+ contract: Contract name or path
1004
+
1005
+ Returns:
1006
+ List of run summaries with id, timestamp, passed, etc.
1007
+ """
1008
+ from kontra.state.backends import get_default_store
1009
+
1010
+ store = get_default_store()
1011
+ if store is None:
1012
+ return []
1013
+
1014
+ try:
1015
+ contract_fp = _resolve_contract_fingerprint(contract, store, "list_runs")
1016
+ if contract_fp is None:
1017
+ return []
1018
+
1019
+ states = store.get_history(contract_fp, limit=100)
1020
+ return [
1021
+ {
1022
+ "id": s.run_at.isoformat(),
1023
+ "fingerprint": s.contract_fingerprint,
1024
+ "timestamp": s.run_at,
1025
+ "passed": s.summary.passed,
1026
+ "total_rules": s.summary.total_rules,
1027
+ "failed_count": s.summary.failed_rules,
1028
+ "dataset": s.dataset_uri,
1029
+ }
1030
+ for s in states
1031
+ ]
1032
+ except Exception as e:
1033
+ log_exception(_logger, "Failed to list runs", e)
1034
+ return []
1035
+
1036
+
1037
+ def get_run(
1038
+ contract: str,
1039
+ run_id: Optional[str] = None,
1040
+ ) -> Optional[ValidationResult]:
1041
+ """
1042
+ Get a specific validation run.
1043
+
1044
+ Args:
1045
+ contract: Contract name or path
1046
+ run_id: Specific run ID (default: latest)
1047
+
1048
+ Returns:
1049
+ ValidationResult or None if not found
1050
+ """
1051
+ from kontra.state.backends import get_default_store
1052
+
1053
+ store = get_default_store()
1054
+ if store is None:
1055
+ return None
1056
+
1057
+ try:
1058
+ contract_fp = _resolve_contract_fingerprint(contract, store, "get_run")
1059
+ if contract_fp is None:
1060
+ return None
1061
+
1062
+ # Get history and find specific run or latest
1063
+ states = store.get_history(contract_fp, limit=100)
1064
+ if not states:
1065
+ return None
1066
+
1067
+ state = None
1068
+ if run_id:
1069
+ # Find specific run by timestamp ID
1070
+ for s in states:
1071
+ if s.run_at.isoformat() == run_id:
1072
+ state = s
1073
+ break
1074
+ else:
1075
+ # Get latest (first in list, newest first)
1076
+ state = states[0]
1077
+
1078
+ if state is None:
1079
+ return None
1080
+
1081
+ # Convert state to ValidationResult
1082
+ return ValidationResult(
1083
+ passed=state.summary.passed,
1084
+ dataset=state.dataset_uri,
1085
+ total_rows=state.summary.row_count or 0,
1086
+ total_rules=state.summary.total_rules,
1087
+ passed_count=state.summary.passed_rules,
1088
+ failed_count=state.summary.blocking_failures,
1089
+ warning_count=state.summary.warning_failures,
1090
+ rules=[
1091
+ RuleResult(
1092
+ rule_id=r.rule_id,
1093
+ name=r.rule_name,
1094
+ passed=r.passed,
1095
+ failed_count=r.failed_count,
1096
+ message=r.message or "",
1097
+ severity=r.severity,
1098
+ source=r.execution_source,
1099
+ column=r.column,
1100
+ )
1101
+ for r in state.rules
1102
+ ],
1103
+ )
1104
+ except Exception as e:
1105
+ log_exception(_logger, "Failed to get run", e)
1106
+ return None
1107
+
1108
+
1109
+ def has_runs(contract: str) -> bool:
1110
+ """
1111
+ Check if any validation history exists for a contract.
1112
+
1113
+ Args:
1114
+ contract: Contract name or path
1115
+
1116
+ Returns:
1117
+ True if history exists
1118
+ """
1119
+ from kontra.state.backends import get_default_store
1120
+
1121
+ store = get_default_store()
1122
+ if store is None:
1123
+ return False
1124
+
1125
+ try:
1126
+ contract_fp = _resolve_contract_fingerprint(contract, store, "has_runs")
1127
+ if contract_fp is None:
1128
+ return False
1129
+
1130
+ states = store.get_history(contract_fp, limit=1)
1131
+ return len(states) > 0
1132
+ except Exception as e:
1133
+ log_exception(_logger, "Failed to check runs", e)
1134
+ return False
1135
+
1136
+
1137
+ def list_profiles(source: str) -> List[Dict[str, Any]]:
1138
+ """
1139
+ List past profile runs for a data source.
1140
+
1141
+ Args:
1142
+ source: Data source path or name
1143
+
1144
+ Returns:
1145
+ List of profile summaries
1146
+ """
1147
+ # TODO: Implement profile history
1148
+ return []
1149
+
1150
+
1151
+ def get_profile(
1152
+ source: str,
1153
+ run_id: Optional[str] = None,
1154
+ ) -> Optional[DatasetProfile]:
1155
+ """
1156
+ Get a specific profile run.
1157
+
1158
+ Args:
1159
+ source: Data source path or name
1160
+ run_id: Specific run ID (default: latest)
1161
+
1162
+ Returns:
1163
+ DatasetProfile or None if not found
1164
+ """
1165
+ # TODO: Implement profile history lookup
1166
+ return None
1167
+
1168
+
1169
+ # =============================================================================
1170
+ # Configuration Functions
1171
+ # =============================================================================
1172
+
1173
+
1174
+ def resolve(name: str) -> str:
1175
+ """
1176
+ Resolve a datasource name to URI.
1177
+
1178
+ Args:
1179
+ name: Datasource name (e.g., "users" or "prod_db.users")
1180
+
1181
+ Returns:
1182
+ Resolved URI
1183
+
1184
+ Example:
1185
+ uri = kontra.resolve("users")
1186
+ uri = kontra.resolve("prod_db.users")
1187
+ """
1188
+ return resolve_datasource(name)
1189
+
1190
+
1191
+ def config(env: Optional[str] = None) -> KontraConfig:
1192
+ """
1193
+ Get effective configuration.
1194
+
1195
+ Args:
1196
+ env: Environment name (default: use KONTRA_ENV or defaults)
1197
+
1198
+ Returns:
1199
+ KontraConfig with preplan, pushdown, etc.
1200
+
1201
+ Example:
1202
+ cfg = kontra.config()
1203
+ cfg = kontra.config(env="production")
1204
+ print(cfg.preplan) # "auto"
1205
+ """
1206
+ return resolve_effective_config(env_name=env)
1207
+
1208
+
1209
+ # =============================================================================
1210
+ # Annotation Functions
1211
+ # =============================================================================
1212
+
1213
+
1214
+ def annotate(
1215
+ contract: str,
1216
+ *,
1217
+ run_id: Optional[str] = None,
1218
+ rule_id: Optional[str] = None,
1219
+ actor_type: str = "agent",
1220
+ actor_id: str,
1221
+ annotation_type: str,
1222
+ summary: str,
1223
+ payload: Optional[Dict[str, Any]] = None,
1224
+ ) -> int:
1225
+ """
1226
+ Save an annotation on a validation run or specific rule.
1227
+
1228
+ Annotations provide "memory without authority" - agents and humans can
1229
+ record context about runs (resolutions, root causes, acknowledgments)
1230
+ without affecting Kontra's validation behavior.
1231
+
1232
+ Invariants:
1233
+ - Append-only: annotations are never updated or deleted
1234
+ - Uninterpreted: Kontra stores annotation_type but doesn't define vocabulary
1235
+ - Never read during validation or diff
1236
+
1237
+ Args:
1238
+ contract: Contract name or path
1239
+ run_id: Run ID to annotate (default: latest run).
1240
+ For file-based backends: string like "2024-01-15T09-30-00_abc123"
1241
+ For database backends: integer ID as string
1242
+ rule_id: Optional rule ID to annotate a specific rule
1243
+ actor_type: Who is creating the annotation ("agent" | "human" | "system")
1244
+ actor_id: Identifier for the actor (e.g., "repair-agent-v2", "alice@example.com")
1245
+ annotation_type: Type of annotation (e.g., "resolution", "root_cause", "acknowledged")
1246
+ summary: Human-readable summary
1247
+ payload: Optional structured data (dict)
1248
+
1249
+ Returns:
1250
+ Annotation ID (integer)
1251
+
1252
+ Raises:
1253
+ ValueError: If contract or run not found, or rule_id not found in run
1254
+ RuntimeError: If annotation save fails
1255
+
1256
+ Common annotation_type values (suggested, not enforced):
1257
+ - "resolution": I fixed this
1258
+ - "root_cause": This failed because...
1259
+ - "false_positive": This isn't actually a problem
1260
+ - "acknowledged": I saw this, will address later
1261
+ - "suppressed": Intentionally ignoring this
1262
+ - "note": General comment
1263
+
1264
+ Example:
1265
+ # Annotate the latest run for a contract
1266
+ kontra.annotate(
1267
+ "users_contract.yml",
1268
+ actor_type="agent",
1269
+ actor_id="repair-agent-v2",
1270
+ annotation_type="resolution",
1271
+ summary="Fixed null emails by backfilling from user_profiles table",
1272
+ )
1273
+
1274
+ # Annotate a specific rule
1275
+ kontra.annotate(
1276
+ "users_contract.yml",
1277
+ rule_id="COL:email:not_null",
1278
+ actor_type="human",
1279
+ actor_id="alice@example.com",
1280
+ annotation_type="false_positive",
1281
+ summary="These are service accounts, nulls are expected",
1282
+ )
1283
+
1284
+ # Annotate with structured payload
1285
+ kontra.annotate(
1286
+ "users_contract.yml",
1287
+ actor_type="agent",
1288
+ actor_id="analysis-agent",
1289
+ annotation_type="root_cause",
1290
+ summary="Upstream data source failed validation",
1291
+ payload={
1292
+ "upstream_source": "crm_export",
1293
+ "failure_time": "2024-01-15T08:30:00Z",
1294
+ "affected_rows": 1523,
1295
+ },
1296
+ )
1297
+ """
1298
+ from kontra.state.backends import get_default_store
1299
+ from kontra.state.types import Annotation
1300
+ from kontra.state.fingerprint import fingerprint_contract
1301
+ from kontra.config.loader import ContractLoader
1302
+
1303
+ store = get_default_store()
1304
+ if store is None:
1305
+ raise RuntimeError("State store not available")
1306
+
1307
+ # Resolve contract to fingerprint
1308
+ contract_fp = _resolve_contract_fingerprint(contract, store, "annotate")
1309
+ if contract_fp is None:
1310
+ raise ValueError(f"Contract not found: {contract}")
1311
+
1312
+ # Get the run state
1313
+ if run_id is None:
1314
+ # Get latest run
1315
+ state = store.get_latest(contract_fp)
1316
+ if state is None:
1317
+ raise ValueError(f"No runs found for contract: {contract}")
1318
+ else:
1319
+ # Find specific run
1320
+ states = store.get_history(contract_fp, limit=100)
1321
+ state = None
1322
+
1323
+ # Try to match run_id as integer (database backends) or string timestamp
1324
+ for s in states:
1325
+ # Check run_at timestamp match
1326
+ if s.run_at.isoformat() == run_id:
1327
+ state = s
1328
+ break
1329
+ # Check ID match (for database backends)
1330
+ if s.id is not None and str(s.id) == run_id:
1331
+ state = s
1332
+ break
1333
+
1334
+ if state is None:
1335
+ raise ValueError(f"Run not found: {run_id}")
1336
+
1337
+ # If annotating a specific rule, find the rule_result_id
1338
+ rule_result_id = None
1339
+ if rule_id is not None:
1340
+ found = False
1341
+ for rule in state.rules:
1342
+ if rule.rule_id == rule_id:
1343
+ found = True
1344
+ rule_result_id = rule.id # May be None for file backends
1345
+ break
1346
+
1347
+ if not found:
1348
+ raise ValueError(f"Rule not found in run: {rule_id}")
1349
+
1350
+ # Create the annotation
1351
+ annotation = Annotation(
1352
+ run_id=state.id or 0,
1353
+ rule_result_id=rule_result_id,
1354
+ rule_id=rule_id, # Store semantic rule ID for cross-run queries
1355
+ actor_type=actor_type,
1356
+ actor_id=actor_id,
1357
+ annotation_type=annotation_type,
1358
+ summary=summary,
1359
+ payload=payload,
1360
+ )
1361
+
1362
+ # Save annotation - method depends on backend type
1363
+ try:
1364
+ # For database backends, save_annotation works directly
1365
+ if hasattr(store, "save_annotation") and not isinstance(store, type):
1366
+ try:
1367
+ return store.save_annotation(annotation)
1368
+ except NotImplementedError:
1369
+ pass
1370
+
1371
+ # For file-based backends, need to find the run_id string
1372
+ if hasattr(store, "save_annotation_for_run"):
1373
+ # Find the run_id string by scanning the runs directory
1374
+ run_id_str = _find_run_id_string(store, contract_fp, state)
1375
+ if run_id_str is None:
1376
+ raise RuntimeError("Could not find run file for annotation")
1377
+ return store.save_annotation_for_run(contract_fp, run_id_str, annotation)
1378
+
1379
+ raise RuntimeError("Backend does not support annotations")
1380
+
1381
+ except Exception as e:
1382
+ raise RuntimeError(f"Failed to save annotation: {e}") from e
1383
+
1384
+
1385
+ def _find_run_id_string(store: Any, contract_fp: str, state: Any) -> Optional[str]:
1386
+ """
1387
+ Find the run_id string for a state in file-based backends.
1388
+
1389
+ This is needed because file-based backends use string run IDs but
1390
+ ValidationState.id is an integer hash.
1391
+ """
1392
+ from pathlib import Path
1393
+
1394
+ # LocalStore
1395
+ if hasattr(store, "_runs_dir"):
1396
+ runs_dir = store._runs_dir(contract_fp)
1397
+ if runs_dir.exists():
1398
+ for filepath in runs_dir.glob("*.json"):
1399
+ if filepath.name.endswith(".ann.jsonl"):
1400
+ continue
1401
+ loaded = store._load_state(filepath)
1402
+ if loaded and loaded.id == state.id:
1403
+ return filepath.stem
1404
+ return None
1405
+
1406
+ # S3Store - similar pattern but via fsspec
1407
+ if hasattr(store, "_runs_prefix") and hasattr(store, "_get_fs"):
1408
+ fs = store._get_fs()
1409
+ prefix = store._runs_prefix(contract_fp)
1410
+ try:
1411
+ all_files = fs.glob(f"s3://{prefix}/*.json")
1412
+ files = [f for f in all_files if not f.endswith(".ann.jsonl")]
1413
+ for filepath in files:
1414
+ loaded = store._load_state(filepath)
1415
+ if loaded and loaded.id == state.id:
1416
+ return filepath.rsplit("/", 1)[-1].replace(".json", "")
1417
+ except Exception:
1418
+ pass
1419
+ return None
1420
+
1421
+ return None
1422
+
1423
+
1424
+ def get_run_with_annotations(
1425
+ contract: str,
1426
+ run_id: Optional[str] = None,
1427
+ ) -> Optional[ValidationResult]:
1428
+ """
1429
+ Get a validation run with its annotations loaded.
1430
+
1431
+ By default, annotations are not loaded (they're opt-in for performance).
1432
+ Use this function when you need to see annotations.
1433
+
1434
+ Args:
1435
+ contract: Contract name or path
1436
+ run_id: Run ID (default: latest run)
1437
+
1438
+ Returns:
1439
+ ValidationResult with annotations, or None if not found
1440
+
1441
+ Example:
1442
+ result = kontra.get_run_with_annotations("users_contract.yml")
1443
+ if result:
1444
+ for rule in result.rules:
1445
+ print(f"{rule.rule_id}: {rule.annotations}")
1446
+ """
1447
+ from kontra.state.backends import get_default_store
1448
+
1449
+ store = get_default_store()
1450
+ if store is None:
1451
+ return None
1452
+
1453
+ try:
1454
+ contract_fp = _resolve_contract_fingerprint(contract, store, "get_run_with_annotations")
1455
+ if contract_fp is None:
1456
+ return None
1457
+
1458
+ # Convert run_id string to integer if needed
1459
+ run_id_int = None
1460
+ if run_id is not None:
1461
+ try:
1462
+ run_id_int = int(run_id)
1463
+ except ValueError:
1464
+ # It's a timestamp or string ID - need to find the matching state
1465
+ states = store.get_history(contract_fp, limit=100)
1466
+ for s in states:
1467
+ if s.run_at.isoformat() == run_id:
1468
+ run_id_int = s.id
1469
+ break
1470
+
1471
+ state = store.get_run_with_annotations(contract_fp, run_id_int)
1472
+ if state is None:
1473
+ return None
1474
+
1475
+ # Convert to ValidationResult
1476
+ return ValidationResult(
1477
+ passed=state.summary.passed,
1478
+ dataset=state.dataset_uri,
1479
+ total_rows=state.summary.row_count or 0,
1480
+ total_rules=state.summary.total_rules,
1481
+ passed_count=state.summary.passed_rules,
1482
+ failed_count=state.summary.blocking_failures,
1483
+ warning_count=state.summary.warning_failures,
1484
+ rules=[
1485
+ RuleResult(
1486
+ rule_id=r.rule_id,
1487
+ name=r.rule_name,
1488
+ passed=r.passed,
1489
+ failed_count=r.failed_count,
1490
+ message=r.message or "",
1491
+ severity=r.severity,
1492
+ source=r.execution_source,
1493
+ column=r.column,
1494
+ annotations=[a.to_dict() for a in r.annotations] if r.annotations else None,
1495
+ )
1496
+ for r in state.rules
1497
+ ],
1498
+ annotations=[a.to_dict() for a in state.annotations] if state.annotations else None,
1499
+ )
1500
+ except Exception as e:
1501
+ log_exception(_logger, "Failed to get run with annotations", e)
1502
+ return None
1503
+
1504
+
1505
+ def get_annotations(
1506
+ contract: str,
1507
+ *,
1508
+ rule_id: Optional[str] = None,
1509
+ annotation_type: Optional[str] = None,
1510
+ limit: int = 20,
1511
+ ) -> List[Dict[str, Any]]:
1512
+ """
1513
+ Retrieve annotations across runs for a contract.
1514
+
1515
+ Primary use case: Agent sees a failure, wants to check if past runs
1516
+ have hints about this rule. This provides cross-session memory.
1517
+
1518
+ Args:
1519
+ contract: Contract name or path
1520
+ rule_id: Filter to annotations on this rule (recommended)
1521
+ annotation_type: Filter by type (e.g., "resolution", "false_positive")
1522
+ limit: Max annotations to return (default 20)
1523
+
1524
+ Returns:
1525
+ List of annotation dicts, most recent first. Each dict contains:
1526
+ - id: Annotation ID
1527
+ - run_id: Which run this was attached to
1528
+ - rule_id: Semantic rule ID (e.g., "COL:email:not_null") or None for run-level
1529
+ - actor_type: "agent" | "human" | "system"
1530
+ - actor_id: Who created it
1531
+ - annotation_type: Type (e.g., "resolution", "root_cause")
1532
+ - summary: Human-readable summary
1533
+ - payload: Optional structured data
1534
+ - created_at: When it was created
1535
+
1536
+ Example:
1537
+ # Agent sees COL:email:not_null failing, checks for past hints
1538
+ hints = kontra.get_annotations(
1539
+ "users_contract.yml",
1540
+ rule_id="COL:email:not_null",
1541
+ )
1542
+
1543
+ for hint in hints:
1544
+ print(f"[{hint['annotation_type']}] {hint['summary']}")
1545
+
1546
+ # Get only resolutions
1547
+ resolutions = kontra.get_annotations(
1548
+ "users_contract.yml",
1549
+ rule_id="COL:email:not_null",
1550
+ annotation_type="resolution",
1551
+ )
1552
+ """
1553
+ from kontra.state.backends import get_default_store
1554
+
1555
+ store = get_default_store()
1556
+ if store is None:
1557
+ return []
1558
+
1559
+ try:
1560
+ contract_fp = _resolve_contract_fingerprint(contract, store, "get_annotations")
1561
+ if contract_fp is None:
1562
+ return []
1563
+
1564
+ annotations = store.get_annotations_for_contract(
1565
+ contract_fp,
1566
+ rule_id=rule_id,
1567
+ annotation_type=annotation_type,
1568
+ limit=limit,
1569
+ )
1570
+
1571
+ return [a.to_dict() for a in annotations]
1572
+ except Exception as e:
1573
+ log_exception(_logger, "Failed to get annotations", e)
1574
+ return []
1575
+
1576
+
1577
+ # =============================================================================
1578
+ # Service/Agent Support Functions
1579
+ # =============================================================================
1580
+
1581
+ # Global config path override for service/agent use
1582
+ _config_path_override: Optional[str] = None
1583
+
1584
+
1585
+ def set_config(path: Optional[str]) -> None:
1586
+ """
1587
+ Set config file path for service/agent use.
1588
+
1589
+ By default, Kontra discovers config from cwd (.kontra/config.yml).
1590
+ For long-running services or agents, use this to set an explicit path.
1591
+
1592
+ Args:
1593
+ path: Path to config.yml (or None to reset to auto-discovery)
1594
+
1595
+ Example:
1596
+ kontra.set_config("/etc/kontra/config.yml")
1597
+ result = kontra.validate(df, rules=[...])
1598
+
1599
+ # Reset to default behavior
1600
+ kontra.set_config(None)
1601
+ """
1602
+ global _config_path_override
1603
+ _config_path_override = path
1604
+
1605
+
1606
+ def get_config_path() -> Optional[str]:
1607
+ """
1608
+ Get the current config path override.
1609
+
1610
+ Returns:
1611
+ The overridden config path, or None if using auto-discovery.
1612
+ """
1613
+ return _config_path_override
1614
+
1615
+
1616
+ def list_rules() -> List[Dict[str, Any]]:
1617
+ """
1618
+ List all available validation rules.
1619
+
1620
+ For agents and integrations that need to discover what rules exist.
1621
+
1622
+ Returns:
1623
+ List of rule info dicts with name, description, params
1624
+
1625
+ Example:
1626
+ rules = kontra.list_rules()
1627
+ for rule in rules:
1628
+ print(f"{rule['name']}: {rule['description']}")
1629
+ """
1630
+ from kontra.rules.registry import RULE_REGISTRY
1631
+
1632
+ # Rule metadata - manually maintained for quality descriptions
1633
+ # This is better than parsing docstrings which may be inconsistent
1634
+ RULE_METADATA = {
1635
+ "not_null": {
1636
+ "description": "Fails where column contains NULL values (optionally NaN)",
1637
+ "params": {"column": "required", "include_nan": "optional (default: False)"},
1638
+ "scope": "column",
1639
+ },
1640
+ "unique": {
1641
+ "description": "Fails where column contains duplicate values",
1642
+ "params": {"column": "required"},
1643
+ "scope": "column",
1644
+ },
1645
+ "allowed_values": {
1646
+ "description": "Fails where column contains values not in allowed list",
1647
+ "params": {"column": "required", "values": "required (list)"},
1648
+ "scope": "column",
1649
+ },
1650
+ "disallowed_values": {
1651
+ "description": "Fails where column contains values that ARE in the disallowed list",
1652
+ "params": {"column": "required", "values": "required (list)"},
1653
+ "scope": "column",
1654
+ },
1655
+ "range": {
1656
+ "description": "Fails where column values are outside [min, max] range",
1657
+ "params": {"column": "required", "min": "optional", "max": "optional"},
1658
+ "scope": "column",
1659
+ },
1660
+ "length": {
1661
+ "description": "Fails where string length is outside [min, max] bounds",
1662
+ "params": {"column": "required", "min": "optional", "max": "optional"},
1663
+ "scope": "column",
1664
+ },
1665
+ "regex": {
1666
+ "description": "Fails where column values don't match regex pattern",
1667
+ "params": {"column": "required", "pattern": "required"},
1668
+ "scope": "column",
1669
+ },
1670
+ "contains": {
1671
+ "description": "Fails where column values don't contain the substring",
1672
+ "params": {"column": "required", "substring": "required"},
1673
+ "scope": "column",
1674
+ },
1675
+ "starts_with": {
1676
+ "description": "Fails where column values don't start with the prefix",
1677
+ "params": {"column": "required", "prefix": "required"},
1678
+ "scope": "column",
1679
+ },
1680
+ "ends_with": {
1681
+ "description": "Fails where column values don't end with the suffix",
1682
+ "params": {"column": "required", "suffix": "required"},
1683
+ "scope": "column",
1684
+ },
1685
+ "dtype": {
1686
+ "description": "Fails if column data type doesn't match expected type",
1687
+ "params": {"column": "required", "type": "required"},
1688
+ "scope": "column",
1689
+ },
1690
+ "min_rows": {
1691
+ "description": "Fails if dataset has fewer than threshold rows",
1692
+ "params": {"threshold": "required (int)"},
1693
+ "scope": "dataset",
1694
+ },
1695
+ "max_rows": {
1696
+ "description": "Fails if dataset has more than threshold rows",
1697
+ "params": {"threshold": "required (int)"},
1698
+ "scope": "dataset",
1699
+ },
1700
+ "freshness": {
1701
+ "description": "Fails if timestamp column is older than max_age",
1702
+ "params": {"column": "required", "max_age": "required (e.g., '24h', '7d')"},
1703
+ "scope": "column",
1704
+ },
1705
+ "custom_sql_check": {
1706
+ "description": "Escape hatch: run arbitrary SQL that returns violation count",
1707
+ "params": {"sql": "required", "threshold": "optional (default: 0)"},
1708
+ "scope": "dataset",
1709
+ },
1710
+ "compare": {
1711
+ "description": "Fails where left column doesn't satisfy comparison with right column",
1712
+ "params": {
1713
+ "left": "required (column name)",
1714
+ "right": "required (column name)",
1715
+ "op": "required (>, >=, <, <=, ==, !=)",
1716
+ },
1717
+ "scope": "cross-column",
1718
+ },
1719
+ "conditional_not_null": {
1720
+ "description": "Fails where column is NULL when a condition is met",
1721
+ "params": {
1722
+ "column": "required (column to check)",
1723
+ "when": "required (e.g., \"status == 'shipped'\")",
1724
+ },
1725
+ "scope": "cross-column",
1726
+ },
1727
+ "conditional_range": {
1728
+ "description": "Fails where column is outside range when a condition is met",
1729
+ "params": {
1730
+ "column": "required (column to check)",
1731
+ "when": "required (e.g., \"customer_type == 'premium'\")",
1732
+ "min": "optional (minimum value, inclusive)",
1733
+ "max": "optional (maximum value, inclusive)",
1734
+ },
1735
+ "scope": "cross-column",
1736
+ },
1737
+ }
1738
+
1739
+ result = []
1740
+ for name in sorted(RULE_REGISTRY.keys()):
1741
+ info = {"name": name}
1742
+
1743
+ # Add metadata if available
1744
+ if name in RULE_METADATA:
1745
+ meta = RULE_METADATA[name]
1746
+ info["description"] = meta.get("description", "")
1747
+ info["params"] = meta.get("params", {})
1748
+ info["scope"] = meta.get("scope", "unknown")
1749
+ else:
1750
+ # Fallback for rules not in metadata
1751
+ info["description"] = f"Validation rule: {name}"
1752
+ info["params"] = {}
1753
+ info["scope"] = "unknown"
1754
+
1755
+ result.append(info)
1756
+
1757
+ return result
1758
+
1759
+
1760
+ def health() -> Dict[str, Any]:
1761
+ """
1762
+ Health check for service/agent use.
1763
+
1764
+ Returns version, config status, and available rules.
1765
+ Use this to verify Kontra is properly installed and configured.
1766
+
1767
+ Returns:
1768
+ Dict with version, config_found, config_path, rule_count, status
1769
+
1770
+ Example:
1771
+ health = kontra.health()
1772
+ if health["status"] == "ok":
1773
+ print(f"Kontra {health['version']} ready")
1774
+ else:
1775
+ print(f"Issue: {health['status']}")
1776
+ """
1777
+ from kontra.rules.registry import RULE_REGISTRY
1778
+ from kontra.config.settings import find_config_file
1779
+ from pathlib import Path
1780
+
1781
+ result: Dict[str, Any] = {
1782
+ "version": __version__,
1783
+ "status": "ok",
1784
+ }
1785
+
1786
+ # Check config
1787
+ if _config_path_override:
1788
+ config_path = Path(_config_path_override)
1789
+ result["config_path"] = str(config_path)
1790
+ result["config_found"] = config_path.exists()
1791
+ if not config_path.exists():
1792
+ result["status"] = "config_not_found"
1793
+ else:
1794
+ found = find_config_file()
1795
+ result["config_path"] = str(found) if found else None
1796
+ result["config_found"] = found is not None
1797
+
1798
+ # Rule count
1799
+ result["rule_count"] = len(RULE_REGISTRY)
1800
+
1801
+ # List available rules
1802
+ result["rules"] = sorted(RULE_REGISTRY.keys())
1803
+
1804
+ return result
1805
+
1806
+
1807
+ # =============================================================================
1808
+ # Exports
1809
+ # =============================================================================
1810
+
1811
+ __all__ = [
1812
+ # Version
1813
+ "__version__",
1814
+ # Core functions
1815
+ "validate",
1816
+ "profile",
1817
+ "draft",
1818
+ "explain",
1819
+ "diff",
1820
+ "profile_diff",
1821
+ # Transformation probes
1822
+ "compare",
1823
+ "profile_relationship",
1824
+ # Deprecated aliases (kept for backward compatibility)
1825
+ "scout", # Use profile() instead
1826
+ "suggest_rules", # Use draft() instead
1827
+ "scout_diff", # Use profile_diff() instead
1828
+ # History functions
1829
+ "list_runs",
1830
+ "get_run",
1831
+ "has_runs",
1832
+ "list_profiles",
1833
+ "get_profile",
1834
+ # Annotation functions
1835
+ "annotate",
1836
+ "get_annotations",
1837
+ "get_run_with_annotations",
1838
+ # Configuration functions
1839
+ "resolve",
1840
+ "config",
1841
+ "list_datasources",
1842
+ # Service/Agent support
1843
+ "set_config",
1844
+ "get_config_path",
1845
+ "list_rules",
1846
+ "health",
1847
+ # Result types
1848
+ "ValidationResult",
1849
+ "RuleResult",
1850
+ "DryRunResult",
1851
+ "Diff",
1852
+ "Suggestions",
1853
+ "SuggestedRule",
1854
+ "DatasetProfile",
1855
+ "ColumnProfile",
1856
+ "ProfileDiff",
1857
+ # Probe result types
1858
+ "CompareResult",
1859
+ "RelationshipProfile",
1860
+ # Rules helpers
1861
+ "rules",
1862
+ # Decorators
1863
+ "validate_decorator",
1864
+ # Errors
1865
+ "ValidationError",
1866
+ "StateCorruptedError",
1867
+ # Advanced usage
1868
+ "ValidationEngine",
1869
+ "ScoutProfiler",
1870
+ "KontraConfig",
1871
+ ]