datacheck-cli 2.0.0__tar.gz → 2.0.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/PKG-INFO +1 -1
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/__init__.py +1 -1
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/cli/schema.py +13 -3
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/cli/validate.py +21 -14
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/config/__init__.py +2 -0
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/config/generator.py +48 -10
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/config/loader.py +88 -17
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/config/templates/basic.yaml +5 -0
- datacheck_cli-2.0.2/datacheck/config/templates/rules-reference.yaml +285 -0
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/config/templates/sources.yaml +7 -0
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/config/validator.py +7 -4
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/connectors/azure.py +7 -7
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/connectors/bigquery.py +2 -2
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/connectors/gcs.py +7 -7
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/connectors/mssql.py +2 -1
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/connectors/mysql.py +2 -2
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/connectors/postgresql.py +2 -1
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/connectors/redshift.py +4 -4
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/connectors/s3.py +7 -7
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/connectors/snowflake.py +2 -2
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/engine.py +45 -6
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/notifications/slack.py +8 -3
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/profiling/profiler.py +170 -12
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/profiling/suggestions.py +73 -14
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/reporting/suggestion_engine.py +7 -2
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/reporting/terminal_reporter.py +28 -10
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/rules/semantic_rules.py +32 -7
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/rules/temporal_rules.py +15 -1
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/pyproject.toml +1 -1
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/LICENSE +0 -0
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/README_PYPI.md +0 -0
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/__main__.py +0 -0
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/airflow/__init__.py +0 -0
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/airflow/operators.py +0 -0
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/cli/__init__.py +0 -0
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/cli/config.py +0 -0
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/cli/profile.py +0 -0
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/config/parser.py +0 -0
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/config/sample_data.py +0 -0
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/config/schema.py +0 -0
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/config/source.py +0 -0
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/config/templates/__init__.py +0 -0
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/config/templates/ecommerce.yaml +0 -0
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/config/templates/finance.yaml +0 -0
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/config/templates/healthcare.yaml +0 -0
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/config/templates/iot.yaml +0 -0
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/config/templates/saas.yaml +0 -0
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/connectors/__init__.py +0 -0
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/connectors/base.py +0 -0
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/connectors/cloud_base.py +0 -0
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/connectors/factory.py +0 -0
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/exceptions.py +0 -0
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/loader.py +0 -0
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/logging/__init__.py +0 -0
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/logging/config.py +0 -0
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/logging/filters.py +0 -0
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/logging/formatters.py +0 -0
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/logging/utils.py +0 -0
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/notifications/__init__.py +0 -0
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/output.py +0 -0
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/parallel/__init__.py +0 -0
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/parallel/executor.py +0 -0
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/parallel/progress.py +0 -0
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/plugins/__init__.py +0 -0
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/plugins/decorators.py +0 -0
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/plugins/loader.py +0 -0
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/plugins/registry.py +0 -0
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/profiling/__init__.py +0 -0
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/profiling/formatters/__init__.py +0 -0
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/profiling/formatters/json_formatter.py +0 -0
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/profiling/formatters/markdown_formatter.py +0 -0
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/profiling/formatters/terminal_formatter.py +0 -0
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/profiling/models.py +0 -0
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/profiling/outliers.py +0 -0
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/profiling/quality.py +0 -0
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/profiling/statistics.py +0 -0
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/reporting/__init__.py +0 -0
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/reporting/csv_exporter.py +0 -0
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/reporting/distribution_analyzer.py +0 -0
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/reporting/json_reporter.py +0 -0
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/results.py +0 -0
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/rules/__init__.py +0 -0
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/rules/base.py +0 -0
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/rules/composite_rules.py +0 -0
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/rules/factory.py +0 -0
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/rules/null_rules.py +0 -0
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/rules/numeric_rules.py +0 -0
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/rules/string_rules.py +0 -0
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/sampling/__init__.py +0 -0
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/sampling/sampler.py +0 -0
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/sampling/strategies.py +0 -0
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/schema/__init__.py +0 -0
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/schema/baseline.py +0 -0
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/schema/comparator.py +0 -0
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/schema/detector.py +0 -0
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/schema/models.py +0 -0
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/security/__init__.py +0 -0
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/security/validators.py +0 -0
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/utils/__init__.py +0 -0
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/utils/connection_parser.py +0 -0
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/validation/__init__.py +0 -0
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/validation/config.py +0 -0
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/validation/rules.py +0 -0
- {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/validation/validator.py +0 -0
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
"""Schema commands for DataCheck CLI."""
|
|
2
2
|
|
|
3
|
+
import sys
|
|
3
4
|
from pathlib import Path
|
|
4
5
|
|
|
5
6
|
import typer
|
|
@@ -8,6 +9,15 @@ from rich.table import Table
|
|
|
8
9
|
import pandas as pd
|
|
9
10
|
|
|
10
11
|
from datacheck.cli import console
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def _safe_encoding() -> bool:
|
|
15
|
+
"""Check if stdout can handle Unicode symbols."""
|
|
16
|
+
encoding = getattr(sys.stdout, "encoding", None) or ""
|
|
17
|
+
return encoding.lower().replace("-", "") in ("utf8", "utf16", "utf32", "utf16le", "utf16be")
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
_TICK = "✓" if _safe_encoding() else "v"
|
|
11
21
|
from datacheck.exceptions import DataLoadError
|
|
12
22
|
|
|
13
23
|
# Schema sub-app for schema evolution commands
|
|
@@ -301,12 +311,12 @@ def schema_capture(
|
|
|
301
311
|
# Save baseline
|
|
302
312
|
manager = BaselineManager(baseline_dir=baseline_dir)
|
|
303
313
|
filepath = manager.save_baseline(schema, name=name)
|
|
304
|
-
console.print(f"[green]
|
|
314
|
+
console.print(f"[green]{_TICK}[/green] Schema saved to: {filepath}")
|
|
305
315
|
|
|
306
316
|
# Save to history if requested
|
|
307
317
|
if save_history:
|
|
308
318
|
history_path = manager.save_to_history(schema)
|
|
309
|
-
console.print(f"[green]
|
|
319
|
+
console.print(f"[green]{_TICK}[/green] Schema added to history: {history_path}")
|
|
310
320
|
|
|
311
321
|
# Display summary
|
|
312
322
|
console.print("\n[bold]Schema Summary:[/bold]")
|
|
@@ -493,7 +503,7 @@ def schema_compare(
|
|
|
493
503
|
else:
|
|
494
504
|
# Terminal output
|
|
495
505
|
if not comparison.changes:
|
|
496
|
-
console.print("[green]
|
|
506
|
+
console.print(f"[green]{_TICK} No schema changes detected[/green]")
|
|
497
507
|
else:
|
|
498
508
|
# Compatibility summary
|
|
499
509
|
compat_style = {
|
|
@@ -428,17 +428,7 @@ def validate(
|
|
|
428
428
|
)
|
|
429
429
|
|
|
430
430
|
try:
|
|
431
|
-
# Initialize
|
|
432
|
-
notifier = None
|
|
433
|
-
if slack_webhook:
|
|
434
|
-
from datacheck.notifications import SlackNotifier
|
|
435
|
-
try:
|
|
436
|
-
notifier = SlackNotifier(slack_webhook)
|
|
437
|
-
except Exception as e:
|
|
438
|
-
console.print(f"[red]Slack Configuration Error:[/red] {e}", style="red")
|
|
439
|
-
raise typer.Exit(code=2) from e
|
|
440
|
-
|
|
441
|
-
# Initialize validation engine
|
|
431
|
+
# Initialize validation engine first (to access config)
|
|
442
432
|
try:
|
|
443
433
|
if config:
|
|
444
434
|
config_path = Path(config)
|
|
@@ -448,7 +438,6 @@ def validate(
|
|
|
448
438
|
workers=workers,
|
|
449
439
|
chunk_size=chunk_size,
|
|
450
440
|
show_progress=show_progress,
|
|
451
|
-
notifier=notifier,
|
|
452
441
|
sources_file=sources_file,
|
|
453
442
|
)
|
|
454
443
|
else:
|
|
@@ -457,13 +446,31 @@ def validate(
|
|
|
457
446
|
workers=workers,
|
|
458
447
|
chunk_size=chunk_size,
|
|
459
448
|
show_progress=show_progress,
|
|
460
|
-
notifier=notifier,
|
|
461
449
|
sources_file=sources_file,
|
|
462
450
|
)
|
|
463
451
|
except ConfigurationError as e:
|
|
464
452
|
console.print(f"[red]Configuration Error:[/red] {e}", style="red")
|
|
465
453
|
raise typer.Exit(code=2) from e
|
|
466
454
|
|
|
455
|
+
# Initialize Slack notifier: CLI flag overrides config
|
|
456
|
+
notifier = None
|
|
457
|
+
effective_webhook = slack_webhook
|
|
458
|
+
mention_on_failure = False
|
|
459
|
+
if not effective_webhook and engine.config.notifications:
|
|
460
|
+
effective_webhook = engine.config.notifications.slack_webhook
|
|
461
|
+
mention_on_failure = engine.config.notifications.mention_on_failure
|
|
462
|
+
|
|
463
|
+
if effective_webhook:
|
|
464
|
+
from datacheck.notifications import SlackNotifier
|
|
465
|
+
try:
|
|
466
|
+
notifier = SlackNotifier(effective_webhook, mention_on_failure=mention_on_failure)
|
|
467
|
+
except Exception as e:
|
|
468
|
+
console.print(f"[red]Slack Configuration Error:[/red] {e}", style="red")
|
|
469
|
+
raise typer.Exit(code=2) from e
|
|
470
|
+
|
|
471
|
+
# Attach notifier to engine
|
|
472
|
+
engine.notifier = notifier
|
|
473
|
+
|
|
467
474
|
# Progress spinner — gives user feedback during load + validation
|
|
468
475
|
num_checks = len(engine.config.checks)
|
|
469
476
|
_status = (
|
|
@@ -524,7 +531,7 @@ def validate(
|
|
|
524
531
|
"loading_inline_data_source",
|
|
525
532
|
extra={"type": inline_source.type, "path": str(source_path)},
|
|
526
533
|
)
|
|
527
|
-
summary = engine.validate_file(str(source_path))
|
|
534
|
+
summary = engine.validate_file(str(source_path), **inline_source.options)
|
|
528
535
|
logger.info(
|
|
529
536
|
"data_loaded",
|
|
530
537
|
extra={"source_type": "inline", "path": str(source_path)},
|
|
@@ -6,6 +6,7 @@ Provides config validation, parsing, generation, and CLI tools.
|
|
|
6
6
|
# Original config module classes (for backward compatibility)
|
|
7
7
|
from datacheck.config.loader import (
|
|
8
8
|
ConfigLoader,
|
|
9
|
+
NotificationsConfig,
|
|
9
10
|
RuleConfig,
|
|
10
11
|
SamplingConfig,
|
|
11
12
|
ValidationConfig,
|
|
@@ -27,6 +28,7 @@ from datacheck.config.templates import (
|
|
|
27
28
|
__all__ = [
|
|
28
29
|
# Original exports (backward compatibility)
|
|
29
30
|
"ConfigLoader",
|
|
31
|
+
"NotificationsConfig",
|
|
30
32
|
"RuleConfig",
|
|
31
33
|
"SamplingConfig",
|
|
32
34
|
"ValidationConfig",
|
|
@@ -105,7 +105,7 @@ class ConfigGenerator:
|
|
|
105
105
|
col_names = cc_rule["columns"]
|
|
106
106
|
cc_check: dict[str, Any] = {
|
|
107
107
|
"name": f"cross_{'_'.join(col_names[:2])}_{cc_rule['rule']}",
|
|
108
|
-
"
|
|
108
|
+
"column": col_names[0],
|
|
109
109
|
"rules": {cc_rule["rule"]: cc_rule["params"]},
|
|
110
110
|
"description": cc_rule.get("reason", "Cross-column rule"),
|
|
111
111
|
}
|
|
@@ -130,6 +130,7 @@ class ConfigGenerator:
|
|
|
130
130
|
config["checks"] = checks
|
|
131
131
|
|
|
132
132
|
config["reporting"] = {
|
|
133
|
+
"output_path": "./output",
|
|
133
134
|
"export_failures": True,
|
|
134
135
|
}
|
|
135
136
|
|
|
@@ -237,8 +238,20 @@ class ConfigGenerator:
|
|
|
237
238
|
"""
|
|
238
239
|
from datacheck.loader import LoaderFactory
|
|
239
240
|
|
|
241
|
+
data_path = Path(data_path)
|
|
240
242
|
df = LoaderFactory.load(str(data_path), **load_kwargs)
|
|
241
|
-
name =
|
|
243
|
+
name = data_path.stem
|
|
244
|
+
|
|
245
|
+
# Determine source type from file extension
|
|
246
|
+
ext = data_path.suffix.lower().lstrip(".")
|
|
247
|
+
source_type_map = {
|
|
248
|
+
"csv": "csv",
|
|
249
|
+
"parquet": "parquet",
|
|
250
|
+
"pq": "parquet",
|
|
251
|
+
"json": "json",
|
|
252
|
+
"avro": "avro",
|
|
253
|
+
}
|
|
254
|
+
source_type = source_type_map.get(ext, "csv")
|
|
242
255
|
|
|
243
256
|
if return_profile:
|
|
244
257
|
if confidence_threshold not in self.CONFIDENCE_LEVELS:
|
|
@@ -250,11 +263,20 @@ class ConfigGenerator:
|
|
|
250
263
|
config = self.generate_from_profile(
|
|
251
264
|
profile, confidence_threshold=confidence_threshold
|
|
252
265
|
)
|
|
266
|
+
config["data_source"] = {
|
|
267
|
+
"type": source_type,
|
|
268
|
+
"path": f"./{data_path.name}",
|
|
269
|
+
}
|
|
253
270
|
return config, profile
|
|
254
271
|
|
|
255
|
-
|
|
272
|
+
config = self.generate_from_dataframe(
|
|
256
273
|
df, name=name, confidence_threshold=confidence_threshold
|
|
257
274
|
)
|
|
275
|
+
config["data_source"] = {
|
|
276
|
+
"type": source_type,
|
|
277
|
+
"path": f"./{data_path.name}",
|
|
278
|
+
}
|
|
279
|
+
return config
|
|
258
280
|
|
|
259
281
|
def save_config(
|
|
260
282
|
self,
|
|
@@ -301,7 +323,7 @@ class ConfigGenerator:
|
|
|
301
323
|
|
|
302
324
|
# Version
|
|
303
325
|
if "version" in config:
|
|
304
|
-
lines.append(f"version:
|
|
326
|
+
lines.append(f"version: '{config['version']}'")
|
|
305
327
|
lines.append("")
|
|
306
328
|
|
|
307
329
|
# Metadata
|
|
@@ -310,11 +332,27 @@ class ConfigGenerator:
|
|
|
310
332
|
lines.append("metadata:")
|
|
311
333
|
for key, value in config["metadata"].items():
|
|
312
334
|
if isinstance(value, str):
|
|
313
|
-
lines.append(f" {key}:
|
|
335
|
+
lines.append(f" {key}: '{value}'")
|
|
314
336
|
else:
|
|
315
337
|
lines.append(f" {key}: {value}")
|
|
316
338
|
lines.append("")
|
|
317
339
|
|
|
340
|
+
# Data source
|
|
341
|
+
if "data_source" in config:
|
|
342
|
+
ds = config["data_source"]
|
|
343
|
+
lines.append("# Data source configuration")
|
|
344
|
+
lines.append("data_source:")
|
|
345
|
+
lines.append(f" type: {ds['type']}")
|
|
346
|
+
lines.append(f" path: '{ds['path']}'")
|
|
347
|
+
if "options" in ds and ds["options"]:
|
|
348
|
+
lines.append(" options:")
|
|
349
|
+
for key, value in ds["options"].items():
|
|
350
|
+
if isinstance(value, str):
|
|
351
|
+
lines.append(f" {key}: '{value}'")
|
|
352
|
+
else:
|
|
353
|
+
lines.append(f" {key}: {value}")
|
|
354
|
+
lines.append("")
|
|
355
|
+
|
|
318
356
|
# Checks
|
|
319
357
|
lines.append("# Validation checks")
|
|
320
358
|
lines.append("# Each check validates a single column with one or more rules")
|
|
@@ -333,7 +371,7 @@ class ConfigGenerator:
|
|
|
333
371
|
lines.append(f" column: {check['column']}")
|
|
334
372
|
|
|
335
373
|
if "description" in check:
|
|
336
|
-
lines.append(f" description:
|
|
374
|
+
lines.append(f" description: '{check['description']}'")
|
|
337
375
|
|
|
338
376
|
lines.append(" rules:")
|
|
339
377
|
rule_reasons = check.get("_rule_reasons", {})
|
|
@@ -401,13 +439,13 @@ class ConfigGenerator:
|
|
|
401
439
|
lines.append(f"{prefix}{rule_name}:")
|
|
402
440
|
for k, v in rule_value.items():
|
|
403
441
|
if isinstance(v, str):
|
|
404
|
-
lines.append(f"{sub_prefix}{k}:
|
|
442
|
+
lines.append(f"{sub_prefix}{k}: '{v}'")
|
|
405
443
|
elif isinstance(v, list):
|
|
406
444
|
lines.append(f"{sub_prefix}{k}:")
|
|
407
445
|
item_prefix = " # " if commented else " "
|
|
408
446
|
for item in v:
|
|
409
447
|
if isinstance(item, str):
|
|
410
|
-
lines.append(f"{item_prefix}-
|
|
448
|
+
lines.append(f"{item_prefix}- '{item}'")
|
|
411
449
|
else:
|
|
412
450
|
lines.append(f"{item_prefix}- {item}")
|
|
413
451
|
else:
|
|
@@ -419,11 +457,11 @@ class ConfigGenerator:
|
|
|
419
457
|
lines.append(f"{prefix}{rule_name}:")
|
|
420
458
|
for item in rule_value:
|
|
421
459
|
if isinstance(item, str):
|
|
422
|
-
lines.append(f"{sub_prefix}-
|
|
460
|
+
lines.append(f"{sub_prefix}- '{item}'")
|
|
423
461
|
else:
|
|
424
462
|
lines.append(f"{sub_prefix}- {item}")
|
|
425
463
|
elif isinstance(rule_value, str):
|
|
426
|
-
lines.append(f"{prefix}{rule_name}:
|
|
464
|
+
lines.append(f"{prefix}{rule_name}: '{rule_value}'{comment}")
|
|
427
465
|
else:
|
|
428
466
|
lines.append(f"{prefix}{rule_name}: {rule_value}{comment}")
|
|
429
467
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
"""Configuration parsing and validation (original config module)."""
|
|
2
2
|
|
|
3
|
-
from dataclasses import dataclass
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
4
|
from pathlib import Path
|
|
5
5
|
from typing import Any
|
|
6
6
|
|
|
@@ -81,10 +81,12 @@ class DataSourceConfig:
|
|
|
81
81
|
Attributes:
|
|
82
82
|
type: Source type (csv, parquet, json, excel, delta)
|
|
83
83
|
path: Path to the data file (relative to config file or absolute)
|
|
84
|
+
options: Loader-specific options (e.g. encoding, delimiter for CSV)
|
|
84
85
|
"""
|
|
85
86
|
|
|
86
87
|
type: str
|
|
87
88
|
path: str
|
|
89
|
+
options: dict[str, Any] = field(default_factory=dict)
|
|
88
90
|
|
|
89
91
|
def __post_init__(self) -> None:
|
|
90
92
|
"""Validate data source configuration."""
|
|
@@ -211,6 +213,34 @@ class SamplingConfig:
|
|
|
211
213
|
)
|
|
212
214
|
|
|
213
215
|
|
|
216
|
+
@dataclass
|
|
217
|
+
class NotificationsConfig:
|
|
218
|
+
"""Configuration for validation notifications.
|
|
219
|
+
|
|
220
|
+
Attributes:
|
|
221
|
+
slack_webhook: Slack webhook URL for sending results
|
|
222
|
+
mention_on_failure: Whether to mention @channel on failures
|
|
223
|
+
"""
|
|
224
|
+
|
|
225
|
+
slack_webhook: str | None = None
|
|
226
|
+
mention_on_failure: bool = False
|
|
227
|
+
|
|
228
|
+
def __post_init__(self) -> None:
|
|
229
|
+
"""Validate notifications configuration."""
|
|
230
|
+
if self.slack_webhook is not None:
|
|
231
|
+
from urllib.parse import urlparse
|
|
232
|
+
|
|
233
|
+
url = self.slack_webhook.strip()
|
|
234
|
+
if not url:
|
|
235
|
+
raise ConfigurationError("Slack webhook URL cannot be empty")
|
|
236
|
+
parsed = urlparse(url)
|
|
237
|
+
if parsed.scheme != "https":
|
|
238
|
+
raise ConfigurationError(
|
|
239
|
+
"Slack webhook URL must use HTTPS scheme"
|
|
240
|
+
)
|
|
241
|
+
self.slack_webhook = url
|
|
242
|
+
|
|
243
|
+
|
|
214
244
|
@dataclass
|
|
215
245
|
class ValidationConfig:
|
|
216
246
|
"""Complete validation configuration.
|
|
@@ -224,6 +254,7 @@ class ValidationConfig:
|
|
|
224
254
|
table: Default table name for all checks
|
|
225
255
|
data_source: Inline data source configuration (for single-source validation)
|
|
226
256
|
reporting: Output and reporting configuration
|
|
257
|
+
notifications: Optional notifications configuration
|
|
227
258
|
"""
|
|
228
259
|
|
|
229
260
|
checks: list[RuleConfig]
|
|
@@ -234,6 +265,7 @@ class ValidationConfig:
|
|
|
234
265
|
table: str | None = None
|
|
235
266
|
data_source: DataSourceConfig | None = None
|
|
236
267
|
reporting: ReportingConfig | None = None
|
|
268
|
+
notifications: NotificationsConfig | None = None
|
|
237
269
|
|
|
238
270
|
def __post_init__(self) -> None:
|
|
239
271
|
"""Validate configuration after initialization."""
|
|
@@ -278,10 +310,14 @@ class ConfigLoader:
|
|
|
278
310
|
if not path.is_file():
|
|
279
311
|
raise ConfigurationError(f"Configuration path is not a file: {config_path}")
|
|
280
312
|
|
|
281
|
-
# Read and parse YAML
|
|
313
|
+
# Read and parse YAML (with env-var substitution and extends resolution)
|
|
282
314
|
try:
|
|
283
|
-
|
|
284
|
-
|
|
315
|
+
from datacheck.config.parser import ConfigParser
|
|
316
|
+
|
|
317
|
+
parser = ConfigParser()
|
|
318
|
+
data = parser.load(path, resolve_env=True, resolve_extends=True)
|
|
319
|
+
except ConfigurationError:
|
|
320
|
+
raise
|
|
285
321
|
except yaml.YAMLError as e:
|
|
286
322
|
raise ConfigurationError(f"Invalid YAML in {config_path}: {e}") from e
|
|
287
323
|
except Exception as e:
|
|
@@ -304,26 +340,33 @@ class ConfigLoader:
|
|
|
304
340
|
f"'checks' must be a list, got {type(data['checks']).__name__}"
|
|
305
341
|
)
|
|
306
342
|
|
|
307
|
-
# Parse checks
|
|
343
|
+
# Parse checks — collect all errors before raising
|
|
308
344
|
checks = []
|
|
345
|
+
check_errors: list[str] = []
|
|
309
346
|
for idx, check_data in enumerate(data["checks"]):
|
|
310
347
|
if not isinstance(check_data, dict):
|
|
311
|
-
|
|
348
|
+
check_errors.append(
|
|
312
349
|
f"Check at index {idx} must be a dictionary, "
|
|
313
350
|
f"got {type(check_data).__name__}"
|
|
314
351
|
)
|
|
352
|
+
continue
|
|
315
353
|
|
|
316
354
|
# Validate required fields
|
|
355
|
+
missing = False
|
|
317
356
|
if "name" not in check_data:
|
|
318
|
-
|
|
357
|
+
check_errors.append(f"Check at index {idx} missing 'name' field")
|
|
358
|
+
missing = True
|
|
319
359
|
if "column" not in check_data:
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
360
|
+
name = check_data.get("name", f"index {idx}")
|
|
361
|
+
check_errors.append(f"Check '{name}' missing 'column' field")
|
|
362
|
+
missing = True
|
|
323
363
|
if "rules" not in check_data:
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
364
|
+
name = check_data.get("name", f"index {idx}")
|
|
365
|
+
check_errors.append(f"Check '{name}' missing 'rules' field")
|
|
366
|
+
missing = True
|
|
367
|
+
|
|
368
|
+
if missing:
|
|
369
|
+
continue
|
|
327
370
|
|
|
328
371
|
try:
|
|
329
372
|
rule_config = RuleConfig(
|
|
@@ -338,12 +381,17 @@ class ConfigLoader:
|
|
|
338
381
|
# Only add enabled checks
|
|
339
382
|
if rule_config.enabled:
|
|
340
383
|
checks.append(rule_config)
|
|
341
|
-
except ConfigurationError:
|
|
342
|
-
|
|
384
|
+
except ConfigurationError as e:
|
|
385
|
+
check_errors.append(str(e))
|
|
343
386
|
except Exception as e:
|
|
344
|
-
|
|
387
|
+
check_errors.append(
|
|
345
388
|
f"Error parsing check '{check_data.get('name', idx)}': {e}"
|
|
346
|
-
)
|
|
389
|
+
)
|
|
390
|
+
|
|
391
|
+
if check_errors:
|
|
392
|
+
raise ConfigurationError(
|
|
393
|
+
"Configuration has errors:\n - " + "\n - ".join(check_errors)
|
|
394
|
+
)
|
|
347
395
|
|
|
348
396
|
# Parse plugins (optional)
|
|
349
397
|
plugins = data.get("plugins", [])
|
|
@@ -400,9 +448,13 @@ class ConfigLoader:
|
|
|
400
448
|
raise ConfigurationError("'data_source' missing 'path' field")
|
|
401
449
|
|
|
402
450
|
try:
|
|
451
|
+
options = ds_data.get("options", {})
|
|
452
|
+
if not isinstance(options, dict):
|
|
453
|
+
raise ConfigurationError("'data_source.options' must be a dictionary")
|
|
403
454
|
data_source = DataSourceConfig(
|
|
404
455
|
type=ds_data["type"],
|
|
405
456
|
path=ds_data["path"],
|
|
457
|
+
options=options,
|
|
406
458
|
)
|
|
407
459
|
except ConfigurationError:
|
|
408
460
|
raise
|
|
@@ -427,6 +479,23 @@ class ConfigLoader:
|
|
|
427
479
|
except Exception as e:
|
|
428
480
|
raise ConfigurationError(f"Error parsing reporting config: {e}") from e
|
|
429
481
|
|
|
482
|
+
# Parse notifications (optional)
|
|
483
|
+
notifications = None
|
|
484
|
+
if "notifications" in data:
|
|
485
|
+
notif_data = data["notifications"]
|
|
486
|
+
if not isinstance(notif_data, dict):
|
|
487
|
+
raise ConfigurationError("'notifications' must be a dictionary")
|
|
488
|
+
|
|
489
|
+
try:
|
|
490
|
+
notifications = NotificationsConfig(
|
|
491
|
+
slack_webhook=notif_data.get("slack_webhook"),
|
|
492
|
+
mention_on_failure=notif_data.get("mention_on_failure", False),
|
|
493
|
+
)
|
|
494
|
+
except ConfigurationError:
|
|
495
|
+
raise
|
|
496
|
+
except Exception as e:
|
|
497
|
+
raise ConfigurationError(f"Error parsing notifications config: {e}") from e
|
|
498
|
+
|
|
430
499
|
return ValidationConfig(
|
|
431
500
|
checks=checks,
|
|
432
501
|
plugins=plugins,
|
|
@@ -436,6 +505,7 @@ class ConfigLoader:
|
|
|
436
505
|
table=default_table,
|
|
437
506
|
data_source=data_source,
|
|
438
507
|
reporting=reporting,
|
|
508
|
+
notifications=notifications,
|
|
439
509
|
)
|
|
440
510
|
|
|
441
511
|
@staticmethod
|
|
@@ -468,6 +538,7 @@ class ConfigLoader:
|
|
|
468
538
|
|
|
469
539
|
__all__ = [
|
|
470
540
|
"DataSourceConfig",
|
|
541
|
+
"NotificationsConfig",
|
|
471
542
|
"ReportingConfig",
|
|
472
543
|
"RuleConfig",
|
|
473
544
|
"SamplingConfig",
|