datacheck-cli 2.0.0__tar.gz → 2.0.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/PKG-INFO +1 -1
  2. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/__init__.py +1 -1
  3. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/cli/schema.py +13 -3
  4. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/cli/validate.py +21 -14
  5. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/config/__init__.py +2 -0
  6. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/config/generator.py +48 -10
  7. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/config/loader.py +88 -17
  8. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/config/templates/basic.yaml +5 -0
  9. datacheck_cli-2.0.2/datacheck/config/templates/rules-reference.yaml +285 -0
  10. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/config/templates/sources.yaml +7 -0
  11. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/config/validator.py +7 -4
  12. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/connectors/azure.py +7 -7
  13. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/connectors/bigquery.py +2 -2
  14. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/connectors/gcs.py +7 -7
  15. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/connectors/mssql.py +2 -1
  16. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/connectors/mysql.py +2 -2
  17. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/connectors/postgresql.py +2 -1
  18. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/connectors/redshift.py +4 -4
  19. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/connectors/s3.py +7 -7
  20. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/connectors/snowflake.py +2 -2
  21. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/engine.py +45 -6
  22. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/notifications/slack.py +8 -3
  23. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/profiling/profiler.py +170 -12
  24. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/profiling/suggestions.py +73 -14
  25. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/reporting/suggestion_engine.py +7 -2
  26. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/reporting/terminal_reporter.py +28 -10
  27. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/rules/semantic_rules.py +32 -7
  28. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/rules/temporal_rules.py +15 -1
  29. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/pyproject.toml +1 -1
  30. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/LICENSE +0 -0
  31. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/README_PYPI.md +0 -0
  32. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/__main__.py +0 -0
  33. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/airflow/__init__.py +0 -0
  34. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/airflow/operators.py +0 -0
  35. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/cli/__init__.py +0 -0
  36. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/cli/config.py +0 -0
  37. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/cli/profile.py +0 -0
  38. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/config/parser.py +0 -0
  39. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/config/sample_data.py +0 -0
  40. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/config/schema.py +0 -0
  41. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/config/source.py +0 -0
  42. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/config/templates/__init__.py +0 -0
  43. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/config/templates/ecommerce.yaml +0 -0
  44. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/config/templates/finance.yaml +0 -0
  45. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/config/templates/healthcare.yaml +0 -0
  46. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/config/templates/iot.yaml +0 -0
  47. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/config/templates/saas.yaml +0 -0
  48. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/connectors/__init__.py +0 -0
  49. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/connectors/base.py +0 -0
  50. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/connectors/cloud_base.py +0 -0
  51. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/connectors/factory.py +0 -0
  52. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/exceptions.py +0 -0
  53. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/loader.py +0 -0
  54. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/logging/__init__.py +0 -0
  55. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/logging/config.py +0 -0
  56. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/logging/filters.py +0 -0
  57. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/logging/formatters.py +0 -0
  58. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/logging/utils.py +0 -0
  59. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/notifications/__init__.py +0 -0
  60. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/output.py +0 -0
  61. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/parallel/__init__.py +0 -0
  62. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/parallel/executor.py +0 -0
  63. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/parallel/progress.py +0 -0
  64. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/plugins/__init__.py +0 -0
  65. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/plugins/decorators.py +0 -0
  66. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/plugins/loader.py +0 -0
  67. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/plugins/registry.py +0 -0
  68. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/profiling/__init__.py +0 -0
  69. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/profiling/formatters/__init__.py +0 -0
  70. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/profiling/formatters/json_formatter.py +0 -0
  71. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/profiling/formatters/markdown_formatter.py +0 -0
  72. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/profiling/formatters/terminal_formatter.py +0 -0
  73. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/profiling/models.py +0 -0
  74. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/profiling/outliers.py +0 -0
  75. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/profiling/quality.py +0 -0
  76. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/profiling/statistics.py +0 -0
  77. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/reporting/__init__.py +0 -0
  78. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/reporting/csv_exporter.py +0 -0
  79. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/reporting/distribution_analyzer.py +0 -0
  80. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/reporting/json_reporter.py +0 -0
  81. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/results.py +0 -0
  82. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/rules/__init__.py +0 -0
  83. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/rules/base.py +0 -0
  84. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/rules/composite_rules.py +0 -0
  85. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/rules/factory.py +0 -0
  86. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/rules/null_rules.py +0 -0
  87. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/rules/numeric_rules.py +0 -0
  88. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/rules/string_rules.py +0 -0
  89. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/sampling/__init__.py +0 -0
  90. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/sampling/sampler.py +0 -0
  91. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/sampling/strategies.py +0 -0
  92. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/schema/__init__.py +0 -0
  93. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/schema/baseline.py +0 -0
  94. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/schema/comparator.py +0 -0
  95. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/schema/detector.py +0 -0
  96. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/schema/models.py +0 -0
  97. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/security/__init__.py +0 -0
  98. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/security/validators.py +0 -0
  99. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/utils/__init__.py +0 -0
  100. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/utils/connection_parser.py +0 -0
  101. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/validation/__init__.py +0 -0
  102. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/validation/config.py +0 -0
  103. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/validation/rules.py +0 -0
  104. {datacheck_cli-2.0.0 → datacheck_cli-2.0.2}/datacheck/validation/validator.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datacheck-cli
3
- Version: 2.0.0
3
+ Version: 2.0.2
4
4
  Summary: CLI-first data validation tool for data engineers. Catch bad data before it breaks pipelines.
5
5
  License: Apache-2.0
6
6
  License-File: LICENSE
@@ -37,7 +37,7 @@ from datacheck.profiling.formatters import (
37
37
  TerminalFormatter,
38
38
  )
39
39
 
40
- __version__ = "2.0.0"
40
+ __version__ = "2.0.2"
41
41
  __author__ = "Squrtech"
42
42
  __email__ = "contact@squrtech.com"
43
43
 
@@ -1,5 +1,6 @@
1
1
  """Schema commands for DataCheck CLI."""
2
2
 
3
+ import sys
3
4
  from pathlib import Path
4
5
 
5
6
  import typer
@@ -8,6 +9,15 @@ from rich.table import Table
8
9
  import pandas as pd
9
10
 
10
11
  from datacheck.cli import console
12
+
13
+
14
+ def _safe_encoding() -> bool:
15
+ """Check if stdout can handle Unicode symbols."""
16
+ encoding = getattr(sys.stdout, "encoding", None) or ""
17
+ return encoding.lower().replace("-", "") in ("utf8", "utf16", "utf32", "utf16le", "utf16be")
18
+
19
+
20
+ _TICK = "✓" if _safe_encoding() else "v"
11
21
  from datacheck.exceptions import DataLoadError
12
22
 
13
23
  # Schema sub-app for schema evolution commands
@@ -301,12 +311,12 @@ def schema_capture(
301
311
  # Save baseline
302
312
  manager = BaselineManager(baseline_dir=baseline_dir)
303
313
  filepath = manager.save_baseline(schema, name=name)
304
- console.print(f"[green][/green] Schema saved to: {filepath}")
314
+ console.print(f"[green]{_TICK}[/green] Schema saved to: {filepath}")
305
315
 
306
316
  # Save to history if requested
307
317
  if save_history:
308
318
  history_path = manager.save_to_history(schema)
309
- console.print(f"[green][/green] Schema added to history: {history_path}")
319
+ console.print(f"[green]{_TICK}[/green] Schema added to history: {history_path}")
310
320
 
311
321
  # Display summary
312
322
  console.print("\n[bold]Schema Summary:[/bold]")
@@ -493,7 +503,7 @@ def schema_compare(
493
503
  else:
494
504
  # Terminal output
495
505
  if not comparison.changes:
496
- console.print("[green] No schema changes detected[/green]")
506
+ console.print(f"[green]{_TICK} No schema changes detected[/green]")
497
507
  else:
498
508
  # Compatibility summary
499
509
  compat_style = {
@@ -428,17 +428,7 @@ def validate(
428
428
  )
429
429
 
430
430
  try:
431
- # Initialize Slack notifier if webhook provided
432
- notifier = None
433
- if slack_webhook:
434
- from datacheck.notifications import SlackNotifier
435
- try:
436
- notifier = SlackNotifier(slack_webhook)
437
- except Exception as e:
438
- console.print(f"[red]Slack Configuration Error:[/red] {e}", style="red")
439
- raise typer.Exit(code=2) from e
440
-
441
- # Initialize validation engine
431
+ # Initialize validation engine first (to access config)
442
432
  try:
443
433
  if config:
444
434
  config_path = Path(config)
@@ -448,7 +438,6 @@ def validate(
448
438
  workers=workers,
449
439
  chunk_size=chunk_size,
450
440
  show_progress=show_progress,
451
- notifier=notifier,
452
441
  sources_file=sources_file,
453
442
  )
454
443
  else:
@@ -457,13 +446,31 @@ def validate(
457
446
  workers=workers,
458
447
  chunk_size=chunk_size,
459
448
  show_progress=show_progress,
460
- notifier=notifier,
461
449
  sources_file=sources_file,
462
450
  )
463
451
  except ConfigurationError as e:
464
452
  console.print(f"[red]Configuration Error:[/red] {e}", style="red")
465
453
  raise typer.Exit(code=2) from e
466
454
 
455
+ # Initialize Slack notifier: CLI flag overrides config
456
+ notifier = None
457
+ effective_webhook = slack_webhook
458
+ mention_on_failure = False
459
+ if not effective_webhook and engine.config.notifications:
460
+ effective_webhook = engine.config.notifications.slack_webhook
461
+ mention_on_failure = engine.config.notifications.mention_on_failure
462
+
463
+ if effective_webhook:
464
+ from datacheck.notifications import SlackNotifier
465
+ try:
466
+ notifier = SlackNotifier(effective_webhook, mention_on_failure=mention_on_failure)
467
+ except Exception as e:
468
+ console.print(f"[red]Slack Configuration Error:[/red] {e}", style="red")
469
+ raise typer.Exit(code=2) from e
470
+
471
+ # Attach notifier to engine
472
+ engine.notifier = notifier
473
+
467
474
  # Progress spinner — gives user feedback during load + validation
468
475
  num_checks = len(engine.config.checks)
469
476
  _status = (
@@ -524,7 +531,7 @@ def validate(
524
531
  "loading_inline_data_source",
525
532
  extra={"type": inline_source.type, "path": str(source_path)},
526
533
  )
527
- summary = engine.validate_file(str(source_path))
534
+ summary = engine.validate_file(str(source_path), **inline_source.options)
528
535
  logger.info(
529
536
  "data_loaded",
530
537
  extra={"source_type": "inline", "path": str(source_path)},
@@ -6,6 +6,7 @@ Provides config validation, parsing, generation, and CLI tools.
6
6
  # Original config module classes (for backward compatibility)
7
7
  from datacheck.config.loader import (
8
8
  ConfigLoader,
9
+ NotificationsConfig,
9
10
  RuleConfig,
10
11
  SamplingConfig,
11
12
  ValidationConfig,
@@ -27,6 +28,7 @@ from datacheck.config.templates import (
27
28
  __all__ = [
28
29
  # Original exports (backward compatibility)
29
30
  "ConfigLoader",
31
+ "NotificationsConfig",
30
32
  "RuleConfig",
31
33
  "SamplingConfig",
32
34
  "ValidationConfig",
@@ -105,7 +105,7 @@ class ConfigGenerator:
105
105
  col_names = cc_rule["columns"]
106
106
  cc_check: dict[str, Any] = {
107
107
  "name": f"cross_{'_'.join(col_names[:2])}_{cc_rule['rule']}",
108
- "columns": col_names,
108
+ "column": col_names[0],
109
109
  "rules": {cc_rule["rule"]: cc_rule["params"]},
110
110
  "description": cc_rule.get("reason", "Cross-column rule"),
111
111
  }
@@ -130,6 +130,7 @@ class ConfigGenerator:
130
130
  config["checks"] = checks
131
131
 
132
132
  config["reporting"] = {
133
+ "output_path": "./output",
133
134
  "export_failures": True,
134
135
  }
135
136
 
@@ -237,8 +238,20 @@ class ConfigGenerator:
237
238
  """
238
239
  from datacheck.loader import LoaderFactory
239
240
 
241
+ data_path = Path(data_path)
240
242
  df = LoaderFactory.load(str(data_path), **load_kwargs)
241
- name = Path(data_path).stem
243
+ name = data_path.stem
244
+
245
+ # Determine source type from file extension
246
+ ext = data_path.suffix.lower().lstrip(".")
247
+ source_type_map = {
248
+ "csv": "csv",
249
+ "parquet": "parquet",
250
+ "pq": "parquet",
251
+ "json": "json",
252
+ "avro": "avro",
253
+ }
254
+ source_type = source_type_map.get(ext, "csv")
242
255
 
243
256
  if return_profile:
244
257
  if confidence_threshold not in self.CONFIDENCE_LEVELS:
@@ -250,11 +263,20 @@ class ConfigGenerator:
250
263
  config = self.generate_from_profile(
251
264
  profile, confidence_threshold=confidence_threshold
252
265
  )
266
+ config["data_source"] = {
267
+ "type": source_type,
268
+ "path": f"./{data_path.name}",
269
+ }
253
270
  return config, profile
254
271
 
255
- return self.generate_from_dataframe(
272
+ config = self.generate_from_dataframe(
256
273
  df, name=name, confidence_threshold=confidence_threshold
257
274
  )
275
+ config["data_source"] = {
276
+ "type": source_type,
277
+ "path": f"./{data_path.name}",
278
+ }
279
+ return config
258
280
 
259
281
  def save_config(
260
282
  self,
@@ -301,7 +323,7 @@ class ConfigGenerator:
301
323
 
302
324
  # Version
303
325
  if "version" in config:
304
- lines.append(f"version: \"{config['version']}\"")
326
+ lines.append(f"version: '{config['version']}'")
305
327
  lines.append("")
306
328
 
307
329
  # Metadata
@@ -310,11 +332,27 @@ class ConfigGenerator:
310
332
  lines.append("metadata:")
311
333
  for key, value in config["metadata"].items():
312
334
  if isinstance(value, str):
313
- lines.append(f" {key}: \"{value}\"")
335
+ lines.append(f" {key}: '{value}'")
314
336
  else:
315
337
  lines.append(f" {key}: {value}")
316
338
  lines.append("")
317
339
 
340
+ # Data source
341
+ if "data_source" in config:
342
+ ds = config["data_source"]
343
+ lines.append("# Data source configuration")
344
+ lines.append("data_source:")
345
+ lines.append(f" type: {ds['type']}")
346
+ lines.append(f" path: '{ds['path']}'")
347
+ if "options" in ds and ds["options"]:
348
+ lines.append(" options:")
349
+ for key, value in ds["options"].items():
350
+ if isinstance(value, str):
351
+ lines.append(f" {key}: '{value}'")
352
+ else:
353
+ lines.append(f" {key}: {value}")
354
+ lines.append("")
355
+
318
356
  # Checks
319
357
  lines.append("# Validation checks")
320
358
  lines.append("# Each check validates a single column with one or more rules")
@@ -333,7 +371,7 @@ class ConfigGenerator:
333
371
  lines.append(f" column: {check['column']}")
334
372
 
335
373
  if "description" in check:
336
- lines.append(f" description: \"{check['description']}\"")
374
+ lines.append(f" description: '{check['description']}'")
337
375
 
338
376
  lines.append(" rules:")
339
377
  rule_reasons = check.get("_rule_reasons", {})
@@ -401,13 +439,13 @@ class ConfigGenerator:
401
439
  lines.append(f"{prefix}{rule_name}:")
402
440
  for k, v in rule_value.items():
403
441
  if isinstance(v, str):
404
- lines.append(f"{sub_prefix}{k}: \"{v}\"")
442
+ lines.append(f"{sub_prefix}{k}: '{v}'")
405
443
  elif isinstance(v, list):
406
444
  lines.append(f"{sub_prefix}{k}:")
407
445
  item_prefix = " # " if commented else " "
408
446
  for item in v:
409
447
  if isinstance(item, str):
410
- lines.append(f"{item_prefix}- \"{item}\"")
448
+ lines.append(f"{item_prefix}- '{item}'")
411
449
  else:
412
450
  lines.append(f"{item_prefix}- {item}")
413
451
  else:
@@ -419,11 +457,11 @@ class ConfigGenerator:
419
457
  lines.append(f"{prefix}{rule_name}:")
420
458
  for item in rule_value:
421
459
  if isinstance(item, str):
422
- lines.append(f"{sub_prefix}- \"{item}\"")
460
+ lines.append(f"{sub_prefix}- '{item}'")
423
461
  else:
424
462
  lines.append(f"{sub_prefix}- {item}")
425
463
  elif isinstance(rule_value, str):
426
- lines.append(f"{prefix}{rule_name}: \"{rule_value}\"{comment}")
464
+ lines.append(f"{prefix}{rule_name}: '{rule_value}'{comment}")
427
465
  else:
428
466
  lines.append(f"{prefix}{rule_name}: {rule_value}{comment}")
429
467
 
@@ -1,6 +1,6 @@
1
1
  """Configuration parsing and validation (original config module)."""
2
2
 
3
- from dataclasses import dataclass
3
+ from dataclasses import dataclass, field
4
4
  from pathlib import Path
5
5
  from typing import Any
6
6
 
@@ -81,10 +81,12 @@ class DataSourceConfig:
81
81
  Attributes:
82
82
  type: Source type (csv, parquet, json, excel, delta)
83
83
  path: Path to the data file (relative to config file or absolute)
84
+ options: Loader-specific options (e.g. encoding, delimiter for CSV)
84
85
  """
85
86
 
86
87
  type: str
87
88
  path: str
89
+ options: dict[str, Any] = field(default_factory=dict)
88
90
 
89
91
  def __post_init__(self) -> None:
90
92
  """Validate data source configuration."""
@@ -211,6 +213,34 @@ class SamplingConfig:
211
213
  )
212
214
 
213
215
 
216
+ @dataclass
217
+ class NotificationsConfig:
218
+ """Configuration for validation notifications.
219
+
220
+ Attributes:
221
+ slack_webhook: Slack webhook URL for sending results
222
+ mention_on_failure: Whether to mention @channel on failures
223
+ """
224
+
225
+ slack_webhook: str | None = None
226
+ mention_on_failure: bool = False
227
+
228
+ def __post_init__(self) -> None:
229
+ """Validate notifications configuration."""
230
+ if self.slack_webhook is not None:
231
+ from urllib.parse import urlparse
232
+
233
+ url = self.slack_webhook.strip()
234
+ if not url:
235
+ raise ConfigurationError("Slack webhook URL cannot be empty")
236
+ parsed = urlparse(url)
237
+ if parsed.scheme != "https":
238
+ raise ConfigurationError(
239
+ "Slack webhook URL must use HTTPS scheme"
240
+ )
241
+ self.slack_webhook = url
242
+
243
+
214
244
  @dataclass
215
245
  class ValidationConfig:
216
246
  """Complete validation configuration.
@@ -224,6 +254,7 @@ class ValidationConfig:
224
254
  table: Default table name for all checks
225
255
  data_source: Inline data source configuration (for single-source validation)
226
256
  reporting: Output and reporting configuration
257
+ notifications: Optional notifications configuration
227
258
  """
228
259
 
229
260
  checks: list[RuleConfig]
@@ -234,6 +265,7 @@ class ValidationConfig:
234
265
  table: str | None = None
235
266
  data_source: DataSourceConfig | None = None
236
267
  reporting: ReportingConfig | None = None
268
+ notifications: NotificationsConfig | None = None
237
269
 
238
270
  def __post_init__(self) -> None:
239
271
  """Validate configuration after initialization."""
@@ -278,10 +310,14 @@ class ConfigLoader:
278
310
  if not path.is_file():
279
311
  raise ConfigurationError(f"Configuration path is not a file: {config_path}")
280
312
 
281
- # Read and parse YAML
313
+ # Read and parse YAML (with env-var substitution and extends resolution)
282
314
  try:
283
- with open(path, encoding="utf-8") as f:
284
- data = yaml.safe_load(f)
315
+ from datacheck.config.parser import ConfigParser
316
+
317
+ parser = ConfigParser()
318
+ data = parser.load(path, resolve_env=True, resolve_extends=True)
319
+ except ConfigurationError:
320
+ raise
285
321
  except yaml.YAMLError as e:
286
322
  raise ConfigurationError(f"Invalid YAML in {config_path}: {e}") from e
287
323
  except Exception as e:
@@ -304,26 +340,33 @@ class ConfigLoader:
304
340
  f"'checks' must be a list, got {type(data['checks']).__name__}"
305
341
  )
306
342
 
307
- # Parse checks
343
+ # Parse checks — collect all errors before raising
308
344
  checks = []
345
+ check_errors: list[str] = []
309
346
  for idx, check_data in enumerate(data["checks"]):
310
347
  if not isinstance(check_data, dict):
311
- raise ConfigurationError(
348
+ check_errors.append(
312
349
  f"Check at index {idx} must be a dictionary, "
313
350
  f"got {type(check_data).__name__}"
314
351
  )
352
+ continue
315
353
 
316
354
  # Validate required fields
355
+ missing = False
317
356
  if "name" not in check_data:
318
- raise ConfigurationError(f"Check at index {idx} missing 'name' field")
357
+ check_errors.append(f"Check at index {idx} missing 'name' field")
358
+ missing = True
319
359
  if "column" not in check_data:
320
- raise ConfigurationError(
321
- f"Check '{check_data.get('name', idx)}' missing 'column' field"
322
- )
360
+ name = check_data.get("name", f"index {idx}")
361
+ check_errors.append(f"Check '{name}' missing 'column' field")
362
+ missing = True
323
363
  if "rules" not in check_data:
324
- raise ConfigurationError(
325
- f"Check '{check_data['name']}' missing 'rules' field"
326
- )
364
+ name = check_data.get("name", f"index {idx}")
365
+ check_errors.append(f"Check '{name}' missing 'rules' field")
366
+ missing = True
367
+
368
+ if missing:
369
+ continue
327
370
 
328
371
  try:
329
372
  rule_config = RuleConfig(
@@ -338,12 +381,17 @@ class ConfigLoader:
338
381
  # Only add enabled checks
339
382
  if rule_config.enabled:
340
383
  checks.append(rule_config)
341
- except ConfigurationError:
342
- raise
384
+ except ConfigurationError as e:
385
+ check_errors.append(str(e))
343
386
  except Exception as e:
344
- raise ConfigurationError(
387
+ check_errors.append(
345
388
  f"Error parsing check '{check_data.get('name', idx)}': {e}"
346
- ) from e
389
+ )
390
+
391
+ if check_errors:
392
+ raise ConfigurationError(
393
+ "Configuration has errors:\n - " + "\n - ".join(check_errors)
394
+ )
347
395
 
348
396
  # Parse plugins (optional)
349
397
  plugins = data.get("plugins", [])
@@ -400,9 +448,13 @@ class ConfigLoader:
400
448
  raise ConfigurationError("'data_source' missing 'path' field")
401
449
 
402
450
  try:
451
+ options = ds_data.get("options", {})
452
+ if not isinstance(options, dict):
453
+ raise ConfigurationError("'data_source.options' must be a dictionary")
403
454
  data_source = DataSourceConfig(
404
455
  type=ds_data["type"],
405
456
  path=ds_data["path"],
457
+ options=options,
406
458
  )
407
459
  except ConfigurationError:
408
460
  raise
@@ -427,6 +479,23 @@ class ConfigLoader:
427
479
  except Exception as e:
428
480
  raise ConfigurationError(f"Error parsing reporting config: {e}") from e
429
481
 
482
+ # Parse notifications (optional)
483
+ notifications = None
484
+ if "notifications" in data:
485
+ notif_data = data["notifications"]
486
+ if not isinstance(notif_data, dict):
487
+ raise ConfigurationError("'notifications' must be a dictionary")
488
+
489
+ try:
490
+ notifications = NotificationsConfig(
491
+ slack_webhook=notif_data.get("slack_webhook"),
492
+ mention_on_failure=notif_data.get("mention_on_failure", False),
493
+ )
494
+ except ConfigurationError:
495
+ raise
496
+ except Exception as e:
497
+ raise ConfigurationError(f"Error parsing notifications config: {e}") from e
498
+
430
499
  return ValidationConfig(
431
500
  checks=checks,
432
501
  plugins=plugins,
@@ -436,6 +505,7 @@ class ConfigLoader:
436
505
  table=default_table,
437
506
  data_source=data_source,
438
507
  reporting=reporting,
508
+ notifications=notifications,
439
509
  )
440
510
 
441
511
  @staticmethod
@@ -468,6 +538,7 @@ class ConfigLoader:
468
538
 
469
539
  __all__ = [
470
540
  "DataSourceConfig",
541
+ "NotificationsConfig",
471
542
  "ReportingConfig",
472
543
  "RuleConfig",
473
544
  "SamplingConfig",
@@ -62,6 +62,11 @@ checks:
62
62
  - inactive
63
63
  - pending
64
64
 
65
+ # Notifications (optional)
66
+ # notifications:
67
+ # slack_webhook: "${SLACK_WEBHOOK}"
68
+ # mention_on_failure: false
69
+
65
70
  # Output configuration
66
71
  reporting:
67
72
  export_failures: true