duckguard 2.3.0__tar.gz → 3.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (126) hide show
  1. {duckguard-2.3.0 → duckguard-3.0.1}/PKG-INFO +120 -1
  2. {duckguard-2.3.0 → duckguard-3.0.1}/README.md +112 -0
  3. duckguard-3.0.1/examples/colab_quickstart.ipynb +447 -0
  4. duckguard-3.0.1/examples/getting_started.ipynb +3123 -0
  5. duckguard-3.0.1/examples/kaggle_notebook.ipynb +487 -0
  6. {duckguard-2.3.0 → duckguard-3.0.1}/examples/pytest_example.py +1 -0
  7. {duckguard-2.3.0 → duckguard-3.0.1}/examples/sample_data/duckguard.yaml +1 -1
  8. {duckguard-2.3.0 → duckguard-3.0.1}/examples/sample_data/orders.csv +1 -1
  9. {duckguard-2.3.0 → duckguard-3.0.1}/pyproject.toml +21 -3
  10. {duckguard-2.3.0 → duckguard-3.0.1}/src/duckguard/__init__.py +1 -1
  11. {duckguard-2.3.0 → duckguard-3.0.1}/src/duckguard/anomaly/methods.py +47 -0
  12. {duckguard-2.3.0 → duckguard-3.0.1}/src/duckguard/anomaly/ml_methods.py +146 -21
  13. duckguard-3.0.1/src/duckguard/checks/__init__.py +26 -0
  14. duckguard-3.0.1/src/duckguard/checks/conditional.py +796 -0
  15. duckguard-3.0.1/src/duckguard/checks/distributional.py +524 -0
  16. duckguard-3.0.1/src/duckguard/checks/multicolumn.py +726 -0
  17. duckguard-3.0.1/src/duckguard/checks/query_based.py +643 -0
  18. {duckguard-2.3.0 → duckguard-3.0.1}/src/duckguard/connectors/factory.py +30 -2
  19. {duckguard-2.3.0 → duckguard-3.0.1}/src/duckguard/connectors/files.py +7 -3
  20. {duckguard-2.3.0 → duckguard-3.0.1}/src/duckguard/core/column.py +372 -0
  21. {duckguard-2.3.0 → duckguard-3.0.1}/src/duckguard/core/dataset.py +330 -0
  22. {duckguard-2.3.0 → duckguard-3.0.1}/src/duckguard/core/result.py +5 -0
  23. {duckguard-2.3.0 → duckguard-3.0.1}/src/duckguard/notifications/email.py +9 -0
  24. {duckguard-2.3.0 → duckguard-3.0.1}/src/duckguard/notifications/notifiers.py +39 -1
  25. duckguard-3.0.1/src/duckguard/profiler/distribution_analyzer.py +384 -0
  26. duckguard-3.0.1/src/duckguard/profiler/outlier_detector.py +497 -0
  27. duckguard-3.0.1/src/duckguard/profiler/pattern_matcher.py +301 -0
  28. duckguard-3.0.1/src/duckguard/profiler/quality_scorer.py +445 -0
  29. duckguard-3.0.1/src/duckguard/rules/executor.py +1257 -0
  30. {duckguard-2.3.0 → duckguard-3.0.1}/src/duckguard/rules/schema.py +31 -0
  31. duckguard-3.0.1/tests/test_conditional_checks.py +595 -0
  32. duckguard-3.0.1/tests/test_distributional_checks.py +620 -0
  33. duckguard-3.0.1/tests/test_integration_duckguard_3_0.py +583 -0
  34. duckguard-3.0.1/tests/test_multicolumn_checks.py +913 -0
  35. duckguard-3.0.1/tests/test_performance_benchmarks.py +497 -0
  36. duckguard-3.0.1/tests/test_query_checks.py +874 -0
  37. duckguard-2.3.0/examples/colab_quickstart.ipynb +0 -266
  38. duckguard-2.3.0/examples/getting_started.ipynb +0 -923
  39. duckguard-2.3.0/examples/kaggle_notebook.ipynb +0 -265
  40. duckguard-2.3.0/src/duckguard/rules/executor.py +0 -615
  41. {duckguard-2.3.0 → duckguard-3.0.1}/.gitignore +0 -0
  42. {duckguard-2.3.0 → duckguard-3.0.1}/LICENSE +0 -0
  43. {duckguard-2.3.0 → duckguard-3.0.1}/examples/basic_usage.py +0 -0
  44. {duckguard-2.3.0 → duckguard-3.0.1}/examples/profiler_example.py +0 -0
  45. {duckguard-2.3.0 → duckguard-3.0.1}/src/duckguard/anomaly/__init__.py +0 -0
  46. {duckguard-2.3.0 → duckguard-3.0.1}/src/duckguard/anomaly/baselines.py +0 -0
  47. {duckguard-2.3.0 → duckguard-3.0.1}/src/duckguard/anomaly/detector.py +0 -0
  48. {duckguard-2.3.0 → duckguard-3.0.1}/src/duckguard/cli/__init__.py +0 -0
  49. {duckguard-2.3.0 → duckguard-3.0.1}/src/duckguard/cli/main.py +0 -0
  50. {duckguard-2.3.0 → duckguard-3.0.1}/src/duckguard/connectors/__init__.py +0 -0
  51. {duckguard-2.3.0 → duckguard-3.0.1}/src/duckguard/connectors/base.py +0 -0
  52. {duckguard-2.3.0 → duckguard-3.0.1}/src/duckguard/connectors/bigquery.py +0 -0
  53. {duckguard-2.3.0 → duckguard-3.0.1}/src/duckguard/connectors/databricks.py +0 -0
  54. {duckguard-2.3.0 → duckguard-3.0.1}/src/duckguard/connectors/kafka.py +0 -0
  55. {duckguard-2.3.0 → duckguard-3.0.1}/src/duckguard/connectors/mongodb.py +0 -0
  56. {duckguard-2.3.0 → duckguard-3.0.1}/src/duckguard/connectors/mysql.py +0 -0
  57. {duckguard-2.3.0 → duckguard-3.0.1}/src/duckguard/connectors/oracle.py +0 -0
  58. {duckguard-2.3.0 → duckguard-3.0.1}/src/duckguard/connectors/postgres.py +0 -0
  59. {duckguard-2.3.0 → duckguard-3.0.1}/src/duckguard/connectors/redshift.py +0 -0
  60. {duckguard-2.3.0 → duckguard-3.0.1}/src/duckguard/connectors/snowflake.py +0 -0
  61. {duckguard-2.3.0 → duckguard-3.0.1}/src/duckguard/connectors/sqlite.py +0 -0
  62. {duckguard-2.3.0 → duckguard-3.0.1}/src/duckguard/connectors/sqlserver.py +0 -0
  63. {duckguard-2.3.0 → duckguard-3.0.1}/src/duckguard/contracts/__init__.py +0 -0
  64. {duckguard-2.3.0 → duckguard-3.0.1}/src/duckguard/contracts/diff.py +0 -0
  65. {duckguard-2.3.0 → duckguard-3.0.1}/src/duckguard/contracts/generator.py +0 -0
  66. {duckguard-2.3.0 → duckguard-3.0.1}/src/duckguard/contracts/loader.py +0 -0
  67. {duckguard-2.3.0 → duckguard-3.0.1}/src/duckguard/contracts/schema.py +0 -0
  68. {duckguard-2.3.0 → duckguard-3.0.1}/src/duckguard/contracts/validator.py +0 -0
  69. {duckguard-2.3.0 → duckguard-3.0.1}/src/duckguard/core/__init__.py +0 -0
  70. {duckguard-2.3.0 → duckguard-3.0.1}/src/duckguard/core/engine.py +0 -0
  71. {duckguard-2.3.0 → duckguard-3.0.1}/src/duckguard/core/scoring.py +0 -0
  72. {duckguard-2.3.0 → duckguard-3.0.1}/src/duckguard/errors.py +0 -0
  73. {duckguard-2.3.0 → duckguard-3.0.1}/src/duckguard/freshness/__init__.py +0 -0
  74. {duckguard-2.3.0 → duckguard-3.0.1}/src/duckguard/freshness/monitor.py +0 -0
  75. {duckguard-2.3.0 → duckguard-3.0.1}/src/duckguard/history/__init__.py +0 -0
  76. {duckguard-2.3.0 → duckguard-3.0.1}/src/duckguard/history/schema.py +0 -0
  77. {duckguard-2.3.0 → duckguard-3.0.1}/src/duckguard/history/storage.py +0 -0
  78. {duckguard-2.3.0 → duckguard-3.0.1}/src/duckguard/history/trends.py +0 -0
  79. {duckguard-2.3.0 → duckguard-3.0.1}/src/duckguard/integrations/__init__.py +0 -0
  80. {duckguard-2.3.0 → duckguard-3.0.1}/src/duckguard/integrations/airflow.py +0 -0
  81. {duckguard-2.3.0 → duckguard-3.0.1}/src/duckguard/integrations/dbt.py +0 -0
  82. {duckguard-2.3.0 → duckguard-3.0.1}/src/duckguard/notifications/__init__.py +0 -0
  83. {duckguard-2.3.0 → duckguard-3.0.1}/src/duckguard/notifications/formatter.py +0 -0
  84. {duckguard-2.3.0 → duckguard-3.0.1}/src/duckguard/profiler/__init__.py +0 -0
  85. {duckguard-2.3.0 → duckguard-3.0.1}/src/duckguard/profiler/auto_profile.py +0 -0
  86. {duckguard-2.3.0 → duckguard-3.0.1}/src/duckguard/pytest_plugin/__init__.py +0 -0
  87. {duckguard-2.3.0 → duckguard-3.0.1}/src/duckguard/pytest_plugin/plugin.py +0 -0
  88. {duckguard-2.3.0 → duckguard-3.0.1}/src/duckguard/reporting/__init__.py +0 -0
  89. {duckguard-2.3.0 → duckguard-3.0.1}/src/duckguard/reporting/console.py +0 -0
  90. {duckguard-2.3.0 → duckguard-3.0.1}/src/duckguard/reporting/json_report.py +0 -0
  91. {duckguard-2.3.0 → duckguard-3.0.1}/src/duckguard/reports/__init__.py +0 -0
  92. {duckguard-2.3.0 → duckguard-3.0.1}/src/duckguard/reports/html_reporter.py +0 -0
  93. {duckguard-2.3.0 → duckguard-3.0.1}/src/duckguard/reports/pdf_reporter.py +0 -0
  94. {duckguard-2.3.0 → duckguard-3.0.1}/src/duckguard/rules/__init__.py +0 -0
  95. {duckguard-2.3.0 → duckguard-3.0.1}/src/duckguard/rules/generator.py +0 -0
  96. {duckguard-2.3.0 → duckguard-3.0.1}/src/duckguard/rules/loader.py +0 -0
  97. {duckguard-2.3.0 → duckguard-3.0.1}/src/duckguard/schema_history/__init__.py +0 -0
  98. {duckguard-2.3.0 → duckguard-3.0.1}/src/duckguard/schema_history/analyzer.py +0 -0
  99. {duckguard-2.3.0 → duckguard-3.0.1}/src/duckguard/schema_history/tracker.py +0 -0
  100. {duckguard-2.3.0 → duckguard-3.0.1}/src/duckguard/semantic/__init__.py +0 -0
  101. {duckguard-2.3.0 → duckguard-3.0.1}/src/duckguard/semantic/analyzer.py +0 -0
  102. {duckguard-2.3.0 → duckguard-3.0.1}/src/duckguard/semantic/detector.py +0 -0
  103. {duckguard-2.3.0 → duckguard-3.0.1}/src/duckguard/semantic/validators.py +0 -0
  104. {duckguard-2.3.0 → duckguard-3.0.1}/src/duckguard/validators/__init__.py +0 -0
  105. {duckguard-2.3.0 → duckguard-3.0.1}/tests/conftest.py +0 -0
  106. {duckguard-2.3.0 → duckguard-3.0.1}/tests/test_airflow_integration.py +0 -0
  107. {duckguard-2.3.0 → duckguard-3.0.1}/tests/test_cli.py +0 -0
  108. {duckguard-2.3.0 → duckguard-3.0.1}/tests/test_connectors.py +0 -0
  109. {duckguard-2.3.0 → duckguard-3.0.1}/tests/test_crossref.py +0 -0
  110. {duckguard-2.3.0 → duckguard-3.0.1}/tests/test_dataset.py +0 -0
  111. {duckguard-2.3.0 → duckguard-3.0.1}/tests/test_dbt_integration.py +0 -0
  112. {duckguard-2.3.0 → duckguard-3.0.1}/tests/test_distribution_drift.py +0 -0
  113. {duckguard-2.3.0 → duckguard-3.0.1}/tests/test_email_notifications.py +0 -0
  114. {duckguard-2.3.0 → duckguard-3.0.1}/tests/test_engine.py +0 -0
  115. {duckguard-2.3.0 → duckguard-3.0.1}/tests/test_errors.py +0 -0
  116. {duckguard-2.3.0 → duckguard-3.0.1}/tests/test_freshness.py +0 -0
  117. {duckguard-2.3.0 → duckguard-3.0.1}/tests/test_group_by.py +0 -0
  118. {duckguard-2.3.0 → duckguard-3.0.1}/tests/test_history.py +0 -0
  119. {duckguard-2.3.0 → duckguard-3.0.1}/tests/test_ml_anomaly.py +0 -0
  120. {duckguard-2.3.0 → duckguard-3.0.1}/tests/test_notifications.py +0 -0
  121. {duckguard-2.3.0 → duckguard-3.0.1}/tests/test_profiler.py +0 -0
  122. {duckguard-2.3.0 → duckguard-3.0.1}/tests/test_reconciliation.py +0 -0
  123. {duckguard-2.3.0 → duckguard-3.0.1}/tests/test_reports.py +0 -0
  124. {duckguard-2.3.0 → duckguard-3.0.1}/tests/test_row_level_errors.py +0 -0
  125. {duckguard-2.3.0 → duckguard-3.0.1}/tests/test_schema_history.py +0 -0
  126. {duckguard-2.3.0 → duckguard-3.0.1}/tests/test_validators.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: duckguard
3
- Version: 2.3.0
3
+ Version: 3.0.1
4
4
  Summary: A Python-native data quality tool with AI superpowers, built on DuckDB for speed
5
5
  Project-URL: Homepage, https://github.com/XDataHubAI/duckguard
6
6
  Project-URL: Documentation, https://github.com/XDataHubAI/duckguard
@@ -50,6 +50,7 @@ Requires-Dist: pymongo>=4.0.0; extra == 'all'
50
50
  Requires-Dist: pymysql>=1.0.0; extra == 'all'
51
51
  Requires-Dist: pyodbc>=4.0.0; extra == 'all'
52
52
  Requires-Dist: redshift-connector>=2.0.0; extra == 'all'
53
+ Requires-Dist: scipy>=1.11.0; extra == 'all'
53
54
  Requires-Dist: snowflake-connector-python>=3.0.0; extra == 'all'
54
55
  Requires-Dist: weasyprint>=60.0; extra == 'all'
55
56
  Provides-Extra: bigquery
@@ -70,9 +71,13 @@ Requires-Dist: databricks-sql-connector>=2.0.0; extra == 'databricks'
70
71
  Provides-Extra: dev
71
72
  Requires-Dist: black>=23.0.0; extra == 'dev'
72
73
  Requires-Dist: mypy>=1.0.0; extra == 'dev'
74
+ Requires-Dist: numpy>=1.24.0; extra == 'dev'
75
+ Requires-Dist: pandas>=2.0.0; extra == 'dev'
76
+ Requires-Dist: psutil>=5.9.0; extra == 'dev'
73
77
  Requires-Dist: pytest-cov>=4.0.0; extra == 'dev'
74
78
  Requires-Dist: pytest>=7.0.0; extra == 'dev'
75
79
  Requires-Dist: ruff>=0.1.0; extra == 'dev'
80
+ Requires-Dist: scipy>=1.11.0; extra == 'dev'
76
81
  Provides-Extra: kafka
77
82
  Requires-Dist: kafka-python>=2.0.0; extra == 'kafka'
78
83
  Provides-Extra: llm
@@ -95,6 +100,8 @@ Provides-Extra: snowflake
95
100
  Requires-Dist: snowflake-connector-python>=3.0.0; extra == 'snowflake'
96
101
  Provides-Extra: sqlserver
97
102
  Requires-Dist: pyodbc>=4.0.0; extra == 'sqlserver'
103
+ Provides-Extra: statistics
104
+ Requires-Dist: scipy>=1.11.0; extra == 'statistics'
98
105
  Description-Content-Type: text/markdown
99
106
 
100
107
  <div align="center">
@@ -137,6 +144,118 @@ assert orders.status.isin(['pending', 'shipped', 'delivered'])
137
144
 
138
145
  ---
139
146
 
147
+ ## What's New in 3.0
148
+
149
+ DuckGuard 3.0 introduces **23 new check types** and powerful validation capabilities that make complex data quality checks simple.
150
+
151
+ ### Conditional Expectations
152
+
153
+ Apply validation rules only when certain conditions are met:
154
+
155
+ ```python
156
+ # Validate state is not null only for US orders
157
+ orders.state.not_null_when("country = 'USA'")
158
+
159
+ # Check shipping_cost only for orders that were shipped
160
+ orders.shipping_cost.greater_than_when(0, "status = 'shipped'")
161
+
162
+ # Require tracking_number for expedited orders
163
+ orders.tracking_number.not_null_when("shipping_type = 'expedited'")
164
+ ```
165
+
166
+ ### Multi-Column Expectations
167
+
168
+ Validate relationships between columns with cross-column checks:
169
+
170
+ ```python
171
+ # Ensure end_date comes after start_date
172
+ orders.expect_column_pair_satisfy("end_date", "start_date", "end_date >= start_date")
173
+
174
+ # Validate discount doesn't exceed original price
175
+ orders.expect_column_pair_satisfy("discount", "price", "discount <= price")
176
+
177
+ # Check that total matches sum of components
178
+ orders.expect_column_pair_satisfy("total", "subtotal", "total = subtotal + tax")
179
+ ```
180
+
181
+ ### Query-Based Expectations
182
+
183
+ Run custom SQL queries for unlimited flexibility:
184
+
185
+ ```python
186
+ # Ensure no negative amounts
187
+ orders.expect_query_to_return_no_rows("SELECT * FROM table WHERE amount < 0")
188
+
189
+ # Validate business rules
190
+ orders.expect_query_to_return_no_rows(
191
+ "SELECT * FROM table WHERE status = 'shipped' AND tracking_number IS NULL"
192
+ )
193
+
194
+ # Check referential integrity with custom logic
195
+ orders.expect_query_result_equals(
196
+ "SELECT COUNT(*) FROM orders WHERE customer_id NOT IN (SELECT id FROM customers)",
197
+ 0
198
+ )
199
+ ```
200
+
201
+ ### Distributional Checks
202
+
203
+ Test if data follows expected statistical distributions:
204
+
205
+ ```python
206
+ # Test for normal distribution
207
+ data.values.expect_distribution_normal()
208
+
209
+ # Test for uniform distribution
210
+ data.values.expect_distribution_uniform()
211
+
212
+ # Chi-square goodness of fit test
213
+ data.category.expect_distribution_chi_square(expected_freq={'A': 0.5, 'B': 0.3, 'C': 0.2})
214
+
215
+ # Kolmogorov-Smirnov test for distribution matching
216
+ current.amount.expect_distribution_ks_test(baseline.amount)
217
+ ```
218
+
219
+ ### Enhanced Profiling
220
+
221
+ Four new profiling modules for deeper data insights:
222
+
223
+ ```python
224
+ from duckguard.profiling import (
225
+ DistributionProfiler, # Statistical distributions and shape analysis
226
+ CorrelationProfiler, # Column relationships and dependencies
227
+ PatternProfiler, # Detect common patterns in text data
228
+ TimeSeriesProfiler # Temporal patterns and trends
229
+ )
230
+
231
+ # Analyze distributions
232
+ dist_profile = DistributionProfiler().profile(orders)
233
+ print(f"Amount distribution: {dist_profile['amount'].distribution_type}") # 'normal', 'skewed', etc.
234
+
235
+ # Discover correlations
236
+ corr_profile = CorrelationProfiler().profile(orders)
237
+ print(f"Highly correlated pairs: {corr_profile.high_correlations}")
238
+
239
+ # Find patterns in text columns
240
+ pattern_profile = PatternProfiler().profile(orders)
241
+ print(f"Email pattern: {pattern_profile['email'].common_pattern}") # Regex pattern
242
+
243
+ # Analyze time series
244
+ ts_profile = TimeSeriesProfiler().profile(orders, date_column='order_date')
245
+ print(f"Seasonality detected: {ts_profile.has_seasonality}")
246
+ ```
247
+
248
+ ### More Validation Power
249
+
250
+ DuckGuard 3.0 adds 23 new check types including:
251
+ - **Conditional validations**: `not_null_when()`, `between_when()`, `isin_when()`
252
+ - **Multi-column checks**: `expect_column_pair_satisfy()`, `expect_column_sum_equals()`
253
+ - **Query-based**: `expect_query_to_return_no_rows()`, `expect_query_result_equals()`
254
+ - **Distribution tests**: `expect_distribution_normal()`, `expect_distribution_chi_square()`
255
+ - **Advanced string**: `expect_column_values_to_match_strftime()`, `expect_column_values_to_be_json()`
256
+
257
+ ---
258
+
140
259
  ## Why DuckGuard?
141
260
 
142
261
  ### The Problem
@@ -38,6 +38,118 @@ assert orders.status.isin(['pending', 'shipped', 'delivered'])
38
38
 
39
39
  ---
40
40
 
41
+ ## What's New in 3.0
42
+
43
+ DuckGuard 3.0 introduces **23 new check types** and powerful validation capabilities that make complex data quality checks simple.
44
+
45
+ ### Conditional Expectations
46
+
47
+ Apply validation rules only when certain conditions are met:
48
+
49
+ ```python
50
+ # Validate state is not null only for US orders
51
+ orders.state.not_null_when("country = 'USA'")
52
+
53
+ # Check shipping_cost only for orders that were shipped
54
+ orders.shipping_cost.greater_than_when(0, "status = 'shipped'")
55
+
56
+ # Require tracking_number for expedited orders
57
+ orders.tracking_number.not_null_when("shipping_type = 'expedited'")
58
+ ```
59
+
60
+ ### Multi-Column Expectations
61
+
62
+ Validate relationships between columns with cross-column checks:
63
+
64
+ ```python
65
+ # Ensure end_date comes after start_date
66
+ orders.expect_column_pair_satisfy("end_date", "start_date", "end_date >= start_date")
67
+
68
+ # Validate discount doesn't exceed original price
69
+ orders.expect_column_pair_satisfy("discount", "price", "discount <= price")
70
+
71
+ # Check that total matches sum of components
72
+ orders.expect_column_pair_satisfy("total", "subtotal", "total = subtotal + tax")
73
+ ```
74
+
75
+ ### Query-Based Expectations
76
+
77
+ Run custom SQL queries for unlimited flexibility:
78
+
79
+ ```python
80
+ # Ensure no negative amounts
81
+ orders.expect_query_to_return_no_rows("SELECT * FROM table WHERE amount < 0")
82
+
83
+ # Validate business rules
84
+ orders.expect_query_to_return_no_rows(
85
+ "SELECT * FROM table WHERE status = 'shipped' AND tracking_number IS NULL"
86
+ )
87
+
88
+ # Check referential integrity with custom logic
89
+ orders.expect_query_result_equals(
90
+ "SELECT COUNT(*) FROM orders WHERE customer_id NOT IN (SELECT id FROM customers)",
91
+ 0
92
+ )
93
+ ```
94
+
95
+ ### Distributional Checks
96
+
97
+ Test if data follows expected statistical distributions:
98
+
99
+ ```python
100
+ # Test for normal distribution
101
+ data.values.expect_distribution_normal()
102
+
103
+ # Test for uniform distribution
104
+ data.values.expect_distribution_uniform()
105
+
106
+ # Chi-square goodness of fit test
107
+ data.category.expect_distribution_chi_square(expected_freq={'A': 0.5, 'B': 0.3, 'C': 0.2})
108
+
109
+ # Kolmogorov-Smirnov test for distribution matching
110
+ current.amount.expect_distribution_ks_test(baseline.amount)
111
+ ```
112
+
113
+ ### Enhanced Profiling
114
+
115
+ Four new profiling modules for deeper data insights:
116
+
117
+ ```python
118
+ from duckguard.profiling import (
119
+ DistributionProfiler, # Statistical distributions and shape analysis
120
+ CorrelationProfiler, # Column relationships and dependencies
121
+ PatternProfiler, # Detect common patterns in text data
122
+ TimeSeriesProfiler # Temporal patterns and trends
123
+ )
124
+
125
+ # Analyze distributions
126
+ dist_profile = DistributionProfiler().profile(orders)
127
+ print(f"Amount distribution: {dist_profile['amount'].distribution_type}") # 'normal', 'skewed', etc.
128
+
129
+ # Discover correlations
130
+ corr_profile = CorrelationProfiler().profile(orders)
131
+ print(f"Highly correlated pairs: {corr_profile.high_correlations}")
132
+
133
+ # Find patterns in text columns
134
+ pattern_profile = PatternProfiler().profile(orders)
135
+ print(f"Email pattern: {pattern_profile['email'].common_pattern}") # Regex pattern
136
+
137
+ # Analyze time series
138
+ ts_profile = TimeSeriesProfiler().profile(orders, date_column='order_date')
139
+ print(f"Seasonality detected: {ts_profile.has_seasonality}")
140
+ ```
141
+
142
+ ### More Validation Power
143
+
144
+ DuckGuard 3.0 adds 23 new check types including:
145
+ - **Conditional validations**: `not_null_when()`, `between_when()`, `isin_when()`
146
+ - **Multi-column checks**: `expect_column_pair_satisfy()`, `expect_column_sum_equals()`
147
+ - **Query-based**: `expect_query_to_return_no_rows()`, `expect_query_result_equals()`
148
+ - **Distribution tests**: `expect_distribution_normal()`, `expect_distribution_chi_square()`
149
+ - **Advanced string**: `expect_column_values_to_match_strftime()`, `expect_column_values_to_be_json()`
150
+
151
+ ---
152
+
41
153
  ## Why DuckGuard?
42
154
 
43
155
  ### The Problem