duckguard 2.0.0__py3-none-any.whl → 2.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. duckguard/__init__.py +55 -28
  2. duckguard/anomaly/__init__.py +29 -1
  3. duckguard/anomaly/baselines.py +294 -0
  4. duckguard/anomaly/detector.py +1 -5
  5. duckguard/anomaly/methods.py +17 -5
  6. duckguard/anomaly/ml_methods.py +724 -0
  7. duckguard/cli/main.py +561 -56
  8. duckguard/connectors/__init__.py +2 -2
  9. duckguard/connectors/bigquery.py +1 -1
  10. duckguard/connectors/databricks.py +1 -1
  11. duckguard/connectors/factory.py +2 -3
  12. duckguard/connectors/files.py +1 -1
  13. duckguard/connectors/kafka.py +2 -2
  14. duckguard/connectors/mongodb.py +1 -1
  15. duckguard/connectors/mysql.py +1 -1
  16. duckguard/connectors/oracle.py +1 -1
  17. duckguard/connectors/postgres.py +1 -2
  18. duckguard/connectors/redshift.py +1 -1
  19. duckguard/connectors/snowflake.py +1 -2
  20. duckguard/connectors/sqlite.py +1 -1
  21. duckguard/connectors/sqlserver.py +10 -13
  22. duckguard/contracts/__init__.py +6 -6
  23. duckguard/contracts/diff.py +1 -1
  24. duckguard/contracts/generator.py +5 -6
  25. duckguard/contracts/loader.py +4 -4
  26. duckguard/contracts/validator.py +3 -4
  27. duckguard/core/__init__.py +3 -3
  28. duckguard/core/column.py +588 -5
  29. duckguard/core/dataset.py +708 -3
  30. duckguard/core/result.py +328 -1
  31. duckguard/core/scoring.py +1 -2
  32. duckguard/errors.py +362 -0
  33. duckguard/freshness/__init__.py +33 -0
  34. duckguard/freshness/monitor.py +429 -0
  35. duckguard/history/__init__.py +44 -0
  36. duckguard/history/schema.py +301 -0
  37. duckguard/history/storage.py +479 -0
  38. duckguard/history/trends.py +348 -0
  39. duckguard/integrations/__init__.py +31 -0
  40. duckguard/integrations/airflow.py +387 -0
  41. duckguard/integrations/dbt.py +458 -0
  42. duckguard/notifications/__init__.py +61 -0
  43. duckguard/notifications/email.py +508 -0
  44. duckguard/notifications/formatter.py +118 -0
  45. duckguard/notifications/notifiers.py +357 -0
  46. duckguard/profiler/auto_profile.py +3 -3
  47. duckguard/pytest_plugin/__init__.py +1 -1
  48. duckguard/pytest_plugin/plugin.py +1 -1
  49. duckguard/reporting/console.py +2 -2
  50. duckguard/reports/__init__.py +42 -0
  51. duckguard/reports/html_reporter.py +514 -0
  52. duckguard/reports/pdf_reporter.py +114 -0
  53. duckguard/rules/__init__.py +3 -3
  54. duckguard/rules/executor.py +3 -4
  55. duckguard/rules/generator.py +8 -5
  56. duckguard/rules/loader.py +5 -5
  57. duckguard/rules/schema.py +23 -0
  58. duckguard/schema_history/__init__.py +40 -0
  59. duckguard/schema_history/analyzer.py +414 -0
  60. duckguard/schema_history/tracker.py +288 -0
  61. duckguard/semantic/__init__.py +1 -1
  62. duckguard/semantic/analyzer.py +0 -2
  63. duckguard/semantic/detector.py +17 -1
  64. duckguard/semantic/validators.py +2 -1
  65. duckguard-2.3.0.dist-info/METADATA +953 -0
  66. duckguard-2.3.0.dist-info/RECORD +77 -0
  67. duckguard-2.0.0.dist-info/METADATA +0 -221
  68. duckguard-2.0.0.dist-info/RECORD +0 -55
  69. {duckguard-2.0.0.dist-info → duckguard-2.3.0.dist-info}/WHEEL +0 -0
  70. {duckguard-2.0.0.dist-info → duckguard-2.3.0.dist-info}/entry_points.txt +0 -0
  71. {duckguard-2.0.0.dist-info → duckguard-2.3.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,77 @@
1
+ duckguard/__init__.py,sha256=TUiy1yQKA20tv77qAFsFrk_yjWuzQD9csTKCtweQ_S4,3078
2
+ duckguard/errors.py,sha256=xhQPxCCeB3dCQspTbQf58h_DvwHP1vAb6vKI9fHYAJ0,11493
3
+ duckguard/anomaly/__init__.py,sha256=mrTyL70cOR5S7_RNc9QLADdnBimIsbAoFTbKlWiIsbw,1353
4
+ duckguard/anomaly/baselines.py,sha256=k28CjjqBa8IaZxnIgof-wjw_Xdb7NJZImC2OJJkGXQ8,8776
5
+ duckguard/anomaly/detector.py,sha256=voA7WS2x2p5h5cnwH3C_2ly7HdYpXLwC4jDiPL2Xleo,12443
6
+ duckguard/anomaly/methods.py,sha256=CtV2G-kowXGgz0HYvNoi2Ge7eyHUg2GwGa3oZvunS38,13475
7
+ duckguard/anomaly/ml_methods.py,sha256=UyEr8q4K_wNq7pWgTsV23IoBI13aqm0hHIwIFjIxeas,23449
8
+ duckguard/cli/__init__.py,sha256=s5MNXEu_MbRqyV-jeUgCIDlHRQA97a9knM_anJooTl0,87
9
+ duckguard/cli/main.py,sha256=sMq5RfM0-OeXTG_jgTRGyvfw-c4iwojNGUEW8AYQ3fA,46001
10
+ duckguard/connectors/__init__.py,sha256=BMbVyyBPI9_GAFcwkQivf2xMvHwVOHvBMuT5qZ558jc,2232
11
+ duckguard/connectors/base.py,sha256=XzGY6_pUwDJIVNhTfgNMkcGNOBs3xxjbnQ_NeMoz4eM,1864
12
+ duckguard/connectors/bigquery.py,sha256=b-EHAF90dbyCh387qNirkRGY0sEsPAmvy-hNCbY7ilQ,5327
13
+ duckguard/connectors/databricks.py,sha256=vsm5wWGb6V_J1yMdXyREjy9ElR84S0aLk0NgOAbd1J4,6550
14
+ duckguard/connectors/factory.py,sha256=brO5ypD9nriHqWNN4x9KItq3mTtjcy5nM6eu5luS9RU,9156
15
+ duckguard/connectors/files.py,sha256=QU5lFWf9NUv0lX_txx_CLfTzhcF7tAZtCGZOCrzX-tk,3841
16
+ duckguard/connectors/kafka.py,sha256=Oo_axyJck6gHrwLFpnGcUVKEfKqxqz-AEdlVkNBYVVE,10709
17
+ duckguard/connectors/mongodb.py,sha256=3RI3-hiTHXQIk5cg9ZM5q2UDn5HU2wDnq-f8xj-Yc2A,7271
18
+ duckguard/connectors/mysql.py,sha256=EW-VrZiNgOGFVnVccTR-jVrn3S6KHK6GA-Yj3kmmU5w,3875
19
+ duckguard/connectors/oracle.py,sha256=ar_xM4t-X1kCYWAi_mmg1wzUqvMcS8dudWqcSGp5o0M,6178
20
+ duckguard/connectors/postgres.py,sha256=CXRKUSwtsOTbsk1ASCzzjZYDr9V3_MAdd7f6CnTi-F0,3061
21
+ duckguard/connectors/redshift.py,sha256=-G9IgBf2Reb7RWBj5dmILWnuu3oc8pNhfq1XgwTQHGs,4951
22
+ duckguard/connectors/snowflake.py,sha256=TwCaUY-7zVrU96POEqDbJRrp0eiKSjjGBr2RI_4SESk,7108
23
+ duckguard/connectors/sqlite.py,sha256=igfv-PNUqcNhexycQ7WcwskGNjzaqfA1oWkqUU3kSXY,3346
24
+ duckguard/connectors/sqlserver.py,sha256=o2TjW7bbANXImvElCjRIIbD0BkeFB0p_oWHjt-LXObY,7579
25
+ duckguard/contracts/__init__.py,sha256=LFc9iPhpbafcQJILJtf90z979aB5HtSCW8veeOlHJy4,1313
26
+ duckguard/contracts/diff.py,sha256=Dj8yjRlEexCEVF5tSkc62LrXL8-W0DwPyyDXc7ON90M,15085
27
+ duckguard/contracts/generator.py,sha256=Ou1EOAgOxIgEw_APbcyQEPqlvMDTovl5_o4T1VzjUT8,10969
28
+ duckguard/contracts/loader.py,sha256=iTmg9xjSAlYsBpQeTAJ1-ABQnuXs-qpMh3DH4rfN6qs,10878
29
+ duckguard/contracts/schema.py,sha256=pLoR4QIXs68Q93DOZqqTmPnPecCeZ4iy9lDXZMNuVmI,7032
30
+ duckguard/contracts/validator.py,sha256=X972Ns-8UWBL8D4nCCQlNOHJas0Mc4ES8URbKqd0WLw,16432
31
+ duckguard/core/__init__.py,sha256=pHndzrdehB0GFtlSQ46uvw8XgUQj55dVZQP1ZK-aDso,356
32
+ duckguard/core/column.py,sha256=ux3B2HyrgXLkz0tCY4EmR7JVRoedzCfURhzCfuO-tU8,35346
33
+ duckguard/core/dataset.py,sha256=SBwrXLtZyf-bkT1o42OU6tURWP7TOL4uBZ0BBMR3wD8,33287
34
+ duckguard/core/engine.py,sha256=ld_NHsWyBkVynmWyvbyQcHdXHhpIoSaRDyqAAtVx8J0,7897
35
+ duckguard/core/result.py,sha256=BwmP0gNPAKVYHdyque1rDkbAhEvwFaA3PwhxaI7cY14,15178
36
+ duckguard/core/scoring.py,sha256=42CVgxmmfo3Yb3m3Xl8qWnDgR7ndSZd8vXRwy9XSThI,16826
37
+ duckguard/freshness/__init__.py,sha256=8XR7JxH9tz61En5DTMSDHrjhroPzvwCTVzBbBiRFexs,854
38
+ duckguard/freshness/monitor.py,sha256=O_b4fh6unyZ2DXioX6O7KP9VpenGdLTpb9OdNb79dX8,14695
39
+ duckguard/history/__init__.py,sha256=_O4OBEeku1X0-Jo87qA0KKwZbh-s3LwfypYTHp_mST8,970
40
+ duckguard/history/schema.py,sha256=E3pP6u88OESmYQM08-XW8UQOmeUIFrM_JIpkQCM2f_g,9900
41
+ duckguard/history/storage.py,sha256=0r2x2VNBUWjafZCFohy63NX4f4v4-SkyJaSCZRJUCj0,15413
42
+ duckguard/history/trends.py,sha256=t6P3asMAPahDMK9E6sVf3nT3zFEDDZhk7n2Ice2I7BM,10702
43
+ duckguard/integrations/__init__.py,sha256=SuqOzfdaejlMCti372FHD_R6bVaPaUmfEPG9IM6UOW0,831
44
+ duckguard/integrations/airflow.py,sha256=pxC14Kgwou_2xWPvTfx8YWO-xg_vgFeAlGDhgGfXRyM,13195
45
+ duckguard/integrations/dbt.py,sha256=Dw1meY-UhylDFhUZ2s47FnJGMp_gszHvadGn_hqYkSM,14101
46
+ duckguard/notifications/__init__.py,sha256=qEfUvt7d_WXlbsGlLB-FaNF4ksLtAyO8JXi1JCdo89w,1541
47
+ duckguard/notifications/email.py,sha256=jwgxec8r6NUNqrxz3v5B4A3UL0-ZdxnJZhXQXWgMWH4,17168
48
+ duckguard/notifications/formatter.py,sha256=Z2vGMpLdqPWYaYTaVtVjYnIbNU8Haer-7efohZ5IZxM,3991
49
+ duckguard/notifications/notifiers.py,sha256=e-UBvoskFSzIwlCFTxIFdkI-z54zZeEeSQkvOvgV6JI,11703
50
+ duckguard/profiler/__init__.py,sha256=a16GYeeFDZzwCemTsTuzO3Ih4M7_hOPb9hS8yt-nHzU,169
51
+ duckguard/profiler/auto_profile.py,sha256=KbAkty-HrpNbTribi2uD17Fcsb-UiV5eG4zZsbyBOL4,12267
52
+ duckguard/pytest_plugin/__init__.py,sha256=GuhFPvINnpoVSxhvCX9b5dymzdhsn2KZhXU6okk4xQU,168
53
+ duckguard/pytest_plugin/plugin.py,sha256=SA1dvkZ0MYyNyRXzuqelreEo2zK0XTsNZeYwUYd3Gy0,4949
54
+ duckguard/reporting/__init__.py,sha256=R7Fm--yEiuOb_II-Qo7MGXYyCNhsGnVsMVuAzZT6rIM,199
55
+ duckguard/reporting/console.py,sha256=GvXFqKLLkU-LQb1FNkS7HI-NQYbHpQCSBYI4FSUDOMw,3026
56
+ duckguard/reporting/json_report.py,sha256=dqUry9akuPRwNz4ysUM6ZP6ZCXl77nA_Z7mXG-1VGKA,3509
57
+ duckguard/reports/__init__.py,sha256=JGGZ2IJFVOutcQaZ8kpjDDKJru9e5EsVi91au2VFKsk,1025
58
+ duckguard/reports/html_reporter.py,sha256=_8jzHg6WzC4xqXgqzHzYQTjE4vXbQGP-p1FUKmYAtuU,20670
59
+ duckguard/reports/pdf_reporter.py,sha256=u6zuV24y9YCBlpDwDObHTSrVE9W9beTIqj-UQyvA8jQ,3094
60
+ duckguard/rules/__init__.py,sha256=XYVasAnu8ErJ-Cvsqeh1mX5zxqd1wk-sM4OzuBJn72Y,813
61
+ duckguard/rules/executor.py,sha256=0MKi4mA0Ig873J7JDKpE_O2OJsBFSx6w2jgcGQWl_8w,20720
62
+ duckguard/rules/generator.py,sha256=h8NWcRsqBqj4xEddavFRlnWZfCi3eoXsqWyIJmxPGeo,11184
63
+ duckguard/rules/loader.py,sha256=gzFihSX6w3lpldEXVUn0Ysh9MAOEXh3ABNqJrVlGEng,14622
64
+ duckguard/rules/schema.py,sha256=_YHgZSau89SuECHWdwHtUmO65HZrNFZkaIz7l3cqhEI,10755
65
+ duckguard/schema_history/__init__.py,sha256=q7Kofw5PxbJlXTLzXNZyhvpsrYDKJl1OScWVwEGYIkY,949
66
+ duckguard/schema_history/analyzer.py,sha256=NRDQCjhPstmp6zD7Co0D4D6jVSJ9SB-iAmv4GUQdvJc,14396
67
+ duckguard/schema_history/tracker.py,sha256=ZuMYX8knruiodXd22KoGaT7MgQBElDjekNz73aSwkqI,8468
68
+ duckguard/semantic/__init__.py,sha256=FbX60d-Qf7qaVEhnSTy9NzKiXZt66A1G-NZdhvi3TIY,847
69
+ duckguard/semantic/analyzer.py,sha256=2be1oofe-owBhTg-Dy88-wihaoTQ7DPxf1NuA1sgfR0,8297
70
+ duckguard/semantic/detector.py,sha256=MPdb2Rv9VGQBko7nmPk4-Kjga_XVjPZdHCr29gdET0M,15665
71
+ duckguard/semantic/validators.py,sha256=8Zu3vwPwh79U09zGf4_PpcwV85_hbNCwRHcxTIQ7G_I,10945
72
+ duckguard/validators/__init__.py,sha256=g717IM5xlVLCTg1nLRRccLAFHCsbRO-IgjzG4H6K32A,268
73
+ duckguard-2.3.0.dist-info/METADATA,sha256=2CdxNqC8jwvv_cZAk1cMyWkJJiObUOKckPcB0D-74bw,27779
74
+ duckguard-2.3.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
75
+ duckguard-2.3.0.dist-info/entry_points.txt,sha256=teP6JdXUvY20E9P44TW_Z24xuQtXMgnCyOuWtd_KIYU,108
76
+ duckguard-2.3.0.dist-info/licenses/LICENSE,sha256=1Li9P3fainL-epQ9kEHZWKDScWtp4inPd6AkhUTJStk,3841
77
+ duckguard-2.3.0.dist-info/RECORD,,
@@ -1,221 +0,0 @@
1
- Metadata-Version: 2.4
2
- Name: duckguard
3
- Version: 2.0.0
4
- Summary: A Python-native data quality tool with AI superpowers, built on DuckDB for speed
5
- Project-URL: Homepage, https://github.com/duckguard/duckguard
6
- Project-URL: Documentation, https://duckguard.dev
7
- Project-URL: Repository, https://github.com/duckguard/duckguard
8
- Author: DuckGuard Team
9
- License-Expression: Elastic-2.0
10
- License-File: LICENSE
11
- Keywords: data-engineering,data-quality,data-validation,duckdb,testing
12
- Classifier: Development Status :: 4 - Beta
13
- Classifier: Intended Audience :: Developers
14
- Classifier: License :: Other/Proprietary License
15
- Classifier: Programming Language :: Python :: 3
16
- Classifier: Programming Language :: Python :: 3.10
17
- Classifier: Programming Language :: Python :: 3.11
18
- Classifier: Programming Language :: Python :: 3.12
19
- Classifier: Topic :: Database
20
- Classifier: Topic :: Software Development :: Testing
21
- Requires-Python: >=3.10
22
- Requires-Dist: duckdb>=1.0.0
23
- Requires-Dist: packaging>=21.0
24
- Requires-Dist: pyarrow>=14.0.0
25
- Requires-Dist: pydantic>=2.0.0
26
- Requires-Dist: pyyaml>=6.0.0
27
- Requires-Dist: rich>=13.0.0
28
- Requires-Dist: typer>=0.9.0
29
- Provides-Extra: all
30
- Requires-Dist: anthropic>=0.18.0; extra == 'all'
31
- Requires-Dist: databricks-sql-connector>=2.0.0; extra == 'all'
32
- Requires-Dist: google-cloud-bigquery>=3.0.0; extra == 'all'
33
- Requires-Dist: kafka-python>=2.0.0; extra == 'all'
34
- Requires-Dist: openai>=1.0.0; extra == 'all'
35
- Requires-Dist: oracledb>=1.0.0; extra == 'all'
36
- Requires-Dist: psycopg2-binary>=2.9.0; extra == 'all'
37
- Requires-Dist: pymongo>=4.0.0; extra == 'all'
38
- Requires-Dist: pymysql>=1.0.0; extra == 'all'
39
- Requires-Dist: pyodbc>=4.0.0; extra == 'all'
40
- Requires-Dist: redshift-connector>=2.0.0; extra == 'all'
41
- Requires-Dist: snowflake-connector-python>=3.0.0; extra == 'all'
42
- Provides-Extra: bigquery
43
- Requires-Dist: google-cloud-bigquery>=3.0.0; extra == 'bigquery'
44
- Provides-Extra: databases
45
- Requires-Dist: databricks-sql-connector>=2.0.0; extra == 'databases'
46
- Requires-Dist: google-cloud-bigquery>=3.0.0; extra == 'databases'
47
- Requires-Dist: kafka-python>=2.0.0; extra == 'databases'
48
- Requires-Dist: oracledb>=1.0.0; extra == 'databases'
49
- Requires-Dist: psycopg2-binary>=2.9.0; extra == 'databases'
50
- Requires-Dist: pymongo>=4.0.0; extra == 'databases'
51
- Requires-Dist: pymysql>=1.0.0; extra == 'databases'
52
- Requires-Dist: pyodbc>=4.0.0; extra == 'databases'
53
- Requires-Dist: redshift-connector>=2.0.0; extra == 'databases'
54
- Requires-Dist: snowflake-connector-python>=3.0.0; extra == 'databases'
55
- Provides-Extra: databricks
56
- Requires-Dist: databricks-sql-connector>=2.0.0; extra == 'databricks'
57
- Provides-Extra: dev
58
- Requires-Dist: black>=23.0.0; extra == 'dev'
59
- Requires-Dist: mypy>=1.0.0; extra == 'dev'
60
- Requires-Dist: pytest-cov>=4.0.0; extra == 'dev'
61
- Requires-Dist: pytest>=7.0.0; extra == 'dev'
62
- Requires-Dist: ruff>=0.1.0; extra == 'dev'
63
- Provides-Extra: kafka
64
- Requires-Dist: kafka-python>=2.0.0; extra == 'kafka'
65
- Provides-Extra: llm
66
- Requires-Dist: anthropic>=0.18.0; extra == 'llm'
67
- Requires-Dist: openai>=1.0.0; extra == 'llm'
68
- Provides-Extra: mongodb
69
- Requires-Dist: pymongo>=4.0.0; extra == 'mongodb'
70
- Provides-Extra: mysql
71
- Requires-Dist: pymysql>=1.0.0; extra == 'mysql'
72
- Provides-Extra: oracle
73
- Requires-Dist: oracledb>=1.0.0; extra == 'oracle'
74
- Provides-Extra: postgres
75
- Requires-Dist: psycopg2-binary>=2.9.0; extra == 'postgres'
76
- Provides-Extra: redshift
77
- Requires-Dist: redshift-connector>=2.0.0; extra == 'redshift'
78
- Provides-Extra: snowflake
79
- Requires-Dist: snowflake-connector-python>=3.0.0; extra == 'snowflake'
80
- Provides-Extra: sqlserver
81
- Requires-Dist: pyodbc>=4.0.0; extra == 'sqlserver'
82
- Description-Content-Type: text/markdown
83
-
84
- # DuckGuard
85
-
86
- Data quality that just works. Python-native, DuckDB-powered, 10x faster.
87
-
88
- [![PyPI version](https://badge.fury.io/py/duckguard.svg)](https://badge.fury.io/py/duckguard)
89
- [![Python 3.10+](https://img.shields.io/badge/python-3.10+-blue.svg)](https://www.python.org/downloads/)
90
- [![License: Elastic-2.0](https://img.shields.io/badge/License-Elastic--2.0-blue.svg)](https://www.elastic.co/licensing/elastic-license)
91
-
92
- ```bash
93
- pip install duckguard
94
- ```
95
-
96
- ## 60-Second Demo
97
-
98
- ```bash
99
- # CLI - instant data quality check
100
- duckguard check data.csv
101
-
102
- # Auto-generate validation rules
103
- duckguard discover data.csv --output duckguard.yaml
104
- ```
105
-
106
- ```python
107
- # Python - feels like pytest
108
- from duckguard import connect
109
-
110
- orders = connect("data/orders.csv")
111
-
112
- assert orders.row_count > 0
113
- assert orders.customer_id.null_percent < 5
114
- assert orders.amount.between(0, 10000)
115
- assert orders.status.isin(['pending', 'shipped', 'delivered'])
116
- ```
117
-
118
- ## Key Features
119
-
120
- | Feature | Description |
121
- |---------|-------------|
122
- | **Quality Scoring** | Get A-F grades for your data |
123
- | **YAML Rules** | Define checks in simple YAML files |
124
- | **Semantic Detection** | Auto-detect emails, phones, SSNs, PII |
125
- | **Data Contracts** | Schema + SLAs with breaking change detection |
126
- | **Anomaly Detection** | Z-score, IQR, and percent change methods |
127
- | **pytest Integration** | Data tests alongside unit tests |
128
-
129
- ## Quick Examples
130
-
131
- ### Quality Score
132
- ```python
133
- quality = orders.score()
134
- print(f"Grade: {quality.grade}") # A, B, C, D, or F
135
- ```
136
-
137
- ### YAML Rules
138
- ```yaml
139
- # duckguard.yaml
140
- dataset: orders
141
- rules:
142
- - order_id is not null
143
- - order_id is unique
144
- - amount >= 0
145
- - status in ['pending', 'shipped', 'delivered']
146
- ```
147
-
148
- ```python
149
- from duckguard import load_rules, execute_rules
150
- result = execute_rules(load_rules("duckguard.yaml"), dataset=orders)
151
- ```
152
-
153
- ### PII Detection
154
- ```python
155
- from duckguard.semantic import SemanticAnalyzer
156
- analysis = SemanticAnalyzer().analyze(orders)
157
- print(f"PII found: {analysis.pii_columns}")
158
- ```
159
-
160
- ### Anomaly Detection
161
- ```python
162
- from duckguard import detect_anomalies
163
- report = detect_anomalies(orders, method="zscore")
164
- ```
165
-
166
- ### Data Contracts
167
- ```python
168
- from duckguard import generate_contract, validate_contract
169
- contract = generate_contract(orders)
170
- result = validate_contract(contract, new_orders)
171
- ```
172
-
173
- ## Supported Sources
174
-
175
- **Files:** CSV, Parquet, JSON, Excel
176
- **Cloud:** S3, GCS, Azure Blob
177
- **Databases:** PostgreSQL, MySQL, SQLite, Snowflake, BigQuery, Redshift, Databricks, SQL Server, Oracle, MongoDB
178
- **Formats:** Delta Lake, Apache Iceberg
179
-
180
- ```python
181
- # Connect to anything
182
- orders = connect("s3://bucket/orders.parquet")
183
- orders = connect("postgres://localhost/db", table="orders")
184
- orders = connect("snowflake://account/db", table="orders")
185
- ```
186
-
187
- ## CLI Commands
188
-
189
- ```bash
190
- duckguard check <file> # Run quality checks
191
- duckguard discover <file> # Auto-generate rules
192
- duckguard contract generate # Create data contract
193
- duckguard contract validate # Validate against contract
194
- duckguard anomaly <file> # Detect anomalies
195
- ```
196
-
197
- ## Column Methods
198
-
199
- ```python
200
- # Statistics
201
- col.null_percent, col.unique_percent
202
- col.min, col.max, col.mean, col.stddev
203
-
204
- # Validations
205
- col.between(0, 100)
206
- col.matches(r'^\d{5}$')
207
- col.isin(['a', 'b', 'c'])
208
- col.has_no_duplicates()
209
- ```
210
-
211
- ## Performance
212
-
213
- Built on DuckDB for speed:
214
-
215
- | | Pandas/GX | DuckGuard |
216
- |---|---|---|
217
- | 1GB CSV | 45s, 4GB RAM | 4s, 200MB RAM |
218
-
219
- ## License
220
-
221
- Elastic License 2.0 - see [LICENSE](LICENSE)
@@ -1,55 +0,0 @@
1
- duckguard/__init__.py,sha256=wxGbL0z4mLna0KajP_Mjlo5ldneGmSZnu1kPlzeRtNo,2339
2
- duckguard/anomaly/__init__.py,sha256=PB7fvywbLVzsA_M1jv-JWIGnCL3uyW6fvdZWO3Xrl1A,741
3
- duckguard/anomaly/detector.py,sha256=6F4BU-Xn97XhS5PzXGS4Ku3Cp_fSUC4s6hLn2YzFgEk,12520
4
- duckguard/anomaly/methods.py,sha256=woLJ3MQsvroawlN1pqFQxA8xqdZEpUlFP4zVSUJo_p4,12774
5
- duckguard/cli/__init__.py,sha256=s5MNXEu_MbRqyV-jeUgCIDlHRQA97a9knM_anJooTl0,87
6
- duckguard/cli/main.py,sha256=zGwT9AiqHBmUFuCNL2qOYjOlHjEadiFbo70iu3CxVhM,24486
7
- duckguard/connectors/__init__.py,sha256=nAZA214EKTQqVJZ0PSgF0hei4NzOKyfdSb994wbToT4,2232
8
- duckguard/connectors/base.py,sha256=XzGY6_pUwDJIVNhTfgNMkcGNOBs3xxjbnQ_NeMoz4eM,1864
9
- duckguard/connectors/bigquery.py,sha256=Zy6sT0z1ve91imLVBHR7f7GlSRv8A6TLKh0VYMa39bc,5327
10
- duckguard/connectors/databricks.py,sha256=yBs2v51WL7jWSoI86log9uAdQ1GZS4iLKVZJis-A-28,6550
11
- duckguard/connectors/factory.py,sha256=dScZqRAQ3BJgpEVmB44VhL6jrLHX8oxhjBgZ_aL5X5A,9157
12
- duckguard/connectors/files.py,sha256=ulDvFhODv9cMqgFgIBKCF68fWrC4bxL13PNZasEBIH0,3841
13
- duckguard/connectors/kafka.py,sha256=xO0Zq-Krj0TDN-svVZEnqR8wYhVunZMF3PbyR26lMd8,10711
14
- duckguard/connectors/mongodb.py,sha256=QtNBMdbc_ZSj00-4MFx7MvmD6GslwxlDWv-h0Gc5MPg,7271
15
- duckguard/connectors/mysql.py,sha256=vYHPhSXByLXcwwj_f67b2NCcu9PAtsbtBQ3xJAbxuI8,3875
16
- duckguard/connectors/oracle.py,sha256=sYERxtanasZaQxD-cXqzA2LeOfWhxY2bm-vPV-xd9DI,6178
17
- duckguard/connectors/postgres.py,sha256=fOb6LFl9NvDsqZAVCyKMSu7oZ6EycmPERs8VdnArfWQ,3071
18
- duckguard/connectors/redshift.py,sha256=-m_eiEo-yTVjUu0RtWYBwM4PZS5QiFcjdrYXZDipBpg,4951
19
- duckguard/connectors/snowflake.py,sha256=a-jO6g7NuFnvR3KXpmYVmilgsJfQe0ZQXF4gjIpBHF8,7118
20
- duckguard/connectors/sqlite.py,sha256=kuS7ZeblORJ1noruwfjIUGuzLIculi2WqX4BldWSlyI,3346
21
- duckguard/connectors/sqlserver.py,sha256=p17F7hguRbDx93nYsjrZ3DXOrfevnPAoGNYIL0p3TG8,7582
22
- duckguard/contracts/__init__.py,sha256=ryEK_amxt0m_sCy7dywYL07MSZA8WNKcVYVcQhe-e9M,1313
23
- duckguard/contracts/diff.py,sha256=Ztcd0mbvMGw9Md8HvGJK4rPwfwhZPXd5fb8upiFIxPM,15085
24
- duckguard/contracts/generator.py,sha256=dZhxbSx0B_-oC2zimL7Jg6W7_l3lTaKsOXJ51fBBcX8,10992
25
- duckguard/contracts/loader.py,sha256=ydUL6_xf-028ug224u7vZiSSpOvtUt408I4l-ONmmIA,10883
26
- duckguard/contracts/schema.py,sha256=pLoR4QIXs68Q93DOZqqTmPnPecCeZ4iy9lDXZMNuVmI,7032
27
- duckguard/contracts/validator.py,sha256=rDUKQZHxcptHmBWI5z4YJxoM871_MG1K13gfW74OGPk,16464
28
- duckguard/core/__init__.py,sha256=E9lCV2G7OqsQt-usfFPjWi4Bn5qgkEM8GZwgohVzyMY,356
29
- duckguard/core/column.py,sha256=3I6e36cZPI29m4T4OiYk6sXkswrvL8KVdmOOqwhyBME,13489
30
- duckguard/core/dataset.py,sha256=OOrKJ-rPl1xCgr-jHH-rpdoADBWSK6j7uw3XVwHMJVM,8287
31
- duckguard/core/engine.py,sha256=ld_NHsWyBkVynmWyvbyQcHdXHhpIoSaRDyqAAtVx8J0,7897
32
- duckguard/core/result.py,sha256=wzggv0ra0EbgjcjhuK0wIS8_mO133XKKc1Hs_JLnzoY,3052
33
- duckguard/core/scoring.py,sha256=W37qJio035M2zOqRV1CDm6IUTzljdGEAZe5Vh610jpg,16876
34
- duckguard/profiler/__init__.py,sha256=a16GYeeFDZzwCemTsTuzO3Ih4M7_hOPb9hS8yt-nHzU,169
35
- duckguard/profiler/auto_profile.py,sha256=hS9Ef1aAbwrqYMAxrsNsFJRV8wNuMlNKR19lqkOxwSE,12275
36
- duckguard/pytest_plugin/__init__.py,sha256=YTu7eG2Kb_d_g4wzsakb5jwJtxleKTVB_MDgHvhSEJ0,168
37
- duckguard/pytest_plugin/plugin.py,sha256=9kVuUoa18DWdzHspMmvkLfJaoXOwpPbTN8cRLZHZ7LE,4949
38
- duckguard/reporting/__init__.py,sha256=R7Fm--yEiuOb_II-Qo7MGXYyCNhsGnVsMVuAzZT6rIM,199
39
- duckguard/reporting/console.py,sha256=NKTnUaiQO9trMCiYyNSym3MZCA_F8C8nd8Ai2HnEh4Y,3026
40
- duckguard/reporting/json_report.py,sha256=dqUry9akuPRwNz4ysUM6ZP6ZCXl77nA_Z7mXG-1VGKA,3509
41
- duckguard/rules/__init__.py,sha256=QvMDHQRKMDzwp2YEPHeW7Nlk4FHeqfwPXjR7BoK2UVA,813
42
- duckguard/rules/executor.py,sha256=353t9sKzQrmNNAhBpoR04X1tGhdcbP2UCIUBN0WIlQ4,20771
43
- duckguard/rules/generator.py,sha256=OMpaHbEsl_wxBDB7gb7DyRmkI1nkJD6BhN6955O4qwE,10989
44
- duckguard/rules/loader.py,sha256=XRFvFEXEFVMqUW3XM1fhFgzzjj992lgaFhpXSMbqeHI,14627
45
- duckguard/rules/schema.py,sha256=KkUAUjQBNbDLRX_XfiXc6DH8EdK4Zbd3NqupKjkoZjc,9326
46
- duckguard/semantic/__init__.py,sha256=Z_nxl5bwSyJZnyHTU2pkiSePX7chreejR6qaDlgzZc0,847
47
- duckguard/semantic/analyzer.py,sha256=nw1kUj_56sHBl6luYMgdRdFgaN3-GGMxh40-sxGYRM8,8336
48
- duckguard/semantic/detector.py,sha256=YUAPj-CEiKQCQn2BjnL5gzETH4N4ffV1EIdGcD4r3ms,14872
49
- duckguard/semantic/validators.py,sha256=iZv0_983fPeX6GLv030qWBIAHq3fRK9gfZIYeZymBUE,10918
50
- duckguard/validators/__init__.py,sha256=g717IM5xlVLCTg1nLRRccLAFHCsbRO-IgjzG4H6K32A,268
51
- duckguard-2.0.0.dist-info/METADATA,sha256=gSkdAUaMl-j6G2OisrQwoaa8WRl5Yh7GIJGra9zqbd4,7054
52
- duckguard-2.0.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
53
- duckguard-2.0.0.dist-info/entry_points.txt,sha256=teP6JdXUvY20E9P44TW_Z24xuQtXMgnCyOuWtd_KIYU,108
54
- duckguard-2.0.0.dist-info/licenses/LICENSE,sha256=1Li9P3fainL-epQ9kEHZWKDScWtp4inPd6AkhUTJStk,3841
55
- duckguard-2.0.0.dist-info/RECORD,,