duckguard 2.0.0__py3-none-any.whl → 2.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- duckguard/__init__.py +55 -28
- duckguard/anomaly/__init__.py +29 -1
- duckguard/anomaly/baselines.py +294 -0
- duckguard/anomaly/detector.py +1 -5
- duckguard/anomaly/methods.py +17 -5
- duckguard/anomaly/ml_methods.py +724 -0
- duckguard/cli/main.py +561 -56
- duckguard/connectors/__init__.py +2 -2
- duckguard/connectors/bigquery.py +1 -1
- duckguard/connectors/databricks.py +1 -1
- duckguard/connectors/factory.py +2 -3
- duckguard/connectors/files.py +1 -1
- duckguard/connectors/kafka.py +2 -2
- duckguard/connectors/mongodb.py +1 -1
- duckguard/connectors/mysql.py +1 -1
- duckguard/connectors/oracle.py +1 -1
- duckguard/connectors/postgres.py +1 -2
- duckguard/connectors/redshift.py +1 -1
- duckguard/connectors/snowflake.py +1 -2
- duckguard/connectors/sqlite.py +1 -1
- duckguard/connectors/sqlserver.py +10 -13
- duckguard/contracts/__init__.py +6 -6
- duckguard/contracts/diff.py +1 -1
- duckguard/contracts/generator.py +5 -6
- duckguard/contracts/loader.py +4 -4
- duckguard/contracts/validator.py +3 -4
- duckguard/core/__init__.py +3 -3
- duckguard/core/column.py +588 -5
- duckguard/core/dataset.py +708 -3
- duckguard/core/result.py +328 -1
- duckguard/core/scoring.py +1 -2
- duckguard/errors.py +362 -0
- duckguard/freshness/__init__.py +33 -0
- duckguard/freshness/monitor.py +429 -0
- duckguard/history/__init__.py +44 -0
- duckguard/history/schema.py +301 -0
- duckguard/history/storage.py +479 -0
- duckguard/history/trends.py +348 -0
- duckguard/integrations/__init__.py +31 -0
- duckguard/integrations/airflow.py +387 -0
- duckguard/integrations/dbt.py +458 -0
- duckguard/notifications/__init__.py +61 -0
- duckguard/notifications/email.py +508 -0
- duckguard/notifications/formatter.py +118 -0
- duckguard/notifications/notifiers.py +357 -0
- duckguard/profiler/auto_profile.py +3 -3
- duckguard/pytest_plugin/__init__.py +1 -1
- duckguard/pytest_plugin/plugin.py +1 -1
- duckguard/reporting/console.py +2 -2
- duckguard/reports/__init__.py +42 -0
- duckguard/reports/html_reporter.py +514 -0
- duckguard/reports/pdf_reporter.py +114 -0
- duckguard/rules/__init__.py +3 -3
- duckguard/rules/executor.py +3 -4
- duckguard/rules/generator.py +8 -5
- duckguard/rules/loader.py +5 -5
- duckguard/rules/schema.py +23 -0
- duckguard/schema_history/__init__.py +40 -0
- duckguard/schema_history/analyzer.py +414 -0
- duckguard/schema_history/tracker.py +288 -0
- duckguard/semantic/__init__.py +1 -1
- duckguard/semantic/analyzer.py +0 -2
- duckguard/semantic/detector.py +17 -1
- duckguard/semantic/validators.py +2 -1
- duckguard-2.3.0.dist-info/METADATA +953 -0
- duckguard-2.3.0.dist-info/RECORD +77 -0
- duckguard-2.0.0.dist-info/METADATA +0 -221
- duckguard-2.0.0.dist-info/RECORD +0 -55
- {duckguard-2.0.0.dist-info → duckguard-2.3.0.dist-info}/WHEEL +0 -0
- {duckguard-2.0.0.dist-info → duckguard-2.3.0.dist-info}/entry_points.txt +0 -0
- {duckguard-2.0.0.dist-info → duckguard-2.3.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
duckguard/__init__.py,sha256=TUiy1yQKA20tv77qAFsFrk_yjWuzQD9csTKCtweQ_S4,3078
|
|
2
|
+
duckguard/errors.py,sha256=xhQPxCCeB3dCQspTbQf58h_DvwHP1vAb6vKI9fHYAJ0,11493
|
|
3
|
+
duckguard/anomaly/__init__.py,sha256=mrTyL70cOR5S7_RNc9QLADdnBimIsbAoFTbKlWiIsbw,1353
|
|
4
|
+
duckguard/anomaly/baselines.py,sha256=k28CjjqBa8IaZxnIgof-wjw_Xdb7NJZImC2OJJkGXQ8,8776
|
|
5
|
+
duckguard/anomaly/detector.py,sha256=voA7WS2x2p5h5cnwH3C_2ly7HdYpXLwC4jDiPL2Xleo,12443
|
|
6
|
+
duckguard/anomaly/methods.py,sha256=CtV2G-kowXGgz0HYvNoi2Ge7eyHUg2GwGa3oZvunS38,13475
|
|
7
|
+
duckguard/anomaly/ml_methods.py,sha256=UyEr8q4K_wNq7pWgTsV23IoBI13aqm0hHIwIFjIxeas,23449
|
|
8
|
+
duckguard/cli/__init__.py,sha256=s5MNXEu_MbRqyV-jeUgCIDlHRQA97a9knM_anJooTl0,87
|
|
9
|
+
duckguard/cli/main.py,sha256=sMq5RfM0-OeXTG_jgTRGyvfw-c4iwojNGUEW8AYQ3fA,46001
|
|
10
|
+
duckguard/connectors/__init__.py,sha256=BMbVyyBPI9_GAFcwkQivf2xMvHwVOHvBMuT5qZ558jc,2232
|
|
11
|
+
duckguard/connectors/base.py,sha256=XzGY6_pUwDJIVNhTfgNMkcGNOBs3xxjbnQ_NeMoz4eM,1864
|
|
12
|
+
duckguard/connectors/bigquery.py,sha256=b-EHAF90dbyCh387qNirkRGY0sEsPAmvy-hNCbY7ilQ,5327
|
|
13
|
+
duckguard/connectors/databricks.py,sha256=vsm5wWGb6V_J1yMdXyREjy9ElR84S0aLk0NgOAbd1J4,6550
|
|
14
|
+
duckguard/connectors/factory.py,sha256=brO5ypD9nriHqWNN4x9KItq3mTtjcy5nM6eu5luS9RU,9156
|
|
15
|
+
duckguard/connectors/files.py,sha256=QU5lFWf9NUv0lX_txx_CLfTzhcF7tAZtCGZOCrzX-tk,3841
|
|
16
|
+
duckguard/connectors/kafka.py,sha256=Oo_axyJck6gHrwLFpnGcUVKEfKqxqz-AEdlVkNBYVVE,10709
|
|
17
|
+
duckguard/connectors/mongodb.py,sha256=3RI3-hiTHXQIk5cg9ZM5q2UDn5HU2wDnq-f8xj-Yc2A,7271
|
|
18
|
+
duckguard/connectors/mysql.py,sha256=EW-VrZiNgOGFVnVccTR-jVrn3S6KHK6GA-Yj3kmmU5w,3875
|
|
19
|
+
duckguard/connectors/oracle.py,sha256=ar_xM4t-X1kCYWAi_mmg1wzUqvMcS8dudWqcSGp5o0M,6178
|
|
20
|
+
duckguard/connectors/postgres.py,sha256=CXRKUSwtsOTbsk1ASCzzjZYDr9V3_MAdd7f6CnTi-F0,3061
|
|
21
|
+
duckguard/connectors/redshift.py,sha256=-G9IgBf2Reb7RWBj5dmILWnuu3oc8pNhfq1XgwTQHGs,4951
|
|
22
|
+
duckguard/connectors/snowflake.py,sha256=TwCaUY-7zVrU96POEqDbJRrp0eiKSjjGBr2RI_4SESk,7108
|
|
23
|
+
duckguard/connectors/sqlite.py,sha256=igfv-PNUqcNhexycQ7WcwskGNjzaqfA1oWkqUU3kSXY,3346
|
|
24
|
+
duckguard/connectors/sqlserver.py,sha256=o2TjW7bbANXImvElCjRIIbD0BkeFB0p_oWHjt-LXObY,7579
|
|
25
|
+
duckguard/contracts/__init__.py,sha256=LFc9iPhpbafcQJILJtf90z979aB5HtSCW8veeOlHJy4,1313
|
|
26
|
+
duckguard/contracts/diff.py,sha256=Dj8yjRlEexCEVF5tSkc62LrXL8-W0DwPyyDXc7ON90M,15085
|
|
27
|
+
duckguard/contracts/generator.py,sha256=Ou1EOAgOxIgEw_APbcyQEPqlvMDTovl5_o4T1VzjUT8,10969
|
|
28
|
+
duckguard/contracts/loader.py,sha256=iTmg9xjSAlYsBpQeTAJ1-ABQnuXs-qpMh3DH4rfN6qs,10878
|
|
29
|
+
duckguard/contracts/schema.py,sha256=pLoR4QIXs68Q93DOZqqTmPnPecCeZ4iy9lDXZMNuVmI,7032
|
|
30
|
+
duckguard/contracts/validator.py,sha256=X972Ns-8UWBL8D4nCCQlNOHJas0Mc4ES8URbKqd0WLw,16432
|
|
31
|
+
duckguard/core/__init__.py,sha256=pHndzrdehB0GFtlSQ46uvw8XgUQj55dVZQP1ZK-aDso,356
|
|
32
|
+
duckguard/core/column.py,sha256=ux3B2HyrgXLkz0tCY4EmR7JVRoedzCfURhzCfuO-tU8,35346
|
|
33
|
+
duckguard/core/dataset.py,sha256=SBwrXLtZyf-bkT1o42OU6tURWP7TOL4uBZ0BBMR3wD8,33287
|
|
34
|
+
duckguard/core/engine.py,sha256=ld_NHsWyBkVynmWyvbyQcHdXHhpIoSaRDyqAAtVx8J0,7897
|
|
35
|
+
duckguard/core/result.py,sha256=BwmP0gNPAKVYHdyque1rDkbAhEvwFaA3PwhxaI7cY14,15178
|
|
36
|
+
duckguard/core/scoring.py,sha256=42CVgxmmfo3Yb3m3Xl8qWnDgR7ndSZd8vXRwy9XSThI,16826
|
|
37
|
+
duckguard/freshness/__init__.py,sha256=8XR7JxH9tz61En5DTMSDHrjhroPzvwCTVzBbBiRFexs,854
|
|
38
|
+
duckguard/freshness/monitor.py,sha256=O_b4fh6unyZ2DXioX6O7KP9VpenGdLTpb9OdNb79dX8,14695
|
|
39
|
+
duckguard/history/__init__.py,sha256=_O4OBEeku1X0-Jo87qA0KKwZbh-s3LwfypYTHp_mST8,970
|
|
40
|
+
duckguard/history/schema.py,sha256=E3pP6u88OESmYQM08-XW8UQOmeUIFrM_JIpkQCM2f_g,9900
|
|
41
|
+
duckguard/history/storage.py,sha256=0r2x2VNBUWjafZCFohy63NX4f4v4-SkyJaSCZRJUCj0,15413
|
|
42
|
+
duckguard/history/trends.py,sha256=t6P3asMAPahDMK9E6sVf3nT3zFEDDZhk7n2Ice2I7BM,10702
|
|
43
|
+
duckguard/integrations/__init__.py,sha256=SuqOzfdaejlMCti372FHD_R6bVaPaUmfEPG9IM6UOW0,831
|
|
44
|
+
duckguard/integrations/airflow.py,sha256=pxC14Kgwou_2xWPvTfx8YWO-xg_vgFeAlGDhgGfXRyM,13195
|
|
45
|
+
duckguard/integrations/dbt.py,sha256=Dw1meY-UhylDFhUZ2s47FnJGMp_gszHvadGn_hqYkSM,14101
|
|
46
|
+
duckguard/notifications/__init__.py,sha256=qEfUvt7d_WXlbsGlLB-FaNF4ksLtAyO8JXi1JCdo89w,1541
|
|
47
|
+
duckguard/notifications/email.py,sha256=jwgxec8r6NUNqrxz3v5B4A3UL0-ZdxnJZhXQXWgMWH4,17168
|
|
48
|
+
duckguard/notifications/formatter.py,sha256=Z2vGMpLdqPWYaYTaVtVjYnIbNU8Haer-7efohZ5IZxM,3991
|
|
49
|
+
duckguard/notifications/notifiers.py,sha256=e-UBvoskFSzIwlCFTxIFdkI-z54zZeEeSQkvOvgV6JI,11703
|
|
50
|
+
duckguard/profiler/__init__.py,sha256=a16GYeeFDZzwCemTsTuzO3Ih4M7_hOPb9hS8yt-nHzU,169
|
|
51
|
+
duckguard/profiler/auto_profile.py,sha256=KbAkty-HrpNbTribi2uD17Fcsb-UiV5eG4zZsbyBOL4,12267
|
|
52
|
+
duckguard/pytest_plugin/__init__.py,sha256=GuhFPvINnpoVSxhvCX9b5dymzdhsn2KZhXU6okk4xQU,168
|
|
53
|
+
duckguard/pytest_plugin/plugin.py,sha256=SA1dvkZ0MYyNyRXzuqelreEo2zK0XTsNZeYwUYd3Gy0,4949
|
|
54
|
+
duckguard/reporting/__init__.py,sha256=R7Fm--yEiuOb_II-Qo7MGXYyCNhsGnVsMVuAzZT6rIM,199
|
|
55
|
+
duckguard/reporting/console.py,sha256=GvXFqKLLkU-LQb1FNkS7HI-NQYbHpQCSBYI4FSUDOMw,3026
|
|
56
|
+
duckguard/reporting/json_report.py,sha256=dqUry9akuPRwNz4ysUM6ZP6ZCXl77nA_Z7mXG-1VGKA,3509
|
|
57
|
+
duckguard/reports/__init__.py,sha256=JGGZ2IJFVOutcQaZ8kpjDDKJru9e5EsVi91au2VFKsk,1025
|
|
58
|
+
duckguard/reports/html_reporter.py,sha256=_8jzHg6WzC4xqXgqzHzYQTjE4vXbQGP-p1FUKmYAtuU,20670
|
|
59
|
+
duckguard/reports/pdf_reporter.py,sha256=u6zuV24y9YCBlpDwDObHTSrVE9W9beTIqj-UQyvA8jQ,3094
|
|
60
|
+
duckguard/rules/__init__.py,sha256=XYVasAnu8ErJ-Cvsqeh1mX5zxqd1wk-sM4OzuBJn72Y,813
|
|
61
|
+
duckguard/rules/executor.py,sha256=0MKi4mA0Ig873J7JDKpE_O2OJsBFSx6w2jgcGQWl_8w,20720
|
|
62
|
+
duckguard/rules/generator.py,sha256=h8NWcRsqBqj4xEddavFRlnWZfCi3eoXsqWyIJmxPGeo,11184
|
|
63
|
+
duckguard/rules/loader.py,sha256=gzFihSX6w3lpldEXVUn0Ysh9MAOEXh3ABNqJrVlGEng,14622
|
|
64
|
+
duckguard/rules/schema.py,sha256=_YHgZSau89SuECHWdwHtUmO65HZrNFZkaIz7l3cqhEI,10755
|
|
65
|
+
duckguard/schema_history/__init__.py,sha256=q7Kofw5PxbJlXTLzXNZyhvpsrYDKJl1OScWVwEGYIkY,949
|
|
66
|
+
duckguard/schema_history/analyzer.py,sha256=NRDQCjhPstmp6zD7Co0D4D6jVSJ9SB-iAmv4GUQdvJc,14396
|
|
67
|
+
duckguard/schema_history/tracker.py,sha256=ZuMYX8knruiodXd22KoGaT7MgQBElDjekNz73aSwkqI,8468
|
|
68
|
+
duckguard/semantic/__init__.py,sha256=FbX60d-Qf7qaVEhnSTy9NzKiXZt66A1G-NZdhvi3TIY,847
|
|
69
|
+
duckguard/semantic/analyzer.py,sha256=2be1oofe-owBhTg-Dy88-wihaoTQ7DPxf1NuA1sgfR0,8297
|
|
70
|
+
duckguard/semantic/detector.py,sha256=MPdb2Rv9VGQBko7nmPk4-Kjga_XVjPZdHCr29gdET0M,15665
|
|
71
|
+
duckguard/semantic/validators.py,sha256=8Zu3vwPwh79U09zGf4_PpcwV85_hbNCwRHcxTIQ7G_I,10945
|
|
72
|
+
duckguard/validators/__init__.py,sha256=g717IM5xlVLCTg1nLRRccLAFHCsbRO-IgjzG4H6K32A,268
|
|
73
|
+
duckguard-2.3.0.dist-info/METADATA,sha256=2CdxNqC8jwvv_cZAk1cMyWkJJiObUOKckPcB0D-74bw,27779
|
|
74
|
+
duckguard-2.3.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
75
|
+
duckguard-2.3.0.dist-info/entry_points.txt,sha256=teP6JdXUvY20E9P44TW_Z24xuQtXMgnCyOuWtd_KIYU,108
|
|
76
|
+
duckguard-2.3.0.dist-info/licenses/LICENSE,sha256=1Li9P3fainL-epQ9kEHZWKDScWtp4inPd6AkhUTJStk,3841
|
|
77
|
+
duckguard-2.3.0.dist-info/RECORD,,
|
|
@@ -1,221 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: duckguard
|
|
3
|
-
Version: 2.0.0
|
|
4
|
-
Summary: A Python-native data quality tool with AI superpowers, built on DuckDB for speed
|
|
5
|
-
Project-URL: Homepage, https://github.com/duckguard/duckguard
|
|
6
|
-
Project-URL: Documentation, https://duckguard.dev
|
|
7
|
-
Project-URL: Repository, https://github.com/duckguard/duckguard
|
|
8
|
-
Author: DuckGuard Team
|
|
9
|
-
License-Expression: Elastic-2.0
|
|
10
|
-
License-File: LICENSE
|
|
11
|
-
Keywords: data-engineering,data-quality,data-validation,duckdb,testing
|
|
12
|
-
Classifier: Development Status :: 4 - Beta
|
|
13
|
-
Classifier: Intended Audience :: Developers
|
|
14
|
-
Classifier: License :: Other/Proprietary License
|
|
15
|
-
Classifier: Programming Language :: Python :: 3
|
|
16
|
-
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
-
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
-
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
-
Classifier: Topic :: Database
|
|
20
|
-
Classifier: Topic :: Software Development :: Testing
|
|
21
|
-
Requires-Python: >=3.10
|
|
22
|
-
Requires-Dist: duckdb>=1.0.0
|
|
23
|
-
Requires-Dist: packaging>=21.0
|
|
24
|
-
Requires-Dist: pyarrow>=14.0.0
|
|
25
|
-
Requires-Dist: pydantic>=2.0.0
|
|
26
|
-
Requires-Dist: pyyaml>=6.0.0
|
|
27
|
-
Requires-Dist: rich>=13.0.0
|
|
28
|
-
Requires-Dist: typer>=0.9.0
|
|
29
|
-
Provides-Extra: all
|
|
30
|
-
Requires-Dist: anthropic>=0.18.0; extra == 'all'
|
|
31
|
-
Requires-Dist: databricks-sql-connector>=2.0.0; extra == 'all'
|
|
32
|
-
Requires-Dist: google-cloud-bigquery>=3.0.0; extra == 'all'
|
|
33
|
-
Requires-Dist: kafka-python>=2.0.0; extra == 'all'
|
|
34
|
-
Requires-Dist: openai>=1.0.0; extra == 'all'
|
|
35
|
-
Requires-Dist: oracledb>=1.0.0; extra == 'all'
|
|
36
|
-
Requires-Dist: psycopg2-binary>=2.9.0; extra == 'all'
|
|
37
|
-
Requires-Dist: pymongo>=4.0.0; extra == 'all'
|
|
38
|
-
Requires-Dist: pymysql>=1.0.0; extra == 'all'
|
|
39
|
-
Requires-Dist: pyodbc>=4.0.0; extra == 'all'
|
|
40
|
-
Requires-Dist: redshift-connector>=2.0.0; extra == 'all'
|
|
41
|
-
Requires-Dist: snowflake-connector-python>=3.0.0; extra == 'all'
|
|
42
|
-
Provides-Extra: bigquery
|
|
43
|
-
Requires-Dist: google-cloud-bigquery>=3.0.0; extra == 'bigquery'
|
|
44
|
-
Provides-Extra: databases
|
|
45
|
-
Requires-Dist: databricks-sql-connector>=2.0.0; extra == 'databases'
|
|
46
|
-
Requires-Dist: google-cloud-bigquery>=3.0.0; extra == 'databases'
|
|
47
|
-
Requires-Dist: kafka-python>=2.0.0; extra == 'databases'
|
|
48
|
-
Requires-Dist: oracledb>=1.0.0; extra == 'databases'
|
|
49
|
-
Requires-Dist: psycopg2-binary>=2.9.0; extra == 'databases'
|
|
50
|
-
Requires-Dist: pymongo>=4.0.0; extra == 'databases'
|
|
51
|
-
Requires-Dist: pymysql>=1.0.0; extra == 'databases'
|
|
52
|
-
Requires-Dist: pyodbc>=4.0.0; extra == 'databases'
|
|
53
|
-
Requires-Dist: redshift-connector>=2.0.0; extra == 'databases'
|
|
54
|
-
Requires-Dist: snowflake-connector-python>=3.0.0; extra == 'databases'
|
|
55
|
-
Provides-Extra: databricks
|
|
56
|
-
Requires-Dist: databricks-sql-connector>=2.0.0; extra == 'databricks'
|
|
57
|
-
Provides-Extra: dev
|
|
58
|
-
Requires-Dist: black>=23.0.0; extra == 'dev'
|
|
59
|
-
Requires-Dist: mypy>=1.0.0; extra == 'dev'
|
|
60
|
-
Requires-Dist: pytest-cov>=4.0.0; extra == 'dev'
|
|
61
|
-
Requires-Dist: pytest>=7.0.0; extra == 'dev'
|
|
62
|
-
Requires-Dist: ruff>=0.1.0; extra == 'dev'
|
|
63
|
-
Provides-Extra: kafka
|
|
64
|
-
Requires-Dist: kafka-python>=2.0.0; extra == 'kafka'
|
|
65
|
-
Provides-Extra: llm
|
|
66
|
-
Requires-Dist: anthropic>=0.18.0; extra == 'llm'
|
|
67
|
-
Requires-Dist: openai>=1.0.0; extra == 'llm'
|
|
68
|
-
Provides-Extra: mongodb
|
|
69
|
-
Requires-Dist: pymongo>=4.0.0; extra == 'mongodb'
|
|
70
|
-
Provides-Extra: mysql
|
|
71
|
-
Requires-Dist: pymysql>=1.0.0; extra == 'mysql'
|
|
72
|
-
Provides-Extra: oracle
|
|
73
|
-
Requires-Dist: oracledb>=1.0.0; extra == 'oracle'
|
|
74
|
-
Provides-Extra: postgres
|
|
75
|
-
Requires-Dist: psycopg2-binary>=2.9.0; extra == 'postgres'
|
|
76
|
-
Provides-Extra: redshift
|
|
77
|
-
Requires-Dist: redshift-connector>=2.0.0; extra == 'redshift'
|
|
78
|
-
Provides-Extra: snowflake
|
|
79
|
-
Requires-Dist: snowflake-connector-python>=3.0.0; extra == 'snowflake'
|
|
80
|
-
Provides-Extra: sqlserver
|
|
81
|
-
Requires-Dist: pyodbc>=4.0.0; extra == 'sqlserver'
|
|
82
|
-
Description-Content-Type: text/markdown
|
|
83
|
-
|
|
84
|
-
# DuckGuard
|
|
85
|
-
|
|
86
|
-
Data quality that just works. Python-native, DuckDB-powered, 10x faster.
|
|
87
|
-
|
|
88
|
-
[](https://badge.fury.io/py/duckguard)
|
|
89
|
-
[](https://www.python.org/downloads/)
|
|
90
|
-
[](https://www.elastic.co/licensing/elastic-license)
|
|
91
|
-
|
|
92
|
-
```bash
|
|
93
|
-
pip install duckguard
|
|
94
|
-
```
|
|
95
|
-
|
|
96
|
-
## 60-Second Demo
|
|
97
|
-
|
|
98
|
-
```bash
|
|
99
|
-
# CLI - instant data quality check
|
|
100
|
-
duckguard check data.csv
|
|
101
|
-
|
|
102
|
-
# Auto-generate validation rules
|
|
103
|
-
duckguard discover data.csv --output duckguard.yaml
|
|
104
|
-
```
|
|
105
|
-
|
|
106
|
-
```python
|
|
107
|
-
# Python - feels like pytest
|
|
108
|
-
from duckguard import connect
|
|
109
|
-
|
|
110
|
-
orders = connect("data/orders.csv")
|
|
111
|
-
|
|
112
|
-
assert orders.row_count > 0
|
|
113
|
-
assert orders.customer_id.null_percent < 5
|
|
114
|
-
assert orders.amount.between(0, 10000)
|
|
115
|
-
assert orders.status.isin(['pending', 'shipped', 'delivered'])
|
|
116
|
-
```
|
|
117
|
-
|
|
118
|
-
## Key Features
|
|
119
|
-
|
|
120
|
-
| Feature | Description |
|
|
121
|
-
|---------|-------------|
|
|
122
|
-
| **Quality Scoring** | Get A-F grades for your data |
|
|
123
|
-
| **YAML Rules** | Define checks in simple YAML files |
|
|
124
|
-
| **Semantic Detection** | Auto-detect emails, phones, SSNs, PII |
|
|
125
|
-
| **Data Contracts** | Schema + SLAs with breaking change detection |
|
|
126
|
-
| **Anomaly Detection** | Z-score, IQR, and percent change methods |
|
|
127
|
-
| **pytest Integration** | Data tests alongside unit tests |
|
|
128
|
-
|
|
129
|
-
## Quick Examples
|
|
130
|
-
|
|
131
|
-
### Quality Score
|
|
132
|
-
```python
|
|
133
|
-
quality = orders.score()
|
|
134
|
-
print(f"Grade: {quality.grade}") # A, B, C, D, or F
|
|
135
|
-
```
|
|
136
|
-
|
|
137
|
-
### YAML Rules
|
|
138
|
-
```yaml
|
|
139
|
-
# duckguard.yaml
|
|
140
|
-
dataset: orders
|
|
141
|
-
rules:
|
|
142
|
-
- order_id is not null
|
|
143
|
-
- order_id is unique
|
|
144
|
-
- amount >= 0
|
|
145
|
-
- status in ['pending', 'shipped', 'delivered']
|
|
146
|
-
```
|
|
147
|
-
|
|
148
|
-
```python
|
|
149
|
-
from duckguard import load_rules, execute_rules
|
|
150
|
-
result = execute_rules(load_rules("duckguard.yaml"), dataset=orders)
|
|
151
|
-
```
|
|
152
|
-
|
|
153
|
-
### PII Detection
|
|
154
|
-
```python
|
|
155
|
-
from duckguard.semantic import SemanticAnalyzer
|
|
156
|
-
analysis = SemanticAnalyzer().analyze(orders)
|
|
157
|
-
print(f"PII found: {analysis.pii_columns}")
|
|
158
|
-
```
|
|
159
|
-
|
|
160
|
-
### Anomaly Detection
|
|
161
|
-
```python
|
|
162
|
-
from duckguard import detect_anomalies
|
|
163
|
-
report = detect_anomalies(orders, method="zscore")
|
|
164
|
-
```
|
|
165
|
-
|
|
166
|
-
### Data Contracts
|
|
167
|
-
```python
|
|
168
|
-
from duckguard import generate_contract, validate_contract
|
|
169
|
-
contract = generate_contract(orders)
|
|
170
|
-
result = validate_contract(contract, new_orders)
|
|
171
|
-
```
|
|
172
|
-
|
|
173
|
-
## Supported Sources
|
|
174
|
-
|
|
175
|
-
**Files:** CSV, Parquet, JSON, Excel
|
|
176
|
-
**Cloud:** S3, GCS, Azure Blob
|
|
177
|
-
**Databases:** PostgreSQL, MySQL, SQLite, Snowflake, BigQuery, Redshift, Databricks, SQL Server, Oracle, MongoDB
|
|
178
|
-
**Formats:** Delta Lake, Apache Iceberg
|
|
179
|
-
|
|
180
|
-
```python
|
|
181
|
-
# Connect to anything
|
|
182
|
-
orders = connect("s3://bucket/orders.parquet")
|
|
183
|
-
orders = connect("postgres://localhost/db", table="orders")
|
|
184
|
-
orders = connect("snowflake://account/db", table="orders")
|
|
185
|
-
```
|
|
186
|
-
|
|
187
|
-
## CLI Commands
|
|
188
|
-
|
|
189
|
-
```bash
|
|
190
|
-
duckguard check <file> # Run quality checks
|
|
191
|
-
duckguard discover <file> # Auto-generate rules
|
|
192
|
-
duckguard contract generate # Create data contract
|
|
193
|
-
duckguard contract validate # Validate against contract
|
|
194
|
-
duckguard anomaly <file> # Detect anomalies
|
|
195
|
-
```
|
|
196
|
-
|
|
197
|
-
## Column Methods
|
|
198
|
-
|
|
199
|
-
```python
|
|
200
|
-
# Statistics
|
|
201
|
-
col.null_percent, col.unique_percent
|
|
202
|
-
col.min, col.max, col.mean, col.stddev
|
|
203
|
-
|
|
204
|
-
# Validations
|
|
205
|
-
col.between(0, 100)
|
|
206
|
-
col.matches(r'^\d{5}$')
|
|
207
|
-
col.isin(['a', 'b', 'c'])
|
|
208
|
-
col.has_no_duplicates()
|
|
209
|
-
```
|
|
210
|
-
|
|
211
|
-
## Performance
|
|
212
|
-
|
|
213
|
-
Built on DuckDB for speed:
|
|
214
|
-
|
|
215
|
-
| | Pandas/GX | DuckGuard |
|
|
216
|
-
|---|---|---|
|
|
217
|
-
| 1GB CSV | 45s, 4GB RAM | 4s, 200MB RAM |
|
|
218
|
-
|
|
219
|
-
## License
|
|
220
|
-
|
|
221
|
-
Elastic License 2.0 - see [LICENSE](LICENSE)
|
duckguard-2.0.0.dist-info/RECORD
DELETED
|
@@ -1,55 +0,0 @@
|
|
|
1
|
-
duckguard/__init__.py,sha256=wxGbL0z4mLna0KajP_Mjlo5ldneGmSZnu1kPlzeRtNo,2339
|
|
2
|
-
duckguard/anomaly/__init__.py,sha256=PB7fvywbLVzsA_M1jv-JWIGnCL3uyW6fvdZWO3Xrl1A,741
|
|
3
|
-
duckguard/anomaly/detector.py,sha256=6F4BU-Xn97XhS5PzXGS4Ku3Cp_fSUC4s6hLn2YzFgEk,12520
|
|
4
|
-
duckguard/anomaly/methods.py,sha256=woLJ3MQsvroawlN1pqFQxA8xqdZEpUlFP4zVSUJo_p4,12774
|
|
5
|
-
duckguard/cli/__init__.py,sha256=s5MNXEu_MbRqyV-jeUgCIDlHRQA97a9knM_anJooTl0,87
|
|
6
|
-
duckguard/cli/main.py,sha256=zGwT9AiqHBmUFuCNL2qOYjOlHjEadiFbo70iu3CxVhM,24486
|
|
7
|
-
duckguard/connectors/__init__.py,sha256=nAZA214EKTQqVJZ0PSgF0hei4NzOKyfdSb994wbToT4,2232
|
|
8
|
-
duckguard/connectors/base.py,sha256=XzGY6_pUwDJIVNhTfgNMkcGNOBs3xxjbnQ_NeMoz4eM,1864
|
|
9
|
-
duckguard/connectors/bigquery.py,sha256=Zy6sT0z1ve91imLVBHR7f7GlSRv8A6TLKh0VYMa39bc,5327
|
|
10
|
-
duckguard/connectors/databricks.py,sha256=yBs2v51WL7jWSoI86log9uAdQ1GZS4iLKVZJis-A-28,6550
|
|
11
|
-
duckguard/connectors/factory.py,sha256=dScZqRAQ3BJgpEVmB44VhL6jrLHX8oxhjBgZ_aL5X5A,9157
|
|
12
|
-
duckguard/connectors/files.py,sha256=ulDvFhODv9cMqgFgIBKCF68fWrC4bxL13PNZasEBIH0,3841
|
|
13
|
-
duckguard/connectors/kafka.py,sha256=xO0Zq-Krj0TDN-svVZEnqR8wYhVunZMF3PbyR26lMd8,10711
|
|
14
|
-
duckguard/connectors/mongodb.py,sha256=QtNBMdbc_ZSj00-4MFx7MvmD6GslwxlDWv-h0Gc5MPg,7271
|
|
15
|
-
duckguard/connectors/mysql.py,sha256=vYHPhSXByLXcwwj_f67b2NCcu9PAtsbtBQ3xJAbxuI8,3875
|
|
16
|
-
duckguard/connectors/oracle.py,sha256=sYERxtanasZaQxD-cXqzA2LeOfWhxY2bm-vPV-xd9DI,6178
|
|
17
|
-
duckguard/connectors/postgres.py,sha256=fOb6LFl9NvDsqZAVCyKMSu7oZ6EycmPERs8VdnArfWQ,3071
|
|
18
|
-
duckguard/connectors/redshift.py,sha256=-m_eiEo-yTVjUu0RtWYBwM4PZS5QiFcjdrYXZDipBpg,4951
|
|
19
|
-
duckguard/connectors/snowflake.py,sha256=a-jO6g7NuFnvR3KXpmYVmilgsJfQe0ZQXF4gjIpBHF8,7118
|
|
20
|
-
duckguard/connectors/sqlite.py,sha256=kuS7ZeblORJ1noruwfjIUGuzLIculi2WqX4BldWSlyI,3346
|
|
21
|
-
duckguard/connectors/sqlserver.py,sha256=p17F7hguRbDx93nYsjrZ3DXOrfevnPAoGNYIL0p3TG8,7582
|
|
22
|
-
duckguard/contracts/__init__.py,sha256=ryEK_amxt0m_sCy7dywYL07MSZA8WNKcVYVcQhe-e9M,1313
|
|
23
|
-
duckguard/contracts/diff.py,sha256=Ztcd0mbvMGw9Md8HvGJK4rPwfwhZPXd5fb8upiFIxPM,15085
|
|
24
|
-
duckguard/contracts/generator.py,sha256=dZhxbSx0B_-oC2zimL7Jg6W7_l3lTaKsOXJ51fBBcX8,10992
|
|
25
|
-
duckguard/contracts/loader.py,sha256=ydUL6_xf-028ug224u7vZiSSpOvtUt408I4l-ONmmIA,10883
|
|
26
|
-
duckguard/contracts/schema.py,sha256=pLoR4QIXs68Q93DOZqqTmPnPecCeZ4iy9lDXZMNuVmI,7032
|
|
27
|
-
duckguard/contracts/validator.py,sha256=rDUKQZHxcptHmBWI5z4YJxoM871_MG1K13gfW74OGPk,16464
|
|
28
|
-
duckguard/core/__init__.py,sha256=E9lCV2G7OqsQt-usfFPjWi4Bn5qgkEM8GZwgohVzyMY,356
|
|
29
|
-
duckguard/core/column.py,sha256=3I6e36cZPI29m4T4OiYk6sXkswrvL8KVdmOOqwhyBME,13489
|
|
30
|
-
duckguard/core/dataset.py,sha256=OOrKJ-rPl1xCgr-jHH-rpdoADBWSK6j7uw3XVwHMJVM,8287
|
|
31
|
-
duckguard/core/engine.py,sha256=ld_NHsWyBkVynmWyvbyQcHdXHhpIoSaRDyqAAtVx8J0,7897
|
|
32
|
-
duckguard/core/result.py,sha256=wzggv0ra0EbgjcjhuK0wIS8_mO133XKKc1Hs_JLnzoY,3052
|
|
33
|
-
duckguard/core/scoring.py,sha256=W37qJio035M2zOqRV1CDm6IUTzljdGEAZe5Vh610jpg,16876
|
|
34
|
-
duckguard/profiler/__init__.py,sha256=a16GYeeFDZzwCemTsTuzO3Ih4M7_hOPb9hS8yt-nHzU,169
|
|
35
|
-
duckguard/profiler/auto_profile.py,sha256=hS9Ef1aAbwrqYMAxrsNsFJRV8wNuMlNKR19lqkOxwSE,12275
|
|
36
|
-
duckguard/pytest_plugin/__init__.py,sha256=YTu7eG2Kb_d_g4wzsakb5jwJtxleKTVB_MDgHvhSEJ0,168
|
|
37
|
-
duckguard/pytest_plugin/plugin.py,sha256=9kVuUoa18DWdzHspMmvkLfJaoXOwpPbTN8cRLZHZ7LE,4949
|
|
38
|
-
duckguard/reporting/__init__.py,sha256=R7Fm--yEiuOb_II-Qo7MGXYyCNhsGnVsMVuAzZT6rIM,199
|
|
39
|
-
duckguard/reporting/console.py,sha256=NKTnUaiQO9trMCiYyNSym3MZCA_F8C8nd8Ai2HnEh4Y,3026
|
|
40
|
-
duckguard/reporting/json_report.py,sha256=dqUry9akuPRwNz4ysUM6ZP6ZCXl77nA_Z7mXG-1VGKA,3509
|
|
41
|
-
duckguard/rules/__init__.py,sha256=QvMDHQRKMDzwp2YEPHeW7Nlk4FHeqfwPXjR7BoK2UVA,813
|
|
42
|
-
duckguard/rules/executor.py,sha256=353t9sKzQrmNNAhBpoR04X1tGhdcbP2UCIUBN0WIlQ4,20771
|
|
43
|
-
duckguard/rules/generator.py,sha256=OMpaHbEsl_wxBDB7gb7DyRmkI1nkJD6BhN6955O4qwE,10989
|
|
44
|
-
duckguard/rules/loader.py,sha256=XRFvFEXEFVMqUW3XM1fhFgzzjj992lgaFhpXSMbqeHI,14627
|
|
45
|
-
duckguard/rules/schema.py,sha256=KkUAUjQBNbDLRX_XfiXc6DH8EdK4Zbd3NqupKjkoZjc,9326
|
|
46
|
-
duckguard/semantic/__init__.py,sha256=Z_nxl5bwSyJZnyHTU2pkiSePX7chreejR6qaDlgzZc0,847
|
|
47
|
-
duckguard/semantic/analyzer.py,sha256=nw1kUj_56sHBl6luYMgdRdFgaN3-GGMxh40-sxGYRM8,8336
|
|
48
|
-
duckguard/semantic/detector.py,sha256=YUAPj-CEiKQCQn2BjnL5gzETH4N4ffV1EIdGcD4r3ms,14872
|
|
49
|
-
duckguard/semantic/validators.py,sha256=iZv0_983fPeX6GLv030qWBIAHq3fRK9gfZIYeZymBUE,10918
|
|
50
|
-
duckguard/validators/__init__.py,sha256=g717IM5xlVLCTg1nLRRccLAFHCsbRO-IgjzG4H6K32A,268
|
|
51
|
-
duckguard-2.0.0.dist-info/METADATA,sha256=gSkdAUaMl-j6G2OisrQwoaa8WRl5Yh7GIJGra9zqbd4,7054
|
|
52
|
-
duckguard-2.0.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
53
|
-
duckguard-2.0.0.dist-info/entry_points.txt,sha256=teP6JdXUvY20E9P44TW_Z24xuQtXMgnCyOuWtd_KIYU,108
|
|
54
|
-
duckguard-2.0.0.dist-info/licenses/LICENSE,sha256=1Li9P3fainL-epQ9kEHZWKDScWtp4inPd6AkhUTJStk,3841
|
|
55
|
-
duckguard-2.0.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|