pointblank 0.9.0__py3-none-any.whl → 0.9.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pointblank/_constants.py +15 -0
- pointblank/_constants_translations.py +162 -0
- pointblank/_interrogation.py +117 -0
- pointblank/_utils.py +1 -0
- pointblank/data/api-docs.txt +306 -3
- pointblank/validate.py +401 -7
- {pointblank-0.9.0.dist-info → pointblank-0.9.1.dist-info}/METADATA +1 -1
- {pointblank-0.9.0.dist-info → pointblank-0.9.1.dist-info}/RECORD +11 -11
- {pointblank-0.9.0.dist-info → pointblank-0.9.1.dist-info}/WHEEL +1 -1
- {pointblank-0.9.0.dist-info → pointblank-0.9.1.dist-info}/licenses/LICENSE +0 -0
- {pointblank-0.9.0.dist-info → pointblank-0.9.1.dist-info}/top_level.txt +0 -0
pointblank/_constants.py
CHANGED
|
@@ -39,6 +39,7 @@ ASSERTION_TYPE_METHOD_MAP = {
|
|
|
39
39
|
"col_vals_expr": "expr",
|
|
40
40
|
"col_exists": "col_exists",
|
|
41
41
|
"rows_distinct": "rows_distinct",
|
|
42
|
+
"rows_complete": "rows_complete",
|
|
42
43
|
"col_schema_match": "col_schema_match",
|
|
43
44
|
"row_count_match": "row_count_match",
|
|
44
45
|
"col_count_match": "col_count_match",
|
|
@@ -63,6 +64,7 @@ METHOD_CATEGORY_MAP = {
|
|
|
63
64
|
"col_exists": "COL_EXISTS_HAS_TYPE",
|
|
64
65
|
"expr": "COMPARE_EXPR",
|
|
65
66
|
"rows_distinct": "ROWS_DISTINCT",
|
|
67
|
+
"rows_complete": "ROWS_COMPLETE",
|
|
66
68
|
"col_schema_match": "COL_SCHEMA_MATCH",
|
|
67
69
|
"row_count_match": "ROW_COUNT_MATCH",
|
|
68
70
|
"col_count_match": "COL_COUNT_MATCH",
|
|
@@ -375,6 +377,19 @@ SVG_ICONS_FOR_ASSERTION_TYPES = {
|
|
|
375
377
|
</g>
|
|
376
378
|
</g>
|
|
377
379
|
</g>
|
|
380
|
+
</svg>""",
|
|
381
|
+
"rows_complete": """<?xml version="1.0" encoding="UTF-8"?>
|
|
382
|
+
<svg width="67px" height="67px" viewBox="0 0 67 67" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
|
|
383
|
+
<title>rows_complete</title>
|
|
384
|
+
<g id="All-Icons" stroke="none" stroke-width="1" fill="none" fill-rule="evenodd">
|
|
385
|
+
<g id="rows_complete" transform="translate(0.000000, 0.965517)">
|
|
386
|
+
<path d="M56.712234,1 C59.1975153,1 61.4475153,2.00735931 63.076195,3.63603897 C64.7048747,5.26471863 65.712234,7.51471863 65.712234,10 L65.712234,10 L65.712234,65 L10.712234,65 C8.22695259,65 5.97695259,63.9926407 4.34827294,62.363961 C2.71959328,60.7352814 1.71223397,58.4852814 1.71223397,56 L1.71223397,56 L1.71223397,10 C1.71223397,7.51471863 2.71959328,5.26471863 4.34827294,3.63603897 C5.97695259,2.00735931 8.22695259,1 10.712234,1 L10.712234,1 Z" id="rectangle" stroke="#000000" stroke-width="2" fill="#FFFFFF"></path>
|
|
387
|
+
<g id="complete_me" transform="translate(12.500000, 9.500000)" fill="#000000">
|
|
388
|
+
<path d="M8,0 L8,10 L16,10 L16,18 L26,18 L26,10 L34,10 L34,0 L8,0 Z M10,2 L16,2 L16,8 L10,8 L10,2 Z M18,2 L24,2 L24,8 L18,8 L18,2 Z M26,2 L32,2 L32,8 L26,8 L26,2 Z M18,10 L24,10 L24,16 L18,16 L18,10 Z M0,21 L0,47 L42,47 L42,21 L32,21 L32,29 L24,29 L24,37 L18,37 L18,29 L10,29 L10,21 L0,21 Z M2,23 L8,23 L8,29 L2,29 L2,23 Z M34,23 L40,23 L40,29 L34,29 L34,23 Z M2,31 L8,31 L8,37 L2,37 L2,31 Z M10,31 L16,31 L16,37 L10,37 L10,31 Z M26,31 L32,31 L32,37 L26,37 L26,31 Z M34,31 L40,31 L40,37 L34,37 L34,31 Z M2,39 L8,39 L8,45 L2,45 L2,39 Z M10,39 L16,39 L16,45 L10,45 L10,39 Z M18,39 L24,39 L24,45 L18,45 L18,39 Z M26,39 L32,39 L32,45 L26,45 L26,39 Z M34,39 L40,39 L40,45 L34,45 L34,39 Z" id="Shape" fill-rule="nonzero"></path>
|
|
389
|
+
<path d="M22.4566476,18.35817 C22.9253976,18.29567 23.3746166,18.569108 23.5308666,19.01442 C23.6910226,19.459733 23.5152416,19.955826 23.1128976,20.20192 L23.1128976,20.20192 L20.2066476,22.38942 L25.7989286,22.3893123 L25.7989286,24.3893123 L20.2066476,24.38942 L23.1128976,26.57692 C23.5621166,26.912858 23.6519606,27.549576 23.3160226,27.998795 C22.9800856,28.448014 22.3433666,28.537858 21.8941476,28.20192 L21.8941476,28.20192 L16.6128976,24.20192 C16.3511786,24.01442 16.1949286,23.709733 16.1949286,23.38942 C16.1949286,23.069108 16.3511786,22.76442 16.6128976,22.57692 L16.6128976,22.57692 L21.8941476,18.57692 C22.0230536,18.479264 22.1714916,18.416764 22.3316476,18.38942 C22.3707106,18.377701 22.4136786,18.365983 22.4566476,18.35817 Z" id="arrow_right" transform="translate(20.997393, 23.377149) rotate(-90.000000) translate(-20.997393, -23.377149) "></path>
|
|
390
|
+
</g>
|
|
391
|
+
</g>
|
|
392
|
+
</g>
|
|
378
393
|
</svg>""",
|
|
379
394
|
"col_schema_match": """<?xml version="1.0" encoding="UTF-8"?>
|
|
380
395
|
<svg width="67px" height="67px" viewBox="0 0 67 67" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
|
|
@@ -728,6 +728,114 @@ EXPECT_FAIL_TEXT = {
|
|
|
728
728
|
"hi": "चयनित स्तंभों में पंक्तियां पूरी तरह से अलग नहीं थीं, ऐसे असफल परीक्षण इकाइयों की अधिकता।",
|
|
729
729
|
"el": "Υπέρβαση αποτυχημένων μονάδων δοκιμής όπου δεν υπήρχαν διακριτές γραμμές στις επιλεγμένες στήλες.",
|
|
730
730
|
},
|
|
731
|
+
"all_row_complete_expectation_text": {
|
|
732
|
+
"en": "Expect entirely complete rows across all columns.",
|
|
733
|
+
"fr": "On s'attend à des lignes entièrement complètes dans toutes les colonnes.",
|
|
734
|
+
"de": "Erwarten Sie vollständig komplette Zeilen über alle Spalten hinweg.",
|
|
735
|
+
"it": "Aspettati righe completamente complete su tutte le colonne.",
|
|
736
|
+
"es": "Se espera que las filas estén completamente completas en todas las columnas.",
|
|
737
|
+
"pt": "Espera-se linhas completamente preenchidas em todas as colunas.",
|
|
738
|
+
"ro": "Se așteaptă ca rândurile să fie complet complete în toate coloanele.",
|
|
739
|
+
"tr": "Tüm sütunlarda tamamen eksiksiz satırlar bekleyin.",
|
|
740
|
+
"zh-Hans": "预期所有列中的行都是完整的。",
|
|
741
|
+
"zh-Hant": "預期所有列中的行都是完整的。",
|
|
742
|
+
"ja": "すべての列で完全に完全な行を期待します。",
|
|
743
|
+
"ko": "모든 열에서 완전히 완성된 행을 기대합니다.",
|
|
744
|
+
"vi": "Kỳ vọng các hàng hoàn toàn đầy đủ trên tất cả các cột.",
|
|
745
|
+
"ru": "Ожидайте полностью заполненные строки по всем столбцам.",
|
|
746
|
+
"cs": "Očekávejte zcela kompletní řádky ve všech sloupcích.",
|
|
747
|
+
"pl": "Spodziewaj się w pełni kompletnych wierszy we wszystkich kolumnach.",
|
|
748
|
+
"da": "Forvent helt komplette rækker på tværs af alle kolonner.",
|
|
749
|
+
"sv": "Förvänta dig helt kompletta rader över alla kolumner.",
|
|
750
|
+
"nb": "Forvent helt komplette rader på tvers av alle kolonner.",
|
|
751
|
+
"nl": "Verwacht volledig complete rijen in alle kolommen.",
|
|
752
|
+
"fi": "Odota täysin täydellisiä rivejä kaikissa sarakkeissa.",
|
|
753
|
+
"is": "Væntir þess að allar raðir séu heildstæðar yfir alla dálka.",
|
|
754
|
+
"ar": "توقع صفوف مكتملة تمامًا عبر جميع الأعمدة.",
|
|
755
|
+
"hi": "सभी स्तंभों में पूरी तरह से पूर्ण पंक्तियों की अपेक्षा करें।",
|
|
756
|
+
"el": "Αναμένεται πλήρως ολοκληρωμένες γραμμές σε όλες τις στήλες.",
|
|
757
|
+
},
|
|
758
|
+
"all_row_complete_failure_text": {
|
|
759
|
+
"en": "Exceedance of failed test units where there weren't complete rows across all columns.",
|
|
760
|
+
"fr": "Dépassement des unités de test ayant échoué là où il n'y avait pas de lignes complètes dans toutes les colonnes.",
|
|
761
|
+
"de": "Überschreitung fehlgeschlagener Testeinheiten, bei denen nicht vollständige Zeilen über alle Spalten hinweg vorhanden waren.",
|
|
762
|
+
"it": "Superamento delle unità di test fallite in cui non c'erano righe complete su tutte le colonne.",
|
|
763
|
+
"es": "Se superó el número de unidades de prueba fallidas donde no había filas completas en todas las columnas.",
|
|
764
|
+
"pt": "Excedeu o número de unidades de teste com falha onde não havia linhas completas em todas as colunas.",
|
|
765
|
+
"ro": "Depășirea unităților de test eșuate unde nu au existat rânduri complete în toate coloanele.",
|
|
766
|
+
"tr": "Tüm sütunlarda eksiksiz satırların olmadığı başarısız test birimlerinin aşılması.",
|
|
767
|
+
"zh-Hans": "错误过多,其中在所有列中行不完整。",
|
|
768
|
+
"zh-Hant": "錯誤過多,在所有列中沒有完整的行。",
|
|
769
|
+
"ja": "すべての列で完全な行がないテスト単位の失敗の超過。",
|
|
770
|
+
"ko": "모든 열에 걸쳐 완전한 행이 아니었던 실패한 테스트 단위 초과.",
|
|
771
|
+
"vi": "Vượt quá số đơn vị kiểm tra thất bại trong đó không có các hàng đầy đủ trên tất cả các cột.",
|
|
772
|
+
"ru": "Превышение неудачных тестовых единиц, где не было полных строк по всем столбцам.",
|
|
773
|
+
"cs": "Překročení počtu neúspěšných testů, kde nebyly úplné řádky ve všech sloupcích.",
|
|
774
|
+
"pl": "Przekroczenie liczby niepomyślnych jednostek testowych, w których nie było kompletnych wierszy we wszystkich kolumnach.",
|
|
775
|
+
"da": "Overskridelse af antal fejlslagne enhedstests, hvor der ikke var komplette rækker på tværs af alle kolonner.",
|
|
776
|
+
"sv": "Överstiger antalet misslyckade enhetstest där det inte fanns kompletta rader över alla kolumner.",
|
|
777
|
+
"nb": "Overskridelse av mislykkede testenheter hvor det ikke var komplette rader på tvers av alle kolonner.",
|
|
778
|
+
"nl": "Overschrijding van mislukte testeenheden waarbij er geen complete rijen waren in alle kolommen.",
|
|
779
|
+
"fi": "Epäonnistuneiden testiyksikköjen ylitys, joissa ei ollut täydellisiä rivejä kaikissa sarakkeissa.",
|
|
780
|
+
"is": "Of mörg misheppnuð próf þar sem raðir voru ekki heildstæðar yfir alla dálka.",
|
|
781
|
+
"ar": "تجاوز وحدات الاختبار الفاشلة حيث لم تكن هناك صفوف مكتملة عبر جميع الأعمدة.",
|
|
782
|
+
"hi": "सभी स्तंभों में पूर्ण पंक्तियां नहीं थीं, ऐसे असफल परीक्षण इकाइयों की अधिकता।",
|
|
783
|
+
"el": "Υπέρβαση αποτυχημένων μονάδων δοκιμής όπου δεν υπήρχαν πλήρεις γραμμές σε όλες τις στήλες.",
|
|
784
|
+
},
|
|
785
|
+
"across_row_complete_expectation_text": {
|
|
786
|
+
"en": "Expect entirely complete rows across {column_text}.",
|
|
787
|
+
"fr": "On s'attend à des lignes entièrement complètes dans {column_text}.",
|
|
788
|
+
"de": "Erwarten Sie vollständig komplette Zeilen über {column_text} hinweg.",
|
|
789
|
+
"it": "Aspettati righe completamente complete su {column_text}.",
|
|
790
|
+
"es": "Se espera que las filas estén completamente completas en {column_text}.",
|
|
791
|
+
"pt": "Espera-se linhas completamente preenchidas em {column_text}.",
|
|
792
|
+
"ro": "Se așteaptă ca rândurile să fie complet complete în {column_text}.",
|
|
793
|
+
"tr": "{column_text} boyunca tamamen eksiksiz satırlar bekleyin.",
|
|
794
|
+
"zh-Hans": "预期在{column_text}中的行是完整的。",
|
|
795
|
+
"zh-Hant": "預期在{column_text}中的行是完整的。",
|
|
796
|
+
"ja": "{column_text}において完全に完全な行を期待します。",
|
|
797
|
+
"ko": "{column_text}에서 완전히 완성된 행을 기대합니다.",
|
|
798
|
+
"vi": "Kỳ vọng các hàng hoàn toàn đầy đủ trên {column_text}.",
|
|
799
|
+
"ru": "Ожидайте полностью заполненные строки в {column_text}.",
|
|
800
|
+
"cs": "Očekávejte zcela kompletní řádky v {column_text}.",
|
|
801
|
+
"pl": "Spodziewaj się w pełni kompletnych wierszy w {column_text}.",
|
|
802
|
+
"da": "Forvent helt komplette rækker på tværs af {column_text}.",
|
|
803
|
+
"sv": "Förvänta dig helt kompletta rader över {column_text}.",
|
|
804
|
+
"nb": "Forvent helt komplette rader på tvers av {column_text}.",
|
|
805
|
+
"nl": "Verwacht volledig complete rijen in {column_text}.",
|
|
806
|
+
"fi": "Odota täysin täydellisiä rivejä sarakkeissa {column_text}.",
|
|
807
|
+
"is": "Væntir þess að allar raðir séu heildstæðar yfir {column_text}.",
|
|
808
|
+
"ar": "توقع صفوف مكتملة تمامًا عبر {column_text}.",
|
|
809
|
+
"hi": "{column_text} में पूरी तरह से पूर्ण पंक्तियों की अपेक्षा करें।",
|
|
810
|
+
"el": "Αναμένεται πλήρως ολοκληρωμένες γραμμές στις στήλες {column_text}.",
|
|
811
|
+
},
|
|
812
|
+
"across_row_complete_failure_text": {
|
|
813
|
+
"en": "Exceedance of failed test units where there weren't complete rows across selected columns.",
|
|
814
|
+
"fr": "Dépassement des unités de test ayant échoué là où il n'y avait pas de lignes complètes dans les colonnes sélectionnées.",
|
|
815
|
+
"de": "Überschreitung fehlgeschlagener Testeinheiten, bei denen nicht vollständige Zeilen über die ausgewählten Spalten hinweg vorhanden waren.",
|
|
816
|
+
"it": "Superamento delle unità di test fallite in cui non c'erano righe complete nelle colonne selezionate.",
|
|
817
|
+
"es": "Se superó el número de unidades de prueba fallidas donde no había filas completas en las columnas seleccionadas.",
|
|
818
|
+
"pt": "Excedeu o número de unidades de teste com falha onde não havia linhas completas nas colunas selecionadas.",
|
|
819
|
+
"ro": "Depășirea unităților de test eșuate unde nu au existat rânduri complete în coloanele selectate.",
|
|
820
|
+
"tr": "Seçili sütunlarda eksiksiz satırların olmadığı başarısız test birimlerinin aşılması.",
|
|
821
|
+
"zh-Hans": "错误过多,其中在所选列中行不完整。",
|
|
822
|
+
"zh-Hant": "錯誤過多,在所選列中沒有完整的行。",
|
|
823
|
+
"ja": "選択された列で完全な行がないテスト単位の失敗の超過。",
|
|
824
|
+
"ko": "선택된 열에서 완전한 행이 아니었던 실패한 테스트 단위 초과.",
|
|
825
|
+
"vi": "Vượt quá số đơn vị kiểm tra thất bại trong đó không có các hàng đầy đủ trên các cột đã chọn.",
|
|
826
|
+
"ru": "Превышение неудачных тестовых единиц, где не было полных строк в выбранных столбцах.",
|
|
827
|
+
"cs": "Překročení počtu neúspěšných testů, kde nebyly úplné řádky ve vybraných sloupcích.",
|
|
828
|
+
"pl": "Przekroczenie liczby niepomyślnych jednostek testowych, w których nie było kompletnych wierszy w wybranych kolumnach.",
|
|
829
|
+
"da": "Overskridelse af antal fejlslagne enhedstests, hvor der ikke var komplette rækker på tværs af valgte kolonner.",
|
|
830
|
+
"sv": "Överstiger antalet misslyckade enhetstest där det inte fanns kompletta rader över valda kolumner.",
|
|
831
|
+
"nb": "Overskridelse av mislykkede testenheter hvor det ikke var komplette rader på tvers av valgte kolonner.",
|
|
832
|
+
"nl": "Overschrijding van mislukte testeenheden waarbij er geen complete rijen waren in geselecteerde kolommen.",
|
|
833
|
+
"fi": "Epäonnistuneiden testiyksikköjen ylitys, joissa ei ollut täydellisiä rivejä valituissa sarakkeissa.",
|
|
834
|
+
"is": "Of mörg misheppnuð próf þar sem raðir voru ekki heildstæðar yfir valda dálka.",
|
|
835
|
+
"ar": "تجاوز وحدات الاختبار الفاشلة حيث لم تكن هناك صفوف مكتملة عبر الأعمدة المحددة.",
|
|
836
|
+
"hi": "चयनित स्तंभों में पूर्ण पंक्तियां नहीं थीं, ऐसे असफल परीक्षण इकाइयों की अधिकता।",
|
|
837
|
+
"el": "Υπέρβαση αποτυχημένων μονάδων δοκιμής όπου δεν υπήρχαν πλήρεις γραμμές στις επιλεγμένες στήλες.",
|
|
838
|
+
},
|
|
731
839
|
"col_schema_match_expectation_text": {
|
|
732
840
|
"en": "Expect that column schemas match.",
|
|
733
841
|
"fr": "On s'attend à ce que les schémas de colonnes correspondent.",
|
|
@@ -1735,6 +1843,60 @@ STEP_REPORT_TEXT = {
|
|
|
1735
1843
|
"hi": "पंक्तियां स्तंभों के एक उपसमूह में अलग-अलग हैं",
|
|
1736
1844
|
"el": "Οι γραμμές είναι διακριτές σε ένα υποσύνολο στηλών",
|
|
1737
1845
|
},
|
|
1846
|
+
"rows_complete_all": {
|
|
1847
|
+
"en": "All rows are complete",
|
|
1848
|
+
"fr": "Toutes les lignes sont complètes",
|
|
1849
|
+
"de": "Alle Zeilen sind vollständig",
|
|
1850
|
+
"it": "Tutte le righe sono complete",
|
|
1851
|
+
"es": "Todas las filas están completas",
|
|
1852
|
+
"pt": "Todas as linhas estão completas",
|
|
1853
|
+
"ro": "Toate rândurile sunt complete",
|
|
1854
|
+
"tr": "Tüm satırlar eksiksizdir",
|
|
1855
|
+
"zh-Hans": "所有行都是完整的",
|
|
1856
|
+
"zh-Hant": "所有行都是完整的",
|
|
1857
|
+
"ja": "すべての行が完全です",
|
|
1858
|
+
"ko": "모든 행이 완전합니다",
|
|
1859
|
+
"vi": "Tất cả các hàng đều đầy đủ",
|
|
1860
|
+
"ru": "Все строки заполнены полностью",
|
|
1861
|
+
"cs": "Všechny řádky jsou úplné",
|
|
1862
|
+
"pl": "Wszystkie wiersze są kompletne",
|
|
1863
|
+
"da": "Alle rækker er komplette",
|
|
1864
|
+
"sv": "Alla rader är kompletta",
|
|
1865
|
+
"nb": "Alle rader er komplette",
|
|
1866
|
+
"nl": "Alle rijen zijn compleet",
|
|
1867
|
+
"fi": "Kaikki rivit ovat täydellisiä",
|
|
1868
|
+
"is": "Allar raðir eru heildstæðar",
|
|
1869
|
+
"ar": "جميع الصفوف مكتملة",
|
|
1870
|
+
"hi": "सभी पंक्तियां पूर्ण हैं",
|
|
1871
|
+
"el": "Όλες οι γραμμές είναι πλήρεις",
|
|
1872
|
+
},
|
|
1873
|
+
"rows_complete_subset": {
|
|
1874
|
+
"en": "Rows are complete across a subset of columns",
|
|
1875
|
+
"fr": "Les lignes sont complètes sur un sous-ensemble de colonnes",
|
|
1876
|
+
"de": "Zeilen sind in einer Teilmenge von Spalten vollständig",
|
|
1877
|
+
"it": "Le righe sono complete in un sottoinsieme di colonne",
|
|
1878
|
+
"es": "Las filas están completas en un subconjunto de columnas",
|
|
1879
|
+
"pt": "As linhas estão completas em um subconjunto de colunas",
|
|
1880
|
+
"ro": "Rândurile sunt complete într-un subset de coloane",
|
|
1881
|
+
"tr": "Satırlar, sütunların bir alt kümesinde eksiksizdir",
|
|
1882
|
+
"zh-Hans": "行在列的子集中是完整的",
|
|
1883
|
+
"zh-Hant": "行在列的子集中是完整的",
|
|
1884
|
+
"ja": "行は列のサブセット間で完全です",
|
|
1885
|
+
"ko": "행이 열의 하위 집합에서 완전합니다",
|
|
1886
|
+
"vi": "Các hàng đầy đủ trong một tập con của các cột",
|
|
1887
|
+
"ru": "Строки полностью заполнены в подмножестве столбцов",
|
|
1888
|
+
"cs": "Řádky jsou úplné napříč podmnožinou sloupců",
|
|
1889
|
+
"pl": "Wiersze są kompletne w podzbiorze kolumn",
|
|
1890
|
+
"da": "Rækker er komplette på tværs af en delmængde af kolonner",
|
|
1891
|
+
"sv": "Rader är kompletta över en delmängd av kolumner",
|
|
1892
|
+
"nb": "Rader er komplette på tvers av en delmengde av kolonner",
|
|
1893
|
+
"nl": "Rijen zijn compleet over een subset van kolommen",
|
|
1894
|
+
"fi": "Rivit ovat täydellisiä sarakkeiden osajoukossa",
|
|
1895
|
+
"is": "Raðir eru heildstæðar í undirsafni dálka",
|
|
1896
|
+
"ar": "الصفوف مكتملة عبر مجموعة فرعية من الأعمدة",
|
|
1897
|
+
"hi": "पंक्तियां स्तंभों के एक उपसमूह में पूर्ण हैं",
|
|
1898
|
+
"el": "Οι γραμμές είναι πλήρεις σε ένα υποσύνολο στηλών",
|
|
1899
|
+
},
|
|
1738
1900
|
"report_for_step_i": {
|
|
1739
1901
|
"en": "Report for Validation Step {i}",
|
|
1740
1902
|
"fr": "Rapport pour l'étape de validation {i}",
|
pointblank/_interrogation.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
import functools
|
|
3
4
|
from dataclasses import dataclass
|
|
4
5
|
from typing import TYPE_CHECKING, Any
|
|
5
6
|
|
|
@@ -1219,6 +1220,36 @@ class Interrogator:
|
|
|
1219
1220
|
|
|
1220
1221
|
return tbl.to_native()
|
|
1221
1222
|
|
|
1223
|
+
def rows_complete(self) -> FrameT | Any:
|
|
1224
|
+
# Ibis backends ---------------------------------------------
|
|
1225
|
+
|
|
1226
|
+
if self.tbl_type in IBIS_BACKENDS:
|
|
1227
|
+
tbl = self.x
|
|
1228
|
+
|
|
1229
|
+
# Determine the number of null values in each row (column subsets are handled in
|
|
1230
|
+
# the `_check_nulls_across_columns_ibis()` function)
|
|
1231
|
+
tbl = _check_nulls_across_columns_ibis(table=tbl, columns_subset=self.columns_subset)
|
|
1232
|
+
|
|
1233
|
+
# Failing rows will have the value `True` in the generated column, so we need to negate
|
|
1234
|
+
# the result to get the passing rows
|
|
1235
|
+
return tbl.mutate(pb_is_good_=~tbl["_any_is_null_"]).drop("_any_is_null_")
|
|
1236
|
+
|
|
1237
|
+
# Local backends (Narwhals) ---------------------------------
|
|
1238
|
+
|
|
1239
|
+
tbl = self.x
|
|
1240
|
+
|
|
1241
|
+
# Determine the number of null values in each row (column subsets are handled in
|
|
1242
|
+
# the `_check_nulls_across_columns_nw()` function)
|
|
1243
|
+
tbl = _check_nulls_across_columns_nw(table=tbl, columns_subset=self.columns_subset)
|
|
1244
|
+
|
|
1245
|
+
# Failing rows will have the value `True` in the generated column, so we need to negate
|
|
1246
|
+
# the result to get the passing rows
|
|
1247
|
+
tbl = tbl.with_columns(pb_is_good_=~nw.col("_any_is_null_"))
|
|
1248
|
+
tbl = tbl.drop("_any_is_null_")
|
|
1249
|
+
|
|
1250
|
+
# Convert the table to a native format
|
|
1251
|
+
return tbl.to_native()
|
|
1252
|
+
|
|
1222
1253
|
|
|
1223
1254
|
@dataclass
|
|
1224
1255
|
class ColValsCompareOne:
|
|
@@ -1794,6 +1825,58 @@ class RowsDistinct:
|
|
|
1794
1825
|
return self.test_unit_res
|
|
1795
1826
|
|
|
1796
1827
|
|
|
1828
|
+
@dataclass
|
|
1829
|
+
class RowsComplete:
|
|
1830
|
+
"""
|
|
1831
|
+
Check if rows in a DataFrame are complete.
|
|
1832
|
+
|
|
1833
|
+
Parameters
|
|
1834
|
+
----------
|
|
1835
|
+
data_tbl
|
|
1836
|
+
A data table.
|
|
1837
|
+
columns_subset
|
|
1838
|
+
A list of columns to check for completeness.
|
|
1839
|
+
threshold
|
|
1840
|
+
The maximum number of failing test units to allow.
|
|
1841
|
+
tbl_type
|
|
1842
|
+
The type of table to use for the assertion.
|
|
1843
|
+
|
|
1844
|
+
Returns
|
|
1845
|
+
-------
|
|
1846
|
+
bool
|
|
1847
|
+
`True` when test units pass below the threshold level for failing test units, `False`
|
|
1848
|
+
otherwise.
|
|
1849
|
+
"""
|
|
1850
|
+
|
|
1851
|
+
data_tbl: FrameT
|
|
1852
|
+
columns_subset: list[str] | None
|
|
1853
|
+
threshold: int
|
|
1854
|
+
tbl_type: str = "local"
|
|
1855
|
+
|
|
1856
|
+
def __post_init__(self):
|
|
1857
|
+
if self.tbl_type == "local":
|
|
1858
|
+
# Convert the DataFrame to a format that narwhals can work with, and:
|
|
1859
|
+
# - check if the `column=` exists
|
|
1860
|
+
# - check if the `column=` type is compatible with the test
|
|
1861
|
+
tbl = _column_subset_test_prep(df=self.data_tbl, columns_subset=self.columns_subset)
|
|
1862
|
+
|
|
1863
|
+
# TODO: For Ibis backends, check if the column exists and if the column type is compatible;
|
|
1864
|
+
# for now, just pass the table as is
|
|
1865
|
+
if self.tbl_type in IBIS_BACKENDS:
|
|
1866
|
+
tbl = self.data_tbl
|
|
1867
|
+
|
|
1868
|
+
# Collect results for the test units; the results are a list of booleans where
|
|
1869
|
+
# `True` indicates a passing test unit
|
|
1870
|
+
self.test_unit_res = Interrogator(
|
|
1871
|
+
x=tbl,
|
|
1872
|
+
columns_subset=self.columns_subset,
|
|
1873
|
+
tbl_type=self.tbl_type,
|
|
1874
|
+
).rows_complete()
|
|
1875
|
+
|
|
1876
|
+
def get_test_results(self):
|
|
1877
|
+
return self.test_unit_res
|
|
1878
|
+
|
|
1879
|
+
|
|
1797
1880
|
@dataclass
|
|
1798
1881
|
class ColSchemaMatch:
|
|
1799
1882
|
"""
|
|
@@ -2207,6 +2290,40 @@ def _column_has_null_values(table: FrameT, column: str) -> bool:
|
|
|
2207
2290
|
return True
|
|
2208
2291
|
|
|
2209
2292
|
|
|
2293
|
+
def _check_nulls_across_columns_ibis(table, columns_subset):
|
|
2294
|
+
# Get all column names from the table
|
|
2295
|
+
column_names = columns_subset if columns_subset else table.columns
|
|
2296
|
+
|
|
2297
|
+
# Build the expression by combining each column's isnull() with OR operations
|
|
2298
|
+
null_expr = functools.reduce(
|
|
2299
|
+
lambda acc, col: acc | table[col].isnull() if acc is not None else table[col].isnull(),
|
|
2300
|
+
column_names,
|
|
2301
|
+
None,
|
|
2302
|
+
)
|
|
2303
|
+
|
|
2304
|
+
# Add the expression as a new column to the table
|
|
2305
|
+
result = table.mutate(_any_is_null_=null_expr)
|
|
2306
|
+
|
|
2307
|
+
return result
|
|
2308
|
+
|
|
2309
|
+
|
|
2310
|
+
def _check_nulls_across_columns_nw(table, columns_subset):
|
|
2311
|
+
# Get all column names from the table
|
|
2312
|
+
column_names = columns_subset if columns_subset else table.columns
|
|
2313
|
+
|
|
2314
|
+
# Build the expression by combining each column's `is_null()` with OR operations
|
|
2315
|
+
null_expr = functools.reduce(
|
|
2316
|
+
lambda acc, col: acc | table[col].is_null() if acc is not None else table[col].is_null(),
|
|
2317
|
+
column_names,
|
|
2318
|
+
None,
|
|
2319
|
+
)
|
|
2320
|
+
|
|
2321
|
+
# Add the expression as a new column to the table
|
|
2322
|
+
result = table.with_columns(_any_is_null_=null_expr)
|
|
2323
|
+
|
|
2324
|
+
return result
|
|
2325
|
+
|
|
2326
|
+
|
|
2210
2327
|
def _modify_datetime_compare_val(tgt_column: any, compare_val: any) -> any:
|
|
2211
2328
|
tgt_col_dtype_str = str(tgt_column.dtype).lower()
|
|
2212
2329
|
|
pointblank/_utils.py
CHANGED
pointblank/data/api-docs.txt
CHANGED
|
@@ -4367,6 +4367,192 @@ rows_distinct(self, columns_subset: 'str | list[str] | None' = None, pre: 'Calla
|
|
|
4367
4367
|
others.
|
|
4368
4368
|
|
|
4369
4369
|
|
|
4370
|
+
rows_complete(self, columns_subset: 'str | list[str] | None' = None, pre: 'Callable | None' = None, segments: 'SegmentSpec | None' = None, thresholds: 'int | float | bool | tuple | dict | Thresholds' = None, actions: 'Actions | None' = None, brief: 'str | bool | None' = None, active: 'bool' = True) -> 'Validate'
|
|
4371
|
+
|
|
4372
|
+
Validate whether row data are complete by having no missing values.
|
|
4373
|
+
|
|
4374
|
+
The `rows_complete()` method checks whether rows in the table are complete. Completeness
|
|
4375
|
+
of a row means that there are no missing values within the row. This validation will operate
|
|
4376
|
+
over the number of test units that is equal to the number of rows in the table (determined
|
|
4377
|
+
after any `pre=` mutation has been applied). A subset of columns can be specified for the
|
|
4378
|
+
completeness check. If no subset is provided, all columns in the table will be used.
|
|
4379
|
+
|
|
4380
|
+
Parameters
|
|
4381
|
+
----------
|
|
4382
|
+
columns_subset
|
|
4383
|
+
A single column or a list of columns to use as a subset for the completeness check. If
|
|
4384
|
+
`None` (the default), then all columns in the table will be used.
|
|
4385
|
+
pre
|
|
4386
|
+
An optional preprocessing function or lambda to apply to the data table during
|
|
4387
|
+
interrogation. This function should take a table as input and return a modified table.
|
|
4388
|
+
Have a look at the *Preprocessing* section for more information on how to use this
|
|
4389
|
+
argument.
|
|
4390
|
+
segments
|
|
4391
|
+
An optional directive on segmentation, which serves to split a validation step into
|
|
4392
|
+
multiple (one step per segment). Can be a single column name, a tuple that specifies a
|
|
4393
|
+
column name and its corresponding values to segment on, or a combination of both
|
|
4394
|
+
(provided as a list). Read the *Segmentation* section for usage information.
|
|
4395
|
+
thresholds
|
|
4396
|
+
Set threshold failure levels for reporting and reacting to exceedences of the levels.
|
|
4397
|
+
The thresholds are set at the step level and will override any global thresholds set in
|
|
4398
|
+
`Validate(thresholds=...)`. The default is `None`, which means that no thresholds will
|
|
4399
|
+
be set locally and global thresholds (if any) will take effect. Look at the *Thresholds*
|
|
4400
|
+
section for information on how to set threshold levels.
|
|
4401
|
+
actions
|
|
4402
|
+
Optional actions to take when the validation step meets or exceeds any set threshold
|
|
4403
|
+
levels. If provided, the [`Actions`](`pointblank.Actions`) class should be used to
|
|
4404
|
+
define the actions.
|
|
4405
|
+
brief
|
|
4406
|
+
An optional brief description of the validation step that will be displayed in the
|
|
4407
|
+
reporting table. You can use the templating elements like `"{step}"` to insert
|
|
4408
|
+
the step number, or `"{auto}"` to include an automatically generated brief. If `True`
|
|
4409
|
+
the entire brief will be automatically generated. If `None` (the default) then there
|
|
4410
|
+
won't be a brief.
|
|
4411
|
+
active
|
|
4412
|
+
A boolean value indicating whether the validation step should be active. Using `False`
|
|
4413
|
+
will make the validation step inactive (still reporting its presence and keeping indexes
|
|
4414
|
+
for the steps unchanged).
|
|
4415
|
+
|
|
4416
|
+
Returns
|
|
4417
|
+
-------
|
|
4418
|
+
Validate
|
|
4419
|
+
The `Validate` object with the added validation step.
|
|
4420
|
+
|
|
4421
|
+
Preprocessing
|
|
4422
|
+
-------------
|
|
4423
|
+
The `pre=` argument allows for a preprocessing function or lambda to be applied to the data
|
|
4424
|
+
table during interrogation. This function should take a table as input and return a modified
|
|
4425
|
+
table. This is useful for performing any necessary transformations or filtering on the data
|
|
4426
|
+
before the validation step is applied.
|
|
4427
|
+
|
|
4428
|
+
The preprocessing function can be any callable that takes a table as input and returns a
|
|
4429
|
+
modified table. For example, you could use a lambda function to filter the table based on
|
|
4430
|
+
certain criteria or to apply a transformation to the data. Note that you can refer to
|
|
4431
|
+
columns via `columns_subset=` that are expected to be present in the transformed table, but
|
|
4432
|
+
may not exist in the table before preprocessing. Regarding the lifetime of the transformed
|
|
4433
|
+
table, it only exists during the validation step and is not stored in the `Validate` object
|
|
4434
|
+
or used in subsequent validation steps.
|
|
4435
|
+
|
|
4436
|
+
Segmentation
|
|
4437
|
+
------------
|
|
4438
|
+
The `segments=` argument allows for the segmentation of a validation step into multiple
|
|
4439
|
+
segments. This is useful for applying the same validation step to different subsets of the
|
|
4440
|
+
data. The segmentation can be done based on a single column or specific fields within a
|
|
4441
|
+
column.
|
|
4442
|
+
|
|
4443
|
+
Providing a single column name will result in a separate validation step for each unique
|
|
4444
|
+
value in that column. For example, if you have a column called `"region"` with values
|
|
4445
|
+
`"North"`, `"South"`, and `"East"`, the validation step will be applied separately to each
|
|
4446
|
+
region.
|
|
4447
|
+
|
|
4448
|
+
Alternatively, you can provide a tuple that specifies a column name and its corresponding
|
|
4449
|
+
values to segment on. For example, if you have a column called `"date"` and you want to
|
|
4450
|
+
segment on only specific dates, you can provide a tuple like
|
|
4451
|
+
`("date", ["2023-01-01", "2023-01-02"])`. Any other values in the column will be disregarded
|
|
4452
|
+
(i.e., no validation steps will be created for them).
|
|
4453
|
+
|
|
4454
|
+
A list with a combination of column names and tuples can be provided as well. This allows
|
|
4455
|
+
for more complex segmentation scenarios. The following inputs are all valid:
|
|
4456
|
+
|
|
4457
|
+
- `segments=["region", ("date", ["2023-01-01", "2023-01-02"])]`: segments on unique values
|
|
4458
|
+
in the `"region"` column and specific dates in the `"date"` column
|
|
4459
|
+
- `segments=["region", "date"]`: segments on unique values in the `"region"` and `"date"`
|
|
4460
|
+
columns
|
|
4461
|
+
|
|
4462
|
+
The segmentation is performed during interrogation, and the resulting validation steps will
|
|
4463
|
+
be numbered sequentially. Each segment will have its own validation step, and the results
|
|
4464
|
+
will be reported separately. This allows for a more granular analysis of the data and helps
|
|
4465
|
+
identify issues within specific segments.
|
|
4466
|
+
|
|
4467
|
+
Importantly, the segmentation process will be performed after any preprocessing of the data
|
|
4468
|
+
table. Because of this, one can conceivably use the `pre=` argument to generate a column
|
|
4469
|
+
that can be used for segmentation. For example, you could create a new column called
|
|
4470
|
+
`"segment"` through use of `pre=` and then use that column for segmentation.
|
|
4471
|
+
|
|
4472
|
+
Thresholds
|
|
4473
|
+
----------
|
|
4474
|
+
The `thresholds=` parameter is used to set the failure-condition levels for the validation
|
|
4475
|
+
step. If they are set here at the step level, these thresholds will override any thresholds
|
|
4476
|
+
set at the global level in `Validate(thresholds=...)`.
|
|
4477
|
+
|
|
4478
|
+
There are three threshold levels: 'warning', 'error', and 'critical'. The threshold values
|
|
4479
|
+
can either be set as a proportion failing of all test units (a value between `0` to `1`),
|
|
4480
|
+
or, the absolute number of failing test units (as integer that's `1` or greater).
|
|
4481
|
+
|
|
4482
|
+
Thresholds can be defined using one of these input schemes:
|
|
4483
|
+
|
|
4484
|
+
1. use the [`Thresholds`](`pointblank.Thresholds`) class (the most direct way to create
|
|
4485
|
+
thresholds)
|
|
4486
|
+
2. provide a tuple of 1-3 values, where position `0` is the 'warning' level, position `1` is
|
|
4487
|
+
the 'error' level, and position `2` is the 'critical' level
|
|
4488
|
+
3. create a dictionary of 1-3 value entries; the valid keys: are 'warning', 'error', and
|
|
4489
|
+
'critical'
|
|
4490
|
+
4. a single integer/float value denoting absolute number or fraction of failing test units
|
|
4491
|
+
for the 'warning' level only
|
|
4492
|
+
|
|
4493
|
+
If the number of failing test units exceeds set thresholds, the validation step will be
|
|
4494
|
+
marked as 'warning', 'error', or 'critical'. All of the threshold levels don't need to be
|
|
4495
|
+
set, you're free to set any combination of them.
|
|
4496
|
+
|
|
4497
|
+
Aside from reporting failure conditions, thresholds can be used to determine the actions to
|
|
4498
|
+
take for each level of failure (using the `actions=` parameter).
|
|
4499
|
+
|
|
4500
|
+
Examples
|
|
4501
|
+
--------
|
|
4502
|
+
For the examples here, we'll use a simple Polars DataFrame with three string columns
|
|
4503
|
+
(`col_1`, `col_2`, and `col_3`). The table is shown below:
|
|
4504
|
+
|
|
4505
|
+
```python
|
|
4506
|
+
import pointblank as pb
|
|
4507
|
+
import polars as pl
|
|
4508
|
+
|
|
4509
|
+
tbl = pl.DataFrame(
|
|
4510
|
+
{
|
|
4511
|
+
"col_1": ["a", None, "c", "d"],
|
|
4512
|
+
"col_2": ["a", "a", "c", None],
|
|
4513
|
+
"col_3": ["a", "a", "d", None],
|
|
4514
|
+
}
|
|
4515
|
+
)
|
|
4516
|
+
|
|
4517
|
+
pb.preview(tbl)
|
|
4518
|
+
```
|
|
4519
|
+
|
|
4520
|
+
Let's validate that the rows in the table are complete with `rows_complete()`. We'll
|
|
4521
|
+
determine if this validation had any failing test units (there are four test units, one for
|
|
4522
|
+
each row). A failing test units means that a given row is not complete (i.e., has at least
|
|
4523
|
+
one missing value).
|
|
4524
|
+
|
|
4525
|
+
```python
|
|
4526
|
+
validation = (
|
|
4527
|
+
pb.Validate(data=tbl)
|
|
4528
|
+
.rows_complete()
|
|
4529
|
+
.interrogate()
|
|
4530
|
+
)
|
|
4531
|
+
|
|
4532
|
+
validation
|
|
4533
|
+
```
|
|
4534
|
+
|
|
4535
|
+
From this validation table we see that there are two failing test units. This is because
|
|
4536
|
+
two rows in the table have at least one missing value (the second row and the last row).
|
|
4537
|
+
|
|
4538
|
+
We can also use a subset of columns to determine completeness. Let's specify the subset
|
|
4539
|
+
using columns `col_2` and `col_3` for the next validation.
|
|
4540
|
+
|
|
4541
|
+
```python
|
|
4542
|
+
validation = (
|
|
4543
|
+
pb.Validate(data=tbl)
|
|
4544
|
+
.rows_complete(columns_subset=["col_2", "col_3"])
|
|
4545
|
+
.interrogate()
|
|
4546
|
+
)
|
|
4547
|
+
|
|
4548
|
+
validation
|
|
4549
|
+
```
|
|
4550
|
+
|
|
4551
|
+
The validation table reports a single failing test units. The last row contains missing
|
|
4552
|
+
values in both the `col_2` and `col_3` columns.
|
|
4553
|
+
others.
|
|
4554
|
+
|
|
4555
|
+
|
|
4370
4556
|
col_schema_match(self, schema: 'Schema', complete: 'bool' = True, in_order: 'bool' = True, case_sensitive_colnames: 'bool' = True, case_sensitive_dtypes: 'bool' = True, full_match_dtypes: 'bool' = True, pre: 'Callable | None' = None, thresholds: 'int | float | bool | tuple | dict | Thresholds' = None, actions: 'Actions | None' = None, brief: 'str | bool | None' = None, active: 'bool' = True) -> 'Validate'
|
|
4371
4557
|
|
|
4372
4558
|
Do columns in the table (and their types) match a predefined schema?
|
|
@@ -6614,6 +6800,7 @@ get_step_report(self, i: 'int', columns_subset: 'str | list[str] | Column | None
|
|
|
6614
6800
|
- [`col_vals_regex()`](`pointblank.Validate.col_vals_regex`)
|
|
6615
6801
|
- [`col_vals_null()`](`pointblank.Validate.col_vals_null`)
|
|
6616
6802
|
- [`col_vals_not_null()`](`pointblank.Validate.col_vals_not_null`)
|
|
6803
|
+
- [`rows_complete()`](`pointblank.Validate.rows_complete`)
|
|
6617
6804
|
- [`conjointly()`](`pointblank.Validate.conjointly`)
|
|
6618
6805
|
|
|
6619
6806
|
The [`rows_distinct()`](`pointblank.Validate.rows_distinct`) validation step will produce a
|
|
@@ -6698,17 +6885,133 @@ get_json_report(self, use_fields: 'list[str] | None' = None, exclude_fields: 'li
|
|
|
6698
6885
|
|
|
6699
6886
|
Get a report of the validation results as a JSON-formatted string.
|
|
6700
6887
|
|
|
6888
|
+
The `get_json_report()` method provides a machine-readable report of validation results in
|
|
6889
|
+
JSON format. This is particularly useful for programmatic processing, storing validation
|
|
6890
|
+
results, or integrating with other systems. The report includes detailed information about
|
|
6891
|
+
each validation step, such as assertion type, columns validated, threshold values, test
|
|
6892
|
+
results, and more.
|
|
6893
|
+
|
|
6894
|
+
By default, all available validation information fields are included in the report. However,
|
|
6895
|
+
you can customize the fields to include or exclude using the `use_fields=` and
|
|
6896
|
+
`exclude_fields=` parameters.
|
|
6897
|
+
|
|
6701
6898
|
Parameters
|
|
6702
6899
|
----------
|
|
6703
6900
|
use_fields
|
|
6704
|
-
|
|
6901
|
+
An optional list of specific fields to include in the report. If provided, only these
|
|
6902
|
+
fields will be included in the JSON output. If `None` (the default), all standard
|
|
6903
|
+
validation report fields are included. Have a look at the *Available Report Fields*
|
|
6904
|
+
section below for a list of fields that can be included in the report.
|
|
6705
6905
|
exclude_fields
|
|
6706
|
-
|
|
6906
|
+
An optional list of fields to exclude from the report. If provided, these fields will
|
|
6907
|
+
be omitted from the JSON output. If `None` (the default), no fields are excluded.
|
|
6908
|
+
This parameter cannot be used together with `use_fields=`. The *Available Report Fields*
|
|
6909
|
+
provides a listing of fields that can be excluded from the report.
|
|
6707
6910
|
|
|
6708
6911
|
Returns
|
|
6709
6912
|
-------
|
|
6710
6913
|
str
|
|
6711
|
-
A JSON-formatted string representing the validation report
|
|
6914
|
+
A JSON-formatted string representing the validation report, with each validation step
|
|
6915
|
+
as an object in the report array.
|
|
6916
|
+
|
|
6917
|
+
Available Report Fields
|
|
6918
|
+
-----------------------
|
|
6919
|
+
The JSON report can include any of the standard validation report fields, including:
|
|
6920
|
+
|
|
6921
|
+
- `i`: the step number (1-indexed)
|
|
6922
|
+
- `i_o`: the original step index from the validation plan (pre-expansion)
|
|
6923
|
+
- `assertion_type`: the type of validation assertion (e.g., `"col_vals_gt"`, etc.)
|
|
6924
|
+
- `column`: the column being validated (or columns used in certain validations)
|
|
6925
|
+
- `values`: the comparison values or parameters used in the validation
|
|
6926
|
+
- `inclusive`: whether the comparison is inclusive (for range-based validations)
|
|
6927
|
+
- `na_pass`: whether `NA`/`Null` values are considered passing (for certain validations)
|
|
6928
|
+
- `pre`: preprocessing function applied before validation
|
|
6929
|
+
- `segments`: data segments to which the validation was applied
|
|
6930
|
+
- `thresholds`: threshold level statement that was used for the validation step
|
|
6931
|
+
- `label`: custom label for the validation step
|
|
6932
|
+
- `brief`: a brief description of the validation step
|
|
6933
|
+
- `active`: whether the validation step is active
|
|
6934
|
+
- `all_passed`: whether all test units passed in the step
|
|
6935
|
+
- `n`: total number of test units
|
|
6936
|
+
- `n_passed`, `n_failed`: number of test units that passed and failed
|
|
6937
|
+
- `f_passed`, `f_failed`: Fraction of test units that passed and failed
|
|
6938
|
+
- `warning`, `error`, `critical`: whether the namesake threshold level was exceeded (is
|
|
6939
|
+
`null` if threshold not set)
|
|
6940
|
+
- `time_processed`: when the validation step was processed (ISO 8601 format)
|
|
6941
|
+
- `proc_duration_s`: the processing duration in seconds
|
|
6942
|
+
|
|
6943
|
+
Examples
|
|
6944
|
+
--------
|
|
6945
|
+
Let's create a validation plan with a few validation steps and generate a JSON report of the
|
|
6946
|
+
results:
|
|
6947
|
+
|
|
6948
|
+
```python
|
|
6949
|
+
import pointblank as pb
|
|
6950
|
+
import polars as pl
|
|
6951
|
+
|
|
6952
|
+
# Create a sample DataFrame
|
|
6953
|
+
tbl = pl.DataFrame({
|
|
6954
|
+
"a": [5, 7, 8, 9],
|
|
6955
|
+
"b": [3, 4, 2, 1]
|
|
6956
|
+
})
|
|
6957
|
+
|
|
6958
|
+
# Create and execute a validation plan
|
|
6959
|
+
validation = (
|
|
6960
|
+
pb.Validate(data=tbl)
|
|
6961
|
+
.col_vals_gt(columns="a", value=6)
|
|
6962
|
+
.col_vals_lt(columns="b", value=4)
|
|
6963
|
+
.interrogate()
|
|
6964
|
+
)
|
|
6965
|
+
|
|
6966
|
+
# Get the full JSON report
|
|
6967
|
+
json_report = validation.get_json_report()
|
|
6968
|
+
|
|
6969
|
+
print(json_report)
|
|
6970
|
+
```
|
|
6971
|
+
|
|
6972
|
+
You can also customize which fields to include:
|
|
6973
|
+
|
|
6974
|
+
```python
|
|
6975
|
+
json_report = validation.get_json_report(
|
|
6976
|
+
use_fields=["i", "assertion_type", "column", "n_passed", "n_failed"]
|
|
6977
|
+
)
|
|
6978
|
+
|
|
6979
|
+
print(json_report)
|
|
6980
|
+
```
|
|
6981
|
+
|
|
6982
|
+
Or which fields to exclude:
|
|
6983
|
+
|
|
6984
|
+
```python
|
|
6985
|
+
json_report = validation.get_json_report(
|
|
6986
|
+
exclude_fields=[
|
|
6987
|
+
"i_o", "thresholds", "pre", "segments", "values",
|
|
6988
|
+
"na_pass", "inclusive", "label", "brief", "active",
|
|
6989
|
+
"time_processed", "proc_duration_s"
|
|
6990
|
+
]
|
|
6991
|
+
)
|
|
6992
|
+
|
|
6993
|
+
print(json_report)
|
|
6994
|
+
```
|
|
6995
|
+
|
|
6996
|
+
The JSON output can be further processed or analyzed programmatically:
|
|
6997
|
+
|
|
6998
|
+
```python
|
|
6999
|
+
import json
|
|
7000
|
+
|
|
7001
|
+
# Parse the JSON report
|
|
7002
|
+
report_data = json.loads(validation.get_json_report())
|
|
7003
|
+
|
|
7004
|
+
# Extract and analyze validation results
|
|
7005
|
+
failing_steps = [step for step in report_data if step["n_failed"] > 0]
|
|
7006
|
+
print(f"Number of failing validation steps: {len(failing_steps)}")
|
|
7007
|
+
```
|
|
7008
|
+
|
|
7009
|
+
See Also
|
|
7010
|
+
--------
|
|
7011
|
+
- [`get_tabular_report()`](`pointblank.Validate.get_tabular_report`): Get a formatted HTML
|
|
7012
|
+
report as a GT table
|
|
7013
|
+
- [`get_data_extracts()`](`pointblank.Validate.get_data_extracts`): Get rows that
|
|
7014
|
+
failed validation
|
|
6712
7015
|
|
|
6713
7016
|
|
|
6714
7017
|
get_sundered_data(self, type='pass') -> 'FrameT'
|
pointblank/validate.py
CHANGED
|
@@ -56,6 +56,7 @@ from pointblank._interrogation import (
|
|
|
56
56
|
ConjointlyValidation,
|
|
57
57
|
NumberOfTestUnits,
|
|
58
58
|
RowCountMatch,
|
|
59
|
+
RowsComplete,
|
|
59
60
|
RowsDistinct,
|
|
60
61
|
)
|
|
61
62
|
from pointblank._typing import SegmentSpec
|
|
@@ -6546,6 +6547,243 @@ class Validate:
|
|
|
6546
6547
|
|
|
6547
6548
|
return self
|
|
6548
6549
|
|
|
6550
|
+
def rows_complete(
|
|
6551
|
+
self,
|
|
6552
|
+
columns_subset: str | list[str] | None = None,
|
|
6553
|
+
pre: Callable | None = None,
|
|
6554
|
+
segments: SegmentSpec | None = None,
|
|
6555
|
+
thresholds: int | float | bool | tuple | dict | Thresholds = None,
|
|
6556
|
+
actions: Actions | None = None,
|
|
6557
|
+
brief: str | bool | None = None,
|
|
6558
|
+
active: bool = True,
|
|
6559
|
+
) -> Validate:
|
|
6560
|
+
"""
|
|
6561
|
+
Validate whether row data are complete by having no missing values.
|
|
6562
|
+
|
|
6563
|
+
The `rows_complete()` method checks whether rows in the table are complete. Completeness
|
|
6564
|
+
of a row means that there are no missing values within the row. This validation will operate
|
|
6565
|
+
over the number of test units that is equal to the number of rows in the table (determined
|
|
6566
|
+
after any `pre=` mutation has been applied). A subset of columns can be specified for the
|
|
6567
|
+
completeness check. If no subset is provided, all columns in the table will be used.
|
|
6568
|
+
|
|
6569
|
+
Parameters
|
|
6570
|
+
----------
|
|
6571
|
+
columns_subset
|
|
6572
|
+
A single column or a list of columns to use as a subset for the completeness check. If
|
|
6573
|
+
`None` (the default), then all columns in the table will be used.
|
|
6574
|
+
pre
|
|
6575
|
+
An optional preprocessing function or lambda to apply to the data table during
|
|
6576
|
+
interrogation. This function should take a table as input and return a modified table.
|
|
6577
|
+
Have a look at the *Preprocessing* section for more information on how to use this
|
|
6578
|
+
argument.
|
|
6579
|
+
segments
|
|
6580
|
+
An optional directive on segmentation, which serves to split a validation step into
|
|
6581
|
+
multiple (one step per segment). Can be a single column name, a tuple that specifies a
|
|
6582
|
+
column name and its corresponding values to segment on, or a combination of both
|
|
6583
|
+
(provided as a list). Read the *Segmentation* section for usage information.
|
|
6584
|
+
thresholds
|
|
6585
|
+
Set threshold failure levels for reporting and reacting to exceedences of the levels.
|
|
6586
|
+
The thresholds are set at the step level and will override any global thresholds set in
|
|
6587
|
+
`Validate(thresholds=...)`. The default is `None`, which means that no thresholds will
|
|
6588
|
+
be set locally and global thresholds (if any) will take effect. Look at the *Thresholds*
|
|
6589
|
+
section for information on how to set threshold levels.
|
|
6590
|
+
actions
|
|
6591
|
+
Optional actions to take when the validation step meets or exceeds any set threshold
|
|
6592
|
+
levels. If provided, the [`Actions`](`pointblank.Actions`) class should be used to
|
|
6593
|
+
define the actions.
|
|
6594
|
+
brief
|
|
6595
|
+
An optional brief description of the validation step that will be displayed in the
|
|
6596
|
+
reporting table. You can use the templating elements like `"{step}"` to insert
|
|
6597
|
+
the step number, or `"{auto}"` to include an automatically generated brief. If `True`
|
|
6598
|
+
the entire brief will be automatically generated. If `None` (the default) then there
|
|
6599
|
+
won't be a brief.
|
|
6600
|
+
active
|
|
6601
|
+
A boolean value indicating whether the validation step should be active. Using `False`
|
|
6602
|
+
will make the validation step inactive (still reporting its presence and keeping indexes
|
|
6603
|
+
for the steps unchanged).
|
|
6604
|
+
|
|
6605
|
+
Returns
|
|
6606
|
+
-------
|
|
6607
|
+
Validate
|
|
6608
|
+
The `Validate` object with the added validation step.
|
|
6609
|
+
|
|
6610
|
+
Preprocessing
|
|
6611
|
+
-------------
|
|
6612
|
+
The `pre=` argument allows for a preprocessing function or lambda to be applied to the data
|
|
6613
|
+
table during interrogation. This function should take a table as input and return a modified
|
|
6614
|
+
table. This is useful for performing any necessary transformations or filtering on the data
|
|
6615
|
+
before the validation step is applied.
|
|
6616
|
+
|
|
6617
|
+
The preprocessing function can be any callable that takes a table as input and returns a
|
|
6618
|
+
modified table. For example, you could use a lambda function to filter the table based on
|
|
6619
|
+
certain criteria or to apply a transformation to the data. Note that you can refer to
|
|
6620
|
+
columns via `columns_subset=` that are expected to be present in the transformed table, but
|
|
6621
|
+
may not exist in the table before preprocessing. Regarding the lifetime of the transformed
|
|
6622
|
+
table, it only exists during the validation step and is not stored in the `Validate` object
|
|
6623
|
+
or used in subsequent validation steps.
|
|
6624
|
+
|
|
6625
|
+
Segmentation
|
|
6626
|
+
------------
|
|
6627
|
+
The `segments=` argument allows for the segmentation of a validation step into multiple
|
|
6628
|
+
segments. This is useful for applying the same validation step to different subsets of the
|
|
6629
|
+
data. The segmentation can be done based on a single column or specific fields within a
|
|
6630
|
+
column.
|
|
6631
|
+
|
|
6632
|
+
Providing a single column name will result in a separate validation step for each unique
|
|
6633
|
+
value in that column. For example, if you have a column called `"region"` with values
|
|
6634
|
+
`"North"`, `"South"`, and `"East"`, the validation step will be applied separately to each
|
|
6635
|
+
region.
|
|
6636
|
+
|
|
6637
|
+
Alternatively, you can provide a tuple that specifies a column name and its corresponding
|
|
6638
|
+
values to segment on. For example, if you have a column called `"date"` and you want to
|
|
6639
|
+
segment on only specific dates, you can provide a tuple like
|
|
6640
|
+
`("date", ["2023-01-01", "2023-01-02"])`. Any other values in the column will be disregarded
|
|
6641
|
+
(i.e., no validation steps will be created for them).
|
|
6642
|
+
|
|
6643
|
+
A list with a combination of column names and tuples can be provided as well. This allows
|
|
6644
|
+
for more complex segmentation scenarios. The following inputs are all valid:
|
|
6645
|
+
|
|
6646
|
+
- `segments=["region", ("date", ["2023-01-01", "2023-01-02"])]`: segments on unique values
|
|
6647
|
+
in the `"region"` column and specific dates in the `"date"` column
|
|
6648
|
+
- `segments=["region", "date"]`: segments on unique values in the `"region"` and `"date"`
|
|
6649
|
+
columns
|
|
6650
|
+
|
|
6651
|
+
The segmentation is performed during interrogation, and the resulting validation steps will
|
|
6652
|
+
be numbered sequentially. Each segment will have its own validation step, and the results
|
|
6653
|
+
will be reported separately. This allows for a more granular analysis of the data and helps
|
|
6654
|
+
identify issues within specific segments.
|
|
6655
|
+
|
|
6656
|
+
Importantly, the segmentation process will be performed after any preprocessing of the data
|
|
6657
|
+
table. Because of this, one can conceivably use the `pre=` argument to generate a column
|
|
6658
|
+
that can be used for segmentation. For example, you could create a new column called
|
|
6659
|
+
`"segment"` through use of `pre=` and then use that column for segmentation.
|
|
6660
|
+
|
|
6661
|
+
Thresholds
|
|
6662
|
+
----------
|
|
6663
|
+
The `thresholds=` parameter is used to set the failure-condition levels for the validation
|
|
6664
|
+
step. If they are set here at the step level, these thresholds will override any thresholds
|
|
6665
|
+
set at the global level in `Validate(thresholds=...)`.
|
|
6666
|
+
|
|
6667
|
+
There are three threshold levels: 'warning', 'error', and 'critical'. The threshold values
|
|
6668
|
+
can either be set as a proportion failing of all test units (a value between `0` to `1`),
|
|
6669
|
+
or, the absolute number of failing test units (as integer that's `1` or greater).
|
|
6670
|
+
|
|
6671
|
+
Thresholds can be defined using one of these input schemes:
|
|
6672
|
+
|
|
6673
|
+
1. use the [`Thresholds`](`pointblank.Thresholds`) class (the most direct way to create
|
|
6674
|
+
thresholds)
|
|
6675
|
+
2. provide a tuple of 1-3 values, where position `0` is the 'warning' level, position `1` is
|
|
6676
|
+
the 'error' level, and position `2` is the 'critical' level
|
|
6677
|
+
3. create a dictionary of 1-3 value entries; the valid keys: are 'warning', 'error', and
|
|
6678
|
+
'critical'
|
|
6679
|
+
4. a single integer/float value denoting absolute number or fraction of failing test units
|
|
6680
|
+
for the 'warning' level only
|
|
6681
|
+
|
|
6682
|
+
If the number of failing test units exceeds set thresholds, the validation step will be
|
|
6683
|
+
marked as 'warning', 'error', or 'critical'. All of the threshold levels don't need to be
|
|
6684
|
+
set, you're free to set any combination of them.
|
|
6685
|
+
|
|
6686
|
+
Aside from reporting failure conditions, thresholds can be used to determine the actions to
|
|
6687
|
+
take for each level of failure (using the `actions=` parameter).
|
|
6688
|
+
|
|
6689
|
+
Examples
|
|
6690
|
+
--------
|
|
6691
|
+
```{python}
|
|
6692
|
+
#| echo: false
|
|
6693
|
+
#| output: false
|
|
6694
|
+
import pointblank as pb
|
|
6695
|
+
pb.config(report_incl_header=False, report_incl_footer=False, preview_incl_header=False)
|
|
6696
|
+
```
|
|
6697
|
+
For the examples here, we'll use a simple Polars DataFrame with three string columns
|
|
6698
|
+
(`col_1`, `col_2`, and `col_3`). The table is shown below:
|
|
6699
|
+
|
|
6700
|
+
```{python}
|
|
6701
|
+
import pointblank as pb
|
|
6702
|
+
import polars as pl
|
|
6703
|
+
|
|
6704
|
+
tbl = pl.DataFrame(
|
|
6705
|
+
{
|
|
6706
|
+
"col_1": ["a", None, "c", "d"],
|
|
6707
|
+
"col_2": ["a", "a", "c", None],
|
|
6708
|
+
"col_3": ["a", "a", "d", None],
|
|
6709
|
+
}
|
|
6710
|
+
)
|
|
6711
|
+
|
|
6712
|
+
pb.preview(tbl)
|
|
6713
|
+
```
|
|
6714
|
+
|
|
6715
|
+
Let's validate that the rows in the table are complete with `rows_complete()`. We'll
|
|
6716
|
+
determine if this validation had any failing test units (there are four test units, one for
|
|
6717
|
+
each row). A failing test units means that a given row is not complete (i.e., has at least
|
|
6718
|
+
one missing value).
|
|
6719
|
+
|
|
6720
|
+
```{python}
|
|
6721
|
+
validation = (
|
|
6722
|
+
pb.Validate(data=tbl)
|
|
6723
|
+
.rows_complete()
|
|
6724
|
+
.interrogate()
|
|
6725
|
+
)
|
|
6726
|
+
|
|
6727
|
+
validation
|
|
6728
|
+
```
|
|
6729
|
+
|
|
6730
|
+
From this validation table we see that there are two failing test units. This is because
|
|
6731
|
+
two rows in the table have at least one missing value (the second row and the last row).
|
|
6732
|
+
|
|
6733
|
+
We can also use a subset of columns to determine completeness. Let's specify the subset
|
|
6734
|
+
using columns `col_2` and `col_3` for the next validation.
|
|
6735
|
+
|
|
6736
|
+
```{python}
|
|
6737
|
+
validation = (
|
|
6738
|
+
pb.Validate(data=tbl)
|
|
6739
|
+
.rows_complete(columns_subset=["col_2", "col_3"])
|
|
6740
|
+
.interrogate()
|
|
6741
|
+
)
|
|
6742
|
+
|
|
6743
|
+
validation
|
|
6744
|
+
```
|
|
6745
|
+
|
|
6746
|
+
The validation table reports a single failing test units. The last row contains missing
|
|
6747
|
+
values in both the `col_2` and `col_3` columns.
|
|
6748
|
+
others.
|
|
6749
|
+
"""
|
|
6750
|
+
|
|
6751
|
+
assertion_type = _get_fn_name()
|
|
6752
|
+
|
|
6753
|
+
_check_pre(pre=pre)
|
|
6754
|
+
# TODO: add check for segments
|
|
6755
|
+
# _check_segments(segments=segments)
|
|
6756
|
+
_check_thresholds(thresholds=thresholds)
|
|
6757
|
+
_check_boolean_input(param=active, param_name="active")
|
|
6758
|
+
|
|
6759
|
+
# Determine threshold to use (global or local) and normalize a local `thresholds=` value
|
|
6760
|
+
thresholds = (
|
|
6761
|
+
self.thresholds if thresholds is None else _normalize_thresholds_creation(thresholds)
|
|
6762
|
+
)
|
|
6763
|
+
|
|
6764
|
+
if columns_subset is not None and isinstance(columns_subset, str):
|
|
6765
|
+
columns_subset = [columns_subset]
|
|
6766
|
+
|
|
6767
|
+
# TODO: incorporate Column object
|
|
6768
|
+
|
|
6769
|
+
# Determine brief to use (global or local) and transform any shorthands of `brief=`
|
|
6770
|
+
brief = self.brief if brief is None else _transform_auto_brief(brief=brief)
|
|
6771
|
+
|
|
6772
|
+
val_info = _ValidationInfo(
|
|
6773
|
+
assertion_type=assertion_type,
|
|
6774
|
+
column=columns_subset,
|
|
6775
|
+
pre=pre,
|
|
6776
|
+
segments=segments,
|
|
6777
|
+
thresholds=thresholds,
|
|
6778
|
+
actions=actions,
|
|
6779
|
+
brief=brief,
|
|
6780
|
+
active=active,
|
|
6781
|
+
)
|
|
6782
|
+
|
|
6783
|
+
self._add_validation(validation_info=val_info)
|
|
6784
|
+
|
|
6785
|
+
return self
|
|
6786
|
+
|
|
6549
6787
|
def col_schema_match(
|
|
6550
6788
|
self,
|
|
6551
6789
|
schema: Schema,
|
|
@@ -7724,6 +7962,14 @@ class Validate:
|
|
|
7724
7962
|
tbl_type=tbl_type,
|
|
7725
7963
|
).get_test_results()
|
|
7726
7964
|
|
|
7965
|
+
if assertion_category == "ROWS_COMPLETE":
|
|
7966
|
+
results_tbl = RowsComplete(
|
|
7967
|
+
data_tbl=data_tbl_step,
|
|
7968
|
+
columns_subset=column,
|
|
7969
|
+
threshold=threshold,
|
|
7970
|
+
tbl_type=tbl_type,
|
|
7971
|
+
).get_test_results()
|
|
7972
|
+
|
|
7727
7973
|
if assertion_category == "COL_EXISTS_HAS_TYPE":
|
|
7728
7974
|
result_bool = ColExistsHasType(
|
|
7729
7975
|
data_tbl=data_tbl_step,
|
|
@@ -7994,7 +8240,8 @@ class Validate:
|
|
|
7994
8240
|
# TODO: Add support for extraction of rows for Ibis backends
|
|
7995
8241
|
if (
|
|
7996
8242
|
collect_extracts
|
|
7997
|
-
and assertion_type
|
|
8243
|
+
and assertion_type
|
|
8244
|
+
in ROW_BASED_VALIDATION_TYPES + ["rows_distinct", "rows_complete"]
|
|
7998
8245
|
and tbl_type not in IBIS_BACKENDS
|
|
7999
8246
|
):
|
|
8000
8247
|
# Add row numbers to the results table
|
|
@@ -9076,19 +9323,134 @@ class Validate:
|
|
|
9076
9323
|
"""
|
|
9077
9324
|
Get a report of the validation results as a JSON-formatted string.
|
|
9078
9325
|
|
|
9326
|
+
The `get_json_report()` method provides a machine-readable report of validation results in
|
|
9327
|
+
JSON format. This is particularly useful for programmatic processing, storing validation
|
|
9328
|
+
results, or integrating with other systems. The report includes detailed information about
|
|
9329
|
+
each validation step, such as assertion type, columns validated, threshold values, test
|
|
9330
|
+
results, and more.
|
|
9331
|
+
|
|
9332
|
+
By default, all available validation information fields are included in the report. However,
|
|
9333
|
+
you can customize the fields to include or exclude using the `use_fields=` and
|
|
9334
|
+
`exclude_fields=` parameters.
|
|
9335
|
+
|
|
9079
9336
|
Parameters
|
|
9080
9337
|
----------
|
|
9081
9338
|
use_fields
|
|
9082
|
-
|
|
9339
|
+
An optional list of specific fields to include in the report. If provided, only these
|
|
9340
|
+
fields will be included in the JSON output. If `None` (the default), all standard
|
|
9341
|
+
validation report fields are included. Have a look at the *Available Report Fields*
|
|
9342
|
+
section below for a list of fields that can be included in the report.
|
|
9083
9343
|
exclude_fields
|
|
9084
|
-
|
|
9344
|
+
An optional list of fields to exclude from the report. If provided, these fields will
|
|
9345
|
+
be omitted from the JSON output. If `None` (the default), no fields are excluded.
|
|
9346
|
+
This parameter cannot be used together with `use_fields=`. The *Available Report Fields*
|
|
9347
|
+
provides a listing of fields that can be excluded from the report.
|
|
9085
9348
|
|
|
9086
9349
|
Returns
|
|
9087
9350
|
-------
|
|
9088
9351
|
str
|
|
9089
|
-
A JSON-formatted string representing the validation report
|
|
9090
|
-
|
|
9352
|
+
A JSON-formatted string representing the validation report, with each validation step
|
|
9353
|
+
as an object in the report array.
|
|
9354
|
+
|
|
9355
|
+
Available Report Fields
|
|
9356
|
+
-----------------------
|
|
9357
|
+
The JSON report can include any of the standard validation report fields, including:
|
|
9358
|
+
|
|
9359
|
+
- `i`: the step number (1-indexed)
|
|
9360
|
+
- `i_o`: the original step index from the validation plan (pre-expansion)
|
|
9361
|
+
- `assertion_type`: the type of validation assertion (e.g., `"col_vals_gt"`, etc.)
|
|
9362
|
+
- `column`: the column being validated (or columns used in certain validations)
|
|
9363
|
+
- `values`: the comparison values or parameters used in the validation
|
|
9364
|
+
- `inclusive`: whether the comparison is inclusive (for range-based validations)
|
|
9365
|
+
- `na_pass`: whether `NA`/`Null` values are considered passing (for certain validations)
|
|
9366
|
+
- `pre`: preprocessing function applied before validation
|
|
9367
|
+
- `segments`: data segments to which the validation was applied
|
|
9368
|
+
- `thresholds`: threshold level statement that was used for the validation step
|
|
9369
|
+
- `label`: custom label for the validation step
|
|
9370
|
+
- `brief`: a brief description of the validation step
|
|
9371
|
+
- `active`: whether the validation step is active
|
|
9372
|
+
- `all_passed`: whether all test units passed in the step
|
|
9373
|
+
- `n`: total number of test units
|
|
9374
|
+
- `n_passed`, `n_failed`: number of test units that passed and failed
|
|
9375
|
+
- `f_passed`, `f_failed`: Fraction of test units that passed and failed
|
|
9376
|
+
- `warning`, `error`, `critical`: whether the namesake threshold level was exceeded (is
|
|
9377
|
+
`null` if threshold not set)
|
|
9378
|
+
- `time_processed`: when the validation step was processed (ISO 8601 format)
|
|
9379
|
+
- `proc_duration_s`: the processing duration in seconds
|
|
9380
|
+
|
|
9381
|
+
Examples
|
|
9382
|
+
--------
|
|
9383
|
+
Let's create a validation plan with a few validation steps and generate a JSON report of the
|
|
9384
|
+
results:
|
|
9385
|
+
|
|
9386
|
+
```{python}
|
|
9387
|
+
import pointblank as pb
|
|
9388
|
+
import polars as pl
|
|
9389
|
+
|
|
9390
|
+
# Create a sample DataFrame
|
|
9391
|
+
tbl = pl.DataFrame({
|
|
9392
|
+
"a": [5, 7, 8, 9],
|
|
9393
|
+
"b": [3, 4, 2, 1]
|
|
9394
|
+
})
|
|
9395
|
+
|
|
9396
|
+
# Create and execute a validation plan
|
|
9397
|
+
validation = (
|
|
9398
|
+
pb.Validate(data=tbl)
|
|
9399
|
+
.col_vals_gt(columns="a", value=6)
|
|
9400
|
+
.col_vals_lt(columns="b", value=4)
|
|
9401
|
+
.interrogate()
|
|
9402
|
+
)
|
|
9403
|
+
|
|
9404
|
+
# Get the full JSON report
|
|
9405
|
+
json_report = validation.get_json_report()
|
|
9406
|
+
|
|
9407
|
+
print(json_report)
|
|
9408
|
+
```
|
|
9409
|
+
|
|
9410
|
+
You can also customize which fields to include:
|
|
9411
|
+
|
|
9412
|
+
```{python}
|
|
9413
|
+
json_report = validation.get_json_report(
|
|
9414
|
+
use_fields=["i", "assertion_type", "column", "n_passed", "n_failed"]
|
|
9415
|
+
)
|
|
9416
|
+
|
|
9417
|
+
print(json_report)
|
|
9418
|
+
```
|
|
9419
|
+
|
|
9420
|
+
Or which fields to exclude:
|
|
9421
|
+
|
|
9422
|
+
```{python}
|
|
9423
|
+
json_report = validation.get_json_report(
|
|
9424
|
+
exclude_fields=[
|
|
9425
|
+
"i_o", "thresholds", "pre", "segments", "values",
|
|
9426
|
+
"na_pass", "inclusive", "label", "brief", "active",
|
|
9427
|
+
"time_processed", "proc_duration_s"
|
|
9428
|
+
]
|
|
9429
|
+
)
|
|
9430
|
+
|
|
9431
|
+
print(json_report)
|
|
9432
|
+
```
|
|
9433
|
+
|
|
9434
|
+
The JSON output can be further processed or analyzed programmatically:
|
|
9435
|
+
|
|
9436
|
+
```{python}
|
|
9437
|
+
import json
|
|
9438
|
+
|
|
9439
|
+
# Parse the JSON report
|
|
9440
|
+
report_data = json.loads(validation.get_json_report())
|
|
9091
9441
|
|
|
9442
|
+
# Extract and analyze validation results
|
|
9443
|
+
failing_steps = [step for step in report_data if step["n_failed"] > 0]
|
|
9444
|
+
print(f"Number of failing validation steps: {len(failing_steps)}")
|
|
9445
|
+
```
|
|
9446
|
+
|
|
9447
|
+
See Also
|
|
9448
|
+
--------
|
|
9449
|
+
- [`get_tabular_report()`](`pointblank.Validate.get_tabular_report`): Get a formatted HTML
|
|
9450
|
+
report as a GT table
|
|
9451
|
+
- [`get_data_extracts()`](`pointblank.Validate.get_data_extracts`): Get rows that
|
|
9452
|
+
failed validation
|
|
9453
|
+
"""
|
|
9092
9454
|
if use_fields is not None and exclude_fields is not None:
|
|
9093
9455
|
raise ValueError("Cannot specify both `use_fields=` and `exclude_fields=`.")
|
|
9094
9456
|
|
|
@@ -9597,7 +9959,7 @@ class Validate:
|
|
|
9597
9959
|
"col_vals_expr",
|
|
9598
9960
|
]:
|
|
9599
9961
|
columns_upd.append("—")
|
|
9600
|
-
elif assertion_type[i] in ["rows_distinct"]:
|
|
9962
|
+
elif assertion_type[i] in ["rows_distinct", "rows_complete"]:
|
|
9601
9963
|
if not column:
|
|
9602
9964
|
# If there is no column subset, then all columns are used
|
|
9603
9965
|
columns_upd.append("ALL COLUMNS")
|
|
@@ -9660,6 +10022,7 @@ class Validate:
|
|
|
9660
10022
|
"col_vals_not_null",
|
|
9661
10023
|
"col_exists",
|
|
9662
10024
|
"rows_distinct",
|
|
10025
|
+
"rows_complete",
|
|
9663
10026
|
]:
|
|
9664
10027
|
values_upd.append("—")
|
|
9665
10028
|
|
|
@@ -10213,6 +10576,7 @@ class Validate:
|
|
|
10213
10576
|
- [`col_vals_regex()`](`pointblank.Validate.col_vals_regex`)
|
|
10214
10577
|
- [`col_vals_null()`](`pointblank.Validate.col_vals_null`)
|
|
10215
10578
|
- [`col_vals_not_null()`](`pointblank.Validate.col_vals_not_null`)
|
|
10579
|
+
- [`rows_complete()`](`pointblank.Validate.rows_complete`)
|
|
10216
10580
|
- [`conjointly()`](`pointblank.Validate.conjointly`)
|
|
10217
10581
|
|
|
10218
10582
|
The [`rows_distinct()`](`pointblank.Validate.rows_distinct`) validation step will produce a
|
|
@@ -10372,7 +10736,7 @@ class Validate:
|
|
|
10372
10736
|
# if get_row_count(extract) == 0:
|
|
10373
10737
|
# return "No rows were extracted."
|
|
10374
10738
|
|
|
10375
|
-
if assertion_type in ROW_BASED_VALIDATION_TYPES:
|
|
10739
|
+
if assertion_type in ROW_BASED_VALIDATION_TYPES + ["rows_complete"]:
|
|
10376
10740
|
# Get the extracted data for the step
|
|
10377
10741
|
extract = self.get_data_extracts(i=i, frame=True)
|
|
10378
10742
|
|
|
@@ -11082,6 +11446,13 @@ def _create_autobrief_or_failure_text(
|
|
|
11082
11446
|
for_failure=for_failure,
|
|
11083
11447
|
)
|
|
11084
11448
|
|
|
11449
|
+
if assertion_type == "rows_complete":
|
|
11450
|
+
return _create_text_rows_complete(
|
|
11451
|
+
lang=lang,
|
|
11452
|
+
columns_subset=column,
|
|
11453
|
+
for_failure=for_failure,
|
|
11454
|
+
)
|
|
11455
|
+
|
|
11085
11456
|
if assertion_type == "row_count_match":
|
|
11086
11457
|
return _create_text_row_count_match(
|
|
11087
11458
|
lang=lang,
|
|
@@ -11257,6 +11628,24 @@ def _create_text_rows_distinct(
|
|
|
11257
11628
|
return text
|
|
11258
11629
|
|
|
11259
11630
|
|
|
11631
|
+
def _create_text_rows_complete(
|
|
11632
|
+
lang: str, columns_subset: list[str] | None, for_failure: bool = False
|
|
11633
|
+
) -> str:
|
|
11634
|
+
type_ = _expect_failure_type(for_failure=for_failure)
|
|
11635
|
+
|
|
11636
|
+
if columns_subset is None:
|
|
11637
|
+
text = EXPECT_FAIL_TEXT[f"all_row_complete_{type_}_text"][lang]
|
|
11638
|
+
|
|
11639
|
+
else:
|
|
11640
|
+
column_text = _prep_values_text(values=columns_subset, lang=lang, limit=3)
|
|
11641
|
+
|
|
11642
|
+
text = EXPECT_FAIL_TEXT[f"across_row_complete_{type_}_text"][lang].format(
|
|
11643
|
+
column_text=column_text
|
|
11644
|
+
)
|
|
11645
|
+
|
|
11646
|
+
return text
|
|
11647
|
+
|
|
11648
|
+
|
|
11260
11649
|
def _create_text_row_count_match(lang: str, value: int, for_failure: bool = False) -> str:
|
|
11261
11650
|
type_ = _expect_failure_type(for_failure=for_failure)
|
|
11262
11651
|
|
|
@@ -12057,6 +12446,11 @@ def _step_report_row_based(
|
|
|
12057
12446
|
text = STEP_REPORT_TEXT["column_is_null"][lang].format(column=column)
|
|
12058
12447
|
elif assertion_type == "col_vals_not_null":
|
|
12059
12448
|
text = STEP_REPORT_TEXT["column_is_not_null"][lang].format(column=column)
|
|
12449
|
+
elif assertion_type == "rows_complete":
|
|
12450
|
+
if column is None:
|
|
12451
|
+
text = STEP_REPORT_TEXT["rows_complete_all"][lang]
|
|
12452
|
+
else:
|
|
12453
|
+
text = STEP_REPORT_TEXT["rows_complete_subset"][lang]
|
|
12060
12454
|
|
|
12061
12455
|
# Wrap assertion text in a <code> tag
|
|
12062
12456
|
text = (
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
pointblank/__init__.py,sha256=uHrX-ARZOhvWogXXqKV65RO2DXdYLZNCD1oNcm8hE6o,1585
|
|
2
|
-
pointblank/_constants.py,sha256=
|
|
2
|
+
pointblank/_constants.py,sha256=tlelmeuftW4BpVeEILbsbuuCaMQ7yA_FYqM6PJPXH58,78561
|
|
3
3
|
pointblank/_constants_docs.py,sha256=JBmtt16zTYQ-zaM4ElLExtKs-dKlnN553Ys2ML1Y1C8,2099
|
|
4
|
-
pointblank/_constants_translations.py,sha256=
|
|
5
|
-
pointblank/_interrogation.py,sha256=
|
|
4
|
+
pointblank/_constants_translations.py,sha256=FHkY2Bh1VBmBwbiGRIRSMU1tNGxgQAkjoPoYlwOHSKU,180685
|
|
5
|
+
pointblank/_interrogation.py,sha256=BjN60ed7BH4ZnoPtkmVSvVEqJgf8k9mce4Zb63_jv_s,85155
|
|
6
6
|
pointblank/_typing.py,sha256=ConITAbsFxU8CkNXY7l0Lua9hGofeDDJAWw-lGAIVgI,764
|
|
7
|
-
pointblank/_utils.py,sha256=
|
|
7
|
+
pointblank/_utils.py,sha256=g7vbvV33tKNvznUoYsHcZW90bYm1LPb76njQeDJDPyQ,24715
|
|
8
8
|
pointblank/_utils_check_args.py,sha256=rFEc1nbCN8ftsQQWVjCNWmQ2QmUDxkfgmoJclrZeTLs,5489
|
|
9
9
|
pointblank/_utils_html.py,sha256=sTcmnBljkPjRZF1hbpoHl4HmnXOazsA91gC9iWVIrRk,2848
|
|
10
10
|
pointblank/actions.py,sha256=oazJk4pe3lIA14hjyCDtPOr4r_sp4vGGo2eyU_LX5_0,18268
|
|
@@ -15,8 +15,8 @@ pointblank/draft.py,sha256=cusr4fBiNncCKIOU8UwvJcvkBeBuUnqH_UfYp9dtNss,15777
|
|
|
15
15
|
pointblank/schema.py,sha256=gzUCmtccO2v15MH2bo9uHUYjkKEEne1okQucxcH39pc,44291
|
|
16
16
|
pointblank/tf.py,sha256=8o_8m4i01teulEe3-YYMotSNf3tImjBMInsvdjSAO5Q,8844
|
|
17
17
|
pointblank/thresholds.py,sha256=aAPfdo3VMCw_G_OAh4nEsCYfIynDfNRJOMrG8yDM6U8,25717
|
|
18
|
-
pointblank/validate.py,sha256=
|
|
19
|
-
pointblank/data/api-docs.txt,sha256=
|
|
18
|
+
pointblank/validate.py,sha256=EPqtxw5sQG4Xh7WSaViVEUtm4FmpFOsyh4KM9EzuqkU,588834
|
|
19
|
+
pointblank/data/api-docs.txt,sha256=JkV9SdXyB3ftBMXVFdFqGZNpyfBdWUpyisn4QHco56w,467666
|
|
20
20
|
pointblank/data/game_revenue-duckdb.zip,sha256=tKIVx48OGLYGsQPS3h5AjA2Nyq_rfEpLCjBiFUWhagU,35880
|
|
21
21
|
pointblank/data/game_revenue.zip,sha256=7c9EvHLyi93CHUd4p3dM4CZ-GucFCtXKSPxgLojL32U,33749
|
|
22
22
|
pointblank/data/nycflights-duckdb.zip,sha256=GQrHO9tp7d9cNGFNSbA9EKF19MLf6t2wZE0U9-hIKow,5293077
|
|
@@ -24,8 +24,8 @@ pointblank/data/nycflights.zip,sha256=yVjbUaKUz2LydSdF9cABuir0VReHBBgV7shiNWSd0m
|
|
|
24
24
|
pointblank/data/polars-api-docs.txt,sha256=KGcS-BOtUs9zgpkWfXD-GFdFh4O_zjdkpX7msHjztLg,198045
|
|
25
25
|
pointblank/data/small_table-duckdb.zip,sha256=BhTaZ2CRS4-9Z1uVhOU6HggvW3XCar7etMznfENIcOc,2028
|
|
26
26
|
pointblank/data/small_table.zip,sha256=lmFb90Nb-v5X559Ikjg31YLAXuRyMkD9yLRElkXPMzQ,472
|
|
27
|
-
pointblank-0.9.
|
|
28
|
-
pointblank-0.9.
|
|
29
|
-
pointblank-0.9.
|
|
30
|
-
pointblank-0.9.
|
|
31
|
-
pointblank-0.9.
|
|
27
|
+
pointblank-0.9.1.dist-info/licenses/LICENSE,sha256=apLF-HWPNU7pT5bmf5KmZpD5Cklpy2u-BN_0xBoRMLY,1081
|
|
28
|
+
pointblank-0.9.1.dist-info/METADATA,sha256=1o11OgPSmpB4qBDEG1HyHDfVj5emxcT_yxHeFsVPVUc,14732
|
|
29
|
+
pointblank-0.9.1.dist-info/WHEEL,sha256=wXxTzcEDnjrTwFYjLPcsW_7_XihufBwmpiBeiXNBGEA,91
|
|
30
|
+
pointblank-0.9.1.dist-info/top_level.txt,sha256=-wHrS1SvV8-nhvc3w-PPYs1C1WtEc1pK-eGjubbCCKc,11
|
|
31
|
+
pointblank-0.9.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|