pointblank 0.8.4__py3-none-any.whl → 0.8.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pointblank/_constants_translations.py +162 -0
- pointblank/validate.py +181 -2
- {pointblank-0.8.4.dist-info → pointblank-0.8.5.dist-info}/METADATA +1 -1
- {pointblank-0.8.4.dist-info → pointblank-0.8.5.dist-info}/RECORD +7 -7
- {pointblank-0.8.4.dist-info → pointblank-0.8.5.dist-info}/WHEEL +0 -0
- {pointblank-0.8.4.dist-info → pointblank-0.8.5.dist-info}/licenses/LICENSE +0 -0
- {pointblank-0.8.4.dist-info → pointblank-0.8.5.dist-info}/top_level.txt +0 -0
|
@@ -1627,6 +1627,60 @@ STEP_REPORT_TEXT = {
|
|
|
1627
1627
|
"hi": "{column} Null नहीं है",
|
|
1628
1628
|
"el": "{column} δεν είναι Null",
|
|
1629
1629
|
},
|
|
1630
|
+
"rows_distinct_all": {
|
|
1631
|
+
"en": "All rows are distinct",
|
|
1632
|
+
"fr": "Toutes les lignes sont distinctes",
|
|
1633
|
+
"de": "Alle Zeilen sind eindeutig",
|
|
1634
|
+
"it": "Tutte le righe sono distinte",
|
|
1635
|
+
"es": "Todas las filas son distintas",
|
|
1636
|
+
"pt": "Todas as linhas são distintas",
|
|
1637
|
+
"ro": "Toate rândurile sunt distincte",
|
|
1638
|
+
"tr": "Tüm satırlar benzersizdir",
|
|
1639
|
+
"zh-Hans": "所有行都是不同的",
|
|
1640
|
+
"zh-Hant": "所有行都是不同的",
|
|
1641
|
+
"ja": "すべての行が一意です",
|
|
1642
|
+
"ko": "모든 행이 고유합니다",
|
|
1643
|
+
"vi": "Tất cả các hàng đều khác nhau",
|
|
1644
|
+
"ru": "Все строки уникальны",
|
|
1645
|
+
"cs": "Všechny řádky jsou jedinečné",
|
|
1646
|
+
"pl": "Wszystkie wiersze są unikalne",
|
|
1647
|
+
"da": "Alle rækker er unikke",
|
|
1648
|
+
"sv": "Alla rader är distinkta",
|
|
1649
|
+
"nb": "Alle rader er unike",
|
|
1650
|
+
"nl": "Alle rijen zijn uniek",
|
|
1651
|
+
"fi": "Kaikki rivit ovat erillisiä",
|
|
1652
|
+
"is": "Allar raðir eru einstakar",
|
|
1653
|
+
"ar": "جميع الصفوف متميزة",
|
|
1654
|
+
"hi": "सभी पंक्तियां अलग-अलग हैं",
|
|
1655
|
+
"el": "Όλες οι γραμμές είναι διακριτές",
|
|
1656
|
+
},
|
|
1657
|
+
"rows_distinct_subset": {
|
|
1658
|
+
"en": "Rows are distinct across a subset of columns",
|
|
1659
|
+
"fr": "Les lignes sont distinctes sur un sous-ensemble de colonnes",
|
|
1660
|
+
"de": "Zeilen sind in einer Teilmenge von Spalten eindeutig",
|
|
1661
|
+
"it": "Le righe sono distinte in un sottoinsieme di colonne",
|
|
1662
|
+
"es": "Las filas son distintas en un subconjunto de columnas",
|
|
1663
|
+
"pt": "As linhas são distintas em um subconjunto de colunas",
|
|
1664
|
+
"ro": "Rândurile sunt distincte într-un subset de coloane",
|
|
1665
|
+
"tr": "Satırlar, sütunların bir alt kümesinde benzersizdir",
|
|
1666
|
+
"zh-Hans": "行在列的子集中是不同的",
|
|
1667
|
+
"zh-Hant": "行在列的子集中是不同的",
|
|
1668
|
+
"ja": "行は列のサブセット間で一意です",
|
|
1669
|
+
"ko": "행이 열의 하위 집합에서 고유합니다",
|
|
1670
|
+
"vi": "Các hàng là khác biệt trong một tập con của các cột",
|
|
1671
|
+
"ru": "Строки уникальны в подмножестве столбцов",
|
|
1672
|
+
"cs": "Řádky jsou jedinečné napříč podmnožinou sloupců",
|
|
1673
|
+
"pl": "Wiersze są unikalne w podzbiorze kolumn",
|
|
1674
|
+
"da": "Rækker er unikke på tværs af en delmængde af kolonner",
|
|
1675
|
+
"sv": "Rader är distinkta över en delmängd av kolumner",
|
|
1676
|
+
"nb": "Rader er unike på tvers av en delmengde av kolonner",
|
|
1677
|
+
"nl": "Rijen zijn uniek over een subset van kolommen",
|
|
1678
|
+
"fi": "Rivit ovat erillisiä sarakkeiden osajoukossa",
|
|
1679
|
+
"is": "Raðir eru einstakar í undirsafni dálka",
|
|
1680
|
+
"ar": "الصفوف متميزة عبر مجموعة فرعية من الأعمدة",
|
|
1681
|
+
"hi": "पंक्तियां स्तंभों के एक उपसमूह में अलग-अलग हैं",
|
|
1682
|
+
"el": "Οι γραμμές είναι διακριτές σε ένα υποσύνολο στηλών",
|
|
1683
|
+
},
|
|
1630
1684
|
"report_for_step_i": {
|
|
1631
1685
|
"en": "Report for Validation Step {i}",
|
|
1632
1686
|
"fr": "Rapport pour l'étape de validation {i}",
|
|
@@ -1816,6 +1870,87 @@ STEP_REPORT_TEXT = {
|
|
|
1816
1870
|
"hi": "(<span style='color: #B22222;'>लाल रंग में</span> परीक्षण इकाई विफलताओं के साथ)",
|
|
1817
1871
|
"el": "(ΜΕ <span style='color: #B22222;'>ΑΠΟΤΥΧΙΕΣ ΜΟΝΑΔΩΝ ΔΟΚΙΜΗΣ ΜΕ ΚΟΚΚΙΝΟ</span>)",
|
|
1818
1872
|
},
|
|
1873
|
+
"extract_text_first_rows_distinct": {
|
|
1874
|
+
"en": "<div>EXTRACT OF FIRST <strong>{extract_length_resolved}</strong> ROWS:</div>",
|
|
1875
|
+
"fr": "<div>EXTRAIT DES PREMIÈRES <strong>{extract_length_resolved}</strong> LIGNES:</div>",
|
|
1876
|
+
"de": "<div>AUSZUG DER ERSTEN <strong>{extract_length_resolved}</strong> ZEILEN:</div>",
|
|
1877
|
+
"it": "<div>ESTRATTO DELLE PRIME <strong>{extract_length_resolved}</strong> RIGHE:</div>",
|
|
1878
|
+
"es": "<div>EXTRACTO DE LAS PRIMERAS <strong>{extract_length_resolved}</strong> FILAS:</div>",
|
|
1879
|
+
"pt": "<div>EXTRATO DAS PRIMEIRAS <strong>{extract_length_resolved}</strong> LINHAS:</div>",
|
|
1880
|
+
"ro": "<div>EXTRAS DIN PRIMELE <strong>{extract_length_resolved}</strong> RÂNDURI:</div>",
|
|
1881
|
+
"tr": "<div>İLK <strong>{extract_length_resolved}</strong> SATIRIN ÇIKARIMI:</div>",
|
|
1882
|
+
"zh-Hans": "<div>前 <strong>{extract_length_resolved}</strong> 行提取数据:</div>",
|
|
1883
|
+
"zh-Hant": "<div>前 <strong>{extract_length_resolved}</strong> 行提取資料:</div>",
|
|
1884
|
+
"ja": "<div>最初の <strong>{extract_length_resolved}</strong> 行の抽出:</div>",
|
|
1885
|
+
"ko": "<div>첫 <strong>{extract_length_resolved}</strong>행 추출:</div>",
|
|
1886
|
+
"vi": "<div>TRÍCH XUẤT <strong>{extract_length_resolved}</strong> HÀNG ĐẦU TIÊN:</div>",
|
|
1887
|
+
"ru": "<div>ВЫДЕРЖКА ПЕРВЫХ <strong>{extract_length_resolved}</strong> СТРОК:</div>",
|
|
1888
|
+
"cs": "<div>VÝPIS PRVNÍCH <strong>{extract_length_resolved}</strong> ŘÁDKŮ:</div>",
|
|
1889
|
+
"pl": "<div>WYCIĄG Z PIERWSZYCH <strong>{extract_length_resolved}</strong> WIERSZY:</div>",
|
|
1890
|
+
"da": "<div>UDDRAG AF FØRSTE <strong>{extract_length_resolved}</strong> RÆKKER:</div>",
|
|
1891
|
+
"sv": "<div>UTDRAG AV FÖRSTA <strong>{extract_length_resolved}</strong> RADERNA:</div>",
|
|
1892
|
+
"nb": "<div>UTDRAG AV FØRSTE <strong>{extract_length_resolved}</strong> RADER:</div>",
|
|
1893
|
+
"nl": "<div>EXTRACT VAN EERSTE <strong>{extract_length_resolved}</strong> RIJEN:</div>",
|
|
1894
|
+
"fi": "<div>OTE ENSIMMÄISISTÄ <strong>{extract_length_resolved}</strong> RIVISTÄ:</div>",
|
|
1895
|
+
"is": "<div>ÚTDRÁTTUR AF FYRSTU <strong>{extract_length_resolved}</strong> RÖÐUM:</div>",
|
|
1896
|
+
"ar": "<div>مقتطف من أول <strong>{extract_length_resolved}</strong> صفوف:</div>",
|
|
1897
|
+
"hi": "<div>पहली <strong>{extract_length_resolved}</strong> पंक्तियों का निष्कर्ष:</div>",
|
|
1898
|
+
"el": "<div>ΕΞΑΓΩΓΗ ΤΩΝ ΠΡΩΤΩΝ <strong>{extract_length_resolved}</strong> ΓΡΑΜΜΩΝ:</div>",
|
|
1899
|
+
},
|
|
1900
|
+
"extract_text_all_rows_distinct": {
|
|
1901
|
+
"en": "<div>EXTRACT OF ALL <strong>{extract_length_resolved}</strong> ROWS:</div>",
|
|
1902
|
+
"fr": "<div>EXTRAIT DE TOUTES LES <strong>{extract_length_resolved}</strong> LIGNES:</div>",
|
|
1903
|
+
"de": "<div>AUSZUG ALLER <strong>{extract_length_resolved}</strong> ZEILEN:</div>",
|
|
1904
|
+
"it": "<div>ESTRATTO DI TUTTE LE <strong>{extract_length_resolved}</strong> RIGHE:</div>",
|
|
1905
|
+
"es": "<div>EXTRACTO DE TODAS LAS <strong>{extract_length_resolved}</strong> FILAS:</div>",
|
|
1906
|
+
"pt": "<div>EXTRATO DE TODAS AS <strong>{extract_length_resolved}</strong> LINHAS:</div>",
|
|
1907
|
+
"ro": "<div>EXTRAS DIN TOATE CELE <strong>{extract_length_resolved}</strong> RÂNDURI:</div>",
|
|
1908
|
+
"tr": "<div>TÜM <strong>{extract_length_resolved}</strong> SATIRIN ÇIKARIMI:</div>",
|
|
1909
|
+
"zh-Hans": "<div>所有 <strong>{extract_length_resolved}</strong> 行提取数据:</div>",
|
|
1910
|
+
"zh-Hant": "<div>所有 <strong>{extract_length_resolved}</strong> 行提取資料:</div>",
|
|
1911
|
+
"ja": "<div>すべての <strong>{extract_length_resolved}</strong> 行の抽出:</div>",
|
|
1912
|
+
"ko": "<div>모든 <strong>{extract_length_resolved}</strong>행 추출:</div>",
|
|
1913
|
+
"vi": "<div>TRÍCH XUẤT TẤT CẢ <strong>{extract_length_resolved}</strong> HÀNG:</div>",
|
|
1914
|
+
"ru": "<div>ВЫДЕРЖКА ВСЕХ <strong>{extract_length_resolved}</strong> СТРОК:</div>",
|
|
1915
|
+
"cs": "<div>VÝPIS VŠECH <strong>{extract_length_resolved}</strong> ŘÁDKŮ:</div>",
|
|
1916
|
+
"pl": "<div>WYCIĄG ZE WSZYSTKICH <strong>{extract_length_resolved}</strong> WIERSZY:</div>",
|
|
1917
|
+
"da": "<div>UDDRAG AF ALLE <strong>{extract_length_resolved}</strong> RÆKKER:</div>",
|
|
1918
|
+
"sv": "<div>UTDRAG AV ALLA <strong>{extract_length_resolved}</strong> RADERNA:</div>",
|
|
1919
|
+
"nb": "<div>UTDRAG AV ALLE <strong>{extract_length_resolved}</strong> RADER:</div>",
|
|
1920
|
+
"nl": "<div>EXTRACT VAN ALLE <strong>{extract_length_resolved}</strong> RIJEN:</div>",
|
|
1921
|
+
"fi": "<div>OTE KAIKISTA <strong>{extract_length_resolved}</strong> RIVISTÄ:</div>",
|
|
1922
|
+
"is": "<div>ÚTDRÁTTUR AF ÖLLUM <strong>{extract_length_resolved}</strong> RÖÐUM:</div>",
|
|
1923
|
+
"ar": "<div>مقتطف من جميع <strong>{extract_length_resolved}</strong> صفوف:</div>",
|
|
1924
|
+
"hi": "<div>सभी <strong>{extract_length_resolved}</strong> पंक्तियों का निष्कर्ष:</div>",
|
|
1925
|
+
"el": "<div>ΕΞΑΓΩΓΗ ΟΛΩΝ ΤΩΝ <strong>{extract_length_resolved}</strong> ΓΡΑΜΜΩΝ:</div>",
|
|
1926
|
+
},
|
|
1927
|
+
"failure_rate_summary_rows_distinct": {
|
|
1928
|
+
"en": "{failure_rate} TEST UNIT FAILURES",
|
|
1929
|
+
"fr": "{failure_rate} ÉCHECS D'UNITÉS DE TEST",
|
|
1930
|
+
"de": "{failure_rate} TESTEINHEITENFEHLER",
|
|
1931
|
+
"it": "{failure_rate} FALLIMENTI DI UNITÀ DI TEST",
|
|
1932
|
+
"es": "{failure_rate} FALLOS DE UNIDAD DE PRUEBA",
|
|
1933
|
+
"pt": "{failure_rate} FALHAS DE UNIDADE DE TESTE",
|
|
1934
|
+
"ro": "{failure_rate} EȘECURI ALE UNITĂȚILOR DE TEST",
|
|
1935
|
+
"tr": "{failure_rate} TEST BİRİMİ HATALARI",
|
|
1936
|
+
"zh-Hans": "{failure_rate} 个测试单元失败",
|
|
1937
|
+
"zh-Hant": "{failure_rate} 個測試單元失敗",
|
|
1938
|
+
"ja": "{failure_rate} テストユニットの失敗",
|
|
1939
|
+
"ko": "{failure_rate} 테스트 단위 실패",
|
|
1940
|
+
"vi": "{failure_rate} LỖI ĐƠN VỊ KIỂM TRA",
|
|
1941
|
+
"ru": "{failure_rate} СБОЕВ ТЕСТОВЫХ ЕДИНИЦ",
|
|
1942
|
+
"cs": "{failure_rate} SELHÁNÍ TESTOVACÍCH JEDNOTEK",
|
|
1943
|
+
"pl": "{failure_rate} NIEPOWODZEŃ JEDNOSTEK TESTOWYCH",
|
|
1944
|
+
"da": "{failure_rate} TEST ENHED FEJL",
|
|
1945
|
+
"sv": "{failure_rate} TESTENHETSFEL",
|
|
1946
|
+
"nb": "{failure_rate} TESTENHETSFEIL",
|
|
1947
|
+
"nl": "{failure_rate} TESTEENHEID FOUTEN",
|
|
1948
|
+
"fi": "{failure_rate} TESTIYKSIKÖN VIRHEITÄ",
|
|
1949
|
+
"is": "{failure_rate} PRÓFUNAREININGAR VILLUR",
|
|
1950
|
+
"ar": "{failure_rate} فشل وحدات الاختبار",
|
|
1951
|
+
"hi": "{failure_rate} परीक्षण इकाई विफलताएँ",
|
|
1952
|
+
"el": "{failure_rate} ΑΠΟΤΥΧΙΕΣ ΜΟΝΑΔΩΝ ΔΟΚΙΜΗΣ",
|
|
1953
|
+
},
|
|
1819
1954
|
"success_statement": {
|
|
1820
1955
|
"en": "<strong>{n}</strong> TEST UNITS <em>ALL PASSED</em> IN COLUMN <strong>{column_position}</strong>",
|
|
1821
1956
|
"fr": "<strong>{n}</strong> UNITÉS DE TEST <em>TOUTES RÉUSSIES</em> DANS LA COLONNE <strong>{column_position}</strong>",
|
|
@@ -1843,6 +1978,33 @@ STEP_REPORT_TEXT = {
|
|
|
1843
1978
|
"hi": "कॉलम <strong>{column_position}</strong> में <strong>{n}</strong> परीक्षण इकाइयाँ <em>सभी सफल</em>",
|
|
1844
1979
|
"el": "<strong>{n}</strong> ΜΟΝΑΔΕΣ ΔΟΚΙΜΗΣ <em>ΟΛΕΣ ΕΠΙΤΥΧΕΙΣ</em> ΣΤΗ ΣΤΗΛΗ <strong>{column_position}</strong>",
|
|
1845
1980
|
},
|
|
1981
|
+
"success_statement_no_column": {
|
|
1982
|
+
"en": "<strong>{n}</strong> TEST UNITS <em>ALL PASSED</em>",
|
|
1983
|
+
"fr": "<strong>{n}</strong> UNITÉS DE TEST <em>TOUTES RÉUSSIES</em>",
|
|
1984
|
+
"de": "<strong>{n}</strong> TESTEINHEITEN <em>ALLE BESTANDEN</em>",
|
|
1985
|
+
"it": "<strong>{n}</strong> UNITÀ DI TEST <em>TUTTE SUPERATE</em>",
|
|
1986
|
+
"es": "<strong>{n}</strong> UNIDADES DE PRUEBA <em>TODAS APROBADAS</em>",
|
|
1987
|
+
"pt": "<strong>{n}</strong> UNIDADES DE TESTE <em>TODAS APROVADAS</em>",
|
|
1988
|
+
"ro": "<strong>{n}</strong> UNITĂȚI DE TEST <em>TOATE TRECUTE</em>",
|
|
1989
|
+
"tr": "<strong>{n}</strong> TEST BİRİMİNİN <em>TÜMÜ GEÇTİ</em>",
|
|
1990
|
+
"zh-Hans": "<strong>{n}</strong> 个测试单元<em>全部通过</em>",
|
|
1991
|
+
"zh-Hant": "<strong>{n}</strong> 個測試單元<em>全部通過</em>",
|
|
1992
|
+
"ja": "<strong>{n}</strong> のテストユニットが<em>すべて合格</em>",
|
|
1993
|
+
"ko": "<strong>{n}</strong> 테스트 단위 <em>모두 통과</em>",
|
|
1994
|
+
"vi": "<strong>{n}</strong> ĐƠN VỊ KIỂM TRA <em>ĐỀU ĐẠT</em>",
|
|
1995
|
+
"ru": "<strong>{n}</strong> ТЕСТОВЫХ ЕДИНИЦ <em>ВСЕ ПРОШЛИ</em>",
|
|
1996
|
+
"cs": "<strong>{n}</strong> TESTOVACÍCH JEDNOTEK <em>VŠECHNY PROŠLY</em>",
|
|
1997
|
+
"pl": "<strong>{n}</strong> JEDNOSTEK TESTOWYCH <em>WSZYSTKIE ZALICZONE</em>",
|
|
1998
|
+
"da": "<strong>{n}</strong> TEST ENHEDER <em>ALLE BESTÅET</em>",
|
|
1999
|
+
"sv": "<strong>{n}</strong> TESTENHETER <em>ALLA GODKÄNDA</em>",
|
|
2000
|
+
"nb": "<strong>{n}</strong> TESTENHETER <em>ALLE BESTÅTT</em>",
|
|
2001
|
+
"nl": "<strong>{n}</strong> TESTEENHEDEN <em>ALLEMAAL GESLAAGD</em>",
|
|
2002
|
+
"fi": "<strong>{n}</strong> TESTIYKSIKKÖÄ <em>KAIKKI LÄPÄISTY</em>",
|
|
2003
|
+
"is": "<strong>{n}</strong> PRÓFUNAREININGAR <em>ALLAR STAÐIST</em>",
|
|
2004
|
+
"ar": "<strong>{n}</strong> وحدات اختبار <em>جميعها نجحت</em>",
|
|
2005
|
+
"hi": "<strong>{n}</strong> परीक्षण इकाइयाँ <em>सभी सफल</em>",
|
|
2006
|
+
"el": "<strong>{n}</strong> ΜΟΝΑΔΕΣ ΔΟΚΙΜΗΣ <em>ΟΛΕΣ ΕΠΙΤΥΧΕΙΣ</em>",
|
|
2007
|
+
},
|
|
1846
2008
|
"preview_statement": {
|
|
1847
2009
|
"en": "<div>PREVIEW OF TARGET TABLE:</div>",
|
|
1848
2010
|
"fr": "<div>APERÇU DE LA TABLE CIBLE :</div>",
|
pointblank/validate.py
CHANGED
|
@@ -7012,7 +7012,7 @@ class Validate:
|
|
|
7012
7012
|
# TODO: Add support for extraction of rows for Ibis backends
|
|
7013
7013
|
if (
|
|
7014
7014
|
collect_extracts
|
|
7015
|
-
and assertion_type in ROW_BASED_VALIDATION_TYPES
|
|
7015
|
+
and assertion_type in ROW_BASED_VALIDATION_TYPES + ["rows_distinct"]
|
|
7016
7016
|
and tbl_type not in IBIS_BACKENDS
|
|
7017
7017
|
):
|
|
7018
7018
|
# Add row numbers to the results table
|
|
@@ -7038,6 +7038,32 @@ class Validate:
|
|
|
7038
7038
|
if len(validation_extract_nw) > extract_limit:
|
|
7039
7039
|
validation_extract_nw = validation_extract_nw.head(extract_limit)
|
|
7040
7040
|
|
|
7041
|
+
# If a 'rows_distinct' validation step, then the extract should have the
|
|
7042
|
+
# duplicate rows arranged together
|
|
7043
|
+
if assertion_type == "rows_distinct":
|
|
7044
|
+
# Get the list of column names in the extract, excluding the `_row_num_` column
|
|
7045
|
+
column_names = validation_extract_nw.columns
|
|
7046
|
+
column_names.remove("_row_num_")
|
|
7047
|
+
|
|
7048
|
+
# Only include the columns that were defined in `rows_distinct(columns_subset=)`
|
|
7049
|
+
# (stored here in `column`), if supplied
|
|
7050
|
+
if column is not None:
|
|
7051
|
+
column_names = column
|
|
7052
|
+
column_names_subset = ["_row_num_"] + column
|
|
7053
|
+
validation_extract_nw = validation_extract_nw.select(column_names_subset)
|
|
7054
|
+
|
|
7055
|
+
validation_extract_nw = (
|
|
7056
|
+
validation_extract_nw.with_columns(
|
|
7057
|
+
group_min_row=nw.min("_row_num_").over(*column_names)
|
|
7058
|
+
)
|
|
7059
|
+
# First sort by the columns to group duplicates and by row numbers
|
|
7060
|
+
# within groups; this type of sorting will preserve the original order in a
|
|
7061
|
+
# single operation
|
|
7062
|
+
.sort(by=["group_min_row"] + column_names + ["_row_num_"])
|
|
7063
|
+
.drop("group_min_row")
|
|
7064
|
+
)
|
|
7065
|
+
|
|
7066
|
+
# Ensure that the extract is set to its native format
|
|
7041
7067
|
validation.extract = nw.to_native(validation_extract_nw)
|
|
7042
7068
|
|
|
7043
7069
|
# Get the end time for this step
|
|
@@ -7976,6 +8002,7 @@ class Validate:
|
|
|
7976
8002
|
- [`col_vals_null()`](`pointblank.Validate.col_vals_null`)
|
|
7977
8003
|
- [`col_vals_not_null()`](`pointblank.Validate.col_vals_not_null`)
|
|
7978
8004
|
- [`col_vals_regex()`](`pointblank.Validate.col_vals_regex`)
|
|
8005
|
+
- [`rows_distinct()`](`pointblank.Validate.rows_distinct`)
|
|
7979
8006
|
|
|
7980
8007
|
An extracted row means that a test unit failed for that row in the validation step. The
|
|
7981
8008
|
extracted rows are a subset of the original table and are useful for further analysis or for
|
|
@@ -9330,6 +9357,24 @@ class Validate:
|
|
|
9330
9357
|
lang=lang,
|
|
9331
9358
|
)
|
|
9332
9359
|
|
|
9360
|
+
elif assertion_type == "rows_distinct":
|
|
9361
|
+
extract = self.get_data_extracts(i=i, frame=True)
|
|
9362
|
+
|
|
9363
|
+
step_report = _step_report_rows_distinct(
|
|
9364
|
+
i=i,
|
|
9365
|
+
column=column,
|
|
9366
|
+
column_position=column_position,
|
|
9367
|
+
columns_subset=columns_subset,
|
|
9368
|
+
n=n,
|
|
9369
|
+
n_failed=n_failed,
|
|
9370
|
+
all_passed=all_passed,
|
|
9371
|
+
extract=extract,
|
|
9372
|
+
tbl_preview=tbl_preview,
|
|
9373
|
+
header=header,
|
|
9374
|
+
limit=limit,
|
|
9375
|
+
lang=lang,
|
|
9376
|
+
)
|
|
9377
|
+
|
|
9333
9378
|
elif assertion_type == "col_schema_match":
|
|
9334
9379
|
# Get the parameters for column-schema matching
|
|
9335
9380
|
values_dict = validation_step["values"]
|
|
@@ -10672,7 +10717,7 @@ def _step_report_row_based(
|
|
|
10672
10717
|
header: str,
|
|
10673
10718
|
limit: int | None,
|
|
10674
10719
|
lang: str,
|
|
10675
|
-
):
|
|
10720
|
+
) -> GT:
|
|
10676
10721
|
# Get the length of the extracted data for the step
|
|
10677
10722
|
extract_length = get_row_count(extract)
|
|
10678
10723
|
|
|
@@ -10889,6 +10934,140 @@ def _step_report_row_based(
|
|
|
10889
10934
|
return step_report
|
|
10890
10935
|
|
|
10891
10936
|
|
|
10937
|
+
def _step_report_rows_distinct(
|
|
10938
|
+
i: int,
|
|
10939
|
+
column: list[str],
|
|
10940
|
+
column_position: list[int],
|
|
10941
|
+
columns_subset: list[str] | None,
|
|
10942
|
+
n: int,
|
|
10943
|
+
n_failed: int,
|
|
10944
|
+
all_passed: bool,
|
|
10945
|
+
extract: any,
|
|
10946
|
+
tbl_preview: GT,
|
|
10947
|
+
header: str,
|
|
10948
|
+
limit: int | None,
|
|
10949
|
+
lang: str,
|
|
10950
|
+
) -> GT:
|
|
10951
|
+
# Get the length of the extracted data for the step
|
|
10952
|
+
extract_length = get_row_count(extract)
|
|
10953
|
+
|
|
10954
|
+
# Determine whether the `lang` value represents a right-to-left language
|
|
10955
|
+
is_rtl_lang = lang in RTL_LANGUAGES
|
|
10956
|
+
direction_rtl = " direction: rtl;" if is_rtl_lang else ""
|
|
10957
|
+
|
|
10958
|
+
if column is None:
|
|
10959
|
+
text = STEP_REPORT_TEXT["rows_distinct_all"][lang].format(column=column)
|
|
10960
|
+
else:
|
|
10961
|
+
columns_list = ", ".join(column)
|
|
10962
|
+
text = STEP_REPORT_TEXT["rows_distinct_subset"][lang].format(columns_subset=columns_list)
|
|
10963
|
+
|
|
10964
|
+
if all_passed:
|
|
10965
|
+
step_report = tbl_preview
|
|
10966
|
+
|
|
10967
|
+
if header is None:
|
|
10968
|
+
return step_report
|
|
10969
|
+
|
|
10970
|
+
title = STEP_REPORT_TEXT["report_for_step_i"][lang].format(i=i) + " " + CHECK_MARK_SPAN
|
|
10971
|
+
|
|
10972
|
+
success_stmt = STEP_REPORT_TEXT["success_statement_no_column"][lang].format(
|
|
10973
|
+
n=n,
|
|
10974
|
+
column_position=column_position,
|
|
10975
|
+
)
|
|
10976
|
+
preview_stmt = STEP_REPORT_TEXT["preview_statement"][lang]
|
|
10977
|
+
|
|
10978
|
+
details = (
|
|
10979
|
+
f"<div style='font-size: 13.6px; {direction_rtl}'>"
|
|
10980
|
+
"<div style='padding-top: 7px;'>"
|
|
10981
|
+
f"{text}"
|
|
10982
|
+
"</div>"
|
|
10983
|
+
"<div style='padding-top: 7px;'>"
|
|
10984
|
+
f"{success_stmt}"
|
|
10985
|
+
"</div>"
|
|
10986
|
+
f"{preview_stmt}"
|
|
10987
|
+
"</div>"
|
|
10988
|
+
)
|
|
10989
|
+
|
|
10990
|
+
# Generate the default template text for the header when `":default:"` is used
|
|
10991
|
+
if header == ":default:":
|
|
10992
|
+
header = "{title}{details}"
|
|
10993
|
+
|
|
10994
|
+
# Use commonmark to convert the header text to HTML
|
|
10995
|
+
header = commonmark.commonmark(header)
|
|
10996
|
+
|
|
10997
|
+
# Place any templated text in the header
|
|
10998
|
+
header = header.format(title=title, details=details)
|
|
10999
|
+
|
|
11000
|
+
# Create the header with `header` string
|
|
11001
|
+
step_report = step_report.tab_header(title=md(header))
|
|
11002
|
+
|
|
11003
|
+
else:
|
|
11004
|
+
if limit is None:
|
|
11005
|
+
limit = extract_length
|
|
11006
|
+
|
|
11007
|
+
# Create a preview of the extracted data
|
|
11008
|
+
step_report = _generate_display_table(
|
|
11009
|
+
data=extract,
|
|
11010
|
+
columns_subset=columns_subset,
|
|
11011
|
+
n_head=limit,
|
|
11012
|
+
n_tail=0,
|
|
11013
|
+
limit=limit,
|
|
11014
|
+
min_tbl_width=600,
|
|
11015
|
+
incl_header=False,
|
|
11016
|
+
mark_missing_values=False,
|
|
11017
|
+
)
|
|
11018
|
+
|
|
11019
|
+
title = STEP_REPORT_TEXT["report_for_step_i"][lang].format(i=i)
|
|
11020
|
+
failure_rate_metrics = f"<strong>{n_failed}</strong> / <strong>{n}</strong>"
|
|
11021
|
+
|
|
11022
|
+
failure_rate_stmt = STEP_REPORT_TEXT["failure_rate_summary_rows_distinct"][lang].format(
|
|
11023
|
+
failure_rate=failure_rate_metrics,
|
|
11024
|
+
column_position=column_position,
|
|
11025
|
+
)
|
|
11026
|
+
|
|
11027
|
+
if limit < extract_length: # pragma: no cover
|
|
11028
|
+
extract_length_resolved = limit
|
|
11029
|
+
extract_text = STEP_REPORT_TEXT["extract_text_first_rows_distinct"][lang].format(
|
|
11030
|
+
extract_length_resolved=extract_length_resolved
|
|
11031
|
+
)
|
|
11032
|
+
|
|
11033
|
+
else:
|
|
11034
|
+
extract_length_resolved = extract_length
|
|
11035
|
+
extract_text = STEP_REPORT_TEXT["extract_text_all_rows_distinct"][lang].format(
|
|
11036
|
+
extract_length_resolved=extract_length_resolved
|
|
11037
|
+
)
|
|
11038
|
+
|
|
11039
|
+
details = (
|
|
11040
|
+
f"<div style='font-size: 13.6px; {direction_rtl}'>"
|
|
11041
|
+
"<div style='padding-top: 7px;'>"
|
|
11042
|
+
f"{text}"
|
|
11043
|
+
"</div>"
|
|
11044
|
+
"<div style='padding-top: 7px;'>"
|
|
11045
|
+
f"{failure_rate_stmt}"
|
|
11046
|
+
"</div>"
|
|
11047
|
+
f"{extract_text}"
|
|
11048
|
+
"</div>"
|
|
11049
|
+
)
|
|
11050
|
+
|
|
11051
|
+
# If `header` is None then don't add a header and just return the step report
|
|
11052
|
+
if header is None:
|
|
11053
|
+
return step_report
|
|
11054
|
+
|
|
11055
|
+
# Generate the default template text for the header when `":default:"` is used
|
|
11056
|
+
if header == ":default:":
|
|
11057
|
+
header = "{title}{details}"
|
|
11058
|
+
|
|
11059
|
+
# Use commonmark to convert the header text to HTML
|
|
11060
|
+
header = commonmark.commonmark(header)
|
|
11061
|
+
|
|
11062
|
+
# Place any templated text in the header
|
|
11063
|
+
header = header.format(title=title, details=details)
|
|
11064
|
+
|
|
11065
|
+
# Create the header with `header` string
|
|
11066
|
+
step_report = step_report.tab_header(title=md(header))
|
|
11067
|
+
|
|
11068
|
+
return step_report
|
|
11069
|
+
|
|
11070
|
+
|
|
10892
11071
|
def _step_report_schema_in_order(
|
|
10893
11072
|
step: int, schema_info: dict, header: str, lang: str, debug_return_df: bool = False
|
|
10894
11073
|
) -> GT | any:
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
pointblank/__init__.py,sha256=c1lZsS_xsMq3OfkCuYQPxDByK_IRLGTYtd5n6uIveks,1555
|
|
2
2
|
pointblank/_constants.py,sha256=xbvHGDi5mt85FBnznXupwE79KttHFbORLVSQVXBKdXE,72533
|
|
3
3
|
pointblank/_constants_docs.py,sha256=JBmtt16zTYQ-zaM4ElLExtKs-dKlnN553Ys2ML1Y1C8,2099
|
|
4
|
-
pointblank/_constants_translations.py,sha256=
|
|
4
|
+
pointblank/_constants_translations.py,sha256=5I-QNY6b3wTIvDS1PzMG-uP2OkCB6c86NP2hr-RHji4,161031
|
|
5
5
|
pointblank/_interrogation.py,sha256=AtygXSb5iaqUcobnfVF3HjO9mjrtPWkLJ8No9XFSvR8,73186
|
|
6
6
|
pointblank/_typing.py,sha256=YQ6Bt-j-W6Cg91qXHHDzBM-ptc-IEvhMg6T5ugWnGwM,306
|
|
7
7
|
pointblank/_utils.py,sha256=Loyu9qo_QR3lgtsWYmFsxfVQCxdU_GWOAk9LqrQq0Wo,24630
|
|
@@ -15,7 +15,7 @@ pointblank/draft.py,sha256=lIbSlY9Avi1GbRvJhqR-69sGWCfD11im3Go20XsX8L0,15783
|
|
|
15
15
|
pointblank/schema.py,sha256=gzUCmtccO2v15MH2bo9uHUYjkKEEne1okQucxcH39pc,44291
|
|
16
16
|
pointblank/tf.py,sha256=8o_8m4i01teulEe3-YYMotSNf3tImjBMInsvdjSAO5Q,8844
|
|
17
17
|
pointblank/thresholds.py,sha256=C8_Rn2z3MVFu4UH5eaGRd7DkW3slgkWB3Hhim2h5CfU,25340
|
|
18
|
-
pointblank/validate.py,sha256=
|
|
18
|
+
pointblank/validate.py,sha256=fnm3xy85AcMPA7v2n9s2NsbCWHjG7A8hcwo7a7lm2N8,500689
|
|
19
19
|
pointblank/data/api-docs.txt,sha256=u9Q0eWlTLW396YSp2lY15bh_omw01XnGPF_jirODLCQ,397547
|
|
20
20
|
pointblank/data/game_revenue-duckdb.zip,sha256=tKIVx48OGLYGsQPS3h5AjA2Nyq_rfEpLCjBiFUWhagU,35880
|
|
21
21
|
pointblank/data/game_revenue.zip,sha256=7c9EvHLyi93CHUd4p3dM4CZ-GucFCtXKSPxgLojL32U,33749
|
|
@@ -24,8 +24,8 @@ pointblank/data/nycflights.zip,sha256=yVjbUaKUz2LydSdF9cABuir0VReHBBgV7shiNWSd0m
|
|
|
24
24
|
pointblank/data/polars-api-docs.txt,sha256=KGcS-BOtUs9zgpkWfXD-GFdFh4O_zjdkpX7msHjztLg,198045
|
|
25
25
|
pointblank/data/small_table-duckdb.zip,sha256=BhTaZ2CRS4-9Z1uVhOU6HggvW3XCar7etMznfENIcOc,2028
|
|
26
26
|
pointblank/data/small_table.zip,sha256=lmFb90Nb-v5X559Ikjg31YLAXuRyMkD9yLRElkXPMzQ,472
|
|
27
|
-
pointblank-0.8.
|
|
28
|
-
pointblank-0.8.
|
|
29
|
-
pointblank-0.8.
|
|
30
|
-
pointblank-0.8.
|
|
31
|
-
pointblank-0.8.
|
|
27
|
+
pointblank-0.8.5.dist-info/licenses/LICENSE,sha256=apLF-HWPNU7pT5bmf5KmZpD5Cklpy2u-BN_0xBoRMLY,1081
|
|
28
|
+
pointblank-0.8.5.dist-info/METADATA,sha256=BPfM_mGzEYNoSFSys35V2vBCGhR1hywdgyoRxGTmpEo,12839
|
|
29
|
+
pointblank-0.8.5.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
|
30
|
+
pointblank-0.8.5.dist-info/top_level.txt,sha256=-wHrS1SvV8-nhvc3w-PPYs1C1WtEc1pK-eGjubbCCKc,11
|
|
31
|
+
pointblank-0.8.5.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|