PyPI - pointblank - Versions diffs - 0.9.0__py3-none-any.whl → 0.9.1__py3-none-any.whl - Mend

pointblank 0.9.0py3-none-any.whl → 0.9.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

pointblank/_constants.py +15 -0
pointblank/_constants_translations.py +162 -0
pointblank/_interrogation.py +117 -0
pointblank/_utils.py +1 -0
pointblank/data/api-docs.txt +306 -3
pointblank/validate.py +401 -7
{pointblank-0.9.0.dist-info → pointblank-0.9.1.dist-info}/METADATA +1 -1
{pointblank-0.9.0.dist-info → pointblank-0.9.1.dist-info}/RECORD +11 -11
{pointblank-0.9.0.dist-info → pointblank-0.9.1.dist-info}/WHEEL +1 -1
{pointblank-0.9.0.dist-info → pointblank-0.9.1.dist-info}/licenses/LICENSE +0 -0
{pointblank-0.9.0.dist-info → pointblank-0.9.1.dist-info}/top_level.txt +0 -0

pointblank/_constants.py CHANGED Viewed

@@ -39,6 +39,7 @@ ASSERTION_TYPE_METHOD_MAP = {
     "col_vals_expr": "expr",
     "col_exists": "col_exists",
     "rows_distinct": "rows_distinct",
+    "rows_complete": "rows_complete",
     "col_schema_match": "col_schema_match",
     "row_count_match": "row_count_match",
     "col_count_match": "col_count_match",
@@ -63,6 +64,7 @@ METHOD_CATEGORY_MAP = {
     "col_exists": "COL_EXISTS_HAS_TYPE",
     "expr": "COMPARE_EXPR",
     "rows_distinct": "ROWS_DISTINCT",
+    "rows_complete": "ROWS_COMPLETE",
     "col_schema_match": "COL_SCHEMA_MATCH",
     "row_count_match": "ROW_COUNT_MATCH",
     "col_count_match": "COL_COUNT_MATCH",
@@ -375,6 +377,19 @@ SVG_ICONS_FOR_ASSERTION_TYPES = {
             </g>
         </g>
     </g>
+</svg>""",
+    "rows_complete": """<?xml version="1.0" encoding="UTF-8"?>
+<svg width="67px" height="67px" viewBox="0 0 67 67" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+    <title>rows_complete</title>
+    <g id="All-Icons" stroke="none" stroke-width="1" fill="none" fill-rule="evenodd">
+        <g id="rows_complete" transform="translate(0.000000, 0.965517)">
+            <path d="M56.712234,1 C59.1975153,1 61.4475153,2.00735931 63.076195,3.63603897 C64.7048747,5.26471863 65.712234,7.51471863 65.712234,10 L65.712234,10 L65.712234,65 L10.712234,65 C8.22695259,65 5.97695259,63.9926407 4.34827294,62.363961 C2.71959328,60.7352814 1.71223397,58.4852814 1.71223397,56 L1.71223397,56 L1.71223397,10 C1.71223397,7.51471863 2.71959328,5.26471863 4.34827294,3.63603897 C5.97695259,2.00735931 8.22695259,1 10.712234,1 L10.712234,1 Z" id="rectangle" stroke="#000000" stroke-width="2" fill="#FFFFFF"></path>
+            <g id="complete_me" transform="translate(12.500000, 9.500000)" fill="#000000">
+                <path d="M8,0 L8,10 L16,10 L16,18 L26,18 L26,10 L34,10 L34,0 L8,0 Z M10,2 L16,2 L16,8 L10,8 L10,2 Z M18,2 L24,2 L24,8 L18,8 L18,2 Z M26,2 L32,2 L32,8 L26,8 L26,2 Z M18,10 L24,10 L24,16 L18,16 L18,10 Z M0,21 L0,47 L42,47 L42,21 L32,21 L32,29 L24,29 L24,37 L18,37 L18,29 L10,29 L10,21 L0,21 Z M2,23 L8,23 L8,29 L2,29 L2,23 Z M34,23 L40,23 L40,29 L34,29 L34,23 Z M2,31 L8,31 L8,37 L2,37 L2,31 Z M10,31 L16,31 L16,37 L10,37 L10,31 Z M26,31 L32,31 L32,37 L26,37 L26,31 Z M34,31 L40,31 L40,37 L34,37 L34,31 Z M2,39 L8,39 L8,45 L2,45 L2,39 Z M10,39 L16,39 L16,45 L10,45 L10,39 Z M18,39 L24,39 L24,45 L18,45 L18,39 Z M26,39 L32,39 L32,45 L26,45 L26,39 Z M34,39 L40,39 L40,45 L34,45 L34,39 Z" id="Shape" fill-rule="nonzero"></path>
+                <path d="M22.4566476,18.35817 C22.9253976,18.29567 23.3746166,18.569108 23.5308666,19.01442 C23.6910226,19.459733 23.5152416,19.955826 23.1128976,20.20192 L23.1128976,20.20192 L20.2066476,22.38942 L25.7989286,22.3893123 L25.7989286,24.3893123 L20.2066476,24.38942 L23.1128976,26.57692 C23.5621166,26.912858 23.6519606,27.549576 23.3160226,27.998795 C22.9800856,28.448014 22.3433666,28.537858 21.8941476,28.20192 L21.8941476,28.20192 L16.6128976,24.20192 C16.3511786,24.01442 16.1949286,23.709733 16.1949286,23.38942 C16.1949286,23.069108 16.3511786,22.76442 16.6128976,22.57692 L16.6128976,22.57692 L21.8941476,18.57692 C22.0230536,18.479264 22.1714916,18.416764 22.3316476,18.38942 C22.3707106,18.377701 22.4136786,18.365983 22.4566476,18.35817 Z" id="arrow_right" transform="translate(20.997393, 23.377149) rotate(-90.000000) translate(-20.997393, -23.377149) "></path>
+            </g>
+        </g>
+    </g>
 </svg>""",
     "col_schema_match": """<?xml version="1.0" encoding="UTF-8"?>
 <svg width="67px" height="67px" viewBox="0 0 67 67" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">

pointblank/_constants_translations.py CHANGED Viewed

@@ -728,6 +728,114 @@ EXPECT_FAIL_TEXT = {
         "hi": "चयनित स्तंभों में पंक्तियां पूरी तरह से अलग नहीं थीं, ऐसे असफल परीक्षण इकाइयों की अधिकता।",
         "el": "Υπέρβαση αποτυχημένων μονάδων δοκιμής όπου δεν υπήρχαν διακριτές γραμμές στις επιλεγμένες στήλες.",
     },
+    "all_row_complete_expectation_text": {
+        "en": "Expect entirely complete rows across all columns.",
+        "fr": "On s'attend à des lignes entièrement complètes dans toutes les colonnes.",
+        "de": "Erwarten Sie vollständig komplette Zeilen über alle Spalten hinweg.",
+        "it": "Aspettati righe completamente complete su tutte le colonne.",
+        "es": "Se espera que las filas estén completamente completas en todas las columnas.",
+        "pt": "Espera-se linhas completamente preenchidas em todas as colunas.",
+        "ro": "Se așteaptă ca rândurile să fie complet complete în toate coloanele.",
+        "tr": "Tüm sütunlarda tamamen eksiksiz satırlar bekleyin.",
+        "zh-Hans": "预期所有列中的行都是完整的。",
+        "zh-Hant": "預期所有列中的行都是完整的。",
+        "ja": "すべての列で完全に完全な行を期待します。",
+        "ko": "모든 열에서 완전히 완성된 행을 기대합니다.",
+        "vi": "Kỳ vọng các hàng hoàn toàn đầy đủ trên tất cả các cột.",
+        "ru": "Ожидайте полностью заполненные строки по всем столбцам.",
+        "cs": "Očekávejte zcela kompletní řádky ve všech sloupcích.",
+        "pl": "Spodziewaj się w pełni kompletnych wierszy we wszystkich kolumnach.",
+        "da": "Forvent helt komplette rækker på tværs af alle kolonner.",
+        "sv": "Förvänta dig helt kompletta rader över alla kolumner.",
+        "nb": "Forvent helt komplette rader på tvers av alle kolonner.",
+        "nl": "Verwacht volledig complete rijen in alle kolommen.",
+        "fi": "Odota täysin täydellisiä rivejä kaikissa sarakkeissa.",
+        "is": "Væntir þess að allar raðir séu heildstæðar yfir alla dálka.",
+        "ar": "توقع صفوف مكتملة تمامًا عبر جميع الأعمدة.",
+        "hi": "सभी स्तंभों में पूरी तरह से पूर्ण पंक्तियों की अपेक्षा करें।",
+        "el": "Αναμένεται πλήρως ολοκληρωμένες γραμμές σε όλες τις στήλες.",
+    },
+    "all_row_complete_failure_text": {
+        "en": "Exceedance of failed test units where there weren't complete rows across all columns.",
+        "fr": "Dépassement des unités de test ayant échoué là où il n'y avait pas de lignes complètes dans toutes les colonnes.",
+        "de": "Überschreitung fehlgeschlagener Testeinheiten, bei denen nicht vollständige Zeilen über alle Spalten hinweg vorhanden waren.",
+        "it": "Superamento delle unità di test fallite in cui non c'erano righe complete su tutte le colonne.",
+        "es": "Se superó el número de unidades de prueba fallidas donde no había filas completas en todas las columnas.",
+        "pt": "Excedeu o número de unidades de teste com falha onde não havia linhas completas em todas as colunas.",
+        "ro": "Depășirea unităților de test eșuate unde nu au existat rânduri complete în toate coloanele.",
+        "tr": "Tüm sütunlarda eksiksiz satırların olmadığı başarısız test birimlerinin aşılması.",
+        "zh-Hans": "错误过多，其中在所有列中行不完整。",
+        "zh-Hant": "錯誤過多，在所有列中沒有完整的行。",
+        "ja": "すべての列で完全な行がないテスト単位の失敗の超過。",
+        "ko": "모든 열에 걸쳐 완전한 행이 아니었던 실패한 테스트 단위 초과.",
+        "vi": "Vượt quá số đơn vị kiểm tra thất bại trong đó không có các hàng đầy đủ trên tất cả các cột.",
+        "ru": "Превышение неудачных тестовых единиц, где не было полных строк по всем столбцам.",
+        "cs": "Překročení počtu neúspěšných testů, kde nebyly úplné řádky ve všech sloupcích.",
+        "pl": "Przekroczenie liczby niepomyślnych jednostek testowych, w których nie było kompletnych wierszy we wszystkich kolumnach.",
+        "da": "Overskridelse af antal fejlslagne enhedstests, hvor der ikke var komplette rækker på tværs af alle kolonner.",
+        "sv": "Överstiger antalet misslyckade enhetstest där det inte fanns kompletta rader över alla kolumner.",
+        "nb": "Overskridelse av mislykkede testenheter hvor det ikke var komplette rader på tvers av alle kolonner.",
+        "nl": "Overschrijding van mislukte testeenheden waarbij er geen complete rijen waren in alle kolommen.",
+        "fi": "Epäonnistuneiden testiyksikköjen ylitys, joissa ei ollut täydellisiä rivejä kaikissa sarakkeissa.",
+        "is": "Of mörg misheppnuð próf þar sem raðir voru ekki heildstæðar yfir alla dálka.",
+        "ar": "تجاوز وحدات الاختبار الفاشلة حيث لم تكن هناك صفوف مكتملة عبر جميع الأعمدة.",
+        "hi": "सभी स्तंभों में पूर्ण पंक्तियां नहीं थीं, ऐसे असफल परीक्षण इकाइयों की अधिकता।",
+        "el": "Υπέρβαση αποτυχημένων μονάδων δοκιμής όπου δεν υπήρχαν πλήρεις γραμμές σε όλες τις στήλες.",
+    },
+    "across_row_complete_expectation_text": {
+        "en": "Expect entirely complete rows across {column_text}.",
+        "fr": "On s'attend à des lignes entièrement complètes dans {column_text}.",
+        "de": "Erwarten Sie vollständig komplette Zeilen über {column_text} hinweg.",
+        "it": "Aspettati righe completamente complete su {column_text}.",
+        "es": "Se espera que las filas estén completamente completas en {column_text}.",
+        "pt": "Espera-se linhas completamente preenchidas em {column_text}.",
+        "ro": "Se așteaptă ca rândurile să fie complet complete în {column_text}.",
+        "tr": "{column_text} boyunca tamamen eksiksiz satırlar bekleyin.",
+        "zh-Hans": "预期在{column_text}中的行是完整的。",
+        "zh-Hant": "預期在{column_text}中的行是完整的。",
+        "ja": "{column_text}において完全に完全な行を期待します。",
+        "ko": "{column_text}에서 완전히 완성된 행을 기대합니다.",
+        "vi": "Kỳ vọng các hàng hoàn toàn đầy đủ trên {column_text}.",
+        "ru": "Ожидайте полностью заполненные строки в {column_text}.",
+        "cs": "Očekávejte zcela kompletní řádky v {column_text}.",
+        "pl": "Spodziewaj się w pełni kompletnych wierszy w {column_text}.",
+        "da": "Forvent helt komplette rækker på tværs af {column_text}.",
+        "sv": "Förvänta dig helt kompletta rader över {column_text}.",
+        "nb": "Forvent helt komplette rader på tvers av {column_text}.",
+        "nl": "Verwacht volledig complete rijen in {column_text}.",
+        "fi": "Odota täysin täydellisiä rivejä sarakkeissa {column_text}.",
+        "is": "Væntir þess að allar raðir séu heildstæðar yfir {column_text}.",
+        "ar": "توقع صفوف مكتملة تمامًا عبر {column_text}.",
+        "hi": "{column_text} में पूरी तरह से पूर्ण पंक्तियों की अपेक्षा करें।",
+        "el": "Αναμένεται πλήρως ολοκληρωμένες γραμμές στις στήλες {column_text}.",
+    },
+    "across_row_complete_failure_text": {
+        "en": "Exceedance of failed test units where there weren't complete rows across selected columns.",
+        "fr": "Dépassement des unités de test ayant échoué là où il n'y avait pas de lignes complètes dans les colonnes sélectionnées.",
+        "de": "Überschreitung fehlgeschlagener Testeinheiten, bei denen nicht vollständige Zeilen über die ausgewählten Spalten hinweg vorhanden waren.",
+        "it": "Superamento delle unità di test fallite in cui non c'erano righe complete nelle colonne selezionate.",
+        "es": "Se superó el número de unidades de prueba fallidas donde no había filas completas en las columnas seleccionadas.",
+        "pt": "Excedeu o número de unidades de teste com falha onde não havia linhas completas nas colunas selecionadas.",
+        "ro": "Depășirea unităților de test eșuate unde nu au existat rânduri complete în coloanele selectate.",
+        "tr": "Seçili sütunlarda eksiksiz satırların olmadığı başarısız test birimlerinin aşılması.",
+        "zh-Hans": "错误过多，其中在所选列中行不完整。",
+        "zh-Hant": "錯誤過多，在所選列中沒有完整的行。",
+        "ja": "選択された列で完全な行がないテスト単位の失敗の超過。",
+        "ko": "선택된 열에서 완전한 행이 아니었던 실패한 테스트 단위 초과.",
+        "vi": "Vượt quá số đơn vị kiểm tra thất bại trong đó không có các hàng đầy đủ trên các cột đã chọn.",
+        "ru": "Превышение неудачных тестовых единиц, где не было полных строк в выбранных столбцах.",
+        "cs": "Překročení počtu neúspěšných testů, kde nebyly úplné řádky ve vybraných sloupcích.",
+        "pl": "Przekroczenie liczby niepomyślnych jednostek testowych, w których nie było kompletnych wierszy w wybranych kolumnach.",
+        "da": "Overskridelse af antal fejlslagne enhedstests, hvor der ikke var komplette rækker på tværs af valgte kolonner.",
+        "sv": "Överstiger antalet misslyckade enhetstest där det inte fanns kompletta rader över valda kolumner.",
+        "nb": "Overskridelse av mislykkede testenheter hvor det ikke var komplette rader på tvers av valgte kolonner.",
+        "nl": "Overschrijding van mislukte testeenheden waarbij er geen complete rijen waren in geselecteerde kolommen.",
+        "fi": "Epäonnistuneiden testiyksikköjen ylitys, joissa ei ollut täydellisiä rivejä valituissa sarakkeissa.",
+        "is": "Of mörg misheppnuð próf þar sem raðir voru ekki heildstæðar yfir valda dálka.",
+        "ar": "تجاوز وحدات الاختبار الفاشلة حيث لم تكن هناك صفوف مكتملة عبر الأعمدة المحددة.",
+        "hi": "चयनित स्तंभों में पूर्ण पंक्तियां नहीं थीं, ऐसे असफल परीक्षण इकाइयों की अधिकता।",
+        "el": "Υπέρβαση αποτυχημένων μονάδων δοκιμής όπου δεν υπήρχαν πλήρεις γραμμές στις επιλεγμένες στήλες.",
+    },
     "col_schema_match_expectation_text": {
         "en": "Expect that column schemas match.",
         "fr": "On s'attend à ce que les schémas de colonnes correspondent.",
@@ -1735,6 +1843,60 @@ STEP_REPORT_TEXT = {
         "hi": "पंक्तियां स्तंभों के एक उपसमूह में अलग-अलग हैं",
         "el": "Οι γραμμές είναι διακριτές σε ένα υποσύνολο στηλών",
     },
+    "rows_complete_all": {
+        "en": "All rows are complete",
+        "fr": "Toutes les lignes sont complètes",
+        "de": "Alle Zeilen sind vollständig",
+        "it": "Tutte le righe sono complete",
+        "es": "Todas las filas están completas",
+        "pt": "Todas as linhas estão completas",
+        "ro": "Toate rândurile sunt complete",
+        "tr": "Tüm satırlar eksiksizdir",
+        "zh-Hans": "所有行都是完整的",
+        "zh-Hant": "所有行都是完整的",
+        "ja": "すべての行が完全です",
+        "ko": "모든 행이 완전합니다",
+        "vi": "Tất cả các hàng đều đầy đủ",
+        "ru": "Все строки заполнены полностью",
+        "cs": "Všechny řádky jsou úplné",
+        "pl": "Wszystkie wiersze są kompletne",
+        "da": "Alle rækker er komplette",
+        "sv": "Alla rader är kompletta",
+        "nb": "Alle rader er komplette",
+        "nl": "Alle rijen zijn compleet",
+        "fi": "Kaikki rivit ovat täydellisiä",
+        "is": "Allar raðir eru heildstæðar",
+        "ar": "جميع الصفوف مكتملة",
+        "hi": "सभी पंक्तियां पूर्ण हैं",
+        "el": "Όλες οι γραμμές είναι πλήρεις",
+    },
+    "rows_complete_subset": {
+        "en": "Rows are complete across a subset of columns",
+        "fr": "Les lignes sont complètes sur un sous-ensemble de colonnes",
+        "de": "Zeilen sind in einer Teilmenge von Spalten vollständig",
+        "it": "Le righe sono complete in un sottoinsieme di colonne",
+        "es": "Las filas están completas en un subconjunto de columnas",
+        "pt": "As linhas estão completas em um subconjunto de colunas",
+        "ro": "Rândurile sunt complete într-un subset de coloane",
+        "tr": "Satırlar, sütunların bir alt kümesinde eksiksizdir",
+        "zh-Hans": "行在列的子集中是完整的",
+        "zh-Hant": "行在列的子集中是完整的",
+        "ja": "行は列のサブセット間で完全です",
+        "ko": "행이 열의 하위 집합에서 완전합니다",
+        "vi": "Các hàng đầy đủ trong một tập con của các cột",
+        "ru": "Строки полностью заполнены в подмножестве столбцов",
+        "cs": "Řádky jsou úplné napříč podmnožinou sloupců",
+        "pl": "Wiersze są kompletne w podzbiorze kolumn",
+        "da": "Rækker er komplette på tværs af en delmængde af kolonner",
+        "sv": "Rader är kompletta över en delmängd av kolumner",
+        "nb": "Rader er komplette på tvers av en delmengde av kolonner",
+        "nl": "Rijen zijn compleet over een subset van kolommen",
+        "fi": "Rivit ovat täydellisiä sarakkeiden osajoukossa",
+        "is": "Raðir eru heildstæðar í undirsafni dálka",
+        "ar": "الصفوف مكتملة عبر مجموعة فرعية من الأعمدة",
+        "hi": "पंक्तियां स्तंभों के एक उपसमूह में पूर्ण हैं",
+        "el": "Οι γραμμές είναι πλήρεις σε ένα υποσύνολο στηλών",
+    },
     "report_for_step_i": {
         "en": "Report for Validation Step {i}",
         "fr": "Rapport pour l'étape de validation {i}",

pointblank/_interrogation.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from __future__ import annotations
+import functools
 from dataclasses import dataclass
 from typing import TYPE_CHECKING, Any
@@ -1219,6 +1220,36 @@ class Interrogator:
         return tbl.to_native()
+    def rows_complete(self) -> FrameT | Any:
+        # Ibis backends ---------------------------------------------
+        if self.tbl_type in IBIS_BACKENDS:
+            tbl = self.x
+            # Determine the number of null values in each row (column subsets are handled in
+            # the `_check_nulls_across_columns_ibis()` function)
+            tbl = _check_nulls_across_columns_ibis(table=tbl, columns_subset=self.columns_subset)
+            # Failing rows will have the value `True` in the generated column, so we need to negate
+            # the result to get the passing rows
+            return tbl.mutate(pb_is_good_=~tbl["_any_is_null_"]).drop("_any_is_null_")
+        # Local backends (Narwhals) ---------------------------------
+        tbl = self.x
+        # Determine the number of null values in each row (column subsets are handled in
+        # the `_check_nulls_across_columns_nw()` function)
+        tbl = _check_nulls_across_columns_nw(table=tbl, columns_subset=self.columns_subset)
+        # Failing rows will have the value `True` in the generated column, so we need to negate
+        # the result to get the passing rows
+        tbl = tbl.with_columns(pb_is_good_=~nw.col("_any_is_null_"))
+        tbl = tbl.drop("_any_is_null_")
+        # Convert the table to a native format
+        return tbl.to_native()
 @dataclass
 class ColValsCompareOne:
@@ -1794,6 +1825,58 @@ class RowsDistinct:
         return self.test_unit_res
+@dataclass
+class RowsComplete:
+    """
+    Check if rows in a DataFrame are complete.
+    Parameters
+    ----------
+    data_tbl
+        A data table.
+    columns_subset
+        A list of columns to check for completeness.
+    threshold
+        The maximum number of failing test units to allow.
+    tbl_type
+        The type of table to use for the assertion.
+    Returns
+    -------
+    bool
+        `True` when test units pass below the threshold level for failing test units, `False`
+        otherwise.
+    """
+    data_tbl: FrameT
+    columns_subset: list[str] | None
+    threshold: int
+    tbl_type: str = "local"
+    def __post_init__(self):
+        if self.tbl_type == "local":
+            # Convert the DataFrame to a format that narwhals can work with, and:
+            #  - check if the `column=` exists
+            #  - check if the `column=` type is compatible with the test
+            tbl = _column_subset_test_prep(df=self.data_tbl, columns_subset=self.columns_subset)
+        # TODO: For Ibis backends, check if the column exists and if the column type is compatible;
+        #       for now, just pass the table as is
+        if self.tbl_type in IBIS_BACKENDS:
+            tbl = self.data_tbl
+        # Collect results for the test units; the results are a list of booleans where
+        # `True` indicates a passing test unit
+        self.test_unit_res = Interrogator(
+            x=tbl,
+            columns_subset=self.columns_subset,
+            tbl_type=self.tbl_type,
+        ).rows_complete()
+    def get_test_results(self):
+        return self.test_unit_res
 @dataclass
 class ColSchemaMatch:
     """
@@ -2207,6 +2290,40 @@ def _column_has_null_values(table: FrameT, column: str) -> bool:
     return True
+def _check_nulls_across_columns_ibis(table, columns_subset):
+    # Get all column names from the table
+    column_names = columns_subset if columns_subset else table.columns
+    # Build the expression by combining each column's isnull() with OR operations
+    null_expr = functools.reduce(
+        lambda acc, col: acc | table[col].isnull() if acc is not None else table[col].isnull(),
+        column_names,
+        None,
+    )
+    # Add the expression as a new column to the table
+    result = table.mutate(_any_is_null_=null_expr)
+    return result
+def _check_nulls_across_columns_nw(table, columns_subset):
+    # Get all column names from the table
+    column_names = columns_subset if columns_subset else table.columns
+    # Build the expression by combining each column's `is_null()` with OR operations
+    null_expr = functools.reduce(
+        lambda acc, col: acc | table[col].is_null() if acc is not None else table[col].is_null(),
+        column_names,
+        None,
+    )
+    # Add the expression as a new column to the table
+    result = table.with_columns(_any_is_null_=null_expr)
+    return result
 def _modify_datetime_compare_val(tgt_column: any, compare_val: any) -> any:
     tgt_col_dtype_str = str(tgt_column.dtype).lower()

pointblank/_utils.py CHANGED Viewed

@@ -485,6 +485,7 @@ def _get_api_text() -> str:
         "Validate.col_vals_expr",
         "Validate.col_exists",
         "Validate.rows_distinct",
+        "Validate.rows_complete",
         "Validate.col_schema_match",
         "Validate.row_count_match",
         "Validate.col_count_match",

pointblank/data/api-docs.txt CHANGED Viewed

@@ -4367,6 +4367,192 @@ rows_distinct(self, columns_subset: 'str | list[str] | None' = None, pre: 'Calla
         others.
+rows_complete(self, columns_subset: 'str | list[str] | None' = None, pre: 'Callable | None' = None, segments: 'SegmentSpec | None' = None, thresholds: 'int | float | bool | tuple | dict | Thresholds' = None, actions: 'Actions | None' = None, brief: 'str | bool | None' = None, active: 'bool' = True) -> 'Validate'
+        Validate whether row data are complete by having no missing values.
+        The `rows_complete()` method checks whether rows in the table are complete. Completeness
+        of a row means that there are no missing values within the row. This validation will operate
+        over the number of test units that is equal to the number of rows in the table (determined
+        after any `pre=` mutation has been applied). A subset of columns can be specified for the
+        completeness check. If no subset is provided, all columns in the table will be used.
+        Parameters
+        ----------
+        columns_subset
+            A single column or a list of columns to use as a subset for the completeness check. If
+            `None` (the default), then all columns in the table will be used.
+        pre
+            An optional preprocessing function or lambda to apply to the data table during
+            interrogation. This function should take a table as input and return a modified table.
+            Have a look at the *Preprocessing* section for more information on how to use this
+            argument.
+        segments
+            An optional directive on segmentation, which serves to split a validation step into
+            multiple (one step per segment). Can be a single column name, a tuple that specifies a
+            column name and its corresponding values to segment on, or a combination of both
+            (provided as a list). Read the *Segmentation* section for usage information.
+        thresholds
+            Set threshold failure levels for reporting and reacting to exceedences of the levels.
+            The thresholds are set at the step level and will override any global thresholds set in
+            `Validate(thresholds=...)`. The default is `None`, which means that no thresholds will
+            be set locally and global thresholds (if any) will take effect. Look at the *Thresholds*
+            section for information on how to set threshold levels.
+        actions
+            Optional actions to take when the validation step meets or exceeds any set threshold
+            levels. If provided, the [`Actions`](`pointblank.Actions`) class should be used to
+            define the actions.
+        brief
+            An optional brief description of the validation step that will be displayed in the
+            reporting table. You can use the templating elements like `"{step}"` to insert
+            the step number, or `"{auto}"` to include an automatically generated brief. If `True`
+            the entire brief will be automatically generated. If `None` (the default) then there
+            won't be a brief.
+        active
+            A boolean value indicating whether the validation step should be active. Using `False`
+            will make the validation step inactive (still reporting its presence and keeping indexes
+            for the steps unchanged).
+        Returns
+        -------
+        Validate
+            The `Validate` object with the added validation step.
+        Preprocessing
+        -------------
+        The `pre=` argument allows for a preprocessing function or lambda to be applied to the data
+        table during interrogation. This function should take a table as input and return a modified
+        table. This is useful for performing any necessary transformations or filtering on the data
+        before the validation step is applied.
+        The preprocessing function can be any callable that takes a table as input and returns a
+        modified table. For example, you could use a lambda function to filter the table based on
+        certain criteria or to apply a transformation to the data. Note that you can refer to
+        columns via `columns_subset=` that are expected to be present in the transformed table, but
+        may not exist in the table before preprocessing. Regarding the lifetime of the transformed
+        table, it only exists during the validation step and is not stored in the `Validate` object
+        or used in subsequent validation steps.
+        Segmentation
+        ------------
+        The `segments=` argument allows for the segmentation of a validation step into multiple
+        segments. This is useful for applying the same validation step to different subsets of the
+        data. The segmentation can be done based on a single column or specific fields within a
+        column.
+        Providing a single column name will result in a separate validation step for each unique
+        value in that column. For example, if you have a column called `"region"` with values
+        `"North"`, `"South"`, and `"East"`, the validation step will be applied separately to each
+        region.
+        Alternatively, you can provide a tuple that specifies a column name and its corresponding
+        values to segment on. For example, if you have a column called `"date"` and you want to
+        segment on only specific dates, you can provide a tuple like
+        `("date", ["2023-01-01", "2023-01-02"])`. Any other values in the column will be disregarded
+        (i.e., no validation steps will be created for them).
+        A list with a combination of column names and tuples can be provided as well. This allows
+        for more complex segmentation scenarios. The following inputs are all valid:
+        - `segments=["region", ("date", ["2023-01-01", "2023-01-02"])]`: segments on unique values
+        in the `"region"` column and specific dates in the `"date"` column
+        - `segments=["region", "date"]`: segments on unique values in the `"region"` and `"date"`
+        columns
+        The segmentation is performed during interrogation, and the resulting validation steps will
+        be numbered sequentially. Each segment will have its own validation step, and the results
+        will be reported separately. This allows for a more granular analysis of the data and helps
+        identify issues within specific segments.
+        Importantly, the segmentation process will be performed after any preprocessing of the data
+        table. Because of this, one can conceivably use the `pre=` argument to generate a column
+        that can be used for segmentation. For example, you could create a new column called
+        `"segment"` through use of `pre=` and then use that column for segmentation.
+        Thresholds
+        ----------
+        The `thresholds=` parameter is used to set the failure-condition levels for the validation
+        step. If they are set here at the step level, these thresholds will override any thresholds
+        set at the global level in `Validate(thresholds=...)`.
+        There are three threshold levels: 'warning', 'error', and 'critical'. The threshold values
+        can either be set as a proportion failing of all test units (a value between `0` to `1`),
+        or, the absolute number of failing test units (as integer that's `1` or greater).
+        Thresholds can be defined using one of these input schemes:
+        1. use the [`Thresholds`](`pointblank.Thresholds`) class (the most direct way to create
+        thresholds)
+        2. provide a tuple of 1-3 values, where position `0` is the 'warning' level, position `1` is
+        the 'error' level, and position `2` is the 'critical' level
+        3. create a dictionary of 1-3 value entries; the valid keys: are 'warning', 'error', and
+        'critical'
+        4. a single integer/float value denoting absolute number or fraction of failing test units
+        for the 'warning' level only
+        If the number of failing test units exceeds set thresholds, the validation step will be
+        marked as 'warning', 'error', or 'critical'. All of the threshold levels don't need to be
+        set, you're free to set any combination of them.
+        Aside from reporting failure conditions, thresholds can be used to determine the actions to
+        take for each level of failure (using the `actions=` parameter).
+        Examples
+        --------
+        For the examples here, we'll use a simple Polars DataFrame with three string columns
+        (`col_1`, `col_2`, and `col_3`). The table is shown below:
+        ```python
+        import pointblank as pb
+        import polars as pl
+        tbl = pl.DataFrame(
+            {
+                "col_1": ["a", None, "c", "d"],
+                "col_2": ["a", "a", "c", None],
+                "col_3": ["a", "a", "d", None],
+            }
+        )
+        pb.preview(tbl)
+        ```
+        Let's validate that the rows in the table are complete with `rows_complete()`. We'll
+        determine if this validation had any failing test units (there are four test units, one for
+        each row). A failing test units means that a given row is not complete (i.e., has at least
+        one missing value).
+        ```python
+        validation = (
+            pb.Validate(data=tbl)
+            .rows_complete()
+            .interrogate()
+        )
+        validation
+        ```
+        From this validation table we see that there are two failing test units. This is because
+        two rows in the table have at least one missing value (the second row and the last row).
+        We can also use a subset of columns to determine completeness. Let's specify the subset
+        using columns `col_2` and `col_3` for the next validation.
+        ```python
+        validation = (
+            pb.Validate(data=tbl)
+            .rows_complete(columns_subset=["col_2", "col_3"])
+            .interrogate()
+        )
+        validation
+        ```
+        The validation table reports a single failing test units. The last row contains missing
+        values in both the `col_2` and `col_3` columns.
+        others.
 col_schema_match(self, schema: 'Schema', complete: 'bool' = True, in_order: 'bool' = True, case_sensitive_colnames: 'bool' = True, case_sensitive_dtypes: 'bool' = True, full_match_dtypes: 'bool' = True, pre: 'Callable | None' = None, thresholds: 'int | float | bool | tuple | dict | Thresholds' = None, actions: 'Actions | None' = None, brief: 'str | bool | None' = None, active: 'bool' = True) -> 'Validate'
         Do columns in the table (and their types) match a predefined schema?
@@ -6614,6 +6800,7 @@ get_step_report(self, i: 'int', columns_subset: 'str | list[str] | Column | None
         - [`col_vals_regex()`](`pointblank.Validate.col_vals_regex`)
         - [`col_vals_null()`](`pointblank.Validate.col_vals_null`)
         - [`col_vals_not_null()`](`pointblank.Validate.col_vals_not_null`)
+        - [`rows_complete()`](`pointblank.Validate.rows_complete`)
         - [`conjointly()`](`pointblank.Validate.conjointly`)
         The [`rows_distinct()`](`pointblank.Validate.rows_distinct`) validation step will produce a
@@ -6698,17 +6885,133 @@ get_json_report(self, use_fields: 'list[str] | None' = None, exclude_fields: 'li
         Get a report of the validation results as a JSON-formatted string.
+        The `get_json_report()` method provides a machine-readable report of validation results in
+        JSON format. This is particularly useful for programmatic processing, storing validation
+        results, or integrating with other systems. The report includes detailed information about
+        each validation step, such as assertion type, columns validated, threshold values, test
+        results, and more.
+        By default, all available validation information fields are included in the report. However,
+        you can customize the fields to include or exclude using the `use_fields=` and
+        `exclude_fields=` parameters.
         Parameters
         ----------
         use_fields
-            A list of fields to include in the report. If `None`, all fields are included.
+            An optional list of specific fields to include in the report. If provided, only these
+            fields will be included in the JSON output. If `None` (the default), all standard
+            validation report fields are included. Have a look at the *Available Report Fields*
+            section below for a list of fields that can be included in the report.
         exclude_fields
-            A list of fields to exclude from the report. If `None`, no fields are excluded.
+            An optional list of fields to exclude from the report. If provided, these fields will
+            be omitted from the JSON output. If `None` (the default), no fields are excluded.
+            This parameter cannot be used together with `use_fields=`. The *Available Report Fields*
+            provides a listing of fields that can be excluded from the report.
         Returns
         -------
         str
-            A JSON-formatted string representing the validation report.
+            A JSON-formatted string representing the validation report, with each validation step
+            as an object in the report array.
+        Available Report Fields
+        -----------------------
+        The JSON report can include any of the standard validation report fields, including:
+        - `i`: the step number (1-indexed)
+        - `i_o`: the original step index from the validation plan (pre-expansion)
+        - `assertion_type`: the type of validation assertion (e.g., `"col_vals_gt"`, etc.)
+        - `column`: the column being validated (or columns used in certain validations)
+        - `values`: the comparison values or parameters used in the validation
+        - `inclusive`: whether the comparison is inclusive (for range-based validations)
+        - `na_pass`: whether `NA`/`Null` values are considered passing (for certain validations)
+        - `pre`: preprocessing function applied before validation
+        - `segments`: data segments to which the validation was applied
+        - `thresholds`: threshold level statement that was used for the validation step
+        - `label`: custom label for the validation step
+        - `brief`: a brief description of the validation step
+        - `active`: whether the validation step is active
+        - `all_passed`: whether all test units passed in the step
+        - `n`: total number of test units
+        - `n_passed`, `n_failed`: number of test units that passed and failed
+        - `f_passed`, `f_failed`: Fraction of test units that passed and failed
+        - `warning`, `error`, `critical`: whether the namesake threshold level was exceeded (is
+        `null` if threshold not set)
+        - `time_processed`: when the validation step was processed (ISO 8601 format)
+        - `proc_duration_s`: the processing duration in seconds
+        Examples
+        --------
+        Let's create a validation plan with a few validation steps and generate a JSON report of the
+        results:
+        ```python
+        import pointblank as pb
+        import polars as pl
+        # Create a sample DataFrame
+        tbl = pl.DataFrame({
+            "a": [5, 7, 8, 9],
+            "b": [3, 4, 2, 1]
+        })
+        # Create and execute a validation plan
+        validation = (
+            pb.Validate(data=tbl)
+            .col_vals_gt(columns="a", value=6)
+            .col_vals_lt(columns="b", value=4)
+            .interrogate()
+        )
+        # Get the full JSON report
+        json_report = validation.get_json_report()
+        print(json_report)
+        ```
+        You can also customize which fields to include:
+        ```python
+        json_report = validation.get_json_report(
+            use_fields=["i", "assertion_type", "column", "n_passed", "n_failed"]
+        )
+        print(json_report)
+        ```
+        Or which fields to exclude:
+        ```python
+        json_report = validation.get_json_report(
+            exclude_fields=[
+                "i_o", "thresholds", "pre", "segments", "values",
+                "na_pass", "inclusive", "label", "brief", "active",
+                "time_processed", "proc_duration_s"
+            ]
+        )
+        print(json_report)
+        ```
+        The JSON output can be further processed or analyzed programmatically:
+        ```python
+        import json
+        # Parse the JSON report
+        report_data = json.loads(validation.get_json_report())
+        # Extract and analyze validation results
+        failing_steps = [step for step in report_data if step["n_failed"] > 0]
+        print(f"Number of failing validation steps: {len(failing_steps)}")
+        ```
+        See Also
+        --------
+        - [`get_tabular_report()`](`pointblank.Validate.get_tabular_report`): Get a formatted HTML
+        report as a GT table
+        - [`get_data_extracts()`](`pointblank.Validate.get_data_extracts`): Get rows that
+        failed validation
 get_sundered_data(self, type='pass') -> 'FrameT'

pointblank/validate.py CHANGED Viewed

@@ -56,6 +56,7 @@ from pointblank._interrogation import (
     ConjointlyValidation,
     NumberOfTestUnits,
     RowCountMatch,
+    RowsComplete,
     RowsDistinct,
 )
 from pointblank._typing import SegmentSpec
@@ -6546,6 +6547,243 @@ class Validate:
         return self
+    def rows_complete(
+        self,
+        columns_subset: str | list[str] | None = None,
+        pre: Callable | None = None,
+        segments: SegmentSpec | None = None,
+        thresholds: int | float | bool | tuple | dict | Thresholds = None,
+        actions: Actions | None = None,
+        brief: str | bool | None = None,
+        active: bool = True,
+    ) -> Validate:
+        """
+        Validate whether row data are complete by having no missing values.
+        The `rows_complete()` method checks whether rows in the table are complete. Completeness
+        of a row means that there are no missing values within the row. This validation will operate
+        over the number of test units that is equal to the number of rows in the table (determined
+        after any `pre=` mutation has been applied). A subset of columns can be specified for the
+        completeness check. If no subset is provided, all columns in the table will be used.
+        Parameters
+        ----------
+        columns_subset
+            A single column or a list of columns to use as a subset for the completeness check. If
+            `None` (the default), then all columns in the table will be used.
+        pre
+            An optional preprocessing function or lambda to apply to the data table during
+            interrogation. This function should take a table as input and return a modified table.
+            Have a look at the *Preprocessing* section for more information on how to use this
+            argument.
+        segments
+            An optional directive on segmentation, which serves to split a validation step into
+            multiple (one step per segment). Can be a single column name, a tuple that specifies a
+            column name and its corresponding values to segment on, or a combination of both
+            (provided as a list). Read the *Segmentation* section for usage information.
+        thresholds
+            Set threshold failure levels for reporting and reacting to exceedences of the levels.
+            The thresholds are set at the step level and will override any global thresholds set in
+            `Validate(thresholds=...)`. The default is `None`, which means that no thresholds will
+            be set locally and global thresholds (if any) will take effect. Look at the *Thresholds*
+            section for information on how to set threshold levels.
+        actions
+            Optional actions to take when the validation step meets or exceeds any set threshold
+            levels. If provided, the [`Actions`](`pointblank.Actions`) class should be used to
+            define the actions.
+        brief
+            An optional brief description of the validation step that will be displayed in the
+            reporting table. You can use the templating elements like `"{step}"` to insert
+            the step number, or `"{auto}"` to include an automatically generated brief. If `True`
+            the entire brief will be automatically generated. If `None` (the default) then there
+            won't be a brief.
+        active
+            A boolean value indicating whether the validation step should be active. Using `False`
+            will make the validation step inactive (still reporting its presence and keeping indexes
+            for the steps unchanged).
+        Returns
+        -------
+        Validate
+            The `Validate` object with the added validation step.
+        Preprocessing
+        -------------
+        The `pre=` argument allows for a preprocessing function or lambda to be applied to the data
+        table during interrogation. This function should take a table as input and return a modified
+        table. This is useful for performing any necessary transformations or filtering on the data
+        before the validation step is applied.
+        The preprocessing function can be any callable that takes a table as input and returns a
+        modified table. For example, you could use a lambda function to filter the table based on
+        certain criteria or to apply a transformation to the data. Note that you can refer to
+        columns via `columns_subset=` that are expected to be present in the transformed table, but
+        may not exist in the table before preprocessing. Regarding the lifetime of the transformed
+        table, it only exists during the validation step and is not stored in the `Validate` object
+        or used in subsequent validation steps.
+        Segmentation
+        ------------
+        The `segments=` argument allows for the segmentation of a validation step into multiple
+        segments. This is useful for applying the same validation step to different subsets of the
+        data. The segmentation can be done based on a single column or specific fields within a
+        column.
+        Providing a single column name will result in a separate validation step for each unique
+        value in that column. For example, if you have a column called `"region"` with values
+        `"North"`, `"South"`, and `"East"`, the validation step will be applied separately to each
+        region.
+        Alternatively, you can provide a tuple that specifies a column name and its corresponding
+        values to segment on. For example, if you have a column called `"date"` and you want to
+        segment on only specific dates, you can provide a tuple like
+        `("date", ["2023-01-01", "2023-01-02"])`. Any other values in the column will be disregarded
+        (i.e., no validation steps will be created for them).
+        A list with a combination of column names and tuples can be provided as well. This allows
+        for more complex segmentation scenarios. The following inputs are all valid:
+        - `segments=["region", ("date", ["2023-01-01", "2023-01-02"])]`: segments on unique values
+        in the `"region"` column and specific dates in the `"date"` column
+        - `segments=["region", "date"]`: segments on unique values in the `"region"` and `"date"`
+        columns
+        The segmentation is performed during interrogation, and the resulting validation steps will
+        be numbered sequentially. Each segment will have its own validation step, and the results
+        will be reported separately. This allows for a more granular analysis of the data and helps
+        identify issues within specific segments.
+        Importantly, the segmentation process will be performed after any preprocessing of the data
+        table. Because of this, one can conceivably use the `pre=` argument to generate a column
+        that can be used for segmentation. For example, you could create a new column called
+        `"segment"` through use of `pre=` and then use that column for segmentation.
+        Thresholds
+        ----------
+        The `thresholds=` parameter is used to set the failure-condition levels for the validation
+        step. If they are set here at the step level, these thresholds will override any thresholds
+        set at the global level in `Validate(thresholds=...)`.
+        There are three threshold levels: 'warning', 'error', and 'critical'. The threshold values
+        can either be set as a proportion failing of all test units (a value between `0` to `1`),
+        or, the absolute number of failing test units (as integer that's `1` or greater).
+        Thresholds can be defined using one of these input schemes:
+        1. use the [`Thresholds`](`pointblank.Thresholds`) class (the most direct way to create
+        thresholds)
+        2. provide a tuple of 1-3 values, where position `0` is the 'warning' level, position `1` is
+        the 'error' level, and position `2` is the 'critical' level
+        3. create a dictionary of 1-3 value entries; the valid keys: are 'warning', 'error', and
+        'critical'
+        4. a single integer/float value denoting absolute number or fraction of failing test units
+        for the 'warning' level only
+        If the number of failing test units exceeds set thresholds, the validation step will be
+        marked as 'warning', 'error', or 'critical'. All of the threshold levels don't need to be
+        set, you're free to set any combination of them.
+        Aside from reporting failure conditions, thresholds can be used to determine the actions to
+        take for each level of failure (using the `actions=` parameter).
+        Examples
+        --------
+        ```{python}
+        #| echo: false
+        #| output: false
+        import pointblank as pb
+        pb.config(report_incl_header=False, report_incl_footer=False, preview_incl_header=False)
+        ```
+        For the examples here, we'll use a simple Polars DataFrame with three string columns
+        (`col_1`, `col_2`, and `col_3`). The table is shown below:
+        ```{python}
+        import pointblank as pb
+        import polars as pl
+        tbl = pl.DataFrame(
+            {
+                "col_1": ["a", None, "c", "d"],
+                "col_2": ["a", "a", "c", None],
+                "col_3": ["a", "a", "d", None],
+            }
+        )
+        pb.preview(tbl)
+        ```
+        Let's validate that the rows in the table are complete with `rows_complete()`. We'll
+        determine if this validation had any failing test units (there are four test units, one for
+        each row). A failing test units means that a given row is not complete (i.e., has at least
+        one missing value).
+        ```{python}
+        validation = (
+            pb.Validate(data=tbl)
+            .rows_complete()
+            .interrogate()
+        )
+        validation
+        ```
+        From this validation table we see that there are two failing test units. This is because
+        two rows in the table have at least one missing value (the second row and the last row).
+        We can also use a subset of columns to determine completeness. Let's specify the subset
+        using columns `col_2` and `col_3` for the next validation.
+        ```{python}
+        validation = (
+            pb.Validate(data=tbl)
+            .rows_complete(columns_subset=["col_2", "col_3"])
+            .interrogate()
+        )
+        validation
+        ```
+        The validation table reports a single failing test units. The last row contains missing
+        values in both the `col_2` and `col_3` columns.
+        others.
+        """
+        assertion_type = _get_fn_name()
+        _check_pre(pre=pre)
+        # TODO: add check for segments
+        # _check_segments(segments=segments)
+        _check_thresholds(thresholds=thresholds)
+        _check_boolean_input(param=active, param_name="active")
+        # Determine threshold to use (global or local) and normalize a local `thresholds=` value
+        thresholds = (
+            self.thresholds if thresholds is None else _normalize_thresholds_creation(thresholds)
+        )
+        if columns_subset is not None and isinstance(columns_subset, str):
+            columns_subset = [columns_subset]
+        # TODO: incorporate Column object
+        # Determine brief to use (global or local) and transform any shorthands of `brief=`
+        brief = self.brief if brief is None else _transform_auto_brief(brief=brief)
+        val_info = _ValidationInfo(
+            assertion_type=assertion_type,
+            column=columns_subset,
+            pre=pre,
+            segments=segments,
+            thresholds=thresholds,
+            actions=actions,
+            brief=brief,
+            active=active,
+        )
+        self._add_validation(validation_info=val_info)
+        return self
     def col_schema_match(
         self,
         schema: Schema,
@@ -7724,6 +7962,14 @@ class Validate:
                     tbl_type=tbl_type,
                 ).get_test_results()
+            if assertion_category == "ROWS_COMPLETE":
+                results_tbl = RowsComplete(
+                    data_tbl=data_tbl_step,
+                    columns_subset=column,
+                    threshold=threshold,
+                    tbl_type=tbl_type,
+                ).get_test_results()
             if assertion_category == "COL_EXISTS_HAS_TYPE":
                 result_bool = ColExistsHasType(
                     data_tbl=data_tbl_step,
@@ -7994,7 +8240,8 @@ class Validate:
             # TODO: Add support for extraction of rows for Ibis backends
             if (
                 collect_extracts
-                and assertion_type in ROW_BASED_VALIDATION_TYPES + ["rows_distinct"]
+                and assertion_type
+                in ROW_BASED_VALIDATION_TYPES + ["rows_distinct", "rows_complete"]
                 and tbl_type not in IBIS_BACKENDS
             ):
                 # Add row numbers to the results table
@@ -9076,19 +9323,134 @@ class Validate:
         """
         Get a report of the validation results as a JSON-formatted string.
+        The `get_json_report()` method provides a machine-readable report of validation results in
+        JSON format. This is particularly useful for programmatic processing, storing validation
+        results, or integrating with other systems. The report includes detailed information about
+        each validation step, such as assertion type, columns validated, threshold values, test
+        results, and more.
+        By default, all available validation information fields are included in the report. However,
+        you can customize the fields to include or exclude using the `use_fields=` and
+        `exclude_fields=` parameters.
         Parameters
         ----------
         use_fields
-            A list of fields to include in the report. If `None`, all fields are included.
+            An optional list of specific fields to include in the report. If provided, only these
+            fields will be included in the JSON output. If `None` (the default), all standard
+            validation report fields are included. Have a look at the *Available Report Fields*
+            section below for a list of fields that can be included in the report.
         exclude_fields
-            A list of fields to exclude from the report. If `None`, no fields are excluded.
+            An optional list of fields to exclude from the report. If provided, these fields will
+            be omitted from the JSON output. If `None` (the default), no fields are excluded.
+            This parameter cannot be used together with `use_fields=`. The *Available Report Fields*
+            provides a listing of fields that can be excluded from the report.
         Returns
         -------
         str
-            A JSON-formatted string representing the validation report.
-        """
+            A JSON-formatted string representing the validation report, with each validation step
+            as an object in the report array.
+        Available Report Fields
+        -----------------------
+        The JSON report can include any of the standard validation report fields, including:
+        - `i`: the step number (1-indexed)
+        - `i_o`: the original step index from the validation plan (pre-expansion)
+        - `assertion_type`: the type of validation assertion (e.g., `"col_vals_gt"`, etc.)
+        - `column`: the column being validated (or columns used in certain validations)
+        - `values`: the comparison values or parameters used in the validation
+        - `inclusive`: whether the comparison is inclusive (for range-based validations)
+        - `na_pass`: whether `NA`/`Null` values are considered passing (for certain validations)
+        - `pre`: preprocessing function applied before validation
+        - `segments`: data segments to which the validation was applied
+        - `thresholds`: threshold level statement that was used for the validation step
+        - `label`: custom label for the validation step
+        - `brief`: a brief description of the validation step
+        - `active`: whether the validation step is active
+        - `all_passed`: whether all test units passed in the step
+        - `n`: total number of test units
+        - `n_passed`, `n_failed`: number of test units that passed and failed
+        - `f_passed`, `f_failed`: Fraction of test units that passed and failed
+        - `warning`, `error`, `critical`: whether the namesake threshold level was exceeded (is
+        `null` if threshold not set)
+        - `time_processed`: when the validation step was processed (ISO 8601 format)
+        - `proc_duration_s`: the processing duration in seconds
+        Examples
+        --------
+        Let's create a validation plan with a few validation steps and generate a JSON report of the
+        results:
+        ```{python}
+        import pointblank as pb
+        import polars as pl
+        # Create a sample DataFrame
+        tbl = pl.DataFrame({
+            "a": [5, 7, 8, 9],
+            "b": [3, 4, 2, 1]
+        })
+        # Create and execute a validation plan
+        validation = (
+            pb.Validate(data=tbl)
+            .col_vals_gt(columns="a", value=6)
+            .col_vals_lt(columns="b", value=4)
+            .interrogate()
+        )
+        # Get the full JSON report
+        json_report = validation.get_json_report()
+        print(json_report)
+        ```
+        You can also customize which fields to include:
+        ```{python}
+        json_report = validation.get_json_report(
+            use_fields=["i", "assertion_type", "column", "n_passed", "n_failed"]
+        )
+        print(json_report)
+        ```
+        Or which fields to exclude:
+        ```{python}
+        json_report = validation.get_json_report(
+            exclude_fields=[
+                "i_o", "thresholds", "pre", "segments", "values",
+                "na_pass", "inclusive", "label", "brief", "active",
+                "time_processed", "proc_duration_s"
+            ]
+        )
+        print(json_report)
+        ```
+        The JSON output can be further processed or analyzed programmatically:
+        ```{python}
+        import json
+        # Parse the JSON report
+        report_data = json.loads(validation.get_json_report())
+        # Extract and analyze validation results
+        failing_steps = [step for step in report_data if step["n_failed"] > 0]
+        print(f"Number of failing validation steps: {len(failing_steps)}")
+        ```
+        See Also
+        --------
+        - [`get_tabular_report()`](`pointblank.Validate.get_tabular_report`): Get a formatted HTML
+        report as a GT table
+        - [`get_data_extracts()`](`pointblank.Validate.get_data_extracts`): Get rows that
+        failed validation
+        """
         if use_fields is not None and exclude_fields is not None:
             raise ValueError("Cannot specify both `use_fields=` and `exclude_fields=`.")
@@ -9597,7 +9959,7 @@ class Validate:
                 "col_vals_expr",
             ]:
                 columns_upd.append("&mdash;")
-            elif assertion_type[i] in ["rows_distinct"]:
+            elif assertion_type[i] in ["rows_distinct", "rows_complete"]:
                 if not column:
                     # If there is no column subset, then all columns are used
                     columns_upd.append("ALL COLUMNS")
@@ -9660,6 +10022,7 @@ class Validate:
                 "col_vals_not_null",
                 "col_exists",
                 "rows_distinct",
+                "rows_complete",
             ]:
                 values_upd.append("&mdash;")
@@ -10213,6 +10576,7 @@ class Validate:
         - [`col_vals_regex()`](`pointblank.Validate.col_vals_regex`)
         - [`col_vals_null()`](`pointblank.Validate.col_vals_null`)
         - [`col_vals_not_null()`](`pointblank.Validate.col_vals_not_null`)
+        - [`rows_complete()`](`pointblank.Validate.rows_complete`)
         - [`conjointly()`](`pointblank.Validate.conjointly`)
         The [`rows_distinct()`](`pointblank.Validate.rows_distinct`) validation step will produce a
@@ -10372,7 +10736,7 @@ class Validate:
         # if get_row_count(extract) == 0:
         #    return "No rows were extracted."
-        if assertion_type in ROW_BASED_VALIDATION_TYPES:
+        if assertion_type in ROW_BASED_VALIDATION_TYPES + ["rows_complete"]:
             # Get the extracted data for the step
             extract = self.get_data_extracts(i=i, frame=True)
@@ -11082,6 +11446,13 @@ def _create_autobrief_or_failure_text(
             for_failure=for_failure,
         )
+    if assertion_type == "rows_complete":
+        return _create_text_rows_complete(
+            lang=lang,
+            columns_subset=column,
+            for_failure=for_failure,
+        )
     if assertion_type == "row_count_match":
         return _create_text_row_count_match(
             lang=lang,
@@ -11257,6 +11628,24 @@ def _create_text_rows_distinct(
     return text
+def _create_text_rows_complete(
+    lang: str, columns_subset: list[str] | None, for_failure: bool = False
+) -> str:
+    type_ = _expect_failure_type(for_failure=for_failure)
+    if columns_subset is None:
+        text = EXPECT_FAIL_TEXT[f"all_row_complete_{type_}_text"][lang]
+    else:
+        column_text = _prep_values_text(values=columns_subset, lang=lang, limit=3)
+        text = EXPECT_FAIL_TEXT[f"across_row_complete_{type_}_text"][lang].format(
+            column_text=column_text
+        )
+    return text
 def _create_text_row_count_match(lang: str, value: int, for_failure: bool = False) -> str:
     type_ = _expect_failure_type(for_failure=for_failure)
@@ -12057,6 +12446,11 @@ def _step_report_row_based(
         text = STEP_REPORT_TEXT["column_is_null"][lang].format(column=column)
     elif assertion_type == "col_vals_not_null":
         text = STEP_REPORT_TEXT["column_is_not_null"][lang].format(column=column)
+    elif assertion_type == "rows_complete":
+        if column is None:
+            text = STEP_REPORT_TEXT["rows_complete_all"][lang]
+        else:
+            text = STEP_REPORT_TEXT["rows_complete_subset"][lang]
     # Wrap assertion text in a <code> tag
     text = (

{pointblank-0.9.0.dist-info → pointblank-0.9.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: pointblank
-Version: 0.9.0
+Version: 0.9.1
 Summary: Find out if your data is what you think it is.
 Author-email: Richard Iannone <riannone@me.com>
 License: MIT License

{pointblank-0.9.0.dist-info → pointblank-0.9.1.dist-info}/RECORD RENAMED Viewed

@@ -1,10 +1,10 @@
 pointblank/__init__.py,sha256=uHrX-ARZOhvWogXXqKV65RO2DXdYLZNCD1oNcm8hE6o,1585
-pointblank/_constants.py,sha256=l7jNb-UqrKER30FnrRKlIK5onc0Id37CVV6l7i4Pn00,75777
+pointblank/_constants.py,sha256=tlelmeuftW4BpVeEILbsbuuCaMQ7yA_FYqM6PJPXH58,78561
 pointblank/_constants_docs.py,sha256=JBmtt16zTYQ-zaM4ElLExtKs-dKlnN553Ys2ML1Y1C8,2099
-pointblank/_constants_translations.py,sha256=QfOmVESwWFokWXpgLkEFHGik8o1EUBhIXYtaEqtGGNg,166575
-pointblank/_interrogation.py,sha256=SkW0DUoCafQbpPToVseUPLzaYXXTTzN9y6mbzjbRmNw,81082
+pointblank/_constants_translations.py,sha256=FHkY2Bh1VBmBwbiGRIRSMU1tNGxgQAkjoPoYlwOHSKU,180685
+pointblank/_interrogation.py,sha256=BjN60ed7BH4ZnoPtkmVSvVEqJgf8k9mce4Zb63_jv_s,85155
 pointblank/_typing.py,sha256=ConITAbsFxU8CkNXY7l0Lua9hGofeDDJAWw-lGAIVgI,764
-pointblank/_utils.py,sha256=0V-LxUjSjGfcZV2_IH-5KPikYiVWdt4QSMQDioyZoZc,24681
+pointblank/_utils.py,sha256=g7vbvV33tKNvznUoYsHcZW90bYm1LPb76njQeDJDPyQ,24715
 pointblank/_utils_check_args.py,sha256=rFEc1nbCN8ftsQQWVjCNWmQ2QmUDxkfgmoJclrZeTLs,5489
 pointblank/_utils_html.py,sha256=sTcmnBljkPjRZF1hbpoHl4HmnXOazsA91gC9iWVIrRk,2848
 pointblank/actions.py,sha256=oazJk4pe3lIA14hjyCDtPOr4r_sp4vGGo2eyU_LX5_0,18268
@@ -15,8 +15,8 @@ pointblank/draft.py,sha256=cusr4fBiNncCKIOU8UwvJcvkBeBuUnqH_UfYp9dtNss,15777
 pointblank/schema.py,sha256=gzUCmtccO2v15MH2bo9uHUYjkKEEne1okQucxcH39pc,44291
 pointblank/tf.py,sha256=8o_8m4i01teulEe3-YYMotSNf3tImjBMInsvdjSAO5Q,8844
 pointblank/thresholds.py,sha256=aAPfdo3VMCw_G_OAh4nEsCYfIynDfNRJOMrG8yDM6U8,25717
-pointblank/validate.py,sha256=pr1Riar-axz17aUwiinZdJE67tH8x24eBPUB1Dw5aYk,570820
-pointblank/data/api-docs.txt,sha256=Jf_akggFaJPh0chntpq2cRTa1Enuupk723zty_x0k-s,452511
+pointblank/validate.py,sha256=EPqtxw5sQG4Xh7WSaViVEUtm4FmpFOsyh4KM9EzuqkU,588834
+pointblank/data/api-docs.txt,sha256=JkV9SdXyB3ftBMXVFdFqGZNpyfBdWUpyisn4QHco56w,467666
 pointblank/data/game_revenue-duckdb.zip,sha256=tKIVx48OGLYGsQPS3h5AjA2Nyq_rfEpLCjBiFUWhagU,35880
 pointblank/data/game_revenue.zip,sha256=7c9EvHLyi93CHUd4p3dM4CZ-GucFCtXKSPxgLojL32U,33749
 pointblank/data/nycflights-duckdb.zip,sha256=GQrHO9tp7d9cNGFNSbA9EKF19MLf6t2wZE0U9-hIKow,5293077
@@ -24,8 +24,8 @@ pointblank/data/nycflights.zip,sha256=yVjbUaKUz2LydSdF9cABuir0VReHBBgV7shiNWSd0m
 pointblank/data/polars-api-docs.txt,sha256=KGcS-BOtUs9zgpkWfXD-GFdFh4O_zjdkpX7msHjztLg,198045
 pointblank/data/small_table-duckdb.zip,sha256=BhTaZ2CRS4-9Z1uVhOU6HggvW3XCar7etMznfENIcOc,2028
 pointblank/data/small_table.zip,sha256=lmFb90Nb-v5X559Ikjg31YLAXuRyMkD9yLRElkXPMzQ,472
-pointblank-0.9.0.dist-info/licenses/LICENSE,sha256=apLF-HWPNU7pT5bmf5KmZpD5Cklpy2u-BN_0xBoRMLY,1081
-pointblank-0.9.0.dist-info/METADATA,sha256=091J9RejW9b-vT63C6w_5shh-82G0t451895oNm479M,14732
-pointblank-0.9.0.dist-info/WHEEL,sha256=ck4Vq1_RXyvS4Jt6SI0Vz6fyVs4GWg7AINwpsaGEgPE,91
-pointblank-0.9.0.dist-info/top_level.txt,sha256=-wHrS1SvV8-nhvc3w-PPYs1C1WtEc1pK-eGjubbCCKc,11
-pointblank-0.9.0.dist-info/RECORD,,
+pointblank-0.9.1.dist-info/licenses/LICENSE,sha256=apLF-HWPNU7pT5bmf5KmZpD5Cklpy2u-BN_0xBoRMLY,1081
+pointblank-0.9.1.dist-info/METADATA,sha256=1o11OgPSmpB4qBDEG1HyHDfVj5emxcT_yxHeFsVPVUc,14732
+pointblank-0.9.1.dist-info/WHEEL,sha256=wXxTzcEDnjrTwFYjLPcsW_7_XihufBwmpiBeiXNBGEA,91
+pointblank-0.9.1.dist-info/top_level.txt,sha256=-wHrS1SvV8-nhvc3w-PPYs1C1WtEc1pK-eGjubbCCKc,11
+pointblank-0.9.1.dist-info/RECORD,,

{pointblank-0.9.0.dist-info → pointblank-0.9.1.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (80.0.0)
+Generator: setuptools (80.1.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

{pointblank-0.9.0.dist-info → pointblank-0.9.1.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{pointblank-0.9.0.dist-info → pointblank-0.9.1.dist-info}/top_level.txt RENAMED Viewed

File without changes

pointblank 0.9.0__py3-none-any.whl → 0.9.1__py3-none-any.whl

pointblank 0.9.0py3-none-any.whl → 0.9.1py3-none-any.whl