PyPI - pointblank - Versions diffs - 0.9.0__py3-none-any.whl → 0.9.2__py3-none-any.whl - Mend

pointblank 0.9.0py3-none-any.whl → 0.9.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

pointblank/_constants.py +29 -0
pointblank/_constants_translations.py +216 -0
pointblank/_interrogation.py +218 -0
pointblank/_utils.py +2 -0
pointblank/actions.py +2 -2
pointblank/data/api-docs.txt +611 -7
pointblank/thresholds.py +3 -2
pointblank/validate.py +794 -18
{pointblank-0.9.0.dist-info → pointblank-0.9.2.dist-info}/METADATA +1 -1
{pointblank-0.9.0.dist-info → pointblank-0.9.2.dist-info}/RECORD +13 -13
{pointblank-0.9.0.dist-info → pointblank-0.9.2.dist-info}/WHEEL +1 -1
{pointblank-0.9.0.dist-info → pointblank-0.9.2.dist-info}/licenses/LICENSE +0 -0
{pointblank-0.9.0.dist-info → pointblank-0.9.2.dist-info}/top_level.txt +0 -0

pointblank/validate.py CHANGED Viewed

@@ -56,7 +56,9 @@ from pointblank._interrogation import (
     ConjointlyValidation,
     NumberOfTestUnits,
     RowCountMatch,
+    RowsComplete,
     RowsDistinct,
+    SpeciallyValidation,
 )
 from pointblank._typing import SegmentSpec
 from pointblank._utils import (
@@ -6546,6 +6548,243 @@ class Validate:
         return self
+    def rows_complete(
+        self,
+        columns_subset: str | list[str] | None = None,
+        pre: Callable | None = None,
+        segments: SegmentSpec | None = None,
+        thresholds: int | float | bool | tuple | dict | Thresholds = None,
+        actions: Actions | None = None,
+        brief: str | bool | None = None,
+        active: bool = True,
+    ) -> Validate:
+        """
+        Validate whether row data are complete by having no missing values.
+        The `rows_complete()` method checks whether rows in the table are complete. Completeness
+        of a row means that there are no missing values within the row. This validation will operate
+        over the number of test units that is equal to the number of rows in the table (determined
+        after any `pre=` mutation has been applied). A subset of columns can be specified for the
+        completeness check. If no subset is provided, all columns in the table will be used.
+        Parameters
+        ----------
+        columns_subset
+            A single column or a list of columns to use as a subset for the completeness check. If
+            `None` (the default), then all columns in the table will be used.
+        pre
+            An optional preprocessing function or lambda to apply to the data table during
+            interrogation. This function should take a table as input and return a modified table.
+            Have a look at the *Preprocessing* section for more information on how to use this
+            argument.
+        segments
+            An optional directive on segmentation, which serves to split a validation step into
+            multiple (one step per segment). Can be a single column name, a tuple that specifies a
+            column name and its corresponding values to segment on, or a combination of both
+            (provided as a list). Read the *Segmentation* section for usage information.
+        thresholds
+            Set threshold failure levels for reporting and reacting to exceedences of the levels.
+            The thresholds are set at the step level and will override any global thresholds set in
+            `Validate(thresholds=...)`. The default is `None`, which means that no thresholds will
+            be set locally and global thresholds (if any) will take effect. Look at the *Thresholds*
+            section for information on how to set threshold levels.
+        actions
+            Optional actions to take when the validation step meets or exceeds any set threshold
+            levels. If provided, the [`Actions`](`pointblank.Actions`) class should be used to
+            define the actions.
+        brief
+            An optional brief description of the validation step that will be displayed in the
+            reporting table. You can use the templating elements like `"{step}"` to insert
+            the step number, or `"{auto}"` to include an automatically generated brief. If `True`
+            the entire brief will be automatically generated. If `None` (the default) then there
+            won't be a brief.
+        active
+            A boolean value indicating whether the validation step should be active. Using `False`
+            will make the validation step inactive (still reporting its presence and keeping indexes
+            for the steps unchanged).
+        Returns
+        -------
+        Validate
+            The `Validate` object with the added validation step.
+        Preprocessing
+        -------------
+        The `pre=` argument allows for a preprocessing function or lambda to be applied to the data
+        table during interrogation. This function should take a table as input and return a modified
+        table. This is useful for performing any necessary transformations or filtering on the data
+        before the validation step is applied.
+        The preprocessing function can be any callable that takes a table as input and returns a
+        modified table. For example, you could use a lambda function to filter the table based on
+        certain criteria or to apply a transformation to the data. Note that you can refer to
+        columns via `columns_subset=` that are expected to be present in the transformed table, but
+        may not exist in the table before preprocessing. Regarding the lifetime of the transformed
+        table, it only exists during the validation step and is not stored in the `Validate` object
+        or used in subsequent validation steps.
+        Segmentation
+        ------------
+        The `segments=` argument allows for the segmentation of a validation step into multiple
+        segments. This is useful for applying the same validation step to different subsets of the
+        data. The segmentation can be done based on a single column or specific fields within a
+        column.
+        Providing a single column name will result in a separate validation step for each unique
+        value in that column. For example, if you have a column called `"region"` with values
+        `"North"`, `"South"`, and `"East"`, the validation step will be applied separately to each
+        region.
+        Alternatively, you can provide a tuple that specifies a column name and its corresponding
+        values to segment on. For example, if you have a column called `"date"` and you want to
+        segment on only specific dates, you can provide a tuple like
+        `("date", ["2023-01-01", "2023-01-02"])`. Any other values in the column will be disregarded
+        (i.e., no validation steps will be created for them).
+        A list with a combination of column names and tuples can be provided as well. This allows
+        for more complex segmentation scenarios. The following inputs are all valid:
+        - `segments=["region", ("date", ["2023-01-01", "2023-01-02"])]`: segments on unique values
+        in the `"region"` column and specific dates in the `"date"` column
+        - `segments=["region", "date"]`: segments on unique values in the `"region"` and `"date"`
+        columns
+        The segmentation is performed during interrogation, and the resulting validation steps will
+        be numbered sequentially. Each segment will have its own validation step, and the results
+        will be reported separately. This allows for a more granular analysis of the data and helps
+        identify issues within specific segments.
+        Importantly, the segmentation process will be performed after any preprocessing of the data
+        table. Because of this, one can conceivably use the `pre=` argument to generate a column
+        that can be used for segmentation. For example, you could create a new column called
+        `"segment"` through use of `pre=` and then use that column for segmentation.
+        Thresholds
+        ----------
+        The `thresholds=` parameter is used to set the failure-condition levels for the validation
+        step. If they are set here at the step level, these thresholds will override any thresholds
+        set at the global level in `Validate(thresholds=...)`.
+        There are three threshold levels: 'warning', 'error', and 'critical'. The threshold values
+        can either be set as a proportion failing of all test units (a value between `0` to `1`),
+        or, the absolute number of failing test units (as integer that's `1` or greater).
+        Thresholds can be defined using one of these input schemes:
+        1. use the [`Thresholds`](`pointblank.Thresholds`) class (the most direct way to create
+        thresholds)
+        2. provide a tuple of 1-3 values, where position `0` is the 'warning' level, position `1` is
+        the 'error' level, and position `2` is the 'critical' level
+        3. create a dictionary of 1-3 value entries; the valid keys: are 'warning', 'error', and
+        'critical'
+        4. a single integer/float value denoting absolute number or fraction of failing test units
+        for the 'warning' level only
+        If the number of failing test units exceeds set thresholds, the validation step will be
+        marked as 'warning', 'error', or 'critical'. All of the threshold levels don't need to be
+        set, you're free to set any combination of them.
+        Aside from reporting failure conditions, thresholds can be used to determine the actions to
+        take for each level of failure (using the `actions=` parameter).
+        Examples
+        --------
+        ```{python}
+        #| echo: false
+        #| output: false
+        import pointblank as pb
+        pb.config(report_incl_header=False, report_incl_footer=False, preview_incl_header=False)
+        ```
+        For the examples here, we'll use a simple Polars DataFrame with three string columns
+        (`col_1`, `col_2`, and `col_3`). The table is shown below:
+        ```{python}
+        import pointblank as pb
+        import polars as pl
+        tbl = pl.DataFrame(
+            {
+                "col_1": ["a", None, "c", "d"],
+                "col_2": ["a", "a", "c", None],
+                "col_3": ["a", "a", "d", None],
+            }
+        )
+        pb.preview(tbl)
+        ```
+        Let's validate that the rows in the table are complete with `rows_complete()`. We'll
+        determine if this validation had any failing test units (there are four test units, one for
+        each row). A failing test units means that a given row is not complete (i.e., has at least
+        one missing value).
+        ```{python}
+        validation = (
+            pb.Validate(data=tbl)
+            .rows_complete()
+            .interrogate()
+        )
+        validation
+        ```
+        From this validation table we see that there are two failing test units. This is because
+        two rows in the table have at least one missing value (the second row and the last row).
+        We can also use a subset of columns to determine completeness. Let's specify the subset
+        using columns `col_2` and `col_3` for the next validation.
+        ```{python}
+        validation = (
+            pb.Validate(data=tbl)
+            .rows_complete(columns_subset=["col_2", "col_3"])
+            .interrogate()
+        )
+        validation
+        ```
+        The validation table reports a single failing test units. The last row contains missing
+        values in both the `col_2` and `col_3` columns.
+        others.
+        """
+        assertion_type = _get_fn_name()
+        _check_pre(pre=pre)
+        # TODO: add check for segments
+        # _check_segments(segments=segments)
+        _check_thresholds(thresholds=thresholds)
+        _check_boolean_input(param=active, param_name="active")
+        # Determine threshold to use (global or local) and normalize a local `thresholds=` value
+        thresholds = (
+            self.thresholds if thresholds is None else _normalize_thresholds_creation(thresholds)
+        )
+        if columns_subset is not None and isinstance(columns_subset, str):
+            columns_subset = [columns_subset]
+        # TODO: incorporate Column object
+        # Determine brief to use (global or local) and transform any shorthands of `brief=`
+        brief = self.brief if brief is None else _transform_auto_brief(brief=brief)
+        val_info = _ValidationInfo(
+            assertion_type=assertion_type,
+            column=columns_subset,
+            pre=pre,
+            segments=segments,
+            thresholds=thresholds,
+            actions=actions,
+            brief=brief,
+            active=active,
+        )
+        self._add_validation(validation_info=val_info)
+        return self
     def col_schema_match(
         self,
         schema: Schema,
@@ -7395,7 +7634,7 @@ class Validate:
         val_info = _ValidationInfo(
             assertion_type=assertion_type,
-            column=None,  # This is a rowwise validation, not specific to any column
+            column=None,  # This validation is not specific to any column(s)
             values=values,
             pre=pre,
             thresholds=thresholds,
@@ -7408,6 +7647,351 @@ class Validate:
         return self
+    def specially(
+        self,
+        expr: Callable,
+        pre: Callable | None = None,
+        thresholds: int | float | bool | tuple | dict | Thresholds = None,
+        actions: Actions | None = None,
+        brief: str | bool | None = None,
+        active: bool = True,
+    ) -> Validate:
+        """
+        Perform a specialized validation with customized logic.
+        The `specially()` validation method allows for the creation of specialized validation
+        expressions that can be used to validate specific conditions or logic in the data. This
+        method provides maximum flexibility by accepting a custom callable that encapsulates
+        your validation logic.
+        The callable function can have one of two signatures:
+        - a function accepting a single parameter (the data table): `def validate(data): ...`
+        - a function with no parameters: `def validate(): ...`
+        The second form is particularly useful for environment validations that don't need to
+        inspect the data table.
+        The callable function must ultimately return one of:
+        1. a single boolean value or boolean list
+        2. a table where the final column contains boolean values (column name is unimportant)
+        The validation will operate over the number of test units that is equal to the number of
+        rows in the data table (if returning a table with boolean values). If returning a scalar
+        boolean value, the validation will operate over a single test unit. For a return of a list
+        of boolean values, the length of the list constitutes the number of test units.
+        Parameters
+        ----------
+        expr
+            A callable function that defines the specialized validation logic. This function should:
+            (1) accept the target data table as its single argument (though it may ignore it), or
+            (2) take no parameters at all (for environment validations). The function must
+            ultimately return boolean values representing validation results. Design your function
+            to incorporate any custom parameters directly within the function itself using closure
+            variables or default parameters.
+        pre
+            An optional preprocessing function or lambda to apply to the data table during
+            interrogation. This function should take a table as input and return a modified table.
+            Have a look at the *Preprocessing* section for more information on how to use this
+            argument.
+        thresholds
+            Set threshold failure levels for reporting and reacting to exceedences of the levels.
+            The thresholds are set at the step level and will override any global thresholds set in
+            `Validate(thresholds=...)`. The default is `None`, which means that no thresholds will
+            be set locally and global thresholds (if any) will take effect. Look at the *Thresholds*
+            section for information on how to set threshold levels.
+        actions
+            Optional actions to take when the validation step meets or exceeds any set threshold
+            levels. If provided, the [`Actions`](`pointblank.Actions`) class should be used to
+            define the actions.
+        brief
+            An optional brief description of the validation step that will be displayed in the
+            reporting table. You can use the templating elements like `"{step}"` to insert
+            the step number, or `"{auto}"` to include an automatically generated brief. If `True`
+            the entire brief will be automatically generated. If `None` (the default) then there
+            won't be a brief.
+        active
+            A boolean value indicating whether the validation step should be active. Using `False`
+            will make the validation step inactive (still reporting its presence and keeping indexes
+            for the steps unchanged).
+        Returns
+        -------
+        Validate
+            The `Validate` object with the added validation step.
+        Preprocessing
+        -------------
+        The `pre=` argument allows for a preprocessing function or lambda to be applied to the data
+        table during interrogation. This function should take a table as input and return a modified
+        table. This is useful for performing any necessary transformations or filtering on the data
+        before the validation step is applied.
+        The preprocessing function can be any callable that takes a table as input and returns a
+        modified table. For example, you could use a lambda function to filter the table based on
+        certain criteria or to apply a transformation to the data. Regarding the lifetime of the
+        transformed table, it only exists during the validation step and is not stored in the
+        `Validate` object or used in subsequent validation steps.
+        Thresholds
+        ----------
+        The `thresholds=` parameter is used to set the failure-condition levels for the validation
+        step. If they are set here at the step level, these thresholds will override any thresholds
+        set at the global level in `Validate(thresholds=...)`.
+        There are three threshold levels: 'warning', 'error', and 'critical'. The threshold values
+        can either be set as a proportion failing of all test units (a value between `0` to `1`),
+        or, the absolute number of failing test units (as integer that's `1` or greater).
+        Thresholds can be defined using one of these input schemes:
+        1. use the [`Thresholds`](`pointblank.Thresholds`) class (the most direct way to create
+        thresholds)
+        2. provide a tuple of 1-3 values, where position `0` is the 'warning' level, position `1` is
+        the 'error' level, and position `2` is the 'critical' level
+        3. create a dictionary of 1-3 value entries; the valid keys: are 'warning', 'error', and
+        'critical'
+        4. a single integer/float value denoting absolute number or fraction of failing test units
+        for the 'warning' level only
+        If the number of failing test units exceeds set thresholds, the validation step will be
+        marked as 'warning', 'error', or 'critical'. All of the threshold levels don't need to be
+        set, you're free to set any combination of them.
+        Aside from reporting failure conditions, thresholds can be used to determine the actions to
+        take for each level of failure (using the `actions=` parameter).
+        Examples
+        --------
+        ```{python}
+        #| echo: false
+        #| output: false
+        import pointblank as pb
+        pb.config(report_incl_header=False, report_incl_footer=False, preview_incl_header=False)
+        ```
+        The `specially()` method offers maximum flexibility for validation, allowing you to create
+        custom validation logic that fits your specific needs. The following examples demonstrate
+        different patterns and use cases for this powerful validation approach.
+        ### Simple validation with direct table access
+        This example shows the most straightforward use case where we create a function that
+        directly checks if the sum of two columns is positive.
+        ```{python}
+        import pointblank as pb
+        import polars as pl
+        simple_tbl = pl.DataFrame({
+            "a": [5, 7, 1, 3, 9, 4],
+            "b": [6, 3, 0, 5, 8, 2]
+        })
+        # Simple function that validates directly on the table
+        def validate_sum_positive(data):
+            return data.select(pl.col("a") + pl.col("b") > 0)
+        (
+            pb.Validate(data=simple_tbl)
+            .specially(expr=validate_sum_positive)
+            .interrogate()
+        )
+        ```
+        The function returns a Polars DataFrame with a single boolean column indicating whether
+        the sum of columns `a` and `b` is positive for each row. Each row in the resulting DataFrame
+        is a distinct test unit. This pattern works well for simple validations where you don't need
+        configurable parameters.
+        ### Advanced validation with closure variables for parameters
+        When you need to make your validation configurable, you can use the function factory pattern
+        (also known as closures) to create parameterized validations:
+        ```{python}
+        # Create a parameterized validation function using closures
+        def make_column_ratio_validator(col1, col2, min_ratio):
+            def validate_column_ratio(data):
+                return data.select((pl.col(col1) / pl.col(col2)) > min_ratio)
+            return validate_column_ratio
+        (
+            pb.Validate(data=simple_tbl)
+            .specially(
+                expr=make_column_ratio_validator(col1="a", col2="b", min_ratio=0.5)
+            )
+            .interrogate()
+        )
+        ```
+        This approach allows you to create reusable validation functions that can be configured with
+        different parameters without modifying the function itself.
+        ### Validation function returning a list of booleans
+        This example demonstrates how to create a validation function that returns a list of boolean
+        values, where each element represents a separate test unit:
+        ```{python}
+        import pointblank as pb
+        import polars as pl
+        import random
+        # Create sample data
+        transaction_tbl = pl.DataFrame({
+            "transaction_id": [f"TX{i:04d}" for i in range(1, 11)],
+            "amount": [120.50, 85.25, 50.00, 240.75, 35.20, 150.00, 85.25, 65.00, 210.75, 90.50],
+            "category": ["food", "shopping", "entertainment", "travel", "utilities",
+                        "food", "shopping", "entertainment", "travel", "utilities"]
+        })
+        # Define a validation function that returns a list of booleans
+        def validate_transaction_rules(data):
+            # Create a list to store individual test results
+            test_results = []
+            # Check each row individually against multiple business rules
+            for row in data.iter_rows(named=True):
+                # Rule: transaction IDs must start with "TX" and be 6 chars long
+                valid_id = row["transaction_id"].startswith("TX") and len(row["transaction_id"]) == 6
+                # Rule: Amounts must be appropriate for their category
+                valid_amount = True
+                if row["category"] == "food" and (row["amount"] < 10 or row["amount"] > 200):
+                    valid_amount = False
+                elif row["category"] == "utilities" and (row["amount"] < 20 or row["amount"] > 300):
+                    valid_amount = False
+                elif row["category"] == "entertainment" and row["amount"] > 100:
+                    valid_amount = False
+                # A transaction passes if it satisfies both rules
+                test_results.append(valid_id and valid_amount)
+            return test_results
+        (
+            pb.Validate(data=transaction_tbl)
+            .specially(
+                expr=validate_transaction_rules,
+                brief="Validate transaction IDs and amounts by category."
+            )
+            .interrogate()
+        )
+        ```
+        This example shows how to create a validation function that applies multiple business rules
+        to each row and returns a list of boolean results. Each boolean in the list represents a
+        separate test unit, and a test unit passes only if all rules are satisfied for a given row.
+        The function iterates through each row in the data table, checking:
+        1. if transaction IDs follow the required format
+        2. if transaction amounts are appropriate for their respective categories
+        This approach is powerful when you need to apply complex, conditional logic that can't be
+        easily expressed using the built-in validation functions.
+        ### Table-level validation returning a single boolean
+        Sometimes you need to validate properties of the entire table rather than row-by-row. In
+        these cases, your function can return a single boolean value:
+        ```{python}
+        def validate_table_properties(data):
+            # Check if table has at least one row with column 'a' > 10
+            has_large_values = data.filter(pl.col("a") > 10).height > 0
+            # Check if mean of column 'b' is positive
+            has_positive_mean = data.select(pl.mean("b")).item() > 0
+            # Return a single boolean for the entire table
+            return has_large_values and has_positive_mean
+        (
+            pb.Validate(data=simple_tbl)
+            .specially(expr=validate_table_properties)
+            .interrogate()
+        )
+        ```
+        This example demonstrates how to perform multiple checks on the table as a whole and combine
+        them into a single validation result.
+        ### Environment validation that doesn't use the data table
+        The `specially()` validation method can even be used to validate aspects of your environment
+        that are completely independent of the data:
+        ```{python}
+        def validate_pointblank_version():
+            try:
+                import importlib.metadata
+                version = importlib.metadata.version("pointblank")
+                version_parts = version.split(".")
+                # Get major and minor components regardless of how many parts there are
+                major = int(version_parts[0])
+                minor = int(version_parts[1])
+                # Check both major and minor components for version `0.9+`
+                return (major > 0) or (major == 0 and minor >= 9)
+            except Exception as e:
+                # More specific error handling could be added here
+                print(f"Version check failed: {e}")
+                return False
+        (
+            pb.Validate(data=simple_tbl)
+            .specially(
+                expr=validate_pointblank_version,
+                brief="Check Pointblank version `>=0.9.0`."
+            )
+            .interrogate()
+        )
+        ```
+        This pattern shows how to validate external dependencies or environment conditions as part
+        of your validation workflow. Notice that the function doesn't take any parameters at all,
+        which makes it cleaner when the validation doesn't need to access the data table.
+        By combining these patterns, you can create sophisticated validation workflows that address
+        virtually any data quality requirement in your organization.
+        """
+        assertion_type = _get_fn_name()
+        # TODO: add a check for the expression to be a callable
+        # _check_expr_specially(expr=expr)
+        _check_pre(pre=pre)
+        _check_thresholds(thresholds=thresholds)
+        _check_boolean_input(param=active, param_name="active")
+        # Determine threshold to use (global or local) and normalize a local `thresholds=` value
+        thresholds = (
+            self.thresholds if thresholds is None else _normalize_thresholds_creation(thresholds)
+        )
+        # Determine brief to use (global or local) and transform any shorthands of `brief=`
+        brief = self.brief if brief is None else _transform_auto_brief(brief=brief)
+        val_info = _ValidationInfo(
+            assertion_type=assertion_type,
+            column=None,  # This validation is not specific to any column(s)
+            values=expr,
+            pre=pre,
+            thresholds=thresholds,
+            actions=actions,
+            brief=brief,
+            active=active,
+        )
+        self._add_validation(validation_info=val_info)
+        return self
     def interrogate(
         self,
         collect_extracts: bool = True,
@@ -7724,6 +8308,14 @@ class Validate:
                     tbl_type=tbl_type,
                 ).get_test_results()
+            if assertion_category == "ROWS_COMPLETE":
+                results_tbl = RowsComplete(
+                    data_tbl=data_tbl_step,
+                    columns_subset=column,
+                    threshold=threshold,
+                    tbl_type=tbl_type,
+                ).get_test_results()
             if assertion_category == "COL_EXISTS_HAS_TYPE":
                 result_bool = ColExistsHasType(
                     data_tbl=data_tbl_step,
@@ -7814,12 +8406,39 @@ class Validate:
                     tbl_type=tbl_type,
                 ).get_test_results()
-            if assertion_category not in [
-                "COL_EXISTS_HAS_TYPE",
-                "COL_SCHEMA_MATCH",
-                "ROW_COUNT_MATCH",
-                "COL_COUNT_MATCH",
-            ]:
+            if assertion_category == "SPECIALLY":
+                results_tbl_list = SpeciallyValidation(
+                    data_tbl=data_tbl_step,
+                    expression=value,
+                    threshold=threshold,
+                    tbl_type=tbl_type,
+                ).get_test_results()
+                #
+                # The result from this could either be a table in the conventional form, or,
+                # a list of boolean values; handle both cases
+                #
+                if isinstance(results_tbl_list, list):
+                    # If the result is a list of boolean values, then we need to convert it to a
+                    # set the validation results from the list
+                    validation.all_passed = all(results_tbl_list)
+                    validation.n = len(results_tbl_list)
+                    validation.n_passed = results_tbl_list.count(True)
+                    validation.n_failed = results_tbl_list.count(False)
+                    results_tbl = None
+                else:
+                    # If the result is not a list, then we assume it's a table in the conventional
+                    # form (where the column is `pb_is_good_` exists, with boolean values)
+                    results_tbl = results_tbl_list
+            # If the results table is not `None`, then we assume there is a table with a column
+            # called `pb_is_good_` that contains boolean values; we can then use this table to
+            # determine the number of test units that passed and failed
+            if results_tbl is not None:
                 # Extract the `pb_is_good_` column from the table as a results list
                 if tbl_type in IBIS_BACKENDS:
                     # Select the DataFrame library to use for getting the results list
@@ -7994,7 +8613,8 @@ class Validate:
             # TODO: Add support for extraction of rows for Ibis backends
             if (
                 collect_extracts
-                and assertion_type in ROW_BASED_VALIDATION_TYPES + ["rows_distinct"]
+                and assertion_type
+                in ROW_BASED_VALIDATION_TYPES + ["rows_distinct", "rows_complete"]
                 and tbl_type not in IBIS_BACKENDS
             ):
                 # Add row numbers to the results table
@@ -9076,19 +9696,134 @@ class Validate:
         """
         Get a report of the validation results as a JSON-formatted string.
+        The `get_json_report()` method provides a machine-readable report of validation results in
+        JSON format. This is particularly useful for programmatic processing, storing validation
+        results, or integrating with other systems. The report includes detailed information about
+        each validation step, such as assertion type, columns validated, threshold values, test
+        results, and more.
+        By default, all available validation information fields are included in the report. However,
+        you can customize the fields to include or exclude using the `use_fields=` and
+        `exclude_fields=` parameters.
         Parameters
         ----------
         use_fields
-            A list of fields to include in the report. If `None`, all fields are included.
+            An optional list of specific fields to include in the report. If provided, only these
+            fields will be included in the JSON output. If `None` (the default), all standard
+            validation report fields are included. Have a look at the *Available Report Fields*
+            section below for a list of fields that can be included in the report.
         exclude_fields
-            A list of fields to exclude from the report. If `None`, no fields are excluded.
+            An optional list of fields to exclude from the report. If provided, these fields will
+            be omitted from the JSON output. If `None` (the default), no fields are excluded.
+            This parameter cannot be used together with `use_fields=`. The *Available Report Fields*
+            provides a listing of fields that can be excluded from the report.
         Returns
         -------
         str
-            A JSON-formatted string representing the validation report.
-        """
+            A JSON-formatted string representing the validation report, with each validation step
+            as an object in the report array.
+        Available Report Fields
+        -----------------------
+        The JSON report can include any of the standard validation report fields, including:
+        - `i`: the step number (1-indexed)
+        - `i_o`: the original step index from the validation plan (pre-expansion)
+        - `assertion_type`: the type of validation assertion (e.g., `"col_vals_gt"`, etc.)
+        - `column`: the column being validated (or columns used in certain validations)
+        - `values`: the comparison values or parameters used in the validation
+        - `inclusive`: whether the comparison is inclusive (for range-based validations)
+        - `na_pass`: whether `NA`/`Null` values are considered passing (for certain validations)
+        - `pre`: preprocessing function applied before validation
+        - `segments`: data segments to which the validation was applied
+        - `thresholds`: threshold level statement that was used for the validation step
+        - `label`: custom label for the validation step
+        - `brief`: a brief description of the validation step
+        - `active`: whether the validation step is active
+        - `all_passed`: whether all test units passed in the step
+        - `n`: total number of test units
+        - `n_passed`, `n_failed`: number of test units that passed and failed
+        - `f_passed`, `f_failed`: Fraction of test units that passed and failed
+        - `warning`, `error`, `critical`: whether the namesake threshold level was exceeded (is
+        `null` if threshold not set)
+        - `time_processed`: when the validation step was processed (ISO 8601 format)
+        - `proc_duration_s`: the processing duration in seconds
+        Examples
+        --------
+        Let's create a validation plan with a few validation steps and generate a JSON report of the
+        results:
+        ```{python}
+        import pointblank as pb
+        import polars as pl
+        # Create a sample DataFrame
+        tbl = pl.DataFrame({
+            "a": [5, 7, 8, 9],
+            "b": [3, 4, 2, 1]
+        })
+        # Create and execute a validation plan
+        validation = (
+            pb.Validate(data=tbl)
+            .col_vals_gt(columns="a", value=6)
+            .col_vals_lt(columns="b", value=4)
+            .interrogate()
+        )
+        # Get the full JSON report
+        json_report = validation.get_json_report()
+        print(json_report)
+        ```
+        You can also customize which fields to include:
+        ```{python}
+        json_report = validation.get_json_report(
+            use_fields=["i", "assertion_type", "column", "n_passed", "n_failed"]
+        )
+        print(json_report)
+        ```
+        Or which fields to exclude:
+        ```{python}
+        json_report = validation.get_json_report(
+            exclude_fields=[
+                "i_o", "thresholds", "pre", "segments", "values",
+                "na_pass", "inclusive", "label", "brief", "active",
+                "time_processed", "proc_duration_s"
+            ]
+        )
+        print(json_report)
+        ```
+        The JSON output can be further processed or analyzed programmatically:
+        ```{python}
+        import json
+        # Parse the JSON report
+        report_data = json.loads(validation.get_json_report())
+        # Extract and analyze validation results
+        failing_steps = [step for step in report_data if step["n_failed"] > 0]
+        print(f"Number of failing validation steps: {len(failing_steps)}")
+        ```
+        See Also
+        --------
+        - [`get_tabular_report()`](`pointblank.Validate.get_tabular_report`): Get a formatted HTML
+        report as a GT table
+        - [`get_data_extracts()`](`pointblank.Validate.get_data_extracts`): Get rows that
+        failed validation
+        """
         if use_fields is not None and exclude_fields is not None:
             raise ValueError("Cannot specify both `use_fields=` and `exclude_fields=`.")
@@ -9597,7 +10332,7 @@ class Validate:
                 "col_vals_expr",
             ]:
                 columns_upd.append("&mdash;")
-            elif assertion_type[i] in ["rows_distinct"]:
+            elif assertion_type[i] in ["rows_distinct", "rows_complete"]:
                 if not column:
                     # If there is no column subset, then all columns are used
                     columns_upd.append("ALL COLUMNS")
@@ -9605,7 +10340,7 @@ class Validate:
                     # With a column subset list, format with commas between the column names
                     columns_upd.append(", ".join(column))
-            elif assertion_type[i] in ["conjointly"]:
+            elif assertion_type[i] in ["conjointly", "specially"]:
                 columns_upd.append("")
             else:
                 columns_upd.append(str(column))
@@ -9660,13 +10395,14 @@ class Validate:
                 "col_vals_not_null",
                 "col_exists",
                 "rows_distinct",
+                "rows_complete",
             ]:
                 values_upd.append("&mdash;")
             elif assertion_type[i] in ["col_schema_match"]:
                 values_upd.append("SCHEMA")
-            elif assertion_type[i] in ["col_vals_expr"]:
+            elif assertion_type[i] in ["col_vals_expr", "conjointly"]:
                 values_upd.append("COLUMN EXPR")
             elif assertion_type[i] in ["row_count_match", "col_count_match"]:
@@ -9678,8 +10414,8 @@ class Validate:
                 values_upd.append(str(count))
-            elif assertion_type[i] in ["conjointly"]:
-                values_upd.append("COLUMN EXPR")
+            elif assertion_type[i] in ["specially"]:
+                values_upd.append("EXPR")
             # If the assertion type is not recognized, add the value as a string
             else:
@@ -10213,6 +10949,7 @@ class Validate:
         - [`col_vals_regex()`](`pointblank.Validate.col_vals_regex`)
         - [`col_vals_null()`](`pointblank.Validate.col_vals_null`)
         - [`col_vals_not_null()`](`pointblank.Validate.col_vals_not_null`)
+        - [`rows_complete()`](`pointblank.Validate.rows_complete`)
         - [`conjointly()`](`pointblank.Validate.conjointly`)
         The [`rows_distinct()`](`pointblank.Validate.rows_distinct`) validation step will produce a
@@ -10372,7 +11109,7 @@ class Validate:
         # if get_row_count(extract) == 0:
         #    return "No rows were extracted."
-        if assertion_type in ROW_BASED_VALIDATION_TYPES:
+        if assertion_type in ROW_BASED_VALIDATION_TYPES + ["rows_complete"]:
             # Get the extracted data for the step
             extract = self.get_data_extracts(i=i, frame=True)
@@ -11082,6 +11819,13 @@ def _create_autobrief_or_failure_text(
             for_failure=for_failure,
         )
+    if assertion_type == "rows_complete":
+        return _create_text_rows_complete(
+            lang=lang,
+            columns_subset=column,
+            for_failure=for_failure,
+        )
     if assertion_type == "row_count_match":
         return _create_text_row_count_match(
             lang=lang,
@@ -11099,6 +11843,9 @@ def _create_autobrief_or_failure_text(
     if assertion_type == "conjointly":
         return _create_text_conjointly(lang=lang, for_failure=for_failure)
+    if assertion_type == "specially":
+        return _create_text_specially(lang=lang, for_failure=for_failure)
     return None  # pragma: no cover
@@ -11257,6 +12004,24 @@ def _create_text_rows_distinct(
     return text
+def _create_text_rows_complete(
+    lang: str, columns_subset: list[str] | None, for_failure: bool = False
+) -> str:
+    type_ = _expect_failure_type(for_failure=for_failure)
+    if columns_subset is None:
+        text = EXPECT_FAIL_TEXT[f"all_row_complete_{type_}_text"][lang]
+    else:
+        column_text = _prep_values_text(values=columns_subset, lang=lang, limit=3)
+        text = EXPECT_FAIL_TEXT[f"across_row_complete_{type_}_text"][lang].format(
+            column_text=column_text
+        )
+    return text
 def _create_text_row_count_match(lang: str, value: int, for_failure: bool = False) -> str:
     type_ = _expect_failure_type(for_failure=for_failure)
@@ -11279,6 +12044,12 @@ def _create_text_conjointly(lang: str, for_failure: bool = False) -> str:
     return EXPECT_FAIL_TEXT[f"conjointly_{type_}_text"][lang]
+def _create_text_specially(lang: str, for_failure: bool = False) -> str:
+    type_ = _expect_failure_type(for_failure=for_failure)
+    return EXPECT_FAIL_TEXT[f"specially_{type_}_text"][lang]
 def _prep_column_text(column: str | list[str]) -> str:
     if isinstance(column, list):
         return "`" + str(column[0]) + "`"
@@ -12057,6 +12828,11 @@ def _step_report_row_based(
         text = STEP_REPORT_TEXT["column_is_null"][lang].format(column=column)
     elif assertion_type == "col_vals_not_null":
         text = STEP_REPORT_TEXT["column_is_not_null"][lang].format(column=column)
+    elif assertion_type == "rows_complete":
+        if column is None:
+            text = STEP_REPORT_TEXT["rows_complete_all"][lang]
+        else:
+            text = STEP_REPORT_TEXT["rows_complete_subset"][lang]
     # Wrap assertion text in a <code> tag
     text = (

pointblank 0.9.0__py3-none-any.whl → 0.9.2__py3-none-any.whl

pointblank 0.9.0py3-none-any.whl → 0.9.2py3-none-any.whl