PyPI - pointblank - Versions diffs - 0.12.1__py3-none-any.whl → 0.13.0__py3-none-any.whl - Mend

pointblank 0.12.1py3-none-any.whl → 0.13.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

pointblank/_interrogation.py CHANGED Viewed

@@ -1388,6 +1388,17 @@ class RowsDistinct:
     def get_test_results(self):
         return self.test_unit_res
+    def test(self):
+        # Get the number of failing test units by counting instances of `False` in the `pb_is_good_`
+        # column and then determine if the test passes overall by comparing the number of failing
+        # test units to the threshold for failing test units
+        results_list = nw.from_native(self.test_unit_res)["pb_is_good_"].to_list()
+        return _threshold_check(
+            failing_test_units=results_list.count(False), threshold=self.threshold
+        )
 @dataclass
 class RowsComplete:
@@ -2029,23 +2040,6 @@ def _column_has_null_values(table: FrameT, column: str) -> bool:
     return True
-def _check_nulls_across_columns_ibis(table, columns_subset):
-    # Get all column names from the table
-    column_names = columns_subset if columns_subset else table.columns
-    # Build the expression by combining each column's isnull() with OR operations
-    null_expr = functools.reduce(
-        lambda acc, col: acc | table[col].isnull() if acc is not None else table[col].isnull(),
-        column_names,
-        None,
-    )
-    # Add the expression as a new column to the table
-    result = table.mutate(_any_is_null_=null_expr)
-    return result
 def _check_nulls_across_columns_nw(table, columns_subset):
     # Get all column names from the table
     column_names = columns_subset if columns_subset else table.columns

pointblank/data/api-docs.txt CHANGED Viewed

@@ -9798,7 +9798,7 @@ validation workflows. The `yaml_interrogate()` function can be used to run a val
 YAML strings or files. The `validate_yaml()` function checks if the YAML configuration
 passes its own validity checks.
-yaml_interrogate(yaml: 'Union[str, Path]') -> 'Validate'
+yaml_interrogate(yaml: 'Union[str, Path]', set_tbl: 'Union[FrameT, Any, None]' = None) -> 'Validate'
 Execute a YAML-based validation workflow.
     This is the main entry point for YAML-based validation workflows. It takes YAML configuration
@@ -9813,13 +9813,20 @@ Execute a YAML-based validation workflow.
     yaml
         YAML configuration as string or file path. Can be: (1) a YAML string containing the
         validation configuration, or (2) a Path object or string path to a YAML file.
+    set_tbl
+        An optional table to override the table specified in the YAML configuration. This allows you
+        to apply a YAML-defined validation workflow to a different table than what's specified in
+        the configuration. If provided, this table will replace the table defined in the YAML's
+        `tbl` field before executing the validation workflow. This can be any supported table type
+        including DataFrame objects, Ibis table objects, CSV file paths, Parquet file paths, GitHub
+        URLs, or database connection strings.
     Returns
     -------
     Validate
-        An instance of the `Validate` class that has been configured based on the YAML input.
-        This object contains the results of the validation steps defined in the YAML configuration.
-        It includes metadata like table name, label, language, and thresholds if specified.
+        An instance of the `Validate` class that has been configured based on the YAML input. This
+        object contains the results of the validation steps defined in the YAML configuration. It
+        includes metadata like table name, label, language, and thresholds if specified.
     Raises
     ------
@@ -9918,6 +9925,44 @@ Execute a YAML-based validation workflow.
     This approach is particularly useful for storing validation configurations as part of your data
     pipeline or version control system, allowing you to maintain validation rules alongside your
     code.
+    ### Using `set_tbl=` to Override the Table
+    The `set_tbl=` parameter allows you to override the table specified in the YAML configuration.
+    This is useful when you have a template validation workflow but want to apply it to different
+    tables:
+    ```python
+    import polars as pl
+    # Create a test table with similar structure to small_table
+    test_table = pl.DataFrame({
+        "date": ["2023-01-01", "2023-01-02", "2023-01-03"],
+        "a": [1, 2, 3],
+        "b": ["1-abc-123", "2-def-456", "3-ghi-789"],
+        "d": [150, 200, 250]
+    })
+    # Use the same YAML config but apply it to our test table
+    yaml_config = '''
+    tbl: small_table  # This will be overridden
+    tbl_name: Test Table  # This name will be used
+    steps:
+    - col_exists:
+        columns: [date, a, b, d]
+    - col_vals_gt:
+        columns: [d]
+        value: 100
+    '''
+    # Execute with table override
+    result = pb.yaml_interrogate(yaml_config, set_tbl=test_table)
+    print(f"Validation applied to: {result.tbl_name}")
+    result
+    ```
+    This feature makes YAML configurations more reusable and flexible, allowing you to define
+    validation logic once and apply it to multiple similar tables.
 validate_yaml(yaml: 'Union[str, Path]') -> 'None'

pointblank/validate.py CHANGED Viewed

@@ -740,9 +740,9 @@ def _process_data(data: FrameT | Any) -> FrameT | Any:
     """
     Centralized data processing pipeline that handles all supported input types.
-    This function consolidates the data processing pipeline used across multiple
-    classes and functions in Pointblank. It processes data through a consistent
-    sequence of transformations to handle different data source types.
+    This function consolidates the data processing pipeline used across multiple classes and
+    functions in Pointblank. It processes data through a consistent sequence of transformations to
+    handle different data source types.
     The processing order is important:
@@ -829,7 +829,9 @@ def _process_github_url(data: FrameT | Any) -> FrameT | Any:
     # Parse the URL to check if it's a GitHub URL
     try:
         parsed = urlparse(data)
-    except Exception:
+    except ValueError:
+        # urlparse can raise ValueError for malformed URLs (e.g., invalid IPv6)
+        # Return original data as it's likely not a GitHub URL we can process
         return data
     # Check if it's a GitHub URL (standard or raw)
@@ -881,13 +883,10 @@ def _process_github_url(data: FrameT | Any) -> FrameT | Any:
         else:  # .parquet
             return _process_parquet_input(tmp_file_path)
-    except Exception:
+    except Exception:  # pragma: no cover
         # If download or processing fails, return original data
         return data
-    except Exception as e:
-        raise RuntimeError(f"Failed to download or process GitHub file from {raw_url}: {e}") from e
 def _process_connection_string(data: FrameT | Any) -> FrameT | Any:
     """
@@ -943,8 +942,7 @@ def _process_csv_input(data: FrameT | Any) -> FrameT | Any:
     if not csv_path.exists():
         raise FileNotFoundError(f"CSV file not found: {csv_path}")
-    # Determine which library to use for reading CSV
-    # Prefer Polars, fallback to Pandas
+    # Determine which library to use for reading CSV: prefer Polars but fallback to Pandas
     if _is_lib_present(lib_name="polars"):
         try:
             import polars as pl
@@ -956,7 +954,7 @@ def _process_csv_input(data: FrameT | Any) -> FrameT | Any:
                 import pandas as pd
                 return pd.read_csv(csv_path)
-            else:
+            else:  # pragma: no cover
                 raise RuntimeError(
                     f"Failed to read CSV file with Polars: {e}. "
                     "Pandas is not available as fallback."
@@ -1093,7 +1091,7 @@ def _process_parquet_input(data: FrameT | Any) -> FrameT | Any:
                     # Multiple files: concatenate them
                     dfs = [pd.read_parquet(path) for path in parquet_paths]
                     return pd.concat(dfs, ignore_index=True)
-            else:
+            else:  # pragma: no cover
                 raise RuntimeError(
                     f"Failed to read Parquet file(s) with Polars: {e}. "
                     "Pandas is not available as fallback."
@@ -1615,24 +1613,9 @@ def _generate_display_table(
     # This is used to highlight these values in the table
     if df_lib_name_gt == "polars":
         none_values = {k: data[k].is_null().to_list() for k in col_names}
-    elif df_lib_name_gt == "pyspark":
-        # For PySpark, check if data has been converted to pandas already
-        if hasattr(data, "isnull"):
-            # Data has been converted to pandas
-            none_values = {k: data[k].isnull() for k in col_names}
-        else:
-            # Data is still a PySpark DataFrame - use narwhals
-            import narwhals as nw
-            df_nw = nw.from_native(data)
-            none_values = {}
-            for col in col_names:
-                # Get null mask, collect to pandas, then convert to list
-                null_mask = (
-                    df_nw.select(nw.col(col).is_null()).collect().to_pandas().iloc[:, 0].tolist()
-                )
-                none_values[col] = null_mask
     else:
+        # PySpark data has been converted to Pandas by this point so the 'isnull()'
+        # method can be used
         none_values = {k: data[k].isnull() for k in col_names}
     none_values = [(k, i) for k, v in none_values.items() for i, val in enumerate(v) if val]
@@ -1980,59 +1963,68 @@ def missing_vals_tbl(data: FrameT | Any) -> GT:
         # Use the `row_ranges` list of lists to query, for each column, the proportion of missing
         # values in each 'sector' of the table (a sector is a range of rows)
-        if df_lib_name_gt == "polars":
-            missing_vals = {
-                col: [
-                    (
-                        data[(cut_points[i - 1] if i > 0 else 0) : cut_points[i]][col]
-                        .isnull()
-                        .sum()
-                        .to_polars()
-                        / (cut_points[i] - (cut_points[i - 1] if i > 0 else 0))
-                        * 100
-                        if cut_points[i] > (cut_points[i - 1] if i > 0 else 0)
-                        else 0
-                    )
-                    for i in range(len(cut_points))
-                ]
-                + [
-                    (
-                        data[cut_points[-1] : n_rows][col].isnull().sum().to_polars()
-                        / (n_rows - cut_points[-1])
-                        * 100
-                        if n_rows > cut_points[-1]
-                        else 0
-                    )
-                ]
-                for col in data.columns
-            }
+        def _calculate_missing_proportions(use_polars_conversion: bool = False):
+            """
+            Calculate missing value proportions for each column and sector.
+            Parameters
+            ----------
+            use_polars_conversion
+                If True, use `.to_polars()` for conversions, otherwise use `.to_pandas()`
+            """
+            missing_vals = {}
+            for col in data.columns:
+                col_missing_props = []
+                # Calculate missing value proportions for each sector
+                for i in range(len(cut_points)):
+                    start_row = cut_points[i - 1] if i > 0 else 0
+                    end_row = cut_points[i]
+                    sector_size = end_row - start_row
+                    if sector_size > 0:
+                        sector_data = data[start_row:end_row][col]
+                        null_sum = sector_data.isnull().sum()
+                        # Apply the appropriate conversion method
+                        if use_polars_conversion:
+                            null_sum_converted = null_sum.to_polars()
+                        else:
+                            null_sum_converted = null_sum.to_pandas()
+                        missing_prop = (null_sum_converted / sector_size) * 100
+                        col_missing_props.append(missing_prop)
+                    else:
+                        col_missing_props.append(0)
+                # Handle the final sector (after last cut point)
+                if n_rows > cut_points[-1]:
+                    start_row = cut_points[-1]
+                    sector_size = n_rows - start_row
+                    sector_data = data[start_row:n_rows][col]
+                    null_sum = sector_data.isnull().sum()
+                    # Apply the appropriate conversion method
+                    if use_polars_conversion:
+                        null_sum_converted = null_sum.to_polars()
+                    else:
+                        null_sum_converted = null_sum.to_pandas()
+                    missing_prop = (null_sum_converted / sector_size) * 100
+                    col_missing_props.append(missing_prop)
+                else:
+                    col_missing_props.append(0)  # pragma: no cover
+                missing_vals[col] = col_missing_props
+            return missing_vals
+        # Use the helper function based on the DataFrame library
+        if df_lib_name_gt == "polars":
+            missing_vals = _calculate_missing_proportions(use_polars_conversion=True)
         else:
-            missing_vals = {
-                col: [
-                    (
-                        data[(cut_points[i - 1] if i > 0 else 0) : cut_points[i]][col]
-                        .isnull()
-                        .sum()
-                        .to_pandas()
-                        / (cut_points[i] - (cut_points[i - 1] if i > 0 else 0))
-                        * 100
-                        if cut_points[i] > (cut_points[i - 1] if i > 0 else 0)
-                        else 0
-                    )
-                    for i in range(len(cut_points))
-                ]
-                + [
-                    (
-                        data[cut_points[-1] : n_rows][col].isnull().sum().to_pandas()
-                        / (n_rows - cut_points[-1])
-                        * 100
-                        if n_rows > cut_points[-1]
-                        else 0
-                    )
-                ]
-                for col in data.columns
-            }
+            missing_vals = _calculate_missing_proportions(use_polars_conversion=False)
         # Pivot the `missing_vals` dictionary to create a table with the missing value proportions
         missing_vals = {
@@ -2053,16 +2045,17 @@ def missing_vals_tbl(data: FrameT | Any) -> GT:
         # Get the column names from the table
         col_names = list(data.columns)
-        # Iterate over the cut points and get the proportion of missing values in each 'sector'
-        # for each column
-        if "polars" in tbl_type:
-            # Polars case
+        # Helper function for DataFrame missing value calculation (Polars/Pandas)
+        def _calculate_missing_proportions_dataframe(is_polars=False):
+            null_method = "is_null" if is_polars else "isnull"
             missing_vals = {
                 col: [
                     (
-                        data[(cut_points[i - 1] if i > 0 else 0) : cut_points[i]][col]
-                        .is_null()
-                        .sum()
+                        getattr(
+                            data[(cut_points[i - 1] if i > 0 else 0) : cut_points[i]][col],
+                            null_method,
+                        )().sum()
                         / (cut_points[i] - (cut_points[i - 1] if i > 0 else 0))
                         * 100
                         if cut_points[i] > (cut_points[i - 1] if i > 0 else 0)
@@ -2072,7 +2065,7 @@ def missing_vals_tbl(data: FrameT | Any) -> GT:
                 ]
                 + [
                     (
-                        data[cut_points[-1] : n_rows][col].is_null().sum()
+                        getattr(data[cut_points[-1] : n_rows][col], null_method)().sum()
                         / (n_rows - cut_points[-1])
                         * 100
                         if n_rows > cut_points[-1]
@@ -2082,7 +2075,8 @@ def missing_vals_tbl(data: FrameT | Any) -> GT:
                 for col in data.columns
             }
-            missing_vals = {
+            # Transform to the expected format
+            formatted_missing_vals = {
                 "columns": list(missing_vals.keys()),
                 **{
                     str(i + 1): [missing_vals[col][i] for col in missing_vals.keys()]
@@ -2091,48 +2085,25 @@ def missing_vals_tbl(data: FrameT | Any) -> GT:
             }
             # Get a dictionary of counts of missing values in each column
-            missing_val_counts = {col: data[col].is_null().sum() for col in data.columns}
-        if "pandas" in tbl_type:
-            missing_vals = {
-                col: [
-                    (
-                        data[(cut_points[i - 1] if i > 0 else 0) : cut_points[i]][col]
-                        .isnull()
-                        .sum()
-                        / (cut_points[i] - (cut_points[i - 1] if i > 0 else 0))
-                        * 100
-                        if cut_points[i] > (cut_points[i - 1] if i > 0 else 0)
-                        else 0
-                    )
-                    for i in range(len(cut_points))
-                ]
-                + [
-                    (
-                        data[cut_points[-1] : n_rows][col].isnull().sum()
-                        / (n_rows - cut_points[-1])
-                        * 100
-                        if n_rows > cut_points[-1]
-                        else 0
-                    )
-                ]
-                for col in data.columns
+            missing_val_counts = {
+                col: getattr(data[col], null_method)().sum() for col in data.columns
             }
-            # Pivot the `missing_vals` dictionary to create a table with the missing
-            # value proportions
-            missing_vals = {
-                "columns": list(missing_vals.keys()),
-                **{
-                    str(i + 1): [missing_vals[col][i] for col in missing_vals.keys()]
-                    for i in range(len(cut_points) + 1)
-                },
-            }
+            return formatted_missing_vals, missing_val_counts
-            # Get a dictionary of counts of missing values in each column
-            missing_val_counts = {col: data[col].isnull().sum() for col in data.columns}
+        # Iterate over the cut points and get the proportion of missing values in each 'sector'
+        # for each column
+        if "polars" in tbl_type:
+            missing_vals, missing_val_counts = _calculate_missing_proportions_dataframe(
+                is_polars=True
+            )
+        elif "pandas" in tbl_type:
+            missing_vals, missing_val_counts = _calculate_missing_proportions_dataframe(
+                is_polars=False
+            )
-        if "pyspark" in tbl_type:
+        elif "pyspark" in tbl_type:
             from pyspark.sql.functions import col as pyspark_col
             # PySpark implementation for missing values calculation
@@ -2164,7 +2135,7 @@ def missing_vals_tbl(data: FrameT | Any) -> GT:
                         missing_prop = (null_count / sector_size) * 100
                         col_missing_props.append(missing_prop)
                     else:
-                        col_missing_props.append(0)
+                        col_missing_props.append(0)  # pragma: no cover
                 # Handle the final sector (after last cut point)
                 if n_rows > cut_points[-1]:
@@ -2184,7 +2155,7 @@ def missing_vals_tbl(data: FrameT | Any) -> GT:
                     missing_prop = (null_count / sector_size) * 100
                     col_missing_props.append(missing_prop)
                 else:
-                    col_missing_props.append(0)
+                    col_missing_props.append(0)  # pragma: no cover
                 missing_vals[col_name] = col_missing_props
@@ -2623,7 +2594,7 @@ def get_column_count(data: FrameT | Any) -> int:
     except Exception:
         # Fallback for unsupported types
         if "pandas" in str(type(data)):
-            return data.shape[1]
+            return data.shape[1]  # pragma: no cover
         else:
             raise ValueError("The input table type supplied in `data=` is not supported.")
@@ -2793,14 +2764,14 @@ def get_row_count(data: FrameT | Any) -> int:
         if hasattr(df_nw, "shape"):
             return df_nw.shape[0]
         elif hasattr(df_nw, "height"):
-            return df_nw.height
-        else:
+            return df_nw.height  # pragma: no cover
+        else:  # pragma: no cover
             raise ValueError("Unable to determine row count from Narwhals DataFrame")
     except Exception:
         # Fallback for types that don't work with Narwhals
-        if "pandas" in str(type(data)):
+        if "pandas" in str(type(data)):  # pragma: no cover
             return data.shape[0]
-        elif "pyspark" in str(type(data)):
+        elif "pyspark" in str(type(data)):  # pragma: no cover
             return data.count()
         else:
             raise ValueError("The input table type supplied in `data=` is not supported.")
@@ -3019,7 +2990,7 @@ def connect_to_table(connection_string: str) -> Any:
             # Get list of available tables
             try:
                 available_tables = conn.list_tables()
-            except Exception:
+            except Exception:  # pragma: no cover
                 available_tables = []
             conn.disconnect()
@@ -3064,7 +3035,7 @@ def connect_to_table(connection_string: str) -> Any:
             }
             # Check if this is a missing backend dependency
-            for backend, install_cmd in backend_install_map.items():
+            for backend, install_cmd in backend_install_map.items():  # pragma: no cover
                 if backend in error_str and ("not found" in error_str or "no module" in error_str):
                     raise ConnectionError(
                         f"Missing {backend.upper()} backend for Ibis. Install it with:\n"
@@ -3081,7 +3052,7 @@ def connect_to_table(connection_string: str) -> Any:
                     ) from e
             # Generic connection error
-            raise ConnectionError(
+            raise ConnectionError(  # pragma: no cover
                 f"Failed to connect to database using connection string: {connection_string}\n"
                 f"Error: {e}\n\n"
                 f"No table specified. Use the format: {connection_string}::TABLE_NAME"
@@ -3090,7 +3061,7 @@ def connect_to_table(connection_string: str) -> Any:
     # Split connection string and table name
     try:
         base_connection, table_name = connection_string.rsplit("::", 1)
-    except ValueError:
+    except ValueError:  # pragma: no cover
         raise ValueError(f"Invalid connection string format: {connection_string}")
     # Connect to database and get table
@@ -3124,7 +3095,7 @@ def connect_to_table(connection_string: str) -> Any:
         # Check if table doesn't exist
         if "table" in error_str and ("not found" in error_str or "does not exist" in error_str):
             # Try to get available tables for helpful message
-            try:
+            try:  # pragma: no cover
                 available_tables = conn.list_tables()
                 if available_tables:
                     table_list = "\n".join(f"  - {table}" for table in available_tables)
@@ -3758,6 +3729,141 @@ class Validate:
         self.validation_info = []
+    def set_tbl(
+        self,
+        tbl: FrameT | Any,
+        tbl_name: str | None = None,
+        label: str | None = None,
+    ) -> Validate:
+        """
+        Set or replace the table associated with the Validate object.
+        This method allows you to replace the table associated with a Validate object with a
+        different (but presumably similar) table. This is useful when you want to apply the same
+        validation plan to multiple tables or when you have a validation workflow defined but want
+        to swap in a different data source.
+        Parameters
+        ----------
+        tbl
+            The table to replace the existing table with. This can be any supported table type
+            including DataFrame objects, Ibis table objects, CSV file paths, Parquet file paths,
+            GitHub URLs, or database connection strings. The same table type constraints apply as in
+            the `Validate` constructor.
+        tbl_name
+            An optional name to assign to the new input table object. If no value is provided, the
+            existing table name will be retained.
+        label
+            An optional label for the validation plan. If no value is provided, the existing label
+            will be retained.
+        Returns
+        -------
+        Validate
+            A new `Validate` object with the replacement table.
+        When to Use
+        -----------
+        The `set_tbl()` method is particularly useful in scenarios where you have:
+        - multiple similar tables that need the same validation checks
+        - a template validation workflow that should be applied to different data sources
+        - YAML-defined validations where you want to override the table specified in the YAML
+        The `set_tbl()` method creates a copy of the validation object with the new table, so the
+        original validation object remains unchanged. This allows you to reuse validation plans
+        across multiple tables without interference.
+        Examples
+        --------
+        ```{python}
+        #| echo: false
+        #| output: false
+        import pointblank as pb
+        pb.config(report_incl_header=False, report_incl_footer=False, preview_incl_header=False)
+        ```
+        We will first create two similar tables for our future validation plans.
+        ```{python}
+        import pointblank as pb
+        import polars as pl
+        # Create two similar tables
+        table_1 = pl.DataFrame({
+            "x": [1, 2, 3, 4, 5],
+            "y": [5, 4, 3, 2, 1],
+            "z": ["a", "b", "c", "d", "e"]
+        })
+        table_2 = pl.DataFrame({
+            "x": [2, 4, 6, 8, 10],
+            "y": [10, 8, 6, 4, 2],
+            "z": ["f", "g", "h", "i", "j"]
+        })
+        ```
+        Create a validation plan with the first table.
+        ```{python}
+        validation_table_1 = (
+            pb.Validate(
+                data=table_1,
+                tbl_name="Table 1",
+                label="Validation applied to the first table"
+            )
+            .col_vals_gt(columns="x", value=0)
+            .col_vals_lt(columns="y", value=10)
+        )
+        ```
+        Now apply the same validation plan to the second table.
+        ```{python}
+        validation_table_2 = (
+            validation_table_1
+            .set_tbl(
+                tbl=table_2,
+                tbl_name="Table 2",
+                label="Validation applied to the second table"
+            )
+        )
+        ```
+        Here is the interrogation of the first table:
+        ```{python}
+        validation_table_1.interrogate()
+        ```
+        And the second table:
+        ```{python}
+        validation_table_2.interrogate()
+        ```
+        """
+        from copy import deepcopy
+        # Create a deep copy of the current Validate object
+        new_validate = deepcopy(self)
+        # Process the new table through the centralized data processing pipeline
+        new_validate.data = _process_data(tbl)
+        # Update table name if provided, otherwise keep existing
+        if tbl_name is not None:
+            new_validate.tbl_name = tbl_name
+        # Update label if provided, otherwise keep existing
+        if label is not None:
+            new_validate.label = label
+        # Reset interrogation state since we have a new table, but preserve validation steps
+        new_validate.time_start = None
+        new_validate.time_end = None
+        # Note: We keep validation_info as it contains the defined validation steps
+        return new_validate
     def _repr_html_(self) -> str:
         return self.get_tabular_report()._repr_html_()  # pragma: no cover

pointblank/yaml.py CHANGED Viewed

@@ -4,6 +4,7 @@ from pathlib import Path
 from typing import Any, Union
 import yaml
+from narwhals.typing import FrameT
 from pointblank._utils import _is_lib_present
 from pointblank.thresholds import Actions
@@ -749,7 +750,7 @@ class YAMLValidator:
         return validation
-def yaml_interrogate(yaml: Union[str, Path]) -> Validate:
+def yaml_interrogate(yaml: Union[str, Path], set_tbl: Union[FrameT, Any, None] = None) -> Validate:
     """Execute a YAML-based validation workflow.
     This is the main entry point for YAML-based validation workflows. It takes YAML configuration
@@ -764,13 +765,20 @@ def yaml_interrogate(yaml: Union[str, Path]) -> Validate:
     yaml
         YAML configuration as string or file path. Can be: (1) a YAML string containing the
         validation configuration, or (2) a Path object or string path to a YAML file.
+    set_tbl
+        An optional table to override the table specified in the YAML configuration. This allows you
+        to apply a YAML-defined validation workflow to a different table than what's specified in
+        the configuration. If provided, this table will replace the table defined in the YAML's
+        `tbl` field before executing the validation workflow. This can be any supported table type
+        including DataFrame objects, Ibis table objects, CSV file paths, Parquet file paths, GitHub
+        URLs, or database connection strings.
     Returns
     -------
     Validate
-        An instance of the `Validate` class that has been configured based on the YAML input.
-        This object contains the results of the validation steps defined in the YAML configuration.
-        It includes metadata like table name, label, language, and thresholds if specified.
+        An instance of the `Validate` class that has been configured based on the YAML input. This
+        object contains the results of the validation steps defined in the YAML configuration. It
+        includes metadata like table name, label, language, and thresholds if specified.
     Raises
     ------
@@ -875,10 +883,59 @@ def yaml_interrogate(yaml: Union[str, Path]) -> Validate:
     This approach is particularly useful for storing validation configurations as part of your data
     pipeline or version control system, allowing you to maintain validation rules alongside your
     code.
+    ### Using `set_tbl=` to Override the Table
+    The `set_tbl=` parameter allows you to override the table specified in the YAML configuration.
+    This is useful when you have a template validation workflow but want to apply it to different
+    tables:
+    ```{python}
+    import polars as pl
+    # Create a test table with similar structure to small_table
+    test_table = pl.DataFrame({
+        "date": ["2023-01-01", "2023-01-02", "2023-01-03"],
+        "a": [1, 2, 3],
+        "b": ["1-abc-123", "2-def-456", "3-ghi-789"],
+        "d": [150, 200, 250]
+    })
+    # Use the same YAML config but apply it to our test table
+    yaml_config = '''
+    tbl: small_table  # This will be overridden
+    tbl_name: Test Table  # This name will be used
+    steps:
+    - col_exists:
+        columns: [date, a, b, d]
+    - col_vals_gt:
+        columns: [d]
+        value: 100
+    '''
+    # Execute with table override
+    result = pb.yaml_interrogate(yaml_config, set_tbl=test_table)
+    print(f"Validation applied to: {result.tbl_name}")
+    result
+    ```
+    This feature makes YAML configurations more reusable and flexible, allowing you to define
+    validation logic once and apply it to multiple similar tables.
     """
     validator = YAMLValidator()
     config = validator.load_config(yaml)
-    return validator.execute_workflow(config)
+    # If `set_tbl=` is provided, we need to build the validation workflow and then use `set_tbl()`
+    if set_tbl is not None:
+        # First build the validation object without interrogation
+        validation = validator.build_validation(config)
+        # Then replace the table using set_tbl method
+        validation = validation.set_tbl(tbl=set_tbl)
+        # Finally interrogate with the new table
+        return validation.interrogate()
+    else:
+        # Standard execution without table override (includes interrogation)
+        return validator.execute_workflow(config)
 def load_yaml_config(file_path: Union[str, Path]) -> dict:
@@ -1453,26 +1510,6 @@ def yaml_to_python(yaml: Union[str, Path]) -> str:
                         action_params.append(f"highest_only={value.highest_only}")
                     actions_str = "pb.Actions(" + ", ".join(action_params) + ")"
                     param_parts.append(f"actions={actions_str}")
-                elif isinstance(value, dict):
-                    action_params = []
-                    step_action_base = f"steps[{step_index}].{list(step_config.keys())[0]}.actions"
-                    for action_key, action_value in value.items():
-                        if action_key == "highest_only":
-                            action_params.append(f"{action_key}={action_value}")
-                        else:
-                            # Check if we have an original expression for this action
-                            action_expr_path = f"{step_action_base}.{action_key}"
-                            if action_expr_path in step_expressions:
-                                action_params.append(
-                                    f"{action_key}={step_expressions[action_expr_path]}"
-                                )
-                            elif isinstance(action_value, str):
-                                action_params.append(f'{action_key}="{action_value}"')
-                            else:
-                                # For callables or complex expressions
-                                action_params.append(f"{action_key}={action_value}")
-                    actions_str = "pb.Actions(" + ", ".join(action_params) + ")"
-                    param_parts.append(f"actions={actions_str}")
                 else:
                     param_parts.append(f"actions={value}")
             elif key == "thresholds":

{pointblank-0.12.1.dist-info → pointblank-0.13.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: pointblank
-Version: 0.12.1
+Version: 0.13.0
 Summary: Find out if your data is what you think it is.
 Author-email: Richard Iannone <riannone@me.com>
 License: MIT License
@@ -60,6 +60,12 @@ Requires-Dist: chatlas>=0.3.0; extra == "generate"
 Requires-Dist: anthropic[bedrock]>=0.45.2; extra == "generate"
 Requires-Dist: openai>=1.63.0; extra == "generate"
 Requires-Dist: shiny>=1.3.0; extra == "generate"
+Provides-Extra: mcp
+Requires-Dist: mcp[cli]>=1.10.1; extra == "mcp"
+Requires-Dist: fastmcp>=2.11.3; extra == "mcp"
+Requires-Dist: pytest-asyncio>=1.0.0; extra == "mcp"
+Provides-Extra: excel
+Requires-Dist: openpyxl>=3.0.0; extra == "excel"
 Provides-Extra: bigquery
 Requires-Dist: ibis-framework[bigquery]>=9.5.0; extra == "bigquery"
 Provides-Extra: databricks
@@ -84,6 +90,7 @@ Requires-Dist: quartodoc>=0.8.1; python_version >= "3.9" and extra == "docs"
 Requires-Dist: pandas>=2.2.3; extra == "docs"
 Requires-Dist: polars>=1.17.1; extra == "docs"
 Requires-Dist: pyspark==3.5.6; extra == "docs"
+Requires-Dist: openpyxl>=3.0.0; extra == "docs"
 Dynamic: license-file
 <div align="center">

{pointblank-0.12.1.dist-info → pointblank-0.13.0.dist-info}/RECORD RENAMED Viewed

@@ -3,7 +3,7 @@ pointblank/_constants.py,sha256=rB8qTnhabwmSQURevHqokC1pp5lfaWMCzhmbMZ0CP8A,8151
 pointblank/_constants_docs.py,sha256=JBmtt16zTYQ-zaM4ElLExtKs-dKlnN553Ys2ML1Y1C8,2099
 pointblank/_constants_translations.py,sha256=HXcCYmKoMjoaFv-Ym4UWv3AsIVXik2zDyAy7xvTvv0Y,186710
 pointblank/_datascan_utils.py,sha256=EMfeabXm_ZsCUKPROB7rFhyOpjtRs8jcnZ_9nBtMyws,1750
-pointblank/_interrogation.py,sha256=a0O30kY6GQmeqkAPZqBynFJHsmwFXr6pimpNL2uUPaU,76996
+pointblank/_interrogation.py,sha256=p3qPTgcsYiDEyV9d5pWLzAqz9rU9-IsfmSFV4sWRBNI,76932
 pointblank/_typing.py,sha256=aItbCbzhbzqjK3lCbL27ltRyXoAH1c3-U6xQdRzg-lU,1594
 pointblank/_utils.py,sha256=ikgkFomoAEOxaiItHZUo3NTHu0MJHWfKAF_fnX9rRnA,30685
 pointblank/_utils_check_args.py,sha256=rFEc1nbCN8ftsQQWVjCNWmQ2QmUDxkfgmoJclrZeTLs,5489
@@ -21,9 +21,9 @@ pointblank/schema.py,sha256=vwGF8UKy2riRSQzcwatcI6L0t_6ccdbOayrKonvyodE,45777
 pointblank/segments.py,sha256=RXp3lPr3FboVseadNqLgIeoMBh_mykrQSFp1WtV41Yg,5570
 pointblank/tf.py,sha256=8o_8m4i01teulEe3-YYMotSNf3tImjBMInsvdjSAO5Q,8844
 pointblank/thresholds.py,sha256=mybeLzTVdmN04NLKoV-jiSBXsWknwHO0Gox0ttVN_MU,25766
-pointblank/validate.py,sha256=KvnC0UnvVW2mkoWkp1fDIXotuBl7MJeU6_ggp_0yDoo,693082
-pointblank/yaml.py,sha256=4DrkOJwCQ3CaXQ7ESNIW72pp-dL1ctlX6ONU30Vh1Fs,57901
-pointblank/data/api-docs.txt,sha256=0wXk__xYwgKeS24ZjbaTPFeJ3ZO7AIyMQoFClCcvPTc,529897
+pointblank/validate.py,sha256=py6w239Mh7tbAfXJkanDLARCkWE5EFhTlfvS0KOjnWA,697215
+pointblank/yaml.py,sha256=Sy802CZBOgEZGwbIes8wcXPPt2a5rXO0b3lh9tsLS8w,58966
+pointblank/data/api-docs.txt,sha256=w2nIkIL_fJpXlPR9clogqcgdiv-uHvdSDI8gjkP_mCQ,531711
 pointblank/data/game_revenue-duckdb.zip,sha256=tKIVx48OGLYGsQPS3h5AjA2Nyq_rfEpLCjBiFUWhagU,35880
 pointblank/data/game_revenue.zip,sha256=7c9EvHLyi93CHUd4p3dM4CZ-GucFCtXKSPxgLojL32U,33749
 pointblank/data/global_sales-duckdb.zip,sha256=2ok_cvJ1ZuSkXnw0R6_OkKYRTWhJ-jJEMq2VYsv5fqY,1336390
@@ -33,9 +33,9 @@ pointblank/data/nycflights.zip,sha256=yVjbUaKUz2LydSdF9cABuir0VReHBBgV7shiNWSd0m
 pointblank/data/polars-api-docs.txt,sha256=KGcS-BOtUs9zgpkWfXD-GFdFh4O_zjdkpX7msHjztLg,198045
 pointblank/data/small_table-duckdb.zip,sha256=BhTaZ2CRS4-9Z1uVhOU6HggvW3XCar7etMznfENIcOc,2028
 pointblank/data/small_table.zip,sha256=lmFb90Nb-v5X559Ikjg31YLAXuRyMkD9yLRElkXPMzQ,472
-pointblank-0.12.1.dist-info/licenses/LICENSE,sha256=apLF-HWPNU7pT5bmf5KmZpD5Cklpy2u-BN_0xBoRMLY,1081
-pointblank-0.12.1.dist-info/METADATA,sha256=1fJY92u1AiJdYggJLaUf0TKbovh3ytcihIdh4PcBEQ8,19242
-pointblank-0.12.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-pointblank-0.12.1.dist-info/entry_points.txt,sha256=GqqqOTOH8uZe22wLcvYjzpizqk_j4MNcUo2YM14ryCw,42
-pointblank-0.12.1.dist-info/top_level.txt,sha256=-wHrS1SvV8-nhvc3w-PPYs1C1WtEc1pK-eGjubbCCKc,11
-pointblank-0.12.1.dist-info/RECORD,,
+pointblank-0.13.0.dist-info/licenses/LICENSE,sha256=apLF-HWPNU7pT5bmf5KmZpD5Cklpy2u-BN_0xBoRMLY,1081
+pointblank-0.13.0.dist-info/METADATA,sha256=A-tNLSbVOz6M27ZVq_ihOQiOdTtEMs3ub8T27kK_DSY,19529
+pointblank-0.13.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+pointblank-0.13.0.dist-info/entry_points.txt,sha256=GqqqOTOH8uZe22wLcvYjzpizqk_j4MNcUo2YM14ryCw,42
+pointblank-0.13.0.dist-info/top_level.txt,sha256=-wHrS1SvV8-nhvc3w-PPYs1C1WtEc1pK-eGjubbCCKc,11
+pointblank-0.13.0.dist-info/RECORD,,

{pointblank-0.12.1.dist-info → pointblank-0.13.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{pointblank-0.12.1.dist-info → pointblank-0.13.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{pointblank-0.12.1.dist-info → pointblank-0.13.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{pointblank-0.12.1.dist-info → pointblank-0.13.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

pointblank 0.12.1__py3-none-any.whl → 0.13.0__py3-none-any.whl

pointblank 0.12.1py3-none-any.whl → 0.13.0py3-none-any.whl