PyPI - pointblank - Versions diffs - 0.13.4__py3-none-any.whl → 0.14.0__py3-none-any.whl - Mend

pointblank 0.13.4py3-none-any.whl → 0.14.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

pointblank/__init__.py +4 -0
pointblank/_constants.py +54 -0
pointblank/_constants_translations.py +487 -2
pointblank/_interrogation.py +182 -11
pointblank/_utils.py +3 -3
pointblank/_utils_ai.py +850 -0
pointblank/cli.py +128 -115
pointblank/column.py +1 -1
pointblank/data/api-docs.txt +198 -13
pointblank/data/validations/README.md +108 -0
pointblank/data/validations/complex_preprocessing.json +54 -0
pointblank/data/validations/complex_preprocessing.pkl +0 -0
pointblank/data/validations/generate_test_files.py +127 -0
pointblank/data/validations/multiple_steps.json +83 -0
pointblank/data/validations/multiple_steps.pkl +0 -0
pointblank/data/validations/narwhals_function.json +28 -0
pointblank/data/validations/narwhals_function.pkl +0 -0
pointblank/data/validations/no_preprocessing.json +83 -0
pointblank/data/validations/no_preprocessing.pkl +0 -0
pointblank/data/validations/pandas_compatible.json +28 -0
pointblank/data/validations/pandas_compatible.pkl +0 -0
pointblank/data/validations/preprocessing_functions.py +46 -0
pointblank/data/validations/simple_preprocessing.json +57 -0
pointblank/data/validations/simple_preprocessing.pkl +0 -0
pointblank/datascan.py +4 -4
pointblank/scan_profile.py +6 -6
pointblank/schema.py +8 -82
pointblank/thresholds.py +1 -1
pointblank/validate.py +1233 -12
{pointblank-0.13.4.dist-info → pointblank-0.14.0.dist-info}/METADATA +66 -8
pointblank-0.14.0.dist-info/RECORD +55 -0
pointblank-0.13.4.dist-info/RECORD +0 -39
{pointblank-0.13.4.dist-info → pointblank-0.14.0.dist-info}/WHEEL +0 -0
{pointblank-0.13.4.dist-info → pointblank-0.14.0.dist-info}/entry_points.txt +0 -0
{pointblank-0.13.4.dist-info → pointblank-0.14.0.dist-info}/licenses/LICENSE +0 -0
{pointblank-0.13.4.dist-info → pointblank-0.14.0.dist-info}/top_level.txt +0 -0

pointblank/data/api-docs.txt CHANGED Viewed

@@ -239,7 +239,7 @@ Validate(data: 'FrameT | Any', tbl_name: 'str | None' = None, label: 'str | None
         summary = pb.get_validation_summary()
         if summary["status"] == "CRITICAL":
             send_alert_email(
-                subject=f"CRITICAL validation failures in {summary['table_name']}",
+                subject=f"CRITICAL validation failures in {summary['tbl_name']}",
                 body=f"{summary['critical_steps']} steps failed with critical severity."
             )
@@ -287,6 +287,11 @@ Validate(data: 'FrameT | Any', tbl_name: 'str | None' = None, label: 'str | None
     - Japanese (`"ja"`)
     - Korean (`"ko"`)
     - Vietnamese (`"vi"`)
+    - Indonesian (`"id"`)
+    - Ukrainian (`"uk"`)
+    - Hebrew (`"he"`)
+    - Thai (`"th"`)
+    - Persian (`"fa"`)
     Automatically generated briefs (produced by using `brief=True` or `brief="...{auto}..."`) will
     be written in the selected language. The language setting will also used when generating the
@@ -858,7 +863,7 @@ FinalActions(*args)
     def send_alert():
         summary = pb.get_validation_summary()
         if summary["highest_severity"] == "critical":
-            print(f"ALERT: Critical validation failures found in {summary['table_name']}")
+            print(f"ALERT: Critical validation failures found in {summary['tbl_name']}")
     validation = (
         pb.Validate(
@@ -3186,7 +3191,10 @@ col_vals_in_set(self, columns: 'str | list[str] | Column | ColumnSelector | Colu
             multiple columns are supplied or resolved, there will be a separate validation step
             generated for each column.
         set
-            A list of values to compare against.
+            A collection of values to compare against. Can be a list of values, a Python Enum class,
+            or a collection containing Enum instances. When an Enum class is provided, all enum
+            values will be used. When a collection contains Enum instances, their values will be
+            extracted automatically.
         pre
             An optional preprocessing function or lambda to apply to the data table during
             interrogation. This function should take a table as input and return a modified table.
@@ -3357,9 +3365,63 @@ col_vals_in_set(self, columns: 'str | list[str] | Column | ColumnSelector | Colu
         The validation table reports two failing test units. The specific failing cases are for the
         column `b` values of `8` and `1`, which are not in the set of `[2, 3, 4, 5, 6]`.
+        **Using Python Enums**
+        The `col_vals_in_set()` method also supports Python Enum classes and instances, which can
+        make validations more readable and maintainable:
+        ```python
+        from enum import Enum
+        class Color(Enum):
+            RED = "red"
+            GREEN = "green"
+            BLUE = "blue"
+        # Create a table with color data
+        tbl_colors = pl.DataFrame({
+            "product": ["shirt", "pants", "hat", "shoes"],
+            "color": ["red", "blue", "green", "yellow"]
+        })
+        # Validate using an Enum class (all enum values are allowed)
+        validation = (
+            pb.Validate(data=tbl_colors)
+            .col_vals_in_set(columns="color", set=Color)
+            .interrogate()
+        )
+        validation
+        ```
+        This validation will fail for the `"yellow"` value since it's not in the `Color` enum.
+        You can also use specific Enum instances or mix them with regular values:
+        ```python
+        # Validate using specific Enum instances
+        validation = (
+            pb.Validate(data=tbl_colors)
+            .col_vals_in_set(columns="color", set=[Color.RED, Color.BLUE])
+            .interrogate()
+        )
+        # Mix Enum instances with regular values
+        validation = (
+            pb.Validate(data=tbl_colors)
+            .col_vals_in_set(columns="color", set=[Color.RED, Color.BLUE, "yellow"])
+            .interrogate()
+        )
+        validation
+        ```
+        In this case, the `"green"` value will cause a failing test unit since it's not part of the
+        specified set.
-col_vals_not_in_set(self, columns: 'str | list[str] | Column | ColumnSelector | ColumnSelectorNarwhals', set: 'list[float | int]', pre: 'Callable | None' = None, segments: 'SegmentSpec | None' = None, thresholds: 'int | float | bool | tuple | dict | Thresholds' = None, actions: 'Actions | None' = None, brief: 'str | bool | None' = None, active: 'bool' = True) -> 'Validate'
+col_vals_not_in_set(self, columns: 'str | list[str] | Column | ColumnSelector | ColumnSelectorNarwhals', set: 'Collection[Any]', pre: 'Callable | None' = None, segments: 'SegmentSpec | None' = None, thresholds: 'int | float | bool | tuple | dict | Thresholds' = None, actions: 'Actions | None' = None, brief: 'str | bool | None' = None, active: 'bool' = True) -> 'Validate'
         Validate whether column values are not in a set of values.
@@ -3376,7 +3438,10 @@ col_vals_not_in_set(self, columns: 'str | list[str] | Column | ColumnSelector |
             multiple columns are supplied or resolved, there will be a separate validation step
             generated for each column.
         set
-            A list of values to compare against.
+            A collection of values to compare against. Can be a list of values, a Python Enum class,
+            or a collection containing Enum instances. When an Enum class is provided, all enum
+            values will be used. When a collection contains Enum instances, their values will be
+            extracted automatically.
         pre
             An optional preprocessing function or lambda to apply to the data table during
             interrogation. This function should take a table as input and return a modified table.
@@ -3548,6 +3613,36 @@ col_vals_not_in_set(self, columns: 'str | list[str] | Column | ColumnSelector |
         The validation table reports two failing test units. The specific failing cases are for the
         column `b` values of `2` and `6`, both of which are in the set of `[2, 3, 4, 5, 6]`.
+        **Using Python Enums**
+        Like `col_vals_in_set()`, this method also supports Python Enum classes and instances:
+        ```python
+        from enum import Enum
+        class InvalidStatus(Enum):
+            DELETED = "deleted"
+            ARCHIVED = "archived"
+        # Create a table with status data
+        status_table = pl.DataFrame({
+            "product": ["widget", "gadget", "tool", "device"],
+            "status": ["active", "pending", "deleted", "active"]
+        })
+        # Validate that no values are in the invalid status set
+        validation = (
+            pb.Validate(data=status_table)
+            .col_vals_not_in_set(columns="status", set=InvalidStatus)
+            .interrogate()
+        )
+        validation
+        ```
+        This `"deleted"` value in the `status` column will fail since it matches one of the invalid
+        statuses in the `InvalidStatus` enum.
 col_vals_null(self, columns: 'str | list[str] | Column | ColumnSelector | ColumnSelectorNarwhals', pre: 'Callable | None' = None, segments: 'SegmentSpec | None' = None, thresholds: 'int | float | bool | tuple | dict | Thresholds' = None, actions: 'Actions | None' = None, brief: 'str | bool | None' = None, active: 'bool' = True) -> 'Validate'
@@ -3922,7 +4017,7 @@ col_vals_not_null(self, columns: 'str | list[str] | Column | ColumnSelector | Co
         two Null values in column `b`.
-col_vals_regex(self, columns: 'str | list[str] | Column | ColumnSelector | ColumnSelectorNarwhals', pattern: 'str', na_pass: 'bool' = False, pre: 'Callable | None' = None, segments: 'SegmentSpec | None' = None, thresholds: 'int | float | bool | tuple | dict | Thresholds' = None, actions: 'Actions | None' = None, brief: 'str | bool | None' = None, active: 'bool' = True) -> 'Validate'
+col_vals_regex(self, columns: 'str | list[str] | Column | ColumnSelector | ColumnSelectorNarwhals', pattern: 'str', na_pass: 'bool' = False, inverse: 'bool' = False, pre: 'Callable | None' = None, segments: 'SegmentSpec | None' = None, thresholds: 'int | float | bool | tuple | dict | Thresholds' = None, actions: 'Actions | None' = None, brief: 'str | bool | None' = None, active: 'bool' = True) -> 'Validate'
         Validate whether column values match a regular expression pattern.
@@ -3943,6 +4038,9 @@ col_vals_regex(self, columns: 'str | list[str] | Column | ColumnSelector | Colum
         na_pass
             Should any encountered None, NA, or Null values be considered as passing test units? By
             default, this is `False`. Set to `True` to pass test units with missing values.
+        inverse
+            Should the validation step be inverted? If `True`, then the expectation is that column
+            values should *not* match the specified `pattern=` regex.
         pre
             An optional preprocessing function or lambda to apply to the data table during
             interrogation. This function should take a table as input and return a modified table.
@@ -5358,13 +5456,17 @@ conjointly(self, *exprs: 'Callable', pre: 'Callable | None' = None, thresholds:
         We can also use preprocessing to filter the data before applying the conjoint validation:
         ```python
+        # Define preprocessing function for serialization compatibility
+        def filter_by_c_gt_5(df):
+            return df.filter(pl.col("c") > 5)
         validation = (
             pb.Validate(data=tbl)
             .conjointly(
                 lambda df: pl.col("a") > 2,
                 lambda df: pl.col("b") < 7,
                 lambda df: pl.col("a") + pl.col("b") < pl.col("c"),
-                pre=lambda df: df.filter(pl.col("c") > 5)
+                pre=filter_by_c_gt_5
             )
             .interrogate()
         )
@@ -8249,11 +8351,15 @@ n(self, i: 'int | list[int] | None' = None, scalar: 'bool' = False) -> 'dict[int
             }
         )
+        # Define a preprocessing function
+        def filter_by_a_gt_1(df):
+            return df.filter(pl.col("a") > 1)
         validation = (
             pb.Validate(data=tbl)
             .col_vals_gt(columns="a", value=0)
             .col_exists(columns="b")
-            .col_vals_lt(columns="b", value=9, pre=lambda df: df.filter(pl.col("a") > 1))
+            .col_vals_lt(columns="b", value=9, pre=filter_by_a_gt_1)
             .interrogate()
         )
         ```
@@ -9798,7 +9904,7 @@ validation workflows. The `yaml_interrogate()` function can be used to run a val
 YAML strings or files. The `validate_yaml()` function checks if the YAML configuration
 passes its own validity checks.
-yaml_interrogate(yaml: 'Union[str, Path]', set_tbl: 'Union[FrameT, Any, None]' = None) -> 'Validate'
+yaml_interrogate(yaml: 'Union[str, Path]', set_tbl: 'Union[FrameT, Any, None]' = None, namespaces: 'Optional[Union[Iterable[str], Mapping[str, str]]]' = None) -> 'Validate'
 Execute a YAML-based validation workflow.
     This is the main entry point for YAML-based validation workflows. It takes YAML configuration
@@ -9820,6 +9926,10 @@ Execute a YAML-based validation workflow.
         `tbl` field before executing the validation workflow. This can be any supported table type
         including DataFrame objects, Ibis table objects, CSV file paths, Parquet file paths, GitHub
         URLs, or database connection strings.
+    namespaces
+        Optional module namespaces to make available for Python code execution in YAML
+        configurations. Can be a dictionary mapping aliases to module names or a list of module
+        names. See the "Using Namespaces" section below for detailed examples.
     Returns
     -------
@@ -9834,6 +9944,71 @@ Execute a YAML-based validation workflow.
         If the YAML is invalid, malformed, or execution fails. This includes syntax errors, missing
         required fields, unknown validation methods, or data loading failures.
+    Using Namespaces
+    ----------------
+    The `namespaces=` parameter enables custom Python modules and functions in YAML configurations.
+    This is particularly useful for custom action functions and advanced Python expressions.
+    **Namespace formats:**
+    - Dictionary format: `{"alias": "module.name"}` maps aliases to module names
+    - List format: `["module.name", "another.module"]` imports modules directly
+    **Option 1: Inline expressions (no namespaces needed)**
+    ```python
+    import pointblank as pb
+    # Simple inline custom action
+    yaml_config = '''
+    tbl: small_table
+    thresholds:
+      warning: 0.01
+    actions:
+      warning:
+        python: "lambda: print('Custom warning triggered')"
+    steps:
+    - col_vals_gt:
+        columns: [a]
+        value: 1000
+    '''
+    result = pb.yaml_interrogate(yaml_config)
+    result
+    ```
+    **Option 2: External functions with namespaces**
+    ```python
+    # Define a custom action function
+    def my_custom_action():
+        print("Data validation failed: please check your data.")
+    # Add to current module for demo
+    import sys
+    sys.modules[__name__].my_custom_action = my_custom_action
+    # YAML that references the external function
+    yaml_config = '''
+    tbl: small_table
+    thresholds:
+      warning: 0.01
+    actions:
+      warning:
+        python: actions.my_custom_action
+    steps:
+    - col_vals_gt:
+        columns: [a]
+        value: 1000  # This will fail
+    '''
+    # Use namespaces to make the function available
+    result = pb.yaml_interrogate(yaml_config, namespaces={'actions': '__main__'})
+    result
+    ```
+    This approach enables modular, reusable validation workflows with custom business logic.
     Examples
     --------
     For the examples here, we'll use YAML configurations to define validation workflows. Let's start
@@ -11307,6 +11482,18 @@ import pointblank as pb
 import polars as pl
 import narwhals as nw
+# Define preprocessing functions
+def get_median_a(df):
+    """Use a Polars expression to aggregate column `a`."""
+    return df.select(pl.median("a"))
+def add_b_length_column(df):
+    """Use Narwhals to add a string length column `b_len`."""
+    return (
+        nw.from_native(df)
+        .with_columns(b_len=nw.col("b").str.len_chars())
+    )
 validation = (
     pb.Validate(
         data=pb.load_dataset(dataset="small_table", tbl_type="polars")
@@ -11314,14 +11501,12 @@ validation = (
     .col_vals_between(
         columns="a",
         left=3, right=6,
-        pre=lambda df: df.select(pl.median("a"))    # Use a Polars expression to aggregate
+        pre=get_median_a
     )
     .col_vals_eq(
         columns="b_len",
         value=9,
-        pre=lambda dfn: dfn.with_columns(           # Use a Narwhals expression, identified
-            b_len=nw.col("b").str.len_chars()       # by the 'dfn' here
-        )
+        pre=add_b_length_column
     )
     .interrogate()
 )

pointblank/data/validations/README.md ADDED Viewed

@@ -0,0 +1,108 @@
+# Validation Serialization Test Infrastructure
+This directory contains test files and utilities for ensuring serialization compatibility of pointblank validation objects across versions.
+## Overview
+The serialization functionality in pointblank allows validation objects to be saved to disk and reloaded later. To ensure this works correctly across different versions and with various types of preprocessing functions, we maintain a collection of reference validation files for regression testing.
+## Files
+### Preprocessing Functions (`preprocessing_functions.py`)
+Contains preprocessing functions used in validation examples:
+- `double_column_a()` - Simple column transformation
+- `add_computed_column()` - Creates computed columns
+- `filter_by_d_gt_100()` - Filtering operations
+- `narwhals_median_transform()` - Cross-backend compatible functions using narwhals
+- `complex_preprocessing()` - Complex multi-step transformations
+- `pandas_compatible_transform()` - Functions that work with both pandas and polars
+### Test File Generator (`generate_test_files.py`)
+Script that creates reference validation objects with various preprocessing functions:
+- Creates test datasets
+- Defines validation objects with different preprocessing scenarios
+- Saves both pickle (`.pkl`) and JSON (`.json`) files
+- Each validation object is interrogated to populate results
+### Test Cases (`tests/test_serialization_compat.py`)
+Comprehensive tests for serialization functionality located in the main tests directory:
+- **Roundtrip testing**: Pickle and unpickle validation objects
+- **Preprocessing preservation**: Verify functions are correctly serialized
+- **Cross-backend compatibility**: Test narwhals functions work after deserialization
+- **Complex workflows**: Multi-step validation with different preprocessing functions
+### Generated Files
+The following validation files are generated for regression testing:
+#### Basic Validation Examples
+- `no_preprocessing.pkl/.json` - Control case without preprocessing
+- `simple_preprocessing.pkl/.json` - Basic single-function preprocessing
+#### Advanced Validation Examples
+- `complex_preprocessing.pkl/.json` - Multi-step transformations
+- `multiple_steps.pkl/.json` - Different preprocessing per validation step
+- `narwhals_function.pkl/.json` - Cross-backend compatible functions
+- `pandas_compatible.pkl/.json` - Functions that work with multiple backends
+## Usage
+### Running Tests
+```bash
+# Run all serialization compatibility tests
+python -m pytest tests/test_serialization_compat.py -v
+# Generate new test files (if functions change)
+cd pointblank/data/validations
+python generate_test_files.py
+```
+### Adding New Test Cases
+1. Add new preprocessing functions to `preprocessing_functions.py`
+2. Update `generate_test_files.py` to create validations using the new functions
+3. Add corresponding test cases in `tests/test_serialization_compat.py`
+4. Regenerate test files: `python generate_test_files.py`
+## Version Compatibility
+These reference files serve as regression tests to ensure:
+- New versions can load validation files created with previous versions
+- Preprocessing functions are correctly preserved across serialization
+- Cross-backend compatibility is maintained
+- Complex workflows continue to work after deserialization
+The pickle files are the authoritative test cases, while JSON files provide human-readable versions for debugging.
+## Best Practices
+### For Preprocessing Functions
+- Always use proper function definitions (not lambdas) for serializable functions
+- Import required libraries inside functions for self-contained serialization
+- Use narwhals for cross-backend compatibility when possible
+- Test functions work with both polars and pandas DataFrames
+### For Test Coverage
+- Include examples of each type of preprocessing function
+- Test both simple and complex multi-step workflows
+- Verify roundtrip serialization (pickle → unpickle → pickle again)
+- Check that deserialized functions produce expected results
+### For Maintenance
+- Regenerate test files when adding new preprocessing function types
+- Keep test functions focused and well-documented
+- Update tests when validation object structure changes
+- Document any breaking changes that affect serialization compatibility

pointblank/data/validations/complex_preprocessing.json ADDED Viewed

@@ -0,0 +1,54 @@
+[
+    {
+        "i": 1,
+        "i_o": 1,
+        "assertion_type": "col_vals_gt",
+        "column": "a_doubled",
+        "values": 0,
+        "inclusive": null,
+        "na_pass": false,
+        "pre": "def complex_preprocessing(df):\n    \"\"\"Complex preprocessing combining multiple operations.\"\"\"\n    return (\n        df.filter(pl.col(\"a\") > 1)\n        .with_columns((pl.col(\"a\") * 2).alias(\"a_doubled\"), (pl.col(\"d\") / 10).alias(\"d_scaled\"))\n        .filter(pl.col(\"d_scaled\") > 10)\n    )",
+        "segments": null,
+        "thresholds": "Thresholds(warning=None, error=None, critical=None)",
+        "label": null,
+        "brief": null,
+        "active": true,
+        "all_passed": true,
+        "n": 7,
+        "n_passed": 7,
+        "n_failed": 0,
+        "f_passed": 1.0,
+        "f_failed": 0.0,
+        "warning": null,
+        "error": null,
+        "critical": null,
+        "time_processed": "2025-10-02T04:16:44.706+00:00",
+        "proc_duration_s": 0.00161
+    },
+    {
+        "i": 2,
+        "i_o": 2,
+        "assertion_type": "col_vals_gt",
+        "column": "d_scaled",
+        "values": 15,
+        "inclusive": null,
+        "na_pass": false,
+        "pre": "def complex_preprocessing(df):\n    \"\"\"Complex preprocessing combining multiple operations.\"\"\"\n    return (\n        df.filter(pl.col(\"a\") > 1)\n        .with_columns((pl.col(\"a\") * 2).alias(\"a_doubled\"), (pl.col(\"d\") / 10).alias(\"d_scaled\"))\n        .filter(pl.col(\"d_scaled\") > 10)\n    )",
+        "segments": null,
+        "thresholds": "Thresholds(warning=None, error=None, critical=None)",
+        "label": null,
+        "brief": null,
+        "active": true,
+        "all_passed": false,
+        "n": 7,
+        "n_passed": 5,
+        "n_failed": 2,
+        "f_passed": 0.7142857142857143,
+        "f_failed": 0.2857142857142857,
+        "warning": null,
+        "error": null,
+        "critical": null,
+        "time_processed": "2025-10-02T04:16:44.708+00:00",
+        "proc_duration_s": 0.001607
+    }
+]

pointblank/data/validations/complex_preprocessing.pkl ADDED Viewed

Binary file

pointblank/data/validations/generate_test_files.py ADDED Viewed

@@ -0,0 +1,127 @@
+"""
+Generate reference validation files for serialization regression testing.
+This script creates validation objects with various preprocessing functions
+and stores them as pickled files in the validations directory. These files
+serve as regression tests to ensure serialization compatibility across versions.
+"""
+import pickle
+# Add the parent directory to Python path to import pointblank
+import sys
+from pathlib import Path
+import polars as pl
+sys.path.insert(0, str(Path(__file__).parent.parent.parent))
+from preprocessing_functions import (
+    add_computed_column,
+    complex_preprocessing,
+    double_column_a,
+    filter_by_d_gt_100,
+    narwhals_median_transform,
+    pandas_compatible_transform,
+)
+import pointblank as pb
+def create_test_data():
+    """Create a test dataset for validation examples."""
+    return pl.DataFrame(
+        {
+            "a": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
+            "b": [10, 20, 30, 40, 50, 60, 70, 80, 90, 100],
+            "c": ["x", "y", "x", "y", "x", "y", "x", "y", "x", "y"],
+            "d": [50, 75, 100, 125, 150, 175, 200, 225, 250, 275],
+        }
+    )
+def create_validation_examples():
+    """Create various validation objects for testing serialization."""
+    data = create_test_data()
+    validations = {}
+    # Basic validation with simple preprocessing
+    validations["simple_preprocessing"] = (
+        pb.Validate(data, tbl_name="test_data")
+        .col_vals_gt("a", value=0, pre=double_column_a)
+        .col_vals_in_set("c", set=["x", "y"])
+    )
+    # Validation with complex preprocessing
+    validations["complex_preprocessing"] = (
+        pb.Validate(data, tbl_name="test_data")
+        .col_vals_gt("a_doubled", value=0, pre=complex_preprocessing)
+        .col_vals_gt("d_scaled", value=15, pre=complex_preprocessing)
+    )
+    # Validation with narwhals function
+    validations["narwhals_function"] = pb.Validate(data, tbl_name="test_data").col_vals_gt(
+        "a", value=5, pre=narwhals_median_transform
+    )
+    # Validation with multiple preprocessing steps
+    validations["multiple_steps"] = (
+        pb.Validate(data, tbl_name="test_data")
+        .col_vals_gt("a", value=2, pre=double_column_a)
+        .col_vals_in_set("c", set=["x", "y"], pre=filter_by_d_gt_100)
+        .col_vals_gt("sum_ab", value=100, pre=add_computed_column)
+    )
+    # Validation with pandas-compatible function
+    validations["pandas_compatible"] = pb.Validate(data, tbl_name="test_data").col_vals_gt(
+        "a_plus_b", value=10, pre=pandas_compatible_transform
+    )
+    # Basic validation without preprocessing (control case)
+    validations["no_preprocessing"] = (
+        pb.Validate(data, tbl_name="test_data")
+        .col_vals_gt("a", value=0)
+        .col_vals_lt("d", value=300)
+        .col_vals_in_set("c", set=["x", "y"])
+    )
+    return validations
+def save_validation_files(validations, output_dir):
+    """Save validation objects as pickled files."""
+    output_path = Path(output_dir)
+    output_path.mkdir(parents=True, exist_ok=True)
+    for name, validation in validations.items():
+        # Interrogate to populate results
+        validation.interrogate()
+        # Save the validation object
+        file_path = output_path / f"{name}.pkl"
+        with open(file_path, "wb") as f:
+            pickle.dump(validation, f)
+        print(f"Saved {name} validation to {file_path}")
+        # Also save as JSON for human readability
+        json_path = output_path / f"{name}.json"
+        try:
+            json_report = validation.get_json_report()
+            with open(json_path, "w") as f:
+                f.write(json_report)
+            print(f"Saved {name} validation JSON to {json_path}")
+        except Exception as e:
+            print(f"Could not save JSON for {name}: {e}")
+if __name__ == "__main__":
+    # Create validation examples
+    validations = create_validation_examples()
+    # Save to the validations directory
+    output_dir = Path(__file__).parent
+    save_validation_files(validations, output_dir)
+    print(f"\nCreated {len(validations)} test validation files in {output_dir}")
+    print("These files can be used for regression testing serialization compatibility.")

pointblank 0.13.4__py3-none-any.whl → 0.14.0__py3-none-any.whl

pointblank 0.13.4py3-none-any.whl → 0.14.0py3-none-any.whl