pointblank 0.13.4__py3-none-any.whl → 0.15.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. pointblank/__init__.py +4 -0
  2. pointblank/_constants.py +117 -0
  3. pointblank/_constants_translations.py +487 -2
  4. pointblank/_interrogation.py +1065 -12
  5. pointblank/_spec_utils.py +1015 -0
  6. pointblank/_utils.py +17 -7
  7. pointblank/_utils_ai.py +875 -0
  8. pointblank/assistant.py +1 -1
  9. pointblank/cli.py +128 -115
  10. pointblank/column.py +1 -1
  11. pointblank/data/api-docs.txt +1838 -130
  12. pointblank/data/validations/README.md +108 -0
  13. pointblank/data/validations/complex_preprocessing.json +54 -0
  14. pointblank/data/validations/complex_preprocessing.pkl +0 -0
  15. pointblank/data/validations/generate_test_files.py +127 -0
  16. pointblank/data/validations/multiple_steps.json +83 -0
  17. pointblank/data/validations/multiple_steps.pkl +0 -0
  18. pointblank/data/validations/narwhals_function.json +28 -0
  19. pointblank/data/validations/narwhals_function.pkl +0 -0
  20. pointblank/data/validations/no_preprocessing.json +83 -0
  21. pointblank/data/validations/no_preprocessing.pkl +0 -0
  22. pointblank/data/validations/pandas_compatible.json +28 -0
  23. pointblank/data/validations/pandas_compatible.pkl +0 -0
  24. pointblank/data/validations/preprocessing_functions.py +46 -0
  25. pointblank/data/validations/simple_preprocessing.json +57 -0
  26. pointblank/data/validations/simple_preprocessing.pkl +0 -0
  27. pointblank/datascan.py +4 -4
  28. pointblank/draft.py +52 -3
  29. pointblank/scan_profile.py +6 -6
  30. pointblank/schema.py +8 -82
  31. pointblank/thresholds.py +1 -1
  32. pointblank/validate.py +3069 -437
  33. {pointblank-0.13.4.dist-info → pointblank-0.15.0.dist-info}/METADATA +67 -8
  34. pointblank-0.15.0.dist-info/RECORD +56 -0
  35. pointblank-0.13.4.dist-info/RECORD +0 -39
  36. {pointblank-0.13.4.dist-info → pointblank-0.15.0.dist-info}/WHEEL +0 -0
  37. {pointblank-0.13.4.dist-info → pointblank-0.15.0.dist-info}/entry_points.txt +0 -0
  38. {pointblank-0.13.4.dist-info → pointblank-0.15.0.dist-info}/licenses/LICENSE +0 -0
  39. {pointblank-0.13.4.dist-info → pointblank-0.15.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,108 @@
1
+ # Validation Serialization Test Infrastructure
2
+
3
+ This directory contains test files and utilities for ensuring serialization compatibility of pointblank validation objects across versions.
4
+
5
+ ## Overview
6
+
7
+ The serialization functionality in pointblank allows validation objects to be saved to disk and reloaded later. To ensure this works correctly across different versions and with various types of preprocessing functions, we maintain a collection of reference validation files for regression testing.
8
+
9
+ ## Files
10
+
11
+ ### Preprocessing Functions (`preprocessing_functions.py`)
12
+
13
+ Contains preprocessing functions used in validation examples:
14
+
15
+ - `double_column_a()` - Simple column transformation
16
+ - `add_computed_column()` - Creates computed columns
17
+ - `filter_by_d_gt_100()` - Filtering operations
18
+ - `narwhals_median_transform()` - Cross-backend compatible functions using narwhals
19
+ - `complex_preprocessing()` - Complex multi-step transformations
20
+ - `pandas_compatible_transform()` - Functions that work with both pandas and polars
21
+
22
+ ### Test File Generator (`generate_test_files.py`)
23
+
24
+ Script that creates reference validation objects with various preprocessing functions:
25
+
26
+ - Creates test datasets
27
+ - Defines validation objects with different preprocessing scenarios
28
+ - Saves both pickle (`.pkl`) and JSON (`.json`) files
29
+ - Each validation object is interrogated to populate results
30
+
31
+ ### Test Cases (`tests/test_serialization_compat.py`)
32
+
33
+ Comprehensive tests for serialization functionality located in the main tests directory:
34
+
35
+ - **Roundtrip testing**: Pickle and unpickle validation objects
36
+ - **Preprocessing preservation**: Verify functions are correctly serialized
37
+ - **Cross-backend compatibility**: Test narwhals functions work after deserialization
38
+ - **Complex workflows**: Multi-step validation with different preprocessing functions
39
+
40
+ ### Generated Files
41
+
42
+ The following validation files are generated for regression testing:
43
+
44
+ #### Basic Validation Examples
45
+
46
+ - `no_preprocessing.pkl/.json` - Control case without preprocessing
47
+ - `simple_preprocessing.pkl/.json` - Basic single-function preprocessing
48
+
49
+ #### Advanced Validation Examples
50
+
51
+ - `complex_preprocessing.pkl/.json` - Multi-step transformations
52
+ - `multiple_steps.pkl/.json` - Different preprocessing per validation step
53
+ - `narwhals_function.pkl/.json` - Cross-backend compatible functions
54
+ - `pandas_compatible.pkl/.json` - Functions that work with multiple backends
55
+
56
+ ## Usage
57
+
58
+ ### Running Tests
59
+
60
+ ```bash
61
+ # Run all serialization compatibility tests
62
+ python -m pytest tests/test_serialization_compat.py -v
63
+
64
+ # Generate new test files (if functions change)
65
+ cd pointblank/data/validations
66
+ python generate_test_files.py
67
+ ```
68
+
69
+ ### Adding New Test Cases
70
+
71
+ 1. Add new preprocessing functions to `preprocessing_functions.py`
72
+ 2. Update `generate_test_files.py` to create validations using the new functions
73
+ 3. Add corresponding test cases in `tests/test_serialization_compat.py`
74
+ 4. Regenerate test files: `python generate_test_files.py`
75
+
76
+ ## Version Compatibility
77
+
78
+ These reference files serve as regression tests to ensure:
79
+
80
+ - New versions can load validation files created with previous versions
81
+ - Preprocessing functions are correctly preserved across serialization
82
+ - Cross-backend compatibility is maintained
83
+ - Complex workflows continue to work after deserialization
84
+
85
+ The pickle files are the authoritative test cases, while JSON files provide human-readable versions for debugging.
86
+
87
+ ## Best Practices
88
+
89
+ ### For Preprocessing Functions
90
+
91
+ - Always use proper function definitions (not lambdas) for serializable functions
92
+ - Import required libraries inside functions for self-contained serialization
93
+ - Use narwhals for cross-backend compatibility when possible
94
+ - Test functions work with both polars and pandas DataFrames
95
+
96
+ ### For Test Coverage
97
+
98
+ - Include examples of each type of preprocessing function
99
+ - Test both simple and complex multi-step workflows
100
+ - Verify roundtrip serialization (pickle → unpickle → pickle again)
101
+ - Check that deserialized functions produce expected results
102
+
103
+ ### For Maintenance
104
+
105
+ - Regenerate test files when adding new preprocessing function types
106
+ - Keep test functions focused and well-documented
107
+ - Update tests when validation object structure changes
108
+ - Document any breaking changes that affect serialization compatibility
@@ -0,0 +1,54 @@
1
+ [
2
+ {
3
+ "i": 1,
4
+ "i_o": 1,
5
+ "assertion_type": "col_vals_gt",
6
+ "column": "a_doubled",
7
+ "values": 0,
8
+ "inclusive": null,
9
+ "na_pass": false,
10
+ "pre": "def complex_preprocessing(df):\n \"\"\"Complex preprocessing combining multiple operations.\"\"\"\n return (\n df.filter(pl.col(\"a\") > 1)\n .with_columns((pl.col(\"a\") * 2).alias(\"a_doubled\"), (pl.col(\"d\") / 10).alias(\"d_scaled\"))\n .filter(pl.col(\"d_scaled\") > 10)\n )",
11
+ "segments": null,
12
+ "thresholds": "Thresholds(warning=None, error=None, critical=None)",
13
+ "label": null,
14
+ "brief": null,
15
+ "active": true,
16
+ "all_passed": true,
17
+ "n": 7,
18
+ "n_passed": 7,
19
+ "n_failed": 0,
20
+ "f_passed": 1.0,
21
+ "f_failed": 0.0,
22
+ "warning": null,
23
+ "error": null,
24
+ "critical": null,
25
+ "time_processed": "2025-10-02T04:16:44.706+00:00",
26
+ "proc_duration_s": 0.00161
27
+ },
28
+ {
29
+ "i": 2,
30
+ "i_o": 2,
31
+ "assertion_type": "col_vals_gt",
32
+ "column": "d_scaled",
33
+ "values": 15,
34
+ "inclusive": null,
35
+ "na_pass": false,
36
+ "pre": "def complex_preprocessing(df):\n \"\"\"Complex preprocessing combining multiple operations.\"\"\"\n return (\n df.filter(pl.col(\"a\") > 1)\n .with_columns((pl.col(\"a\") * 2).alias(\"a_doubled\"), (pl.col(\"d\") / 10).alias(\"d_scaled\"))\n .filter(pl.col(\"d_scaled\") > 10)\n )",
37
+ "segments": null,
38
+ "thresholds": "Thresholds(warning=None, error=None, critical=None)",
39
+ "label": null,
40
+ "brief": null,
41
+ "active": true,
42
+ "all_passed": false,
43
+ "n": 7,
44
+ "n_passed": 5,
45
+ "n_failed": 2,
46
+ "f_passed": 0.7142857142857143,
47
+ "f_failed": 0.2857142857142857,
48
+ "warning": null,
49
+ "error": null,
50
+ "critical": null,
51
+ "time_processed": "2025-10-02T04:16:44.708+00:00",
52
+ "proc_duration_s": 0.001607
53
+ }
54
+ ]
@@ -0,0 +1,127 @@
1
+ """
2
+ Generate reference validation files for serialization regression testing.
3
+
4
+ This script creates validation objects with various preprocessing functions
5
+ and stores them as pickled files in the validations directory. These files
6
+ serve as regression tests to ensure serialization compatibility across versions.
7
+ """
8
+
9
+ import pickle
10
+
11
+ # Add the parent directory to Python path to import pointblank
12
+ import sys
13
+ from pathlib import Path
14
+
15
+ import polars as pl
16
+
17
+ sys.path.insert(0, str(Path(__file__).parent.parent.parent))
18
+
19
+ from preprocessing_functions import (
20
+ add_computed_column,
21
+ complex_preprocessing,
22
+ double_column_a,
23
+ filter_by_d_gt_100,
24
+ narwhals_median_transform,
25
+ pandas_compatible_transform,
26
+ )
27
+
28
+ import pointblank as pb
29
+
30
+
31
+ def create_test_data():
32
+ """Create a test dataset for validation examples."""
33
+ return pl.DataFrame(
34
+ {
35
+ "a": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
36
+ "b": [10, 20, 30, 40, 50, 60, 70, 80, 90, 100],
37
+ "c": ["x", "y", "x", "y", "x", "y", "x", "y", "x", "y"],
38
+ "d": [50, 75, 100, 125, 150, 175, 200, 225, 250, 275],
39
+ }
40
+ )
41
+
42
+
43
+ def create_validation_examples():
44
+ """Create various validation objects for testing serialization."""
45
+ data = create_test_data()
46
+ validations = {}
47
+
48
+ # Basic validation with simple preprocessing
49
+ validations["simple_preprocessing"] = (
50
+ pb.Validate(data, tbl_name="test_data")
51
+ .col_vals_gt("a", value=0, pre=double_column_a)
52
+ .col_vals_in_set("c", set=["x", "y"])
53
+ )
54
+
55
+ # Validation with complex preprocessing
56
+ validations["complex_preprocessing"] = (
57
+ pb.Validate(data, tbl_name="test_data")
58
+ .col_vals_gt("a_doubled", value=0, pre=complex_preprocessing)
59
+ .col_vals_gt("d_scaled", value=15, pre=complex_preprocessing)
60
+ )
61
+
62
+ # Validation with narwhals function
63
+ validations["narwhals_function"] = pb.Validate(data, tbl_name="test_data").col_vals_gt(
64
+ "a", value=5, pre=narwhals_median_transform
65
+ )
66
+
67
+ # Validation with multiple preprocessing steps
68
+ validations["multiple_steps"] = (
69
+ pb.Validate(data, tbl_name="test_data")
70
+ .col_vals_gt("a", value=2, pre=double_column_a)
71
+ .col_vals_in_set("c", set=["x", "y"], pre=filter_by_d_gt_100)
72
+ .col_vals_gt("sum_ab", value=100, pre=add_computed_column)
73
+ )
74
+
75
+ # Validation with pandas-compatible function
76
+ validations["pandas_compatible"] = pb.Validate(data, tbl_name="test_data").col_vals_gt(
77
+ "a_plus_b", value=10, pre=pandas_compatible_transform
78
+ )
79
+
80
+ # Basic validation without preprocessing (control case)
81
+ validations["no_preprocessing"] = (
82
+ pb.Validate(data, tbl_name="test_data")
83
+ .col_vals_gt("a", value=0)
84
+ .col_vals_lt("d", value=300)
85
+ .col_vals_in_set("c", set=["x", "y"])
86
+ )
87
+
88
+ return validations
89
+
90
+
91
+ def save_validation_files(validations, output_dir):
92
+ """Save validation objects as pickled files."""
93
+ output_path = Path(output_dir)
94
+ output_path.mkdir(parents=True, exist_ok=True)
95
+
96
+ for name, validation in validations.items():
97
+ # Interrogate to populate results
98
+ validation.interrogate()
99
+
100
+ # Save the validation object
101
+ file_path = output_path / f"{name}.pkl"
102
+ with open(file_path, "wb") as f:
103
+ pickle.dump(validation, f)
104
+
105
+ print(f"Saved {name} validation to {file_path}")
106
+
107
+ # Also save as JSON for human readability
108
+ json_path = output_path / f"{name}.json"
109
+ try:
110
+ json_report = validation.get_json_report()
111
+ with open(json_path, "w") as f:
112
+ f.write(json_report)
113
+ print(f"Saved {name} validation JSON to {json_path}")
114
+ except Exception as e:
115
+ print(f"Could not save JSON for {name}: {e}")
116
+
117
+
118
+ if __name__ == "__main__":
119
+ # Create validation examples
120
+ validations = create_validation_examples()
121
+
122
+ # Save to the validations directory
123
+ output_dir = Path(__file__).parent
124
+ save_validation_files(validations, output_dir)
125
+
126
+ print(f"\nCreated {len(validations)} test validation files in {output_dir}")
127
+ print("These files can be used for regression testing serialization compatibility.")
@@ -0,0 +1,83 @@
1
+ [
2
+ {
3
+ "i": 1,
4
+ "i_o": 1,
5
+ "assertion_type": "col_vals_gt",
6
+ "column": "a",
7
+ "values": 2,
8
+ "inclusive": null,
9
+ "na_pass": false,
10
+ "pre": "def double_column_a(df):\n \"\"\"Double the values in column 'a'.\"\"\"\n return df.with_columns(pl.col(\"a\") * 2)",
11
+ "segments": null,
12
+ "thresholds": "Thresholds(warning=None, error=None, critical=None)",
13
+ "label": null,
14
+ "brief": null,
15
+ "active": true,
16
+ "all_passed": false,
17
+ "n": 10,
18
+ "n_passed": 9,
19
+ "n_failed": 1,
20
+ "f_passed": 0.9,
21
+ "f_failed": 0.1,
22
+ "warning": null,
23
+ "error": null,
24
+ "critical": null,
25
+ "time_processed": "2025-10-02T04:16:44.712+00:00",
26
+ "proc_duration_s": 0.00152
27
+ },
28
+ {
29
+ "i": 2,
30
+ "i_o": 2,
31
+ "assertion_type": "col_vals_in_set",
32
+ "column": "c",
33
+ "values": [
34
+ "x",
35
+ "y"
36
+ ],
37
+ "inclusive": null,
38
+ "na_pass": null,
39
+ "pre": "def filter_by_d_gt_100(df):\n \"\"\"Filter rows where column 'd' is greater than 100.\"\"\"\n return df.filter(pl.col(\"d\") > 100)",
40
+ "segments": null,
41
+ "thresholds": "Thresholds(warning=None, error=None, critical=None)",
42
+ "label": null,
43
+ "brief": null,
44
+ "active": true,
45
+ "all_passed": true,
46
+ "n": 7,
47
+ "n_passed": 7,
48
+ "n_failed": 0,
49
+ "f_passed": 1.0,
50
+ "f_failed": 0.0,
51
+ "warning": null,
52
+ "error": null,
53
+ "critical": null,
54
+ "time_processed": "2025-10-02T04:16:44.713+00:00",
55
+ "proc_duration_s": 0.001
56
+ },
57
+ {
58
+ "i": 3,
59
+ "i_o": 3,
60
+ "assertion_type": "col_vals_gt",
61
+ "column": "sum_ab",
62
+ "values": 100,
63
+ "inclusive": null,
64
+ "na_pass": false,
65
+ "pre": "def add_computed_column(df):\n \"\"\"Add a computed column based on existing columns.\"\"\"\n return df.with_columns((pl.col(\"a\") + pl.col(\"b\")).alias(\"sum_ab\"))",
66
+ "segments": null,
67
+ "thresholds": "Thresholds(warning=None, error=None, critical=None)",
68
+ "label": null,
69
+ "brief": null,
70
+ "active": true,
71
+ "all_passed": false,
72
+ "n": 10,
73
+ "n_passed": 1,
74
+ "n_failed": 9,
75
+ "f_passed": 0.1,
76
+ "f_failed": 0.9,
77
+ "warning": null,
78
+ "error": null,
79
+ "critical": null,
80
+ "time_processed": "2025-10-02T04:16:44.714+00:00",
81
+ "proc_duration_s": 0.001464
82
+ }
83
+ ]
@@ -0,0 +1,28 @@
1
+ [
2
+ {
3
+ "i": 1,
4
+ "i_o": 1,
5
+ "assertion_type": "col_vals_gt",
6
+ "column": "a",
7
+ "values": 5,
8
+ "inclusive": null,
9
+ "na_pass": false,
10
+ "pre": "def narwhals_median_transform(df):\n \"\"\"Use narwhals to compute median - cross-backend compatible.\"\"\"\n return nw.from_native(df).select(nw.median(\"a\"), nw.median(\"d\"))",
11
+ "segments": null,
12
+ "thresholds": "Thresholds(warning=None, error=None, critical=None)",
13
+ "label": null,
14
+ "brief": null,
15
+ "active": true,
16
+ "all_passed": true,
17
+ "n": 1,
18
+ "n_passed": 1,
19
+ "n_failed": 0,
20
+ "f_passed": 1.0,
21
+ "f_failed": 0.0,
22
+ "warning": null,
23
+ "error": null,
24
+ "critical": null,
25
+ "time_processed": "2025-10-02T04:16:44.710+00:00",
26
+ "proc_duration_s": 0.001455
27
+ }
28
+ ]
@@ -0,0 +1,83 @@
1
+ [
2
+ {
3
+ "i": 1,
4
+ "i_o": 1,
5
+ "assertion_type": "col_vals_gt",
6
+ "column": "a",
7
+ "values": 0,
8
+ "inclusive": null,
9
+ "na_pass": false,
10
+ "pre": null,
11
+ "segments": null,
12
+ "thresholds": "Thresholds(warning=None, error=None, critical=None)",
13
+ "label": null,
14
+ "brief": null,
15
+ "active": true,
16
+ "all_passed": true,
17
+ "n": 10,
18
+ "n_passed": 10,
19
+ "n_failed": 0,
20
+ "f_passed": 1.0,
21
+ "f_failed": 0.0,
22
+ "warning": null,
23
+ "error": null,
24
+ "critical": null,
25
+ "time_processed": "2025-10-02T04:16:44.718+00:00",
26
+ "proc_duration_s": 0.001148
27
+ },
28
+ {
29
+ "i": 2,
30
+ "i_o": 2,
31
+ "assertion_type": "col_vals_lt",
32
+ "column": "d",
33
+ "values": 300,
34
+ "inclusive": null,
35
+ "na_pass": false,
36
+ "pre": null,
37
+ "segments": null,
38
+ "thresholds": "Thresholds(warning=None, error=None, critical=None)",
39
+ "label": null,
40
+ "brief": null,
41
+ "active": true,
42
+ "all_passed": true,
43
+ "n": 10,
44
+ "n_passed": 10,
45
+ "n_failed": 0,
46
+ "f_passed": 1.0,
47
+ "f_failed": 0.0,
48
+ "warning": null,
49
+ "error": null,
50
+ "critical": null,
51
+ "time_processed": "2025-10-02T04:16:44.719+00:00",
52
+ "proc_duration_s": 0.001181
53
+ },
54
+ {
55
+ "i": 3,
56
+ "i_o": 3,
57
+ "assertion_type": "col_vals_in_set",
58
+ "column": "c",
59
+ "values": [
60
+ "x",
61
+ "y"
62
+ ],
63
+ "inclusive": null,
64
+ "na_pass": null,
65
+ "pre": null,
66
+ "segments": null,
67
+ "thresholds": "Thresholds(warning=None, error=None, critical=None)",
68
+ "label": null,
69
+ "brief": null,
70
+ "active": true,
71
+ "all_passed": true,
72
+ "n": 10,
73
+ "n_passed": 10,
74
+ "n_failed": 0,
75
+ "f_passed": 1.0,
76
+ "f_failed": 0.0,
77
+ "warning": null,
78
+ "error": null,
79
+ "critical": null,
80
+ "time_processed": "2025-10-02T04:16:44.720+00:00",
81
+ "proc_duration_s": 0.000892
82
+ }
83
+ ]
@@ -0,0 +1,28 @@
1
+ [
2
+ {
3
+ "i": 1,
4
+ "i_o": 1,
5
+ "assertion_type": "col_vals_gt",
6
+ "column": "a_plus_b",
7
+ "values": 10,
8
+ "inclusive": null,
9
+ "na_pass": false,
10
+ "pre": "def pandas_compatible_transform(df):\n \"\"\"Transform that works with pandas DataFrames.\"\"\"\n if hasattr(df, \"assign\"): # pandas\n return df.assign(a_plus_b=df[\"a\"] + df.get(\"b\", 0))\n else: # polars or other\n return df.with_columns((pl.col(\"a\") + pl.col(\"b\")).alias(\"a_plus_b\"))",
11
+ "segments": null,
12
+ "thresholds": "Thresholds(warning=None, error=None, critical=None)",
13
+ "label": null,
14
+ "brief": null,
15
+ "active": true,
16
+ "all_passed": true,
17
+ "n": 10,
18
+ "n_passed": 10,
19
+ "n_failed": 0,
20
+ "f_passed": 1.0,
21
+ "f_failed": 0.0,
22
+ "warning": null,
23
+ "error": null,
24
+ "critical": null,
25
+ "time_processed": "2025-10-02T04:16:44.717+00:00",
26
+ "proc_duration_s": 0.001428
27
+ }
28
+ ]
@@ -0,0 +1,46 @@
1
+ """
2
+ Test preprocessing functions for validation serialization examples.
3
+
4
+ These functions are used to create validation objects that can be serialized
5
+ and stored as reference files for regression testing.
6
+ """
7
+
8
+ import narwhals as nw
9
+ import polars as pl
10
+
11
+
12
+ def double_column_a(df):
13
+ """Double the values in column 'a'."""
14
+ return df.with_columns(pl.col("a") * 2)
15
+
16
+
17
+ def add_computed_column(df):
18
+ """Add a computed column based on existing columns."""
19
+ return df.with_columns((pl.col("a") + pl.col("b")).alias("sum_ab"))
20
+
21
+
22
+ def filter_by_d_gt_100(df):
23
+ """Filter rows where column 'd' is greater than 100."""
24
+ return df.filter(pl.col("d") > 100)
25
+
26
+
27
+ def narwhals_median_transform(df):
28
+ """Use narwhals to compute median - cross-backend compatible."""
29
+ return nw.from_native(df).select(nw.median("a"), nw.median("d"))
30
+
31
+
32
+ def complex_preprocessing(df):
33
+ """Complex preprocessing combining multiple operations."""
34
+ return (
35
+ df.filter(pl.col("a") > 1)
36
+ .with_columns((pl.col("a") * 2).alias("a_doubled"), (pl.col("d") / 10).alias("d_scaled"))
37
+ .filter(pl.col("d_scaled") > 10)
38
+ )
39
+
40
+
41
+ def pandas_compatible_transform(df):
42
+ """Transform that works with pandas DataFrames."""
43
+ if hasattr(df, "assign"): # pandas
44
+ return df.assign(a_plus_b=df["a"] + df.get("b", 0))
45
+ else: # polars or other
46
+ return df.with_columns((pl.col("a") + pl.col("b")).alias("a_plus_b"))
@@ -0,0 +1,57 @@
1
+ [
2
+ {
3
+ "i": 1,
4
+ "i_o": 1,
5
+ "assertion_type": "col_vals_gt",
6
+ "column": "a",
7
+ "values": 0,
8
+ "inclusive": null,
9
+ "na_pass": false,
10
+ "pre": "def double_column_a(df):\n \"\"\"Double the values in column 'a'.\"\"\"\n return df.with_columns(pl.col(\"a\") * 2)",
11
+ "segments": null,
12
+ "thresholds": "Thresholds(warning=None, error=None, critical=None)",
13
+ "label": null,
14
+ "brief": null,
15
+ "active": true,
16
+ "all_passed": true,
17
+ "n": 10,
18
+ "n_passed": 10,
19
+ "n_failed": 0,
20
+ "f_passed": 1.0,
21
+ "f_failed": 0.0,
22
+ "warning": null,
23
+ "error": null,
24
+ "critical": null,
25
+ "time_processed": "2025-10-02T04:16:44.702+00:00",
26
+ "proc_duration_s": 0.00387
27
+ },
28
+ {
29
+ "i": 2,
30
+ "i_o": 2,
31
+ "assertion_type": "col_vals_in_set",
32
+ "column": "c",
33
+ "values": [
34
+ "x",
35
+ "y"
36
+ ],
37
+ "inclusive": null,
38
+ "na_pass": null,
39
+ "pre": null,
40
+ "segments": null,
41
+ "thresholds": "Thresholds(warning=None, error=None, critical=None)",
42
+ "label": null,
43
+ "brief": null,
44
+ "active": true,
45
+ "all_passed": true,
46
+ "n": 10,
47
+ "n_passed": 10,
48
+ "n_failed": 0,
49
+ "f_passed": 1.0,
50
+ "f_failed": 0.0,
51
+ "warning": null,
52
+ "error": null,
53
+ "critical": null,
54
+ "time_processed": "2025-10-02T04:16:44.703+00:00",
55
+ "proc_duration_s": 0.000983
56
+ }
57
+ ]
pointblank/datascan.py CHANGED
@@ -143,17 +143,17 @@ class DataScan:
143
143
  for conv_method in valid_conversion_methods:
144
144
  try:
145
145
  valid_native = getattr(ibis_native, conv_method)()
146
- except (NotImplementedError, ImportError, ModuleNotFoundError):
147
- continue
146
+ except (NotImplementedError, ImportError, ModuleNotFoundError): # pragma: no cover
147
+ continue # pragma: no cover
148
148
  break
149
- else:
149
+ else: # pragma: no cover
150
150
  msg = (
151
151
  "To use `ibis` as input, you must have one of arrow, pandas, polars or numpy "
152
152
  "available in the process. Until `ibis` is fully supported by Narwhals, this is "
153
153
  "necessary. Additionally, the data must be collected in order to calculate some "
154
154
  "structural statistics, which may be performance detrimental."
155
155
  )
156
- raise ImportError(msg)
156
+ raise ImportError(msg) # pragma: no cover
157
157
  as_native = nw.from_native(valid_native)
158
158
 
159
159
  self.nw_data: Frame = nw.from_native(as_native)