pointblank 0.13.4__py3-none-any.whl → 0.15.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pointblank/__init__.py +4 -0
- pointblank/_constants.py +117 -0
- pointblank/_constants_translations.py +487 -2
- pointblank/_interrogation.py +1065 -12
- pointblank/_spec_utils.py +1015 -0
- pointblank/_utils.py +17 -7
- pointblank/_utils_ai.py +875 -0
- pointblank/assistant.py +1 -1
- pointblank/cli.py +128 -115
- pointblank/column.py +1 -1
- pointblank/data/api-docs.txt +1838 -130
- pointblank/data/validations/README.md +108 -0
- pointblank/data/validations/complex_preprocessing.json +54 -0
- pointblank/data/validations/complex_preprocessing.pkl +0 -0
- pointblank/data/validations/generate_test_files.py +127 -0
- pointblank/data/validations/multiple_steps.json +83 -0
- pointblank/data/validations/multiple_steps.pkl +0 -0
- pointblank/data/validations/narwhals_function.json +28 -0
- pointblank/data/validations/narwhals_function.pkl +0 -0
- pointblank/data/validations/no_preprocessing.json +83 -0
- pointblank/data/validations/no_preprocessing.pkl +0 -0
- pointblank/data/validations/pandas_compatible.json +28 -0
- pointblank/data/validations/pandas_compatible.pkl +0 -0
- pointblank/data/validations/preprocessing_functions.py +46 -0
- pointblank/data/validations/simple_preprocessing.json +57 -0
- pointblank/data/validations/simple_preprocessing.pkl +0 -0
- pointblank/datascan.py +4 -4
- pointblank/draft.py +52 -3
- pointblank/scan_profile.py +6 -6
- pointblank/schema.py +8 -82
- pointblank/thresholds.py +1 -1
- pointblank/validate.py +3069 -437
- {pointblank-0.13.4.dist-info → pointblank-0.15.0.dist-info}/METADATA +67 -8
- pointblank-0.15.0.dist-info/RECORD +56 -0
- pointblank-0.13.4.dist-info/RECORD +0 -39
- {pointblank-0.13.4.dist-info → pointblank-0.15.0.dist-info}/WHEEL +0 -0
- {pointblank-0.13.4.dist-info → pointblank-0.15.0.dist-info}/entry_points.txt +0 -0
- {pointblank-0.13.4.dist-info → pointblank-0.15.0.dist-info}/licenses/LICENSE +0 -0
- {pointblank-0.13.4.dist-info → pointblank-0.15.0.dist-info}/top_level.txt +0 -0
pointblank/_utils.py
CHANGED
|
@@ -102,7 +102,7 @@ def _get_tbl_type(data: FrameT | Any) -> str:
|
|
|
102
102
|
if "read_parquet" in tbl_name:
|
|
103
103
|
return "parquet"
|
|
104
104
|
|
|
105
|
-
else:
|
|
105
|
+
else: # pragma: no cover
|
|
106
106
|
return "duckdb"
|
|
107
107
|
|
|
108
108
|
return backend
|
|
@@ -274,10 +274,10 @@ def _copy_dataframe(df):
|
|
|
274
274
|
import copy
|
|
275
275
|
|
|
276
276
|
return copy.deepcopy(df)
|
|
277
|
-
except Exception:
|
|
277
|
+
except Exception: # pragma: no cover
|
|
278
278
|
# If all else fails, return the original DataFrame
|
|
279
279
|
# This is better than crashing the validation
|
|
280
|
-
return df
|
|
280
|
+
return df # pragma: no cover
|
|
281
281
|
|
|
282
282
|
|
|
283
283
|
def _convert_to_narwhals(df: FrameT) -> nw.DataFrame:
|
|
@@ -670,18 +670,23 @@ def _get_api_text() -> str:
|
|
|
670
670
|
"Validate.col_vals_outside",
|
|
671
671
|
"Validate.col_vals_in_set",
|
|
672
672
|
"Validate.col_vals_not_in_set",
|
|
673
|
+
"Validate.col_vals_increasing",
|
|
674
|
+
"Validate.col_vals_decreasing",
|
|
673
675
|
"Validate.col_vals_null",
|
|
674
676
|
"Validate.col_vals_not_null",
|
|
675
677
|
"Validate.col_vals_regex",
|
|
678
|
+
"Validate.col_vals_within_spec",
|
|
676
679
|
"Validate.col_vals_expr",
|
|
677
|
-
"Validate.col_exists",
|
|
678
680
|
"Validate.rows_distinct",
|
|
679
681
|
"Validate.rows_complete",
|
|
682
|
+
"Validate.col_exists",
|
|
680
683
|
"Validate.col_schema_match",
|
|
681
684
|
"Validate.row_count_match",
|
|
682
685
|
"Validate.col_count_match",
|
|
686
|
+
"Validate.tbl_match",
|
|
683
687
|
"Validate.conjointly",
|
|
684
688
|
"Validate.specially",
|
|
689
|
+
"Validate.prompt",
|
|
685
690
|
]
|
|
686
691
|
|
|
687
692
|
column_selection_exported = [
|
|
@@ -702,6 +707,7 @@ def _get_api_text() -> str:
|
|
|
702
707
|
|
|
703
708
|
interrogation_exported = [
|
|
704
709
|
"Validate.interrogate",
|
|
710
|
+
"Validate.set_tbl",
|
|
705
711
|
"Validate.get_tabular_report",
|
|
706
712
|
"Validate.get_step_report",
|
|
707
713
|
"Validate.get_json_report",
|
|
@@ -735,6 +741,7 @@ def _get_api_text() -> str:
|
|
|
735
741
|
yaml_exported = [
|
|
736
742
|
"yaml_interrogate",
|
|
737
743
|
"validate_yaml",
|
|
744
|
+
"yaml_to_python",
|
|
738
745
|
]
|
|
739
746
|
|
|
740
747
|
utility_exported = [
|
|
@@ -742,6 +749,8 @@ def _get_api_text() -> str:
|
|
|
742
749
|
"get_row_count",
|
|
743
750
|
"get_action_metadata",
|
|
744
751
|
"get_validation_summary",
|
|
752
|
+
"write_file",
|
|
753
|
+
"read_file",
|
|
745
754
|
"config",
|
|
746
755
|
]
|
|
747
756
|
|
|
@@ -786,9 +795,10 @@ datasets included in the package can be accessed via the `load_dataset()` functi
|
|
|
786
795
|
the `assistant()` function to get help with Pointblank."""
|
|
787
796
|
|
|
788
797
|
yaml_desc = """The *YAML* group contains functions that allow for the use of YAML to orchestrate
|
|
789
|
-
validation workflows. The `yaml_interrogate()` function can be used to run a validation workflow
|
|
790
|
-
YAML strings or files. The `validate_yaml()` function checks if the YAML configuration
|
|
791
|
-
|
|
798
|
+
validation workflows. The `yaml_interrogate()` function can be used to run a validation workflow
|
|
799
|
+
from YAML strings or files. The `validate_yaml()` function checks if the YAML configuration passes
|
|
800
|
+
its own validity checks. The `yaml_to_python()` function converts YAML configuration to equivalent
|
|
801
|
+
Python code."""
|
|
792
802
|
|
|
793
803
|
utility_desc = """The Utility Functions group contains functions that are useful for accessing
|
|
794
804
|
metadata about the target data. Use `get_column_count()` or `get_row_count()` to get the number of
|