pointblank 0.12.1__py3-none-any.whl → 0.12.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9798,7 +9798,7 @@ validation workflows. The `yaml_interrogate()` function can be used to run a val
9798
9798
  YAML strings or files. The `validate_yaml()` function checks if the YAML configuration
9799
9799
  passes its own validity checks.
9800
9800
 
9801
- yaml_interrogate(yaml: 'Union[str, Path]') -> 'Validate'
9801
+ yaml_interrogate(yaml: 'Union[str, Path]', set_tbl: 'Union[FrameT, Any, None]' = None) -> 'Validate'
9802
9802
  Execute a YAML-based validation workflow.
9803
9803
 
9804
9804
  This is the main entry point for YAML-based validation workflows. It takes YAML configuration
@@ -9813,13 +9813,20 @@ Execute a YAML-based validation workflow.
9813
9813
  yaml
9814
9814
  YAML configuration as string or file path. Can be: (1) a YAML string containing the
9815
9815
  validation configuration, or (2) a Path object or string path to a YAML file.
9816
+ set_tbl
9817
+ An optional table to override the table specified in the YAML configuration. This allows you
9818
+ to apply a YAML-defined validation workflow to a different table than what's specified in
9819
+ the configuration. If provided, this table will replace the table defined in the YAML's
9820
+ `tbl` field before executing the validation workflow. This can be any supported table type
9821
+ including DataFrame objects, Ibis table objects, CSV file paths, Parquet file paths, GitHub
9822
+ URLs, or database connection strings.
9816
9823
 
9817
9824
  Returns
9818
9825
  -------
9819
9826
  Validate
9820
- An instance of the `Validate` class that has been configured based on the YAML input.
9821
- This object contains the results of the validation steps defined in the YAML configuration.
9822
- It includes metadata like table name, label, language, and thresholds if specified.
9827
+ An instance of the `Validate` class that has been configured based on the YAML input. This
9828
+ object contains the results of the validation steps defined in the YAML configuration. It
9829
+ includes metadata like table name, label, language, and thresholds if specified.
9823
9830
 
9824
9831
  Raises
9825
9832
  ------
@@ -9918,6 +9925,44 @@ Execute a YAML-based validation workflow.
9918
9925
  This approach is particularly useful for storing validation configurations as part of your data
9919
9926
  pipeline or version control system, allowing you to maintain validation rules alongside your
9920
9927
  code.
9928
+
9929
+ ### Using `set_tbl=` to Override the Table
9930
+
9931
+ The `set_tbl=` parameter allows you to override the table specified in the YAML configuration.
9932
+ This is useful when you have a template validation workflow but want to apply it to different
9933
+ tables:
9934
+
9935
+ ```python
9936
+ import polars as pl
9937
+
9938
+ # Create a test table with similar structure to small_table
9939
+ test_table = pl.DataFrame({
9940
+ "date": ["2023-01-01", "2023-01-02", "2023-01-03"],
9941
+ "a": [1, 2, 3],
9942
+ "b": ["1-abc-123", "2-def-456", "3-ghi-789"],
9943
+ "d": [150, 200, 250]
9944
+ })
9945
+
9946
+ # Use the same YAML config but apply it to our test table
9947
+ yaml_config = '''
9948
+ tbl: small_table # This will be overridden
9949
+ tbl_name: Test Table # This name will be used
9950
+ steps:
9951
+ - col_exists:
9952
+ columns: [date, a, b, d]
9953
+ - col_vals_gt:
9954
+ columns: [d]
9955
+ value: 100
9956
+ '''
9957
+
9958
+ # Execute with table override
9959
+ result = pb.yaml_interrogate(yaml_config, set_tbl=test_table)
9960
+ print(f"Validation applied to: {result.tbl_name}")
9961
+ result
9962
+ ```
9963
+
9964
+ This feature makes YAML configurations more reusable and flexible, allowing you to define
9965
+ validation logic once and apply it to multiple similar tables.
9921
9966
 
9922
9967
 
9923
9968
  validate_yaml(yaml: 'Union[str, Path]') -> 'None'
pointblank/validate.py CHANGED
@@ -3758,6 +3758,141 @@ class Validate:
3758
3758
 
3759
3759
  self.validation_info = []
3760
3760
 
3761
+ def set_tbl(
3762
+ self,
3763
+ tbl: FrameT | Any,
3764
+ tbl_name: str | None = None,
3765
+ label: str | None = None,
3766
+ ) -> Validate:
3767
+ """
3768
+ Set or replace the table associated with the Validate object.
3769
+
3770
+ This method allows you to replace the table associated with a Validate object with a
3771
+ different (but presumably similar) table. This is useful when you want to apply the same
3772
+ validation plan to multiple tables or when you have a validation workflow defined but want
3773
+ to swap in a different data source.
3774
+
3775
+ Parameters
3776
+ ----------
3777
+ tbl
3778
+ The table to replace the existing table with. This can be any supported table type
3779
+ including DataFrame objects, Ibis table objects, CSV file paths, Parquet file paths,
3780
+ GitHub URLs, or database connection strings. The same table type constraints apply as in
3781
+ the `Validate` constructor.
3782
+ tbl_name
3783
+ An optional name to assign to the new input table object. If no value is provided, the
3784
+ existing table name will be retained.
3785
+ label
3786
+ An optional label for the validation plan. If no value is provided, the existing label
3787
+ will be retained.
3788
+
3789
+ Returns
3790
+ -------
3791
+ Validate
3792
+ A new `Validate` object with the replacement table.
3793
+
3794
+ When to Use
3795
+ -----------
3796
+ The `set_tbl()` method is particularly useful in scenarios where you have:
3797
+
3798
+ - multiple similar tables that need the same validation checks
3799
+ - a template validation workflow that should be applied to different data sources
3800
+ - YAML-defined validations where you want to override the table specified in the YAML
3801
+
3802
+ The `set_tbl()` method creates a copy of the validation object with the new table, so the
3803
+ original validation object remains unchanged. This allows you to reuse validation plans
3804
+ across multiple tables without interference.
3805
+
3806
+ Examples
3807
+ --------
3808
+ ```{python}
3809
+ #| echo: false
3810
+ #| output: false
3811
+ import pointblank as pb
3812
+ pb.config(report_incl_header=False, report_incl_footer=False, preview_incl_header=False)
3813
+ ```
3814
+ We will first create two similar tables for our future validation plans.
3815
+
3816
+ ```{python}
3817
+ import pointblank as pb
3818
+ import polars as pl
3819
+
3820
+ # Create two similar tables
3821
+ table_1 = pl.DataFrame({
3822
+ "x": [1, 2, 3, 4, 5],
3823
+ "y": [5, 4, 3, 2, 1],
3824
+ "z": ["a", "b", "c", "d", "e"]
3825
+ })
3826
+
3827
+ table_2 = pl.DataFrame({
3828
+ "x": [2, 4, 6, 8, 10],
3829
+ "y": [10, 8, 6, 4, 2],
3830
+ "z": ["f", "g", "h", "i", "j"]
3831
+ })
3832
+ ```
3833
+
3834
+ Create a validation plan with the first table.
3835
+
3836
+ ```{python}
3837
+ validation_table_1 = (
3838
+ pb.Validate(
3839
+ data=table_1,
3840
+ tbl_name="Table 1",
3841
+ label="Validation applied to the first table"
3842
+ )
3843
+ .col_vals_gt(columns="x", value=0)
3844
+ .col_vals_lt(columns="y", value=10)
3845
+ )
3846
+ ```
3847
+
3848
+ Now apply the same validation plan to the second table.
3849
+
3850
+ ```{python}
3851
+ validation_table_2 = (
3852
+ validation_table_1
3853
+ .set_tbl(
3854
+ tbl=table_2,
3855
+ tbl_name="Table 2",
3856
+ label="Validation applied to the second table"
3857
+ )
3858
+ )
3859
+ ```
3860
+
3861
+ Here is the interrogation of the first table:
3862
+
3863
+ ```{python}
3864
+ validation_table_1.interrogate()
3865
+ ```
3866
+
3867
+ And the second table:
3868
+
3869
+ ```{python}
3870
+ validation_table_2.interrogate()
3871
+ ```
3872
+ """
3873
+ from copy import deepcopy
3874
+
3875
+ # Create a deep copy of the current Validate object
3876
+ new_validate = deepcopy(self)
3877
+
3878
+ # Process the new table through the centralized data processing pipeline
3879
+ new_validate.data = _process_data(tbl)
3880
+
3881
+ # Update table name if provided, otherwise keep existing
3882
+ if tbl_name is not None:
3883
+ new_validate.tbl_name = tbl_name
3884
+
3885
+ # Update label if provided, otherwise keep existing
3886
+ if label is not None:
3887
+ new_validate.label = label
3888
+
3889
+ # Reset interrogation state since we have a new table, but preserve validation steps
3890
+ new_validate.time_start = None
3891
+ new_validate.time_end = None
3892
+ # Note: We keep validation_info as it contains the defined validation steps
3893
+
3894
+ return new_validate
3895
+
3761
3896
  def _repr_html_(self) -> str:
3762
3897
  return self.get_tabular_report()._repr_html_() # pragma: no cover
3763
3898
 
pointblank/yaml.py CHANGED
@@ -4,6 +4,7 @@ from pathlib import Path
4
4
  from typing import Any, Union
5
5
 
6
6
  import yaml
7
+ from narwhals.typing import FrameT
7
8
 
8
9
  from pointblank._utils import _is_lib_present
9
10
  from pointblank.thresholds import Actions
@@ -749,7 +750,7 @@ class YAMLValidator:
749
750
  return validation
750
751
 
751
752
 
752
- def yaml_interrogate(yaml: Union[str, Path]) -> Validate:
753
+ def yaml_interrogate(yaml: Union[str, Path], set_tbl: Union[FrameT, Any, None] = None) -> Validate:
753
754
  """Execute a YAML-based validation workflow.
754
755
 
755
756
  This is the main entry point for YAML-based validation workflows. It takes YAML configuration
@@ -764,13 +765,20 @@ def yaml_interrogate(yaml: Union[str, Path]) -> Validate:
764
765
  yaml
765
766
  YAML configuration as string or file path. Can be: (1) a YAML string containing the
766
767
  validation configuration, or (2) a Path object or string path to a YAML file.
768
+ set_tbl
769
+ An optional table to override the table specified in the YAML configuration. This allows you
770
+ to apply a YAML-defined validation workflow to a different table than what's specified in
771
+ the configuration. If provided, this table will replace the table defined in the YAML's
772
+ `tbl` field before executing the validation workflow. This can be any supported table type
773
+ including DataFrame objects, Ibis table objects, CSV file paths, Parquet file paths, GitHub
774
+ URLs, or database connection strings.
767
775
 
768
776
  Returns
769
777
  -------
770
778
  Validate
771
- An instance of the `Validate` class that has been configured based on the YAML input.
772
- This object contains the results of the validation steps defined in the YAML configuration.
773
- It includes metadata like table name, label, language, and thresholds if specified.
779
+ An instance of the `Validate` class that has been configured based on the YAML input. This
780
+ object contains the results of the validation steps defined in the YAML configuration. It
781
+ includes metadata like table name, label, language, and thresholds if specified.
774
782
 
775
783
  Raises
776
784
  ------
@@ -875,10 +883,59 @@ def yaml_interrogate(yaml: Union[str, Path]) -> Validate:
875
883
  This approach is particularly useful for storing validation configurations as part of your data
876
884
  pipeline or version control system, allowing you to maintain validation rules alongside your
877
885
  code.
886
+
887
+ ### Using `set_tbl=` to Override the Table
888
+
889
+ The `set_tbl=` parameter allows you to override the table specified in the YAML configuration.
890
+ This is useful when you have a template validation workflow but want to apply it to different
891
+ tables:
892
+
893
+ ```{python}
894
+ import polars as pl
895
+
896
+ # Create a test table with similar structure to small_table
897
+ test_table = pl.DataFrame({
898
+ "date": ["2023-01-01", "2023-01-02", "2023-01-03"],
899
+ "a": [1, 2, 3],
900
+ "b": ["1-abc-123", "2-def-456", "3-ghi-789"],
901
+ "d": [150, 200, 250]
902
+ })
903
+
904
+ # Use the same YAML config but apply it to our test table
905
+ yaml_config = '''
906
+ tbl: small_table # This will be overridden
907
+ tbl_name: Test Table # This name will be used
908
+ steps:
909
+ - col_exists:
910
+ columns: [date, a, b, d]
911
+ - col_vals_gt:
912
+ columns: [d]
913
+ value: 100
914
+ '''
915
+
916
+ # Execute with table override
917
+ result = pb.yaml_interrogate(yaml_config, set_tbl=test_table)
918
+ print(f"Validation applied to: {result.tbl_name}")
919
+ result
920
+ ```
921
+
922
+ This feature makes YAML configurations more reusable and flexible, allowing you to define
923
+ validation logic once and apply it to multiple similar tables.
878
924
  """
879
925
  validator = YAMLValidator()
880
926
  config = validator.load_config(yaml)
881
- return validator.execute_workflow(config)
927
+
928
+ # If `set_tbl=` is provided, we need to build the validation workflow and then use `set_tbl()`
929
+ if set_tbl is not None:
930
+ # First build the validation object without interrogation
931
+ validation = validator.build_validation(config)
932
+ # Then replace the table using set_tbl method
933
+ validation = validation.set_tbl(tbl=set_tbl)
934
+ # Finally interrogate with the new table
935
+ return validation.interrogate()
936
+ else:
937
+ # Standard execution without table override (includes interrogation)
938
+ return validator.execute_workflow(config)
882
939
 
883
940
 
884
941
  def load_yaml_config(file_path: Union[str, Path]) -> dict:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pointblank
3
- Version: 0.12.1
3
+ Version: 0.12.2
4
4
  Summary: Find out if your data is what you think it is.
5
5
  Author-email: Richard Iannone <riannone@me.com>
6
6
  License: MIT License
@@ -21,9 +21,9 @@ pointblank/schema.py,sha256=vwGF8UKy2riRSQzcwatcI6L0t_6ccdbOayrKonvyodE,45777
21
21
  pointblank/segments.py,sha256=RXp3lPr3FboVseadNqLgIeoMBh_mykrQSFp1WtV41Yg,5570
22
22
  pointblank/tf.py,sha256=8o_8m4i01teulEe3-YYMotSNf3tImjBMInsvdjSAO5Q,8844
23
23
  pointblank/thresholds.py,sha256=mybeLzTVdmN04NLKoV-jiSBXsWknwHO0Gox0ttVN_MU,25766
24
- pointblank/validate.py,sha256=KvnC0UnvVW2mkoWkp1fDIXotuBl7MJeU6_ggp_0yDoo,693082
25
- pointblank/yaml.py,sha256=4DrkOJwCQ3CaXQ7ESNIW72pp-dL1ctlX6ONU30Vh1Fs,57901
26
- pointblank/data/api-docs.txt,sha256=0wXk__xYwgKeS24ZjbaTPFeJ3ZO7AIyMQoFClCcvPTc,529897
24
+ pointblank/validate.py,sha256=9-lzWSvNVo2JDcXVAiMINPbyg1Xgy0SMmwW8IfKstJs,697641
25
+ pointblank/yaml.py,sha256=v3uCrs4Hzt-9cAmGxiFJIU8A9XiRQsN7U3OBFa6_hUs,60287
26
+ pointblank/data/api-docs.txt,sha256=w2nIkIL_fJpXlPR9clogqcgdiv-uHvdSDI8gjkP_mCQ,531711
27
27
  pointblank/data/game_revenue-duckdb.zip,sha256=tKIVx48OGLYGsQPS3h5AjA2Nyq_rfEpLCjBiFUWhagU,35880
28
28
  pointblank/data/game_revenue.zip,sha256=7c9EvHLyi93CHUd4p3dM4CZ-GucFCtXKSPxgLojL32U,33749
29
29
  pointblank/data/global_sales-duckdb.zip,sha256=2ok_cvJ1ZuSkXnw0R6_OkKYRTWhJ-jJEMq2VYsv5fqY,1336390
@@ -33,9 +33,9 @@ pointblank/data/nycflights.zip,sha256=yVjbUaKUz2LydSdF9cABuir0VReHBBgV7shiNWSd0m
33
33
  pointblank/data/polars-api-docs.txt,sha256=KGcS-BOtUs9zgpkWfXD-GFdFh4O_zjdkpX7msHjztLg,198045
34
34
  pointblank/data/small_table-duckdb.zip,sha256=BhTaZ2CRS4-9Z1uVhOU6HggvW3XCar7etMznfENIcOc,2028
35
35
  pointblank/data/small_table.zip,sha256=lmFb90Nb-v5X559Ikjg31YLAXuRyMkD9yLRElkXPMzQ,472
36
- pointblank-0.12.1.dist-info/licenses/LICENSE,sha256=apLF-HWPNU7pT5bmf5KmZpD5Cklpy2u-BN_0xBoRMLY,1081
37
- pointblank-0.12.1.dist-info/METADATA,sha256=1fJY92u1AiJdYggJLaUf0TKbovh3ytcihIdh4PcBEQ8,19242
38
- pointblank-0.12.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
39
- pointblank-0.12.1.dist-info/entry_points.txt,sha256=GqqqOTOH8uZe22wLcvYjzpizqk_j4MNcUo2YM14ryCw,42
40
- pointblank-0.12.1.dist-info/top_level.txt,sha256=-wHrS1SvV8-nhvc3w-PPYs1C1WtEc1pK-eGjubbCCKc,11
41
- pointblank-0.12.1.dist-info/RECORD,,
36
+ pointblank-0.12.2.dist-info/licenses/LICENSE,sha256=apLF-HWPNU7pT5bmf5KmZpD5Cklpy2u-BN_0xBoRMLY,1081
37
+ pointblank-0.12.2.dist-info/METADATA,sha256=yF9JixS22t9DR-S8mXMv3uyXmkACVb1LQMoHck2bMYs,19242
38
+ pointblank-0.12.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
39
+ pointblank-0.12.2.dist-info/entry_points.txt,sha256=GqqqOTOH8uZe22wLcvYjzpizqk_j4MNcUo2YM14ryCw,42
40
+ pointblank-0.12.2.dist-info/top_level.txt,sha256=-wHrS1SvV8-nhvc3w-PPYs1C1WtEc1pK-eGjubbCCKc,11
41
+ pointblank-0.12.2.dist-info/RECORD,,