pointblank 0.8.5__py3-none-any.whl → 0.8.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4171,6 +4171,201 @@ col_count_match(self, count: 'int | FrameT | Any', inverse: 'bool' = False, pre:
4171
4171
  columns in the target table. So, the single test unit passed.
4172
4172
 
4173
4173
 
4174
+ conjointly(self, *exprs: 'Callable', pre: 'Callable | None' = None, thresholds: 'int | float | bool | tuple | dict | Thresholds' = None, actions: 'Actions | None' = None, brief: 'str | bool | None' = None, active: 'bool' = True) -> 'Validate'
4175
+
4176
+ Perform multiple row-wise validations for joint validity.
4177
+
4178
+ The `conjointly()` validation method checks whether each row in the table passes multiple
4179
+ validation conditions simultaneously. This enables compound validation logic where a test
4180
+ unit (typically a row) must satisfy all specified conditions to pass the validation.
4181
+
4182
+ This method accepts multiple validation expressions as callables, which should return
4183
+ boolean expressions when applied to the data. You can use lambdas that incorporate
4184
+ Polars/Pandas/Ibis expressions (based on the target table type) or create more complex
4185
+ validation functions. The validation will operate over the number of test units that is
4186
+ equal to the number of rows in the table (determined after any `pre=` mutation has been
4187
+ applied).
4188
+
4189
+ Parameters
4190
+ ----------
4191
+ *exprs
4192
+ Multiple validation expressions provided as callable functions. Each callable should
4193
+ accept a table as its single argument and return a boolean expression or Series/Column
4194
+ that evaluates to boolean values for each row.
4195
+ pre
4196
+ An optional preprocessing function or lambda to apply to the data table during
4197
+ interrogation. This function should take a table as input and return a modified table.
4198
+ Have a look at the *Preprocessing* section for more information on how to use this
4199
+ argument.
4200
+ thresholds
4201
+ Set threshold failure levels for reporting and reacting to exceedences of the levels.
4202
+ The thresholds are set at the step level and will override any global thresholds set in
4203
+ `Validate(thresholds=...)`. The default is `None`, which means that no thresholds will
4204
+ be set locally and global thresholds (if any) will take effect. Look at the *Thresholds*
4205
+ section for information on how to set threshold levels.
4206
+ actions
4207
+ Optional actions to take when the validation step meets or exceeds any set threshold
4208
+ levels. If provided, the [`Actions`](`pointblank.Actions`) class should be used to
4209
+ define the actions.
4210
+ brief
4211
+ An optional brief description of the validation step that will be displayed in the
4212
+ reporting table. You can use the templating elements like `"{step}"` to insert
4213
+ the step number, or `"{auto}"` to include an automatically generated brief. If `True`
4214
+ the entire brief will be automatically generated. If `None` (the default) then there
4215
+ won't be a brief.
4216
+ active
4217
+ A boolean value indicating whether the validation step should be active. Using `False`
4218
+ will make the validation step inactive (still reporting its presence and keeping indexes
4219
+ for the steps unchanged).
4220
+
4221
+ Returns
4222
+ -------
4223
+ Validate
4224
+ The `Validate` object with the added validation step.
4225
+
4226
+ Preprocessing
4227
+ -------------
4228
+ The `pre=` argument allows for a preprocessing function or lambda to be applied to the data
4229
+ table during interrogation. This function should take a table as input and return a modified
4230
+ table. This is useful for performing any necessary transformations or filtering on the data
4231
+ before the validation step is applied.
4232
+
4233
+ The preprocessing function can be any callable that takes a table as input and returns a
4234
+ modified table. For example, you could use a lambda function to filter the table based on
4235
+ certain criteria or to apply a transformation to the data. Regarding the lifetime of the
4236
+ transformed table, it only exists during the validation step and is not stored in the
4237
+ `Validate` object or used in subsequent validation steps.
4238
+
4239
+ Thresholds
4240
+ ----------
4241
+ The `thresholds=` parameter is used to set the failure-condition levels for the validation
4242
+ step. If they are set here at the step level, these thresholds will override any thresholds
4243
+ set at the global level in `Validate(thresholds=...)`.
4244
+
4245
+ There are three threshold levels: 'warning', 'error', and 'critical'. The threshold values
4246
+ can either be set as a proportion failing of all test units (a value between `0` to `1`),
4247
+ or, the absolute number of failing test units (as integer that's `1` or greater).
4248
+
4249
+ Thresholds can be defined using one of these input schemes:
4250
+
4251
+ 1. use the [`Thresholds`](`pointblank.Thresholds`) class (the most direct way to create
4252
+ thresholds)
4253
+ 2. provide a tuple of 1-3 values, where position `0` is the 'warning' level, position `1` is
4254
+ the 'error' level, and position `2` is the 'critical' level
4255
+ 3. create a dictionary of 1-3 value entries; the valid keys: are 'warning', 'error', and
4256
+ 'critical'
4257
+ 4. a single integer/float value denoting absolute number or fraction of failing test units
4258
+ for the 'warning' level only
4259
+
4260
+ If the number of failing test units exceeds set thresholds, the validation step will be
4261
+ marked as 'warning', 'error', or 'critical'. All of the threshold levels don't need to be
4262
+ set, you're free to set any combination of them.
4263
+
4264
+ Aside from reporting failure conditions, thresholds can be used to determine the actions to
4265
+ take for each level of failure (using the `actions=` parameter).
4266
+
4267
+ Examples
4268
+ --------
4269
+ For the examples here, we'll use a simple Polars DataFrame with three numeric columns (`a`,
4270
+ `b`, and `c`). The table is shown below:
4271
+
4272
+ ```python
4273
+ import pointblank as pb
4274
+ import polars as pl
4275
+
4276
+ tbl = pl.DataFrame(
4277
+ {
4278
+ "a": [5, 7, 1, 3, 9, 4],
4279
+ "b": [6, 3, 0, 5, 8, 2],
4280
+ "c": [10, 4, 8, 9, 10, 5],
4281
+ }
4282
+ )
4283
+
4284
+ pb.preview(tbl)
4285
+ ```
4286
+
4287
+ Let's validate that the values in each row satisfy multiple conditions simultaneously:
4288
+
4289
+ 1. Column `a` should be greater than 2
4290
+ 2. Column `b` should be less than 7
4291
+ 3. The sum of `a` and `b` should be less than the value in column `c`
4292
+
4293
+ We'll use `conjointly()` to check all these conditions together:
4294
+
4295
+ ```python
4296
+ validation = (
4297
+ pb.Validate(data=tbl)
4298
+ .conjointly(
4299
+ lambda df: pl.col("a") > 2,
4300
+ lambda df: pl.col("b") < 7,
4301
+ lambda df: pl.col("a") + pl.col("b") < pl.col("c")
4302
+ )
4303
+ .interrogate()
4304
+ )
4305
+
4306
+ validation
4307
+ ```
4308
+
4309
+ The validation table shows that not all rows satisfy all three conditions together. For a
4310
+ row to pass the conjoint validation, all three conditions must be true for that row.
4311
+
4312
+ We can also use preprocessing to filter the data before applying the conjoint validation:
4313
+
4314
+ ```python
4315
+ validation = (
4316
+ pb.Validate(data=tbl)
4317
+ .conjointly(
4318
+ lambda df: pl.col("a") > 2,
4319
+ lambda df: pl.col("b") < 7,
4320
+ lambda df: pl.col("a") + pl.col("b") < pl.col("c"),
4321
+ pre=lambda df: df.filter(pl.col("c") > 5)
4322
+ )
4323
+ .interrogate()
4324
+ )
4325
+
4326
+ validation
4327
+ ```
4328
+
4329
+ This allows for more complex validation scenarios where the data is first prepared and then
4330
+ validated against multiple conditions simultaneously.
4331
+
4332
+ Or, you can use the backend-agnostic column expression helper
4333
+ [`expr_col()`](`pointblank.expr_col`) to write expressions that work across different table
4334
+ backends:
4335
+
4336
+ ```python
4337
+ tbl = pl.DataFrame(
4338
+ {
4339
+ "a": [5, 7, 1, 3, 9, 4],
4340
+ "b": [6, 3, 0, 5, 8, 2],
4341
+ "c": [10, 4, 8, 9, 10, 5],
4342
+ }
4343
+ )
4344
+
4345
+ # Using backend-agnostic syntax with expr_col()
4346
+ validation = (
4347
+ pb.Validate(data=tbl)
4348
+ .conjointly(
4349
+ lambda df: pb.expr_col("a") > 2,
4350
+ lambda df: pb.expr_col("b") < 7,
4351
+ lambda df: pb.expr_col("a") + pb.expr_col("b") < pb.expr_col("c")
4352
+ )
4353
+ .interrogate()
4354
+ )
4355
+
4356
+ validation
4357
+ ```
4358
+
4359
+ Using [`expr_col()`](`pointblank.expr_col`) allows your validation code to work consistently
4360
+ across Pandas, Polars, and Ibis table backends without changes, making your validation
4361
+ pipelines more portable.
4362
+
4363
+ See Also
4364
+ --------
4365
+ Look at the documentation of the [`expr_col()`](`pointblank.expr_col`) function for more
4366
+ information on how to use it with different table backends.
4367
+
4368
+
4174
4369
 
4175
4370
  ## The Column Selection family
4176
4371
 
@@ -4195,18 +4390,20 @@ col(exprs: 'str | ColumnSelector | ColumnSelectorNarwhals') -> 'Column | ColumnL
4195
4390
  [`interrogate()`](`pointblank.Validate.interrogate`) is called), Pointblank will then check that
4196
4391
  the column exists in the input table.
4197
4392
 
4393
+ For creating expressions to use with the `conjointly()` validation method, use the
4394
+ [`expr_col()`](`pointblank.expr_col`) function instead.
4395
+
4198
4396
  Parameters
4199
4397
  ----------
4200
4398
  exprs
4201
4399
  Either the name of a single column in the target table, provided as a string, or, an
4202
4400
  expression involving column selector functions (e.g., `starts_with("a")`,
4203
- `ends_with("e") | starts_with("a")`, etc.). Please read the documentation for further
4204
- details on which input forms are valid depending on the context.
4401
+ `ends_with("e") | starts_with("a")`, etc.).
4205
4402
 
4206
4403
  Returns
4207
4404
  -------
4208
- Column
4209
- A `Column` object representing the column.
4405
+ Column | ColumnLiteral | ColumnSelectorNarwhals:
4406
+ A column object or expression representing the column reference.
4210
4407
 
4211
4408
  Usage with the `columns=` Argument
4212
4409
  -----------------------------------
@@ -4450,6 +4647,11 @@ col(exprs: 'str | ColumnSelector | ColumnSelectorNarwhals') -> 'Column | ColumnL
4450
4647
  [`matches()`](`pointblank.matches`) column selector functions from Narwhals, combined with the
4451
4648
  `&` operator. This is necessary to specify the set of columns that are numeric *and* match the
4452
4649
  text `"2023"` or `"2024"`.
4650
+
4651
+ See Also
4652
+ --------
4653
+ Create a column expression for use in `conjointly()` validation with the
4654
+ [`expr_col()`](`pointblank.expr_col`) function.
4453
4655
 
4454
4656
 
4455
4657
  starts_with(text: 'str', case_sensitive: 'bool' = False) -> 'StartsWith'
@@ -5474,6 +5676,69 @@ last_n(n: 'int', offset: 'int' = 0) -> 'LastN'
5474
5676
  `paid_2022`, and `paid_2024`.
5475
5677
 
5476
5678
 
5679
+ expr_col(column_name: 'str') -> 'ColumnExpression'
5680
+
5681
+ Create a column expression for use in `conjointly()` validation.
5682
+
5683
+ This function returns a ColumnExpression object that supports operations like `>`, `<`, `+`,
5684
+ etc. for use in [`conjointly()`](`pointblank.Validate.conjointly`) validation expressions.
5685
+
5686
+ Parameters
5687
+ ----------
5688
+ column_name
5689
+ The name of the column to reference.
5690
+
5691
+ Returns
5692
+ -------
5693
+ ColumnExpression
5694
+ A column expression that can be used in comparisons and operations.
5695
+
5696
+ Examples
5697
+ --------
5698
+ Let's say we have a table with three columns: `a`, `b`, and `c`. We want to validate that:
5699
+
5700
+ - The values in column `a` are greater than `2`.
5701
+ - The values in column `b` are less than `7`.
5702
+ - The sum of columns `a` and `b` is less than the values in column `c`.
5703
+
5704
+ We can use the `expr_col()` function to create a column expression for each of these conditions.
5705
+
5706
+ ```python
5707
+ import pointblank as pb
5708
+ import polars as pl
5709
+
5710
+ tbl = pl.DataFrame(
5711
+ {
5712
+ "a": [5, 7, 1, 3, 9, 4],
5713
+ "b": [6, 3, 0, 5, 8, 2],
5714
+ "c": [10, 4, 8, 9, 10, 5],
5715
+ }
5716
+ )
5717
+
5718
+ # Using expr_col() to create backend-agnostic validation expressions
5719
+ validation = (
5720
+ pb.Validate(data=tbl)
5721
+ .conjointly(
5722
+ lambda df: pb.expr_col("a") > 2,
5723
+ lambda df: pb.expr_col("b") < 7,
5724
+ lambda df: pb.expr_col("a") + pb.expr_col("b") < pb.expr_col("c")
5725
+ )
5726
+ .interrogate()
5727
+ )
5728
+
5729
+ validation
5730
+ ```
5731
+
5732
+ The above code creates a validation object that checks the specified conditions using the
5733
+ `expr_col()` function. The resulting validation table will show whether each condition was
5734
+ satisfied for each row in the table.
5735
+
5736
+ See Also
5737
+ --------
5738
+ The [`conjointly()`](`pointblank.Validate.conjointly`) validation method, which is where this
5739
+ function should be used.
5740
+
5741
+
5477
5742
 
5478
5743
  ## The Interrogation and Reporting family
5479
5744
 
@@ -5916,6 +6181,7 @@ get_data_extracts(self, i: 'int | list[int] | None' = None, frame: 'bool' = Fals
5916
6181
  - [`col_vals_null()`](`pointblank.Validate.col_vals_null`)
5917
6182
  - [`col_vals_not_null()`](`pointblank.Validate.col_vals_not_null`)
5918
6183
  - [`col_vals_regex()`](`pointblank.Validate.col_vals_regex`)
6184
+ - [`rows_distinct()`](`pointblank.Validate.rows_distinct`)
5919
6185
 
5920
6186
  An extracted row means that a test unit failed for that row in the validation step. The
5921
6187
  extracted rows are a subset of the original table and are useful for further analysis or for
pointblank/validate.py CHANGED
@@ -52,6 +52,7 @@ from pointblank._interrogation import (
52
52
  ColValsCompareTwo,
53
53
  ColValsExpr,
54
54
  ColValsRegex,
55
+ ConjointlyValidation,
55
56
  NumberOfTestUnits,
56
57
  RowCountMatch,
57
58
  RowsDistinct,
@@ -86,6 +87,8 @@ from pointblank.thresholds import (
86
87
  )
87
88
 
88
89
  if TYPE_CHECKING:
90
+ from collections.abc import Collection
91
+
89
92
  from pointblank._typing import AbsoluteBounds, Tolerance
90
93
 
91
94
  __all__ = [
@@ -4310,7 +4313,7 @@ class Validate:
4310
4313
  def col_vals_in_set(
4311
4314
  self,
4312
4315
  columns: str | list[str] | Column | ColumnSelector | ColumnSelectorNarwhals,
4313
- set: list[float | int],
4316
+ set: Collection[Any],
4314
4317
  pre: Callable | None = None,
4315
4318
  thresholds: int | float | bool | tuple | dict | Thresholds = None,
4316
4319
  actions: Actions | None = None,
@@ -4470,7 +4473,13 @@ class Validate:
4470
4473
  assertion_type = _get_fn_name()
4471
4474
 
4472
4475
  _check_column(column=columns)
4473
- _check_set_types(set=set)
4476
+
4477
+ for val in set:
4478
+ if val is None:
4479
+ continue
4480
+ if not isinstance(val, (float, int, str)):
4481
+ raise ValueError("`set=` must be a list of floats, integers, or strings.")
4482
+
4474
4483
  _check_pre(pre=pre)
4475
4484
  _check_thresholds(thresholds=thresholds)
4476
4485
  _check_boolean_input(param=active, param_name="active")
@@ -6462,6 +6471,250 @@ class Validate:
6462
6471
 
6463
6472
  return self
6464
6473
 
6474
+ def conjointly(
6475
+ self,
6476
+ *exprs: Callable,
6477
+ pre: Callable | None = None,
6478
+ thresholds: int | float | bool | tuple | dict | Thresholds = None,
6479
+ actions: Actions | None = None,
6480
+ brief: str | bool | None = None,
6481
+ active: bool = True,
6482
+ ) -> Validate:
6483
+ """
6484
+ Perform multiple row-wise validations for joint validity.
6485
+
6486
+ The `conjointly()` validation method checks whether each row in the table passes multiple
6487
+ validation conditions simultaneously. This enables compound validation logic where a test
6488
+ unit (typically a row) must satisfy all specified conditions to pass the validation.
6489
+
6490
+ This method accepts multiple validation expressions as callables, which should return
6491
+ boolean expressions when applied to the data. You can use lambdas that incorporate
6492
+ Polars/Pandas/Ibis expressions (based on the target table type) or create more complex
6493
+ validation functions. The validation will operate over the number of test units that is
6494
+ equal to the number of rows in the table (determined after any `pre=` mutation has been
6495
+ applied).
6496
+
6497
+ Parameters
6498
+ ----------
6499
+ *exprs
6500
+ Multiple validation expressions provided as callable functions. Each callable should
6501
+ accept a table as its single argument and return a boolean expression or Series/Column
6502
+ that evaluates to boolean values for each row.
6503
+ pre
6504
+ An optional preprocessing function or lambda to apply to the data table during
6505
+ interrogation. This function should take a table as input and return a modified table.
6506
+ Have a look at the *Preprocessing* section for more information on how to use this
6507
+ argument.
6508
+ thresholds
6509
+ Set threshold failure levels for reporting and reacting to exceedences of the levels.
6510
+ The thresholds are set at the step level and will override any global thresholds set in
6511
+ `Validate(thresholds=...)`. The default is `None`, which means that no thresholds will
6512
+ be set locally and global thresholds (if any) will take effect. Look at the *Thresholds*
6513
+ section for information on how to set threshold levels.
6514
+ actions
6515
+ Optional actions to take when the validation step meets or exceeds any set threshold
6516
+ levels. If provided, the [`Actions`](`pointblank.Actions`) class should be used to
6517
+ define the actions.
6518
+ brief
6519
+ An optional brief description of the validation step that will be displayed in the
6520
+ reporting table. You can use the templating elements like `"{step}"` to insert
6521
+ the step number, or `"{auto}"` to include an automatically generated brief. If `True`
6522
+ the entire brief will be automatically generated. If `None` (the default) then there
6523
+ won't be a brief.
6524
+ active
6525
+ A boolean value indicating whether the validation step should be active. Using `False`
6526
+ will make the validation step inactive (still reporting its presence and keeping indexes
6527
+ for the steps unchanged).
6528
+
6529
+ Returns
6530
+ -------
6531
+ Validate
6532
+ The `Validate` object with the added validation step.
6533
+
6534
+ Preprocessing
6535
+ -------------
6536
+ The `pre=` argument allows for a preprocessing function or lambda to be applied to the data
6537
+ table during interrogation. This function should take a table as input and return a modified
6538
+ table. This is useful for performing any necessary transformations or filtering on the data
6539
+ before the validation step is applied.
6540
+
6541
+ The preprocessing function can be any callable that takes a table as input and returns a
6542
+ modified table. For example, you could use a lambda function to filter the table based on
6543
+ certain criteria or to apply a transformation to the data. Regarding the lifetime of the
6544
+ transformed table, it only exists during the validation step and is not stored in the
6545
+ `Validate` object or used in subsequent validation steps.
6546
+
6547
+ Thresholds
6548
+ ----------
6549
+ The `thresholds=` parameter is used to set the failure-condition levels for the validation
6550
+ step. If they are set here at the step level, these thresholds will override any thresholds
6551
+ set at the global level in `Validate(thresholds=...)`.
6552
+
6553
+ There are three threshold levels: 'warning', 'error', and 'critical'. The threshold values
6554
+ can either be set as a proportion failing of all test units (a value between `0` to `1`),
6555
+ or, the absolute number of failing test units (as integer that's `1` or greater).
6556
+
6557
+ Thresholds can be defined using one of these input schemes:
6558
+
6559
+ 1. use the [`Thresholds`](`pointblank.Thresholds`) class (the most direct way to create
6560
+ thresholds)
6561
+ 2. provide a tuple of 1-3 values, where position `0` is the 'warning' level, position `1` is
6562
+ the 'error' level, and position `2` is the 'critical' level
6563
+ 3. create a dictionary of 1-3 value entries; the valid keys: are 'warning', 'error', and
6564
+ 'critical'
6565
+ 4. a single integer/float value denoting absolute number or fraction of failing test units
6566
+ for the 'warning' level only
6567
+
6568
+ If the number of failing test units exceeds set thresholds, the validation step will be
6569
+ marked as 'warning', 'error', or 'critical'. All of the threshold levels don't need to be
6570
+ set, you're free to set any combination of them.
6571
+
6572
+ Aside from reporting failure conditions, thresholds can be used to determine the actions to
6573
+ take for each level of failure (using the `actions=` parameter).
6574
+
6575
+ Examples
6576
+ --------
6577
+ ```{python}
6578
+ #| echo: false
6579
+ #| output: false
6580
+ import pointblank as pb
6581
+ pb.config(report_incl_header=False, report_incl_footer=False, preview_incl_header=False)
6582
+ ```
6583
+ For the examples here, we'll use a simple Polars DataFrame with three numeric columns (`a`,
6584
+ `b`, and `c`). The table is shown below:
6585
+
6586
+ ```{python}
6587
+ import pointblank as pb
6588
+ import polars as pl
6589
+
6590
+ tbl = pl.DataFrame(
6591
+ {
6592
+ "a": [5, 7, 1, 3, 9, 4],
6593
+ "b": [6, 3, 0, 5, 8, 2],
6594
+ "c": [10, 4, 8, 9, 10, 5],
6595
+ }
6596
+ )
6597
+
6598
+ pb.preview(tbl)
6599
+ ```
6600
+
6601
+ Let's validate that the values in each row satisfy multiple conditions simultaneously:
6602
+
6603
+ 1. Column `a` should be greater than 2
6604
+ 2. Column `b` should be less than 7
6605
+ 3. The sum of `a` and `b` should be less than the value in column `c`
6606
+
6607
+ We'll use `conjointly()` to check all these conditions together:
6608
+
6609
+ ```{python}
6610
+ validation = (
6611
+ pb.Validate(data=tbl)
6612
+ .conjointly(
6613
+ lambda df: pl.col("a") > 2,
6614
+ lambda df: pl.col("b") < 7,
6615
+ lambda df: pl.col("a") + pl.col("b") < pl.col("c")
6616
+ )
6617
+ .interrogate()
6618
+ )
6619
+
6620
+ validation
6621
+ ```
6622
+
6623
+ The validation table shows that not all rows satisfy all three conditions together. For a
6624
+ row to pass the conjoint validation, all three conditions must be true for that row.
6625
+
6626
+ We can also use preprocessing to filter the data before applying the conjoint validation:
6627
+
6628
+ ```{python}
6629
+ validation = (
6630
+ pb.Validate(data=tbl)
6631
+ .conjointly(
6632
+ lambda df: pl.col("a") > 2,
6633
+ lambda df: pl.col("b") < 7,
6634
+ lambda df: pl.col("a") + pl.col("b") < pl.col("c"),
6635
+ pre=lambda df: df.filter(pl.col("c") > 5)
6636
+ )
6637
+ .interrogate()
6638
+ )
6639
+
6640
+ validation
6641
+ ```
6642
+
6643
+ This allows for more complex validation scenarios where the data is first prepared and then
6644
+ validated against multiple conditions simultaneously.
6645
+
6646
+ Or, you can use the backend-agnostic column expression helper
6647
+ [`expr_col()`](`pointblank.expr_col`) to write expressions that work across different table
6648
+ backends:
6649
+
6650
+ ```{python}
6651
+ tbl = pl.DataFrame(
6652
+ {
6653
+ "a": [5, 7, 1, 3, 9, 4],
6654
+ "b": [6, 3, 0, 5, 8, 2],
6655
+ "c": [10, 4, 8, 9, 10, 5],
6656
+ }
6657
+ )
6658
+
6659
+ # Using backend-agnostic syntax with expr_col()
6660
+ validation = (
6661
+ pb.Validate(data=tbl)
6662
+ .conjointly(
6663
+ lambda df: pb.expr_col("a") > 2,
6664
+ lambda df: pb.expr_col("b") < 7,
6665
+ lambda df: pb.expr_col("a") + pb.expr_col("b") < pb.expr_col("c")
6666
+ )
6667
+ .interrogate()
6668
+ )
6669
+
6670
+ validation
6671
+ ```
6672
+
6673
+ Using [`expr_col()`](`pointblank.expr_col`) allows your validation code to work consistently
6674
+ across Pandas, Polars, and Ibis table backends without changes, making your validation
6675
+ pipelines more portable.
6676
+
6677
+ See Also
6678
+ --------
6679
+ Look at the documentation of the [`expr_col()`](`pointblank.expr_col`) function for more
6680
+ information on how to use it with different table backends.
6681
+ """
6682
+
6683
+ assertion_type = _get_fn_name()
6684
+
6685
+ if len(exprs) == 0:
6686
+ raise ValueError("At least one validation expression must be provided")
6687
+
6688
+ _check_pre(pre=pre)
6689
+ _check_thresholds(thresholds=thresholds)
6690
+ _check_boolean_input(param=active, param_name="active")
6691
+
6692
+ # Determine threshold to use (global or local) and normalize a local `thresholds=` value
6693
+ thresholds = (
6694
+ self.thresholds if thresholds is None else _normalize_thresholds_creation(thresholds)
6695
+ )
6696
+
6697
+ # Determine brief to use (global or local) and transform any shorthands of `brief=`
6698
+ brief = self.brief if brief is None else _transform_auto_brief(brief=brief)
6699
+
6700
+ # Package the validation expressions for later evaluation
6701
+ values = {"expressions": exprs}
6702
+
6703
+ val_info = _ValidationInfo(
6704
+ assertion_type=assertion_type,
6705
+ column=None, # This is a rowwise validation, not specific to any column
6706
+ values=values,
6707
+ pre=pre,
6708
+ thresholds=thresholds,
6709
+ actions=actions,
6710
+ brief=brief,
6711
+ active=active,
6712
+ )
6713
+
6714
+ self._add_validation(validation_info=val_info)
6715
+
6716
+ return self
6717
+
6465
6718
  def interrogate(
6466
6719
  self,
6467
6720
  collect_extracts: bool = True,
@@ -6841,6 +7094,14 @@ class Validate:
6841
7094
 
6842
7095
  results_tbl = None
6843
7096
 
7097
+ if assertion_category == "CONJOINTLY":
7098
+ results_tbl = ConjointlyValidation(
7099
+ data_tbl=data_tbl_step,
7100
+ expressions=value["expressions"],
7101
+ threshold=threshold,
7102
+ tbl_type=tbl_type,
7103
+ ).get_test_results()
7104
+
6844
7105
  if assertion_category not in [
6845
7106
  "COL_EXISTS_HAS_TYPE",
6846
7107
  "COL_SCHEMA_MATCH",
@@ -6849,9 +7110,18 @@ class Validate:
6849
7110
  ]:
6850
7111
  # Extract the `pb_is_good_` column from the table as a results list
6851
7112
  if tbl_type in IBIS_BACKENDS:
6852
- results_list = (
6853
- results_tbl.select("pb_is_good_").to_pandas()["pb_is_good_"].to_list()
6854
- )
7113
+ # Select the DataFrame library to use for getting the results list
7114
+ df_lib = _select_df_lib(preference="polars")
7115
+ df_lib_name = df_lib.__name__
7116
+
7117
+ if df_lib_name == "pandas":
7118
+ results_list = (
7119
+ results_tbl.select("pb_is_good_").to_pandas()["pb_is_good_"].to_list()
7120
+ )
7121
+ else:
7122
+ results_list = (
7123
+ results_tbl.select("pb_is_good_").to_polars()["pb_is_good_"].to_list()
7124
+ )
6855
7125
 
6856
7126
  else:
6857
7127
  results_list = nw.from_native(results_tbl)["pb_is_good_"].to_list()
@@ -8384,6 +8654,7 @@ class Validate:
8384
8654
  # Do we have a DataFrame library to work with?
8385
8655
  _check_any_df_lib(method_used="get_tabular_report")
8386
8656
 
8657
+ # Select the DataFrame library
8387
8658
  df_lib = _select_df_lib(preference="polars")
8388
8659
 
8389
8660
  # Get information on the input data table
@@ -8613,6 +8884,9 @@ class Validate:
8613
8884
  else:
8614
8885
  # With a column subset list, format with commas between the column names
8615
8886
  columns_upd.append(", ".join(column))
8887
+
8888
+ elif assertion_type[i] in ["conjointly"]:
8889
+ columns_upd.append("")
8616
8890
  else:
8617
8891
  columns_upd.append(str(column))
8618
8892
 
@@ -8684,6 +8958,9 @@ class Validate:
8684
8958
 
8685
8959
  values_upd.append(str(count))
8686
8960
 
8961
+ elif assertion_type[i] in ["conjointly"]:
8962
+ values_upd.append("COLUMN EXPR")
8963
+
8687
8964
  # If the assertion type is not recognized, add the value as a string
8688
8965
  else:
8689
8966
  values_upd.append(str(value))
@@ -9970,6 +10247,9 @@ def _create_autobrief_or_failure_text(
9970
10247
  for_failure=for_failure,
9971
10248
  )
9972
10249
 
10250
+ if assertion_type == "conjointly":
10251
+ return _create_text_conjointly(lang=lang, for_failure=for_failure)
10252
+
9973
10253
  return None # pragma: no cover
9974
10254
 
9975
10255
 
@@ -10144,6 +10424,12 @@ def _create_text_col_count_match(lang: str, value: int, for_failure: bool = Fals
10144
10424
  return EXPECT_FAIL_TEXT[f"col_count_match_n_{type_}_text"][lang].format(values_text=values_text)
10145
10425
 
10146
10426
 
10427
+ def _create_text_conjointly(lang: str, for_failure: bool = False) -> str:
10428
+ type_ = _expect_failure_type(for_failure=for_failure)
10429
+
10430
+ return EXPECT_FAIL_TEXT[f"conjointly_{type_}_text"][lang]
10431
+
10432
+
10147
10433
  def _prep_column_text(column: str | list[str]) -> str:
10148
10434
  if isinstance(column, list):
10149
10435
  return "`" + str(column[0]) + "`"