pointblank 0.8.5__py3-none-any.whl → 0.8.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4171,6 +4171,201 @@ col_count_match(self, count: 'int | FrameT | Any', inverse: 'bool' = False, pre:
4171
4171
  columns in the target table. So, the single test unit passed.
4172
4172
 
4173
4173
 
4174
+ conjointly(self, *exprs: 'Callable', pre: 'Callable | None' = None, thresholds: 'int | float | bool | tuple | dict | Thresholds' = None, actions: 'Actions | None' = None, brief: 'str | bool | None' = None, active: 'bool' = True) -> 'Validate'
4175
+
4176
+ Perform multiple row-wise validations for joint validity.
4177
+
4178
+ The `conjointly()` validation method checks whether each row in the table passes multiple
4179
+ validation conditions simultaneously. This enables compound validation logic where a test
4180
+ unit (typically a row) must satisfy all specified conditions to pass the validation.
4181
+
4182
+ This method accepts multiple validation expressions as callables, which should return
4183
+ boolean expressions when applied to the data. You can use lambdas that incorporate
4184
+ Polars/Pandas/Ibis expressions (based on the target table type) or create more complex
4185
+ validation functions. The validation will operate over the number of test units that is
4186
+ equal to the number of rows in the table (determined after any `pre=` mutation has been
4187
+ applied).
4188
+
4189
+ Parameters
4190
+ ----------
4191
+ *exprs
4192
+ Multiple validation expressions provided as callable functions. Each callable should
4193
+ accept a table as its single argument and return a boolean expression or Series/Column
4194
+ that evaluates to boolean values for each row.
4195
+ pre
4196
+ An optional preprocessing function or lambda to apply to the data table during
4197
+ interrogation. This function should take a table as input and return a modified table.
4198
+ Have a look at the *Preprocessing* section for more information on how to use this
4199
+ argument.
4200
+ thresholds
4201
+ Set threshold failure levels for reporting and reacting to exceedences of the levels.
4202
+ The thresholds are set at the step level and will override any global thresholds set in
4203
+ `Validate(thresholds=...)`. The default is `None`, which means that no thresholds will
4204
+ be set locally and global thresholds (if any) will take effect. Look at the *Thresholds*
4205
+ section for information on how to set threshold levels.
4206
+ actions
4207
+ Optional actions to take when the validation step meets or exceeds any set threshold
4208
+ levels. If provided, the [`Actions`](`pointblank.Actions`) class should be used to
4209
+ define the actions.
4210
+ brief
4211
+ An optional brief description of the validation step that will be displayed in the
4212
+ reporting table. You can use the templating elements like `"{step}"` to insert
4213
+ the step number, or `"{auto}"` to include an automatically generated brief. If `True`
4214
+ the entire brief will be automatically generated. If `None` (the default) then there
4215
+ won't be a brief.
4216
+ active
4217
+ A boolean value indicating whether the validation step should be active. Using `False`
4218
+ will make the validation step inactive (still reporting its presence and keeping indexes
4219
+ for the steps unchanged).
4220
+
4221
+ Returns
4222
+ -------
4223
+ Validate
4224
+ The `Validate` object with the added validation step.
4225
+
4226
+ Preprocessing
4227
+ -------------
4228
+ The `pre=` argument allows for a preprocessing function or lambda to be applied to the data
4229
+ table during interrogation. This function should take a table as input and return a modified
4230
+ table. This is useful for performing any necessary transformations or filtering on the data
4231
+ before the validation step is applied.
4232
+
4233
+ The preprocessing function can be any callable that takes a table as input and returns a
4234
+ modified table. For example, you could use a lambda function to filter the table based on
4235
+ certain criteria or to apply a transformation to the data. Regarding the lifetime of the
4236
+ transformed table, it only exists during the validation step and is not stored in the
4237
+ `Validate` object or used in subsequent validation steps.
4238
+
4239
+ Thresholds
4240
+ ----------
4241
+ The `thresholds=` parameter is used to set the failure-condition levels for the validation
4242
+ step. If they are set here at the step level, these thresholds will override any thresholds
4243
+ set at the global level in `Validate(thresholds=...)`.
4244
+
4245
+ There are three threshold levels: 'warning', 'error', and 'critical'. The threshold values
4246
+ can either be set as a proportion failing of all test units (a value between `0` to `1`),
4247
+ or, the absolute number of failing test units (as integer that's `1` or greater).
4248
+
4249
+ Thresholds can be defined using one of these input schemes:
4250
+
4251
+ 1. use the [`Thresholds`](`pointblank.Thresholds`) class (the most direct way to create
4252
+ thresholds)
4253
+ 2. provide a tuple of 1-3 values, where position `0` is the 'warning' level, position `1` is
4254
+ the 'error' level, and position `2` is the 'critical' level
4255
+ 3. create a dictionary of 1-3 value entries; the valid keys: are 'warning', 'error', and
4256
+ 'critical'
4257
+ 4. a single integer/float value denoting absolute number or fraction of failing test units
4258
+ for the 'warning' level only
4259
+
4260
+ If the number of failing test units exceeds set thresholds, the validation step will be
4261
+ marked as 'warning', 'error', or 'critical'. All of the threshold levels don't need to be
4262
+ set, you're free to set any combination of them.
4263
+
4264
+ Aside from reporting failure conditions, thresholds can be used to determine the actions to
4265
+ take for each level of failure (using the `actions=` parameter).
4266
+
4267
+ Examples
4268
+ --------
4269
+ For the examples here, we'll use a simple Polars DataFrame with three numeric columns (`a`,
4270
+ `b`, and `c`). The table is shown below:
4271
+
4272
+ ```python
4273
+ import pointblank as pb
4274
+ import polars as pl
4275
+
4276
+ tbl = pl.DataFrame(
4277
+ {
4278
+ "a": [5, 7, 1, 3, 9, 4],
4279
+ "b": [6, 3, 0, 5, 8, 2],
4280
+ "c": [10, 4, 8, 9, 10, 5],
4281
+ }
4282
+ )
4283
+
4284
+ pb.preview(tbl)
4285
+ ```
4286
+
4287
+ Let's validate that the values in each row satisfy multiple conditions simultaneously:
4288
+
4289
+ 1. Column `a` should be greater than 2
4290
+ 2. Column `b` should be less than 7
4291
+ 3. The sum of `a` and `b` should be less than the value in column `c`
4292
+
4293
+ We'll use `conjointly()` to check all these conditions together:
4294
+
4295
+ ```python
4296
+ validation = (
4297
+ pb.Validate(data=tbl)
4298
+ .conjointly(
4299
+ lambda df: pl.col("a") > 2,
4300
+ lambda df: pl.col("b") < 7,
4301
+ lambda df: pl.col("a") + pl.col("b") < pl.col("c")
4302
+ )
4303
+ .interrogate()
4304
+ )
4305
+
4306
+ validation
4307
+ ```
4308
+
4309
+ The validation table shows that not all rows satisfy all three conditions together. For a
4310
+ row to pass the conjoint validation, all three conditions must be true for that row.
4311
+
4312
+ We can also use preprocessing to filter the data before applying the conjoint validation:
4313
+
4314
+ ```python
4315
+ validation = (
4316
+ pb.Validate(data=tbl)
4317
+ .conjointly(
4318
+ lambda df: pl.col("a") > 2,
4319
+ lambda df: pl.col("b") < 7,
4320
+ lambda df: pl.col("a") + pl.col("b") < pl.col("c"),
4321
+ pre=lambda df: df.filter(pl.col("c") > 5)
4322
+ )
4323
+ .interrogate()
4324
+ )
4325
+
4326
+ validation
4327
+ ```
4328
+
4329
+ This allows for more complex validation scenarios where the data is first prepared and then
4330
+ validated against multiple conditions simultaneously.
4331
+
4332
+ Or, you can use the backend-agnostic column expression helper
4333
+ [`expr_col()`](`pointblank.expr_col`) to write expressions that work across different table
4334
+ backends:
4335
+
4336
+ ```python
4337
+ tbl = pl.DataFrame(
4338
+ {
4339
+ "a": [5, 7, 1, 3, 9, 4],
4340
+ "b": [6, 3, 0, 5, 8, 2],
4341
+ "c": [10, 4, 8, 9, 10, 5],
4342
+ }
4343
+ )
4344
+
4345
+ # Using backend-agnostic syntax with expr_col()
4346
+ validation = (
4347
+ pb.Validate(data=tbl)
4348
+ .conjointly(
4349
+ lambda df: pb.expr_col("a") > 2,
4350
+ lambda df: pb.expr_col("b") < 7,
4351
+ lambda df: pb.expr_col("a") + pb.expr_col("b") < pb.expr_col("c")
4352
+ )
4353
+ .interrogate()
4354
+ )
4355
+
4356
+ validation
4357
+ ```
4358
+
4359
+ Using [`expr_col()`](`pointblank.expr_col`) allows your validation code to work consistently
4360
+ across Pandas, Polars, and Ibis table backends without changes, making your validation
4361
+ pipelines more portable.
4362
+
4363
+ See Also
4364
+ --------
4365
+ Look at the documentation of the [`expr_col()`](`pointblank.expr_col`) function for more
4366
+ information on how to use it with different table backends.
4367
+
4368
+
4174
4369
 
4175
4370
  ## The Column Selection family
4176
4371
 
@@ -4195,18 +4390,20 @@ col(exprs: 'str | ColumnSelector | ColumnSelectorNarwhals') -> 'Column | ColumnL
4195
4390
  [`interrogate()`](`pointblank.Validate.interrogate`) is called), Pointblank will then check that
4196
4391
  the column exists in the input table.
4197
4392
 
4393
+ For creating expressions to use with the `conjointly()` validation method, use the
4394
+ [`expr_col()`](`pointblank.expr_col`) function instead.
4395
+
4198
4396
  Parameters
4199
4397
  ----------
4200
4398
  exprs
4201
4399
  Either the name of a single column in the target table, provided as a string, or, an
4202
4400
  expression involving column selector functions (e.g., `starts_with("a")`,
4203
- `ends_with("e") | starts_with("a")`, etc.). Please read the documentation for further
4204
- details on which input forms are valid depending on the context.
4401
+ `ends_with("e") | starts_with("a")`, etc.).
4205
4402
 
4206
4403
  Returns
4207
4404
  -------
4208
- Column
4209
- A `Column` object representing the column.
4405
+ Column | ColumnLiteral | ColumnSelectorNarwhals:
4406
+ A column object or expression representing the column reference.
4210
4407
 
4211
4408
  Usage with the `columns=` Argument
4212
4409
  -----------------------------------
@@ -4450,6 +4647,11 @@ col(exprs: 'str | ColumnSelector | ColumnSelectorNarwhals') -> 'Column | ColumnL
4450
4647
  [`matches()`](`pointblank.matches`) column selector functions from Narwhals, combined with the
4451
4648
  `&` operator. This is necessary to specify the set of columns that are numeric *and* match the
4452
4649
  text `"2023"` or `"2024"`.
4650
+
4651
+ See Also
4652
+ --------
4653
+ Create a column expression for use in `conjointly()` validation with the
4654
+ [`expr_col()`](`pointblank.expr_col`) function.
4453
4655
 
4454
4656
 
4455
4657
  starts_with(text: 'str', case_sensitive: 'bool' = False) -> 'StartsWith'
@@ -5474,6 +5676,69 @@ last_n(n: 'int', offset: 'int' = 0) -> 'LastN'
5474
5676
  `paid_2022`, and `paid_2024`.
5475
5677
 
5476
5678
 
5679
+ expr_col(column_name: 'str') -> 'ColumnExpression'
5680
+
5681
+ Create a column expression for use in `conjointly()` validation.
5682
+
5683
+ This function returns a ColumnExpression object that supports operations like `>`, `<`, `+`,
5684
+ etc. for use in [`conjointly()`](`pointblank.Validate.conjointly`) validation expressions.
5685
+
5686
+ Parameters
5687
+ ----------
5688
+ column_name
5689
+ The name of the column to reference.
5690
+
5691
+ Returns
5692
+ -------
5693
+ ColumnExpression
5694
+ A column expression that can be used in comparisons and operations.
5695
+
5696
+ Examples
5697
+ --------
5698
+ Let's say we have a table with three columns: `a`, `b`, and `c`. We want to validate that:
5699
+
5700
+ - The values in column `a` are greater than `2`.
5701
+ - The values in column `b` are less than `7`.
5702
+ - The sum of columns `a` and `b` is less than the values in column `c`.
5703
+
5704
+ We can use the `expr_col()` function to create a column expression for each of these conditions.
5705
+
5706
+ ```python
5707
+ import pointblank as pb
5708
+ import polars as pl
5709
+
5710
+ tbl = pl.DataFrame(
5711
+ {
5712
+ "a": [5, 7, 1, 3, 9, 4],
5713
+ "b": [6, 3, 0, 5, 8, 2],
5714
+ "c": [10, 4, 8, 9, 10, 5],
5715
+ }
5716
+ )
5717
+
5718
+ # Using expr_col() to create backend-agnostic validation expressions
5719
+ validation = (
5720
+ pb.Validate(data=tbl)
5721
+ .conjointly(
5722
+ lambda df: pb.expr_col("a") > 2,
5723
+ lambda df: pb.expr_col("b") < 7,
5724
+ lambda df: pb.expr_col("a") + pb.expr_col("b") < pb.expr_col("c")
5725
+ )
5726
+ .interrogate()
5727
+ )
5728
+
5729
+ validation
5730
+ ```
5731
+
5732
+ The above code creates a validation object that checks the specified conditions using the
5733
+ `expr_col()` function. The resulting validation table will show whether each condition was
5734
+ satisfied for each row in the table.
5735
+
5736
+ See Also
5737
+ --------
5738
+ The [`conjointly()`](`pointblank.Validate.conjointly`) validation method, which is where this
5739
+ function should be used.
5740
+
5741
+
5477
5742
 
5478
5743
  ## The Interrogation and Reporting family
5479
5744
 
@@ -5916,6 +6181,7 @@ get_data_extracts(self, i: 'int | list[int] | None' = None, frame: 'bool' = Fals
5916
6181
  - [`col_vals_null()`](`pointblank.Validate.col_vals_null`)
5917
6182
  - [`col_vals_not_null()`](`pointblank.Validate.col_vals_not_null`)
5918
6183
  - [`col_vals_regex()`](`pointblank.Validate.col_vals_regex`)
6184
+ - [`rows_distinct()`](`pointblank.Validate.rows_distinct`)
5919
6185
 
5920
6186
  An extracted row means that a test unit failed for that row in the validation step. The
5921
6187
  extracted rows are a subset of the original table and are useful for further analysis or for
pointblank/validate.py CHANGED
@@ -52,6 +52,7 @@ from pointblank._interrogation import (
52
52
  ColValsCompareTwo,
53
53
  ColValsExpr,
54
54
  ColValsRegex,
55
+ ConjointlyValidation,
55
56
  NumberOfTestUnits,
56
57
  RowCountMatch,
57
58
  RowsDistinct,
@@ -6462,6 +6463,250 @@ class Validate:
6462
6463
 
6463
6464
  return self
6464
6465
 
6466
+ def conjointly(
6467
+ self,
6468
+ *exprs: Callable,
6469
+ pre: Callable | None = None,
6470
+ thresholds: int | float | bool | tuple | dict | Thresholds = None,
6471
+ actions: Actions | None = None,
6472
+ brief: str | bool | None = None,
6473
+ active: bool = True,
6474
+ ) -> Validate:
6475
+ """
6476
+ Perform multiple row-wise validations for joint validity.
6477
+
6478
+ The `conjointly()` validation method checks whether each row in the table passes multiple
6479
+ validation conditions simultaneously. This enables compound validation logic where a test
6480
+ unit (typically a row) must satisfy all specified conditions to pass the validation.
6481
+
6482
+ This method accepts multiple validation expressions as callables, which should return
6483
+ boolean expressions when applied to the data. You can use lambdas that incorporate
6484
+ Polars/Pandas/Ibis expressions (based on the target table type) or create more complex
6485
+ validation functions. The validation will operate over the number of test units that is
6486
+ equal to the number of rows in the table (determined after any `pre=` mutation has been
6487
+ applied).
6488
+
6489
+ Parameters
6490
+ ----------
6491
+ *exprs
6492
+ Multiple validation expressions provided as callable functions. Each callable should
6493
+ accept a table as its single argument and return a boolean expression or Series/Column
6494
+ that evaluates to boolean values for each row.
6495
+ pre
6496
+ An optional preprocessing function or lambda to apply to the data table during
6497
+ interrogation. This function should take a table as input and return a modified table.
6498
+ Have a look at the *Preprocessing* section for more information on how to use this
6499
+ argument.
6500
+ thresholds
6501
+ Set threshold failure levels for reporting and reacting to exceedences of the levels.
6502
+ The thresholds are set at the step level and will override any global thresholds set in
6503
+ `Validate(thresholds=...)`. The default is `None`, which means that no thresholds will
6504
+ be set locally and global thresholds (if any) will take effect. Look at the *Thresholds*
6505
+ section for information on how to set threshold levels.
6506
+ actions
6507
+ Optional actions to take when the validation step meets or exceeds any set threshold
6508
+ levels. If provided, the [`Actions`](`pointblank.Actions`) class should be used to
6509
+ define the actions.
6510
+ brief
6511
+ An optional brief description of the validation step that will be displayed in the
6512
+ reporting table. You can use the templating elements like `"{step}"` to insert
6513
+ the step number, or `"{auto}"` to include an automatically generated brief. If `True`
6514
+ the entire brief will be automatically generated. If `None` (the default) then there
6515
+ won't be a brief.
6516
+ active
6517
+ A boolean value indicating whether the validation step should be active. Using `False`
6518
+ will make the validation step inactive (still reporting its presence and keeping indexes
6519
+ for the steps unchanged).
6520
+
6521
+ Returns
6522
+ -------
6523
+ Validate
6524
+ The `Validate` object with the added validation step.
6525
+
6526
+ Preprocessing
6527
+ -------------
6528
+ The `pre=` argument allows for a preprocessing function or lambda to be applied to the data
6529
+ table during interrogation. This function should take a table as input and return a modified
6530
+ table. This is useful for performing any necessary transformations or filtering on the data
6531
+ before the validation step is applied.
6532
+
6533
+ The preprocessing function can be any callable that takes a table as input and returns a
6534
+ modified table. For example, you could use a lambda function to filter the table based on
6535
+ certain criteria or to apply a transformation to the data. Regarding the lifetime of the
6536
+ transformed table, it only exists during the validation step and is not stored in the
6537
+ `Validate` object or used in subsequent validation steps.
6538
+
6539
+ Thresholds
6540
+ ----------
6541
+ The `thresholds=` parameter is used to set the failure-condition levels for the validation
6542
+ step. If they are set here at the step level, these thresholds will override any thresholds
6543
+ set at the global level in `Validate(thresholds=...)`.
6544
+
6545
+ There are three threshold levels: 'warning', 'error', and 'critical'. The threshold values
6546
+ can either be set as a proportion failing of all test units (a value between `0` to `1`),
6547
+ or, the absolute number of failing test units (as integer that's `1` or greater).
6548
+
6549
+ Thresholds can be defined using one of these input schemes:
6550
+
6551
+ 1. use the [`Thresholds`](`pointblank.Thresholds`) class (the most direct way to create
6552
+ thresholds)
6553
+ 2. provide a tuple of 1-3 values, where position `0` is the 'warning' level, position `1` is
6554
+ the 'error' level, and position `2` is the 'critical' level
6555
+ 3. create a dictionary of 1-3 value entries; the valid keys: are 'warning', 'error', and
6556
+ 'critical'
6557
+ 4. a single integer/float value denoting absolute number or fraction of failing test units
6558
+ for the 'warning' level only
6559
+
6560
+ If the number of failing test units exceeds set thresholds, the validation step will be
6561
+ marked as 'warning', 'error', or 'critical'. All of the threshold levels don't need to be
6562
+ set, you're free to set any combination of them.
6563
+
6564
+ Aside from reporting failure conditions, thresholds can be used to determine the actions to
6565
+ take for each level of failure (using the `actions=` parameter).
6566
+
6567
+ Examples
6568
+ --------
6569
+ ```{python}
6570
+ #| echo: false
6571
+ #| output: false
6572
+ import pointblank as pb
6573
+ pb.config(report_incl_header=False, report_incl_footer=False, preview_incl_header=False)
6574
+ ```
6575
+ For the examples here, we'll use a simple Polars DataFrame with three numeric columns (`a`,
6576
+ `b`, and `c`). The table is shown below:
6577
+
6578
+ ```{python}
6579
+ import pointblank as pb
6580
+ import polars as pl
6581
+
6582
+ tbl = pl.DataFrame(
6583
+ {
6584
+ "a": [5, 7, 1, 3, 9, 4],
6585
+ "b": [6, 3, 0, 5, 8, 2],
6586
+ "c": [10, 4, 8, 9, 10, 5],
6587
+ }
6588
+ )
6589
+
6590
+ pb.preview(tbl)
6591
+ ```
6592
+
6593
+ Let's validate that the values in each row satisfy multiple conditions simultaneously:
6594
+
6595
+ 1. Column `a` should be greater than 2
6596
+ 2. Column `b` should be less than 7
6597
+ 3. The sum of `a` and `b` should be less than the value in column `c`
6598
+
6599
+ We'll use `conjointly()` to check all these conditions together:
6600
+
6601
+ ```{python}
6602
+ validation = (
6603
+ pb.Validate(data=tbl)
6604
+ .conjointly(
6605
+ lambda df: pl.col("a") > 2,
6606
+ lambda df: pl.col("b") < 7,
6607
+ lambda df: pl.col("a") + pl.col("b") < pl.col("c")
6608
+ )
6609
+ .interrogate()
6610
+ )
6611
+
6612
+ validation
6613
+ ```
6614
+
6615
+ The validation table shows that not all rows satisfy all three conditions together. For a
6616
+ row to pass the conjoint validation, all three conditions must be true for that row.
6617
+
6618
+ We can also use preprocessing to filter the data before applying the conjoint validation:
6619
+
6620
+ ```{python}
6621
+ validation = (
6622
+ pb.Validate(data=tbl)
6623
+ .conjointly(
6624
+ lambda df: pl.col("a") > 2,
6625
+ lambda df: pl.col("b") < 7,
6626
+ lambda df: pl.col("a") + pl.col("b") < pl.col("c"),
6627
+ pre=lambda df: df.filter(pl.col("c") > 5)
6628
+ )
6629
+ .interrogate()
6630
+ )
6631
+
6632
+ validation
6633
+ ```
6634
+
6635
+ This allows for more complex validation scenarios where the data is first prepared and then
6636
+ validated against multiple conditions simultaneously.
6637
+
6638
+ Or, you can use the backend-agnostic column expression helper
6639
+ [`expr_col()`](`pointblank.expr_col`) to write expressions that work across different table
6640
+ backends:
6641
+
6642
+ ```{python}
6643
+ tbl = pl.DataFrame(
6644
+ {
6645
+ "a": [5, 7, 1, 3, 9, 4],
6646
+ "b": [6, 3, 0, 5, 8, 2],
6647
+ "c": [10, 4, 8, 9, 10, 5],
6648
+ }
6649
+ )
6650
+
6651
+ # Using backend-agnostic syntax with expr_col()
6652
+ validation = (
6653
+ pb.Validate(data=tbl)
6654
+ .conjointly(
6655
+ lambda df: pb.expr_col("a") > 2,
6656
+ lambda df: pb.expr_col("b") < 7,
6657
+ lambda df: pb.expr_col("a") + pb.expr_col("b") < pb.expr_col("c")
6658
+ )
6659
+ .interrogate()
6660
+ )
6661
+
6662
+ validation
6663
+ ```
6664
+
6665
+ Using [`expr_col()`](`pointblank.expr_col`) allows your validation code to work consistently
6666
+ across Pandas, Polars, and Ibis table backends without changes, making your validation
6667
+ pipelines more portable.
6668
+
6669
+ See Also
6670
+ --------
6671
+ Look at the documentation of the [`expr_col()`](`pointblank.expr_col`) function for more
6672
+ information on how to use it with different table backends.
6673
+ """
6674
+
6675
+ assertion_type = _get_fn_name()
6676
+
6677
+ if len(exprs) == 0:
6678
+ raise ValueError("At least one validation expression must be provided")
6679
+
6680
+ _check_pre(pre=pre)
6681
+ _check_thresholds(thresholds=thresholds)
6682
+ _check_boolean_input(param=active, param_name="active")
6683
+
6684
+ # Determine threshold to use (global or local) and normalize a local `thresholds=` value
6685
+ thresholds = (
6686
+ self.thresholds if thresholds is None else _normalize_thresholds_creation(thresholds)
6687
+ )
6688
+
6689
+ # Determine brief to use (global or local) and transform any shorthands of `brief=`
6690
+ brief = self.brief if brief is None else _transform_auto_brief(brief=brief)
6691
+
6692
+ # Package the validation expressions for later evaluation
6693
+ values = {"expressions": exprs}
6694
+
6695
+ val_info = _ValidationInfo(
6696
+ assertion_type=assertion_type,
6697
+ column=None, # This is a rowwise validation, not specific to any column
6698
+ values=values,
6699
+ pre=pre,
6700
+ thresholds=thresholds,
6701
+ actions=actions,
6702
+ brief=brief,
6703
+ active=active,
6704
+ )
6705
+
6706
+ self._add_validation(validation_info=val_info)
6707
+
6708
+ return self
6709
+
6465
6710
  def interrogate(
6466
6711
  self,
6467
6712
  collect_extracts: bool = True,
@@ -6841,6 +7086,14 @@ class Validate:
6841
7086
 
6842
7087
  results_tbl = None
6843
7088
 
7089
+ if assertion_category == "CONJOINTLY":
7090
+ results_tbl = ConjointlyValidation(
7091
+ data_tbl=data_tbl_step,
7092
+ expressions=value["expressions"],
7093
+ threshold=threshold,
7094
+ tbl_type=tbl_type,
7095
+ ).get_test_results()
7096
+
6844
7097
  if assertion_category not in [
6845
7098
  "COL_EXISTS_HAS_TYPE",
6846
7099
  "COL_SCHEMA_MATCH",
@@ -6849,9 +7102,18 @@ class Validate:
6849
7102
  ]:
6850
7103
  # Extract the `pb_is_good_` column from the table as a results list
6851
7104
  if tbl_type in IBIS_BACKENDS:
6852
- results_list = (
6853
- results_tbl.select("pb_is_good_").to_pandas()["pb_is_good_"].to_list()
6854
- )
7105
+ # Select the DataFrame library to use for getting the results list
7106
+ df_lib = _select_df_lib(preference="polars")
7107
+ df_lib_name = df_lib.__name__
7108
+
7109
+ if df_lib_name == "pandas":
7110
+ results_list = (
7111
+ results_tbl.select("pb_is_good_").to_pandas()["pb_is_good_"].to_list()
7112
+ )
7113
+ else:
7114
+ results_list = (
7115
+ results_tbl.select("pb_is_good_").to_polars()["pb_is_good_"].to_list()
7116
+ )
6855
7117
 
6856
7118
  else:
6857
7119
  results_list = nw.from_native(results_tbl)["pb_is_good_"].to_list()
@@ -8384,6 +8646,7 @@ class Validate:
8384
8646
  # Do we have a DataFrame library to work with?
8385
8647
  _check_any_df_lib(method_used="get_tabular_report")
8386
8648
 
8649
+ # Select the DataFrame library
8387
8650
  df_lib = _select_df_lib(preference="polars")
8388
8651
 
8389
8652
  # Get information on the input data table
@@ -8613,6 +8876,9 @@ class Validate:
8613
8876
  else:
8614
8877
  # With a column subset list, format with commas between the column names
8615
8878
  columns_upd.append(", ".join(column))
8879
+
8880
+ elif assertion_type[i] in ["conjointly"]:
8881
+ columns_upd.append("")
8616
8882
  else:
8617
8883
  columns_upd.append(str(column))
8618
8884
 
@@ -8684,6 +8950,9 @@ class Validate:
8684
8950
 
8685
8951
  values_upd.append(str(count))
8686
8952
 
8953
+ elif assertion_type[i] in ["conjointly"]:
8954
+ values_upd.append("COLUMN EXPR")
8955
+
8687
8956
  # If the assertion type is not recognized, add the value as a string
8688
8957
  else:
8689
8958
  values_upd.append(str(value))
@@ -9970,6 +10239,9 @@ def _create_autobrief_or_failure_text(
9970
10239
  for_failure=for_failure,
9971
10240
  )
9972
10241
 
10242
+ if assertion_type == "conjointly":
10243
+ return _create_text_conjointly(lang=lang, for_failure=for_failure)
10244
+
9973
10245
  return None # pragma: no cover
9974
10246
 
9975
10247
 
@@ -10144,6 +10416,12 @@ def _create_text_col_count_match(lang: str, value: int, for_failure: bool = Fals
10144
10416
  return EXPECT_FAIL_TEXT[f"col_count_match_n_{type_}_text"][lang].format(values_text=values_text)
10145
10417
 
10146
10418
 
10419
+ def _create_text_conjointly(lang: str, for_failure: bool = False) -> str:
10420
+ type_ = _expect_failure_type(for_failure=for_failure)
10421
+
10422
+ return EXPECT_FAIL_TEXT[f"conjointly_{type_}_text"][lang]
10423
+
10424
+
10147
10425
  def _prep_column_text(column: str | list[str]) -> str:
10148
10426
  if isinstance(column, list):
10149
10427
  return "`" + str(column[0]) + "`"