pointblank 0.9.1__py3-none-any.whl → 0.9.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pointblank/_constants.py CHANGED
@@ -44,6 +44,7 @@ ASSERTION_TYPE_METHOD_MAP = {
44
44
  "row_count_match": "row_count_match",
45
45
  "col_count_match": "col_count_match",
46
46
  "conjointly": "conjointly",
47
+ "specially": "specially",
47
48
  }
48
49
 
49
50
  METHOD_CATEGORY_MAP = {
@@ -69,6 +70,7 @@ METHOD_CATEGORY_MAP = {
69
70
  "row_count_match": "ROW_COUNT_MATCH",
70
71
  "col_count_match": "COL_COUNT_MATCH",
71
72
  "conjointly": "CONJOINTLY",
73
+ "specially": "SPECIALLY",
72
74
  }
73
75
 
74
76
  COMPARISON_OPERATORS = {
@@ -455,6 +457,18 @@ SVG_ICONS_FOR_ASSERTION_TYPES = {
455
457
  <path d="M51.8485976,12 L15.5758703,12 C13.9986329,12 12.712234,13.2863989 12.712234,14.8636364 L12.712234,51.1363636 C12.712234,52.7136011 13.9986329,54 15.5758703,54 L51.8485976,54 C53.4258351,54 54.712234,52.7136011 54.712234,51.1363636 L54.712234,14.8636364 C54.712234,13.2863989 53.4258351,12 51.8485976,12 Z M37.072234,44 L20.272234,44 L20.272234,42 L37.072234,42 L37.072234,44 Z M37.072234,34 L20.272234,34 L20.272234,32 L37.072234,32 L37.072234,34 Z M37.072234,24 L20.272234,24 L20.272234,22 L37.072234,22 L37.072234,24 Z M47.9233279,41.773438 L45.5706719,45.773438 C45.4427029,45.996094 45.239265,46.148438 45.0095779,46.1875 C44.9702029,46.195313 44.9275469,46.199219 44.88489,46.199219 C44.70114,46.199219 44.5206719,46.128906 44.373015,45.992188 L42.1877029,43.992188 C41.8202029,43.65625 41.7512969,43.027344 42.033484,42.589844 C42.3156719,42.152344 42.8439529,42.070313 43.2114529,42.40625 L44.697859,43.769531 L46.548484,40.625 C46.814265,40.171875 47.335984,40.0625 47.716609,40.378906 C48.097234,40.695313 48.189109,41.320313 47.9233279,41.773438 Z M47.9233279,31.773438 L45.5706719,35.773438 C45.4427029,35.996094 45.239265,36.148438 45.0095779,36.1875 C44.9702029,36.195313 44.9275469,36.199219 44.88489,36.199219 C44.70114,36.199219 44.5206719,36.128906 44.373015,35.992188 L42.1877029,33.992188 C41.8202029,33.65625 41.7512969,33.027344 42.033484,32.589844 C42.3156719,32.152344 42.8439529,32.070313 43.2114529,32.40625 L44.697859,33.769531 L46.548484,30.628906 C46.814265,30.175781 47.335984,30.0625 47.716609,30.382813 C48.097234,30.699219 48.189109,31.320313 47.9233279,31.773438 Z M47.9233279,21.773438 L45.5706719,25.773438 C45.4427029,25.996094 45.239265,26.148438 45.0095779,26.1875 C44.9702029,26.195313 44.9275469,26.199219 44.88489,26.199219 C44.70114,26.199219 44.5206719,26.128906 44.373015,25.992188 L42.1877029,23.992188 C41.8202029,23.65625 41.7512969,23.027344 42.033484,22.589844 C42.3156719,22.152344 42.8439529,22.070313 43.2114529,22.40625 L44.697859,23.769531 L46.548484,20.625 C46.814265,20.171875 47.335984,20.0625 47.716609,20.378906 C48.097234,20.699219 48.189109,21.320313 47.9233279,21.773438 Z" id="conjoint" fill="#000000" fill-rule="nonzero"></path>
456
458
  </g>
457
459
  </g>
460
+ </svg>""",
461
+ "specially": """<?xml version="1.0" encoding="UTF-8"?>
462
+ <svg width="67px" height="67px" viewBox="0 0 67 67" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
463
+ <title>specially</title>
464
+ <g id="All-Icons" stroke="none" stroke-width="1" fill="none" fill-rule="evenodd">
465
+ <g id="specially" transform="translate(0.000000, 0.206897)">
466
+ <path d="M56.712234,1 C59.1975153,1 61.4475153,2.00735931 63.076195,3.63603897 C64.7048747,5.26471863 65.712234,7.51471863 65.712234,10 L65.712234,10 L65.712234,65 L10.712234,65 C8.22695259,65 5.97695259,63.9926407 4.34827294,62.363961 C2.71959328,60.7352814 1.71223397,58.4852814 1.71223397,56 L1.71223397,56 L1.71223397,10 C1.71223397,7.51471863 2.71959328,5.26471863 4.34827294,3.63603897 C5.97695259,2.00735931 8.22695259,1 10.712234,1 L10.712234,1 Z" id="rectangle" stroke="#000000" stroke-width="2" fill="#FFFFFF"></path>
467
+ <g id="star" transform="translate(8.500000, 8.500000)" fill="#000000" fill-rule="nonzero">
468
+ <path d="M25,0 C24.5874484,0 24.2174517,0.254002847 24.068359,0.6386719 L17.902344,16.535156 L0.94921875,17.400391 C0.536124409,17.4213013 0.17852458,17.6943523 0.0495446395,18.0873515 C-0.0794353012,18.4803507 0.046820452,18.9122002 0.3671875,19.173828 L13.568359,29.966797 L9.2324219,46.34375 C9.12646963,46.7428009 9.27663058,47.1659433 9.61042698,47.4089402 C9.94422338,47.651937 10.3930345,47.664834 10.740234,47.441406 L25,38.289062 L39.259766,47.441406 C39.6069655,47.6648339 40.0557766,47.6519369 40.3895729,47.4089401 C40.7233693,47.1659432 40.8735302,46.7428009 40.767578,46.34375 L36.431641,29.966797 L49.632812,19.173828 C49.953179,18.9122002 50.0794348,18.4803507 49.9504549,18.0873516 C49.821475,17.6943524 49.4638753,17.4213014 49.050781,17.400391 L32.097656,16.535156 L25.931641,0.6386719 C25.7825483,0.254002847 25.4125516,0 25,0 Z M25,3.7636719 L30.466797,17.861328 C30.609689,18.2291416 30.9554962,18.4785515 31.349609,18.498047 L46.359375,19.265625 L34.667969,28.826172 C34.3646054,29.0742114 34.2340493,29.4765679 34.333984,29.855469 L38.175781,44.369141 L25.541016,36.257812 C25.2114789,36.0458536 24.7885211,36.0458536 24.458984,36.257812 L11.824219,44.369141 L15.666016,29.855469 C15.7659507,29.4765679 15.6353946,29.0742114 15.332031,28.826172 L3.640625,19.265625 L18.650391,18.498047 C19.0445038,18.4785515 19.390311,18.2291416 19.533203,17.861328 L25,3.7636719 Z" id="Shape"></path>
469
+ </g>
470
+ </g>
471
+ </g>
458
472
  </svg>""",
459
473
  }
460
474
 
@@ -1160,6 +1160,60 @@ EXPECT_FAIL_TEXT = {
1160
1160
  "hi": "असफल परीक्षण इकाइयों की अधिकता जहां संयुक्त 'पास' इकाइयां होनी चाहिए थीं।",
1161
1161
  "el": "Υπέρβαση αποτυχημένων μονάδων δοκιμής όπου θα έπρεπε να υπάρχουν κοινές μονάδες 'επιτυχίας'.",
1162
1162
  },
1163
+ "specially_expectation_text": {
1164
+ "en": "Expect that special testing with a given function yields agreement.",
1165
+ "fr": "On s'attend à ce que les tests spéciaux avec une fonction donnée produisent un accord.",
1166
+ "de": "Erwarten Sie, dass spezielle Tests mit einer bestimmten Funktion Übereinstimmung ergeben.",
1167
+ "it": "Aspettati che i test speciali con una funzione data producano accordo.",
1168
+ "es": "Se espera que las pruebas especiales con una función dada produzcan concordancia.",
1169
+ "pt": "Espera-se que testes especiais com uma função dada produzam concordância.",
1170
+ "ro": "Se așteaptă ca testarea specială cu o funcție dată să producă acord.",
1171
+ "tr": "Belirli bir fonksiyonla özel testlerin uyum sağlamasını bekleyin.",
1172
+ "zh-Hans": "预期使用给定函数的特殊测试会产生一致结果。",
1173
+ "zh-Hant": "預期使用給定函數的特殊測試會產生一致結果。",
1174
+ "ja": "指定された関数による特別なテストが一致することを期待します。",
1175
+ "ko": "주어진 함수로 특수 테스트를 수행하면 일치함을 기대합니다.",
1176
+ "vi": "Kỳ vọng rằng kiểm tra đặc biệt với một hàm đã cho sẽ cho kết quả phù hợp.",
1177
+ "ru": "Ожидайте, что специальное тестирование с заданной функцией дает согласие.",
1178
+ "cs": "Očekává se, že speciální testování s danou funkcí přinese shodu.",
1179
+ "pl": "Oczekuj, że specjalne testowanie z użyciem danej funkcji przyniesie zgodność.",
1180
+ "da": "Forvent at speciel test med en given funktion giver overensstemmelse.",
1181
+ "sv": "Förvänta dig att speciell testning med en given funktion ger överensstämmelse.",
1182
+ "nb": "Forvent at spesiell testing med en gitt funksjon gir samsvar.",
1183
+ "nl": "Verwacht dat speciale tests met een gegeven functie overeenstemming opleveren.",
1184
+ "fi": "Odota, että erityinen testaus annetulla funktiolla tuottaa yhdenmukaisuuden.",
1185
+ "is": "Væntir þess að sérstök prófun með gefnu falli leiði til samræmis.",
1186
+ "ar": "توقع أن الاختبار الخاص بدالة معينة يؤدي إلى التوافق.",
1187
+ "hi": "अपेक्षा है कि दिए गए फ़ंक्शन के साथ विशेष परीक्षण सहमति प्रदान करेगा।",
1188
+ "el": "Αναμένεται ότι ο ειδικός έλεγχος με μια δεδομένη συνάρτηση αποδίδει συμφωνία.",
1189
+ },
1190
+ "specially_failure_text": {
1191
+ "en": "Exceedance of failed test units when performing specialized testing with a given function.",
1192
+ "fr": "Dépassement des unités de test ayant échoué lors de l'exécution de tests spécialisés avec une fonction donnée.",
1193
+ "de": "Überschreitung fehlgeschlagener Testeinheiten bei der Durchführung spezialisierter Tests mit einer bestimmten Funktion.",
1194
+ "it": "Superamento delle unità di test fallite durante l'esecuzione di test specializzati con una funzione data.",
1195
+ "es": "Se superó el número de unidades de prueba fallidas al realizar pruebas especializadas con una función dada.",
1196
+ "pt": "Excedeu o número de unidades de teste com falha ao realizar testes especializados com uma função dada.",
1197
+ "ro": "Depășirea unităților de test eșuate la efectuarea testării specializate cu o funcție dată.",
1198
+ "tr": "Belirli bir fonksiyonla özel testler yapılırken başarısız test birimlerinin aşılması.",
1199
+ "zh-Hans": "使用给定函数进行专门测试时,失败的测试单元数量超标。",
1200
+ "zh-Hant": "使用給定函數進行專門測試時,失敗的測試單元數量超標。",
1201
+ "ja": "指定された関数を使用した特殊テスト実行時のテスト単位の失敗の超過。",
1202
+ "ko": "주어진 함수로 특수 테스트를 수행할 때 실패한 테스트 단위 초과.",
1203
+ "vi": "Vượt quá số đơn vị kiểm tra thất bại khi thực hiện kiểm tra chuyên biệt với một hàm đã cho.",
1204
+ "ru": "Превышение неудачных тестовых единиц при выполнении специализированного тестирования с заданной функцией.",
1205
+ "cs": "Překročení počtu neúspěšných testovacích jednotek při provádění specializovaného testování s danou funkcí.",
1206
+ "pl": "Przekroczenie nieudanych jednostek testowych podczas przeprowadzania specjalistycznych testów z daną funkcją.",
1207
+ "da": "Overskridelse af fejlslagne testenheder ved udførelse af specialiseret test med en given funktion.",
1208
+ "sv": "Överskrider antalet misslyckade testenheter vid utförande av specialiserad testning med en given funktion.",
1209
+ "nb": "Overskridelse av mislykkede testenheter ved utførelse av spesialisert testing med en gitt funksjon.",
1210
+ "nl": "Overschrijding van mislukte testeenheden bij het uitvoeren van gespecialiseerde tests met een gegeven functie.",
1211
+ "fi": "Epäonnistuneiden testiyksiköiden ylitys suoritettaessa erikoistestejä annetulla funktiolla.",
1212
+ "is": "Of mörg misheppnuð próf við framkvæmd sérhæfðra prófana með gefnu falli.",
1213
+ "ar": "تجاوز وحدات الاختبار الفاشلة عند إجراء اختبار متخصص بدالة معينة.",
1214
+ "hi": "दिए गए फ़ंक्शन के साथ विशेष परीक्षण करते समय असफल परीक्षण इकाइयों की अधिकता।",
1215
+ "el": "Υπέρβαση αποτυχημένων μονάδων δοκιμής κατά την εκτέλεση εξειδικευμένων ελέγχων με μια δεδομένη συνάρτηση.",
1216
+ },
1163
1217
  }
1164
1218
 
1165
1219
 
@@ -2248,6 +2248,107 @@ class ConjointlyValidation:
2248
2248
  return results_tbl
2249
2249
 
2250
2250
 
2251
+ class SpeciallyValidation:
2252
+ def __init__(self, data_tbl, expression, threshold, tbl_type):
2253
+ self.data_tbl = data_tbl
2254
+ self.expression = expression
2255
+ self.threshold = threshold
2256
+
2257
+ # Detect the table type
2258
+ if tbl_type in (None, "local"):
2259
+ # Detect the table type using _get_tbl_type()
2260
+ self.tbl_type = _get_tbl_type(data=data_tbl)
2261
+ else:
2262
+ self.tbl_type = tbl_type
2263
+
2264
+ def get_test_results(self) -> any | list[bool]:
2265
+ """Evaluate the expression get either a list of booleans or a results table."""
2266
+
2267
+ # Get the expression and inspect whether there is a `data` argument
2268
+ expression = self.expression
2269
+
2270
+ import inspect
2271
+
2272
+ # During execution of `specially` validation
2273
+ sig = inspect.signature(expression)
2274
+ params = list(sig.parameters.keys())
2275
+
2276
+ # Execute the function based on its signature
2277
+ if len(params) == 0:
2278
+ # No parameters: call without arguments
2279
+ result = expression()
2280
+ elif len(params) == 1:
2281
+ # One parameter: pass the data table
2282
+ data_tbl = self.data_tbl
2283
+ result = expression(data_tbl)
2284
+ else:
2285
+ # More than one parameter - this doesn't match either allowed signature
2286
+ raise ValueError(
2287
+ f"The function provided to 'specially()' should have either no parameters or a "
2288
+ f"single 'data' parameter, but it has {len(params)} parameters: {params}"
2289
+ )
2290
+
2291
+ # Determine if the object is a DataFrame by inspecting the string version of its type
2292
+ if (
2293
+ "pandas" in str(type(result))
2294
+ or "polars" in str(type(result))
2295
+ or "ibis" in str(type(result))
2296
+ ):
2297
+ # Get the type of the table
2298
+ tbl_type = _get_tbl_type(data=result)
2299
+
2300
+ if "pandas" in tbl_type:
2301
+ # If it's a Pandas DataFrame, check if the last column is a boolean column
2302
+ last_col = result.iloc[:, -1]
2303
+
2304
+ import pandas as pd
2305
+
2306
+ if last_col.dtype == bool or pd.api.types.is_bool_dtype(last_col):
2307
+ # If the last column is a boolean column, rename it as `pb_is_good_`
2308
+ result.rename(columns={result.columns[-1]: "pb_is_good_"}, inplace=True)
2309
+ elif "polars" in tbl_type:
2310
+ # If it's a Polars DataFrame, check if the last column is a boolean column
2311
+ last_col_name = result.columns[-1]
2312
+ last_col_dtype = result.schema[last_col_name]
2313
+
2314
+ import polars as pl
2315
+
2316
+ if last_col_dtype == pl.Boolean:
2317
+ # If the last column is a boolean column, rename it as `pb_is_good_`
2318
+ result = result.rename({last_col_name: "pb_is_good_"})
2319
+ elif tbl_type in IBIS_BACKENDS:
2320
+ # If it's an Ibis table, check if the last column is a boolean column
2321
+ last_col_name = result.columns[-1]
2322
+ result_schema = result.schema()
2323
+ is_last_col_bool = str(result_schema[last_col_name]) == "boolean"
2324
+
2325
+ if is_last_col_bool:
2326
+ # If the last column is a boolean column, rename it as `pb_is_good_`
2327
+ result = result.rename(pb_is_good_=last_col_name)
2328
+
2329
+ else: # pragma: no cover
2330
+ raise NotImplementedError(f"Support for {tbl_type} is not yet implemented")
2331
+
2332
+ elif isinstance(result, bool):
2333
+ # If it's a single boolean, return that as a list
2334
+ return [result]
2335
+
2336
+ elif isinstance(result, list):
2337
+ # If it's a list, check that it is a boolean list
2338
+ if all(isinstance(x, bool) for x in result):
2339
+ # If it's a list of booleans, return it as is
2340
+ return result
2341
+ else:
2342
+ # If it's not a list of booleans, raise an error
2343
+ raise TypeError("The result is not a list of booleans.")
2344
+ else: # pragma: no cover
2345
+ # If it's not a DataFrame or a list, raise an error
2346
+ raise TypeError("The result is not a DataFrame or a list of booleans.")
2347
+
2348
+ # Return the results table or list of booleans
2349
+ return result
2350
+
2351
+
2251
2352
  @dataclass
2252
2353
  class NumberOfTestUnits:
2253
2354
  """
pointblank/_typing.py CHANGED
@@ -1,26 +1,37 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import TypeAlias
4
-
5
- ## Absolute bounds, ie. plus or minus
6
- AbsoluteBounds: TypeAlias = tuple[int, int]
7
-
8
- ## Relative bounds, ie. plus or minus some percent
9
- RelativeBounds: TypeAlias = tuple[float, float]
10
-
11
- ## Tolerance afforded to some check
12
- Tolerance: TypeAlias = int | float | AbsoluteBounds | RelativeBounds
13
-
14
- ## Types for data segmentation
15
-
16
- ## Value(s) that can be used in a segment tuple
17
- SegmentValue: TypeAlias = str | list[str]
18
-
19
- ## (column, value(s)) format for segments
20
- SegmentTuple: TypeAlias = tuple[str, SegmentValue]
21
-
22
- ## Individual segment item (string or tuple)
23
- SegmentItem: TypeAlias = str | SegmentTuple
24
-
25
- ## Full segment specification options
26
- SegmentSpec: TypeAlias = str | SegmentTuple | list[SegmentItem]
3
+ import sys
4
+ from typing import List, Tuple, Union
5
+
6
+ # Check Python version for TypeAlias support
7
+ if sys.version_info >= (3, 10):
8
+ from typing import TypeAlias
9
+
10
+ # Python 3.10+ style type aliases
11
+ AbsoluteBounds: TypeAlias = Tuple[int, int]
12
+ RelativeBounds: TypeAlias = Tuple[float, float]
13
+ Tolerance: TypeAlias = Union[int, float, AbsoluteBounds, RelativeBounds]
14
+ SegmentValue: TypeAlias = Union[str, List[str]]
15
+ SegmentTuple: TypeAlias = Tuple[str, SegmentValue]
16
+ SegmentItem: TypeAlias = Union[str, SegmentTuple]
17
+ SegmentSpec: TypeAlias = Union[str, SegmentTuple, List[SegmentItem]]
18
+ else:
19
+ # Python 3.8 and 3.9 compatible type aliases
20
+ AbsoluteBounds = Tuple[int, int]
21
+ RelativeBounds = Tuple[float, float]
22
+ Tolerance = Union[int, float, AbsoluteBounds, RelativeBounds]
23
+ SegmentValue = Union[str, List[str]]
24
+ SegmentTuple = Tuple[str, SegmentValue]
25
+ SegmentItem = Union[str, SegmentTuple]
26
+ SegmentSpec = Union[str, SegmentTuple, List[SegmentItem]]
27
+
28
+ # Add docstrings for better IDE support
29
+ AbsoluteBounds.__doc__ = "Absolute bounds (i.e., plus or minus)"
30
+ RelativeBounds.__doc__ = "Relative bounds (i.e., plus or minus some percent)"
31
+ Tolerance.__doc__ = "Tolerance (i.e., the allowed deviation)"
32
+ SegmentValue.__doc__ = "Value(s) that can be used in a segment tuple"
33
+ SegmentTuple.__doc__ = "(column, value(s)) format for segments"
34
+ SegmentItem.__doc__ = "Individual segment item (string or tuple)"
35
+ SegmentSpec.__doc__ = (
36
+ "Full segment specification options (i.e., all options for segment specification)"
37
+ )
pointblank/_utils.py CHANGED
@@ -490,6 +490,7 @@ def _get_api_text() -> str:
490
490
  "Validate.row_count_match",
491
491
  "Validate.col_count_match",
492
492
  "Validate.conjointly",
493
+ "Validate.specially",
493
494
  ]
494
495
 
495
496
  column_selection_exported = [
pointblank/actions.py CHANGED
@@ -225,7 +225,7 @@ def send_slack_notification(
225
225
  validation
226
226
  ```
227
227
 
228
- By placing the `notify_slack` function in the `Validate(actions=Actions(critical=))` argument,
228
+ By placing the `notify_slack()` function in the `Validate(actions=Actions(critical=))` argument,
229
229
  you can ensure that the notification is sent whenever the 'critical' threshold is reached (as
230
230
  set here, when 15% or more of the test units fail). The notification will include information
231
231
  about the validation step that triggered the alert.
@@ -255,7 +255,7 @@ def send_slack_notification(
255
255
  )
256
256
  ```
257
257
 
258
- In this case, the same `notify_slack` function is used, but it is placed in
258
+ In this case, the same `notify_slack()` function is used, but it is placed in
259
259
  `Validate(final_actions=FinalActions())`. This results in the summary notification being sent
260
260
  after all validation steps are completed, regardless of whether any steps failed or not.
261
261
 
@@ -708,8 +708,9 @@ FinalActions(*args)
708
708
  In this example, the `send_alert()` function is defined to check the validation summary for
709
709
  critical failures. If any are found, an alert message is printed to the console. The function is
710
710
  passed to the `FinalActions` class, which ensures it will be executed after all validation steps
711
- are complete. Note that we used the `get_validation_summary()` function to retrieve the summary
712
- of the validation results to help craft the alert message.
711
+ are complete. Note that we used the
712
+ [`get_validation_summary()`](`pointblank.get_validation_summary`) function to retrieve the
713
+ summary of the validation results to help craft the alert message.
713
714
 
714
715
  Multiple final actions can be provided in a sequence. They will be executed in the order they
715
716
  are specified after all validation steps have completed:
@@ -5177,6 +5178,306 @@ conjointly(self, *exprs: 'Callable', pre: 'Callable | None' = None, thresholds:
5177
5178
  information on how to use it with different table backends.
5178
5179
 
5179
5180
 
5181
+ specially(self, expr: 'Callable', pre: 'Callable | None' = None, thresholds: 'int | float | bool | tuple | dict | Thresholds' = None, actions: 'Actions | None' = None, brief: 'str | bool | None' = None, active: 'bool' = True) -> 'Validate'
5182
+
5183
+ Perform a specialized validation with customized logic.
5184
+
5185
+ The `specially()` validation method allows for the creation of specialized validation
5186
+ expressions that can be used to validate specific conditions or logic in the data. This
5187
+ method provides maximum flexibility by accepting a custom callable that encapsulates
5188
+ your validation logic.
5189
+
5190
+ The callable function can have one of two signatures:
5191
+
5192
+ - a function accepting a single parameter (the data table): `def validate(data): ...`
5193
+ - a function with no parameters: `def validate(): ...`
5194
+
5195
+ The second form is particularly useful for environment validations that don't need to
5196
+ inspect the data table.
5197
+
5198
+ The callable function must ultimately return one of:
5199
+
5200
+ 1. a single boolean value or boolean list
5201
+ 2. a table where the final column contains boolean values (column name is unimportant)
5202
+
5203
+ The validation will operate over the number of test units that is equal to the number of
5204
+ rows in the data table (if returning a table with boolean values). If returning a scalar
5205
+ boolean value, the validation will operate over a single test unit. For a return of a list
5206
+ of boolean values, the length of the list constitutes the number of test units.
5207
+
5208
+ Parameters
5209
+ ----------
5210
+ expr
5211
+ A callable function that defines the specialized validation logic. This function should:
5212
+ (1) accept the target data table as its single argument (though it may ignore it), or
5213
+ (2) take no parameters at all (for environment validations). The function must
5214
+ ultimately return boolean values representing validation results. Design your function
5215
+ to incorporate any custom parameters directly within the function itself using closure
5216
+ variables or default parameters.
5217
+ pre
5218
+ An optional preprocessing function or lambda to apply to the data table during
5219
+ interrogation. This function should take a table as input and return a modified table.
5220
+ Have a look at the *Preprocessing* section for more information on how to use this
5221
+ argument.
5222
+ thresholds
5223
+ Set threshold failure levels for reporting and reacting to exceedences of the levels.
5224
+ The thresholds are set at the step level and will override any global thresholds set in
5225
+ `Validate(thresholds=...)`. The default is `None`, which means that no thresholds will
5226
+ be set locally and global thresholds (if any) will take effect. Look at the *Thresholds*
5227
+ section for information on how to set threshold levels.
5228
+ actions
5229
+ Optional actions to take when the validation step meets or exceeds any set threshold
5230
+ levels. If provided, the [`Actions`](`pointblank.Actions`) class should be used to
5231
+ define the actions.
5232
+ brief
5233
+ An optional brief description of the validation step that will be displayed in the
5234
+ reporting table. You can use the templating elements like `"{step}"` to insert
5235
+ the step number, or `"{auto}"` to include an automatically generated brief. If `True`
5236
+ the entire brief will be automatically generated. If `None` (the default) then there
5237
+ won't be a brief.
5238
+ active
5239
+ A boolean value indicating whether the validation step should be active. Using `False`
5240
+ will make the validation step inactive (still reporting its presence and keeping indexes
5241
+ for the steps unchanged).
5242
+
5243
+ Returns
5244
+ -------
5245
+ Validate
5246
+ The `Validate` object with the added validation step.
5247
+
5248
+ Preprocessing
5249
+ -------------
5250
+ The `pre=` argument allows for a preprocessing function or lambda to be applied to the data
5251
+ table during interrogation. This function should take a table as input and return a modified
5252
+ table. This is useful for performing any necessary transformations or filtering on the data
5253
+ before the validation step is applied.
5254
+
5255
+ The preprocessing function can be any callable that takes a table as input and returns a
5256
+ modified table. For example, you could use a lambda function to filter the table based on
5257
+ certain criteria or to apply a transformation to the data. Regarding the lifetime of the
5258
+ transformed table, it only exists during the validation step and is not stored in the
5259
+ `Validate` object or used in subsequent validation steps.
5260
+
5261
+ Thresholds
5262
+ ----------
5263
+ The `thresholds=` parameter is used to set the failure-condition levels for the validation
5264
+ step. If they are set here at the step level, these thresholds will override any thresholds
5265
+ set at the global level in `Validate(thresholds=...)`.
5266
+
5267
+ There are three threshold levels: 'warning', 'error', and 'critical'. The threshold values
5268
+ can either be set as a proportion failing of all test units (a value between `0` to `1`),
5269
+ or, the absolute number of failing test units (as integer that's `1` or greater).
5270
+
5271
+ Thresholds can be defined using one of these input schemes:
5272
+
5273
+ 1. use the [`Thresholds`](`pointblank.Thresholds`) class (the most direct way to create
5274
+ thresholds)
5275
+ 2. provide a tuple of 1-3 values, where position `0` is the 'warning' level, position `1` is
5276
+ the 'error' level, and position `2` is the 'critical' level
5277
+ 3. create a dictionary of 1-3 value entries; the valid keys: are 'warning', 'error', and
5278
+ 'critical'
5279
+ 4. a single integer/float value denoting absolute number or fraction of failing test units
5280
+ for the 'warning' level only
5281
+
5282
+ If the number of failing test units exceeds set thresholds, the validation step will be
5283
+ marked as 'warning', 'error', or 'critical'. All of the threshold levels don't need to be
5284
+ set, you're free to set any combination of them.
5285
+
5286
+ Aside from reporting failure conditions, thresholds can be used to determine the actions to
5287
+ take for each level of failure (using the `actions=` parameter).
5288
+
5289
+ Examples
5290
+ --------
5291
+ The `specially()` method offers maximum flexibility for validation, allowing you to create
5292
+ custom validation logic that fits your specific needs. The following examples demonstrate
5293
+ different patterns and use cases for this powerful validation approach.
5294
+
5295
+ ### Simple validation with direct table access
5296
+
5297
+ This example shows the most straightforward use case where we create a function that
5298
+ directly checks if the sum of two columns is positive.
5299
+
5300
+ ```python
5301
+ import pointblank as pb
5302
+ import polars as pl
5303
+
5304
+ simple_tbl = pl.DataFrame({
5305
+ "a": [5, 7, 1, 3, 9, 4],
5306
+ "b": [6, 3, 0, 5, 8, 2]
5307
+ })
5308
+
5309
+ # Simple function that validates directly on the table
5310
+ def validate_sum_positive(data):
5311
+ return data.select(pl.col("a") + pl.col("b") > 0)
5312
+
5313
+ (
5314
+ pb.Validate(data=simple_tbl)
5315
+ .specially(expr=validate_sum_positive)
5316
+ .interrogate()
5317
+ )
5318
+ ```
5319
+
5320
+ The function returns a Polars DataFrame with a single boolean column indicating whether
5321
+ the sum of columns `a` and `b` is positive for each row. Each row in the resulting DataFrame
5322
+ is a distinct test unit. This pattern works well for simple validations where you don't need
5323
+ configurable parameters.
5324
+
5325
+ ### Advanced validation with closure variables for parameters
5326
+
5327
+ When you need to make your validation configurable, you can use the function factory pattern
5328
+ (also known as closures) to create parameterized validations:
5329
+
5330
+ ```python
5331
+ # Create a parameterized validation function using closures
5332
+ def make_column_ratio_validator(col1, col2, min_ratio):
5333
+ def validate_column_ratio(data):
5334
+ return data.select((pl.col(col1) / pl.col(col2)) > min_ratio)
5335
+ return validate_column_ratio
5336
+
5337
+ (
5338
+ pb.Validate(data=simple_tbl)
5339
+ .specially(
5340
+ expr=make_column_ratio_validator(col1="a", col2="b", min_ratio=0.5)
5341
+ )
5342
+ .interrogate()
5343
+ )
5344
+ ```
5345
+
5346
+ This approach allows you to create reusable validation functions that can be configured with
5347
+ different parameters without modifying the function itself.
5348
+
5349
+ ### Validation function returning a list of booleans
5350
+
5351
+ This example demonstrates how to create a validation function that returns a list of boolean
5352
+ values, where each element represents a separate test unit:
5353
+
5354
+ ```python
5355
+ import pointblank as pb
5356
+ import polars as pl
5357
+ import random
5358
+
5359
+ # Create sample data
5360
+ transaction_tbl = pl.DataFrame({
5361
+ "transaction_id": [f"TX{i:04d}" for i in range(1, 11)],
5362
+ "amount": [120.50, 85.25, 50.00, 240.75, 35.20, 150.00, 85.25, 65.00, 210.75, 90.50],
5363
+ "category": ["food", "shopping", "entertainment", "travel", "utilities",
5364
+ "food", "shopping", "entertainment", "travel", "utilities"]
5365
+ })
5366
+
5367
+ # Define a validation function that returns a list of booleans
5368
+ def validate_transaction_rules(data):
5369
+ # Create a list to store individual test results
5370
+ test_results = []
5371
+
5372
+ # Check each row individually against multiple business rules
5373
+ for row in data.iter_rows(named=True):
5374
+ # Rule: transaction IDs must start with "TX" and be 6 chars long
5375
+ valid_id = row["transaction_id"].startswith("TX") and len(row["transaction_id"]) == 6
5376
+
5377
+ # Rule: Amounts must be appropriate for their category
5378
+ valid_amount = True
5379
+ if row["category"] == "food" and (row["amount"] < 10 or row["amount"] > 200):
5380
+ valid_amount = False
5381
+ elif row["category"] == "utilities" and (row["amount"] < 20 or row["amount"] > 300):
5382
+ valid_amount = False
5383
+ elif row["category"] == "entertainment" and row["amount"] > 100:
5384
+ valid_amount = False
5385
+
5386
+ # A transaction passes if it satisfies both rules
5387
+ test_results.append(valid_id and valid_amount)
5388
+
5389
+ return test_results
5390
+
5391
+ (
5392
+ pb.Validate(data=transaction_tbl)
5393
+ .specially(
5394
+ expr=validate_transaction_rules,
5395
+ brief="Validate transaction IDs and amounts by category."
5396
+ )
5397
+ .interrogate()
5398
+ )
5399
+ ```
5400
+
5401
+ This example shows how to create a validation function that applies multiple business rules
5402
+ to each row and returns a list of boolean results. Each boolean in the list represents a
5403
+ separate test unit, and a test unit passes only if all rules are satisfied for a given row.
5404
+
5405
+ The function iterates through each row in the data table, checking:
5406
+
5407
+ 1. if transaction IDs follow the required format
5408
+ 2. if transaction amounts are appropriate for their respective categories
5409
+
5410
+ This approach is powerful when you need to apply complex, conditional logic that can't be
5411
+ easily expressed using the built-in validation functions.
5412
+
5413
+ ### Table-level validation returning a single boolean
5414
+
5415
+ Sometimes you need to validate properties of the entire table rather than row-by-row. In
5416
+ these cases, your function can return a single boolean value:
5417
+
5418
+ ```python
5419
+ def validate_table_properties(data):
5420
+ # Check if table has at least one row with column 'a' > 10
5421
+ has_large_values = data.filter(pl.col("a") > 10).height > 0
5422
+
5423
+ # Check if mean of column 'b' is positive
5424
+ has_positive_mean = data.select(pl.mean("b")).item() > 0
5425
+
5426
+ # Return a single boolean for the entire table
5427
+ return has_large_values and has_positive_mean
5428
+
5429
+ (
5430
+ pb.Validate(data=simple_tbl)
5431
+ .specially(expr=validate_table_properties)
5432
+ .interrogate()
5433
+ )
5434
+ ```
5435
+
5436
+ This example demonstrates how to perform multiple checks on the table as a whole and combine
5437
+ them into a single validation result.
5438
+
5439
+ ### Environment validation that doesn't use the data table
5440
+
5441
+ The `specially()` validation method can even be used to validate aspects of your environment
5442
+ that are completely independent of the data:
5443
+
5444
+ ```python
5445
+ def validate_pointblank_version():
5446
+ try:
5447
+ import importlib.metadata
5448
+ version = importlib.metadata.version("pointblank")
5449
+ version_parts = version.split(".")
5450
+
5451
+ # Get major and minor components regardless of how many parts there are
5452
+ major = int(version_parts[0])
5453
+ minor = int(version_parts[1])
5454
+
5455
+ # Check both major and minor components for version `0.9+`
5456
+ return (major > 0) or (major == 0 and minor >= 9)
5457
+
5458
+ except Exception as e:
5459
+ # More specific error handling could be added here
5460
+ print(f"Version check failed: {e}")
5461
+ return False
5462
+
5463
+ (
5464
+ pb.Validate(data=simple_tbl)
5465
+ .specially(
5466
+ expr=validate_pointblank_version,
5467
+ brief="Check Pointblank version `>=0.9.0`."
5468
+ )
5469
+ .interrogate()
5470
+ )
5471
+ ```
5472
+
5473
+ This pattern shows how to validate external dependencies or environment conditions as part
5474
+ of your validation workflow. Notice that the function doesn't take any parameters at all,
5475
+ which makes it cleaner when the validation doesn't need to access the data table.
5476
+
5477
+ By combining these patterns, you can create sophisticated validation workflows that address
5478
+ virtually any data quality requirement in your organization.
5479
+
5480
+
5180
5481
 
5181
5482
  ## The Column Selection family
5182
5483
 
@@ -9160,7 +9461,7 @@ send_slack_notification(webhook_url: 'str | None' = None, step_msg: 'str | None'
9160
9461
  validation
9161
9462
  ```
9162
9463
 
9163
- By placing the `notify_slack` function in the `Validate(actions=Actions(critical=))` argument,
9464
+ By placing the `notify_slack()` function in the `Validate(actions=Actions(critical=))` argument,
9164
9465
  you can ensure that the notification is sent whenever the 'critical' threshold is reached (as
9165
9466
  set here, when 15% or more of the test units fail). The notification will include information
9166
9467
  about the validation step that triggered the alert.
@@ -9190,7 +9491,7 @@ send_slack_notification(webhook_url: 'str | None' = None, step_msg: 'str | None'
9190
9491
  )
9191
9492
  ```
9192
9493
 
9193
- In this case, the same `notify_slack` function is used, but it is placed in
9494
+ In this case, the same `notify_slack()` function is used, but it is placed in
9194
9495
  `Validate(final_actions=FinalActions())`. This results in the summary notification being sent
9195
9496
  after all validation steps are completed, regardless of whether any steps failed or not.
9196
9497
 
Binary file
Binary file
pointblank/thresholds.py CHANGED
@@ -574,8 +574,9 @@ class FinalActions:
574
574
  In this example, the `send_alert()` function is defined to check the validation summary for
575
575
  critical failures. If any are found, an alert message is printed to the console. The function is
576
576
  passed to the `FinalActions` class, which ensures it will be executed after all validation steps
577
- are complete. Note that we used the `get_validation_summary()` function to retrieve the summary
578
- of the validation results to help craft the alert message.
577
+ are complete. Note that we used the
578
+ [`get_validation_summary()`](`pointblank.get_validation_summary`) function to retrieve the
579
+ summary of the validation results to help craft the alert message.
579
580
 
580
581
  Multiple final actions can be provided in a sequence. They will be executed in the order they
581
582
  are specified after all validation steps have completed: