PyPI - pointblank - Versions diffs - 0.9.1__py3-none-any.whl → 0.9.4__py3-none-any.whl - Mend

pointblank 0.9.1py3-none-any.whl → 0.9.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

pointblank/_constants.py +14 -0
pointblank/_constants_translations.py +54 -0
pointblank/_interrogation.py +101 -0
pointblank/_typing.py +35 -24
pointblank/_utils.py +1 -0
pointblank/actions.py +2 -2
pointblank/data/api-docs.txt +305 -4
pointblank/data/global_sales-duckdb.zip +0 -0
pointblank/data/global_sales.zip +0 -0
pointblank/thresholds.py +3 -2
pointblank/validate.py +461 -20
{pointblank-0.9.1.dist-info → pointblank-0.9.4.dist-info}/METADATA +1 -1
{pointblank-0.9.1.dist-info → pointblank-0.9.4.dist-info}/RECORD +16 -14
{pointblank-0.9.1.dist-info → pointblank-0.9.4.dist-info}/WHEEL +1 -1
{pointblank-0.9.1.dist-info → pointblank-0.9.4.dist-info}/licenses/LICENSE +0 -0
{pointblank-0.9.1.dist-info → pointblank-0.9.4.dist-info}/top_level.txt +0 -0

pointblank/_constants.py CHANGED Viewed

@@ -44,6 +44,7 @@ ASSERTION_TYPE_METHOD_MAP = {
     "row_count_match": "row_count_match",
     "col_count_match": "col_count_match",
     "conjointly": "conjointly",
+    "specially": "specially",
 }
 METHOD_CATEGORY_MAP = {
@@ -69,6 +70,7 @@ METHOD_CATEGORY_MAP = {
     "row_count_match": "ROW_COUNT_MATCH",
     "col_count_match": "COL_COUNT_MATCH",
     "conjointly": "CONJOINTLY",
+    "specially": "SPECIALLY",
 }
 COMPARISON_OPERATORS = {
@@ -455,6 +457,18 @@ SVG_ICONS_FOR_ASSERTION_TYPES = {
             <path d="M51.8485976,12 L15.5758703,12 C13.9986329,12 12.712234,13.2863989 12.712234,14.8636364 L12.712234,51.1363636 C12.712234,52.7136011 13.9986329,54 15.5758703,54 L51.8485976,54 C53.4258351,54 54.712234,52.7136011 54.712234,51.1363636 L54.712234,14.8636364 C54.712234,13.2863989 53.4258351,12 51.8485976,12 Z M37.072234,44 L20.272234,44 L20.272234,42 L37.072234,42 L37.072234,44 Z M37.072234,34 L20.272234,34 L20.272234,32 L37.072234,32 L37.072234,34 Z M37.072234,24 L20.272234,24 L20.272234,22 L37.072234,22 L37.072234,24 Z M47.9233279,41.773438 L45.5706719,45.773438 C45.4427029,45.996094 45.239265,46.148438 45.0095779,46.1875 C44.9702029,46.195313 44.9275469,46.199219 44.88489,46.199219 C44.70114,46.199219 44.5206719,46.128906 44.373015,45.992188 L42.1877029,43.992188 C41.8202029,43.65625 41.7512969,43.027344 42.033484,42.589844 C42.3156719,42.152344 42.8439529,42.070313 43.2114529,42.40625 L44.697859,43.769531 L46.548484,40.625 C46.814265,40.171875 47.335984,40.0625 47.716609,40.378906 C48.097234,40.695313 48.189109,41.320313 47.9233279,41.773438 Z M47.9233279,31.773438 L45.5706719,35.773438 C45.4427029,35.996094 45.239265,36.148438 45.0095779,36.1875 C44.9702029,36.195313 44.9275469,36.199219 44.88489,36.199219 C44.70114,36.199219 44.5206719,36.128906 44.373015,35.992188 L42.1877029,33.992188 C41.8202029,33.65625 41.7512969,33.027344 42.033484,32.589844 C42.3156719,32.152344 42.8439529,32.070313 43.2114529,32.40625 L44.697859,33.769531 L46.548484,30.628906 C46.814265,30.175781 47.335984,30.0625 47.716609,30.382813 C48.097234,30.699219 48.189109,31.320313 47.9233279,31.773438 Z M47.9233279,21.773438 L45.5706719,25.773438 C45.4427029,25.996094 45.239265,26.148438 45.0095779,26.1875 C44.9702029,26.195313 44.9275469,26.199219 44.88489,26.199219 C44.70114,26.199219 44.5206719,26.128906 44.373015,25.992188 L42.1877029,23.992188 C41.8202029,23.65625 41.7512969,23.027344 42.033484,22.589844 C42.3156719,22.152344 42.8439529,22.070313 43.2114529,22.40625 L44.697859,23.769531 L46.548484,20.625 C46.814265,20.171875 47.335984,20.0625 47.716609,20.378906 C48.097234,20.699219 48.189109,21.320313 47.9233279,21.773438 Z" id="conjoint" fill="#000000" fill-rule="nonzero"></path>
         </g>
     </g>
+</svg>""",
+    "specially": """<?xml version="1.0" encoding="UTF-8"?>
+<svg width="67px" height="67px" viewBox="0 0 67 67" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+    <title>specially</title>
+    <g id="All-Icons" stroke="none" stroke-width="1" fill="none" fill-rule="evenodd">
+        <g id="specially" transform="translate(0.000000, 0.206897)">
+            <path d="M56.712234,1 C59.1975153,1 61.4475153,2.00735931 63.076195,3.63603897 C64.7048747,5.26471863 65.712234,7.51471863 65.712234,10 L65.712234,10 L65.712234,65 L10.712234,65 C8.22695259,65 5.97695259,63.9926407 4.34827294,62.363961 C2.71959328,60.7352814 1.71223397,58.4852814 1.71223397,56 L1.71223397,56 L1.71223397,10 C1.71223397,7.51471863 2.71959328,5.26471863 4.34827294,3.63603897 C5.97695259,2.00735931 8.22695259,1 10.712234,1 L10.712234,1 Z" id="rectangle" stroke="#000000" stroke-width="2" fill="#FFFFFF"></path>
+            <g id="star" transform="translate(8.500000, 8.500000)" fill="#000000" fill-rule="nonzero">
+                <path d="M25,0 C24.5874484,0 24.2174517,0.254002847 24.068359,0.6386719 L17.902344,16.535156 L0.94921875,17.400391 C0.536124409,17.4213013 0.17852458,17.6943523 0.0495446395,18.0873515 C-0.0794353012,18.4803507 0.046820452,18.9122002 0.3671875,19.173828 L13.568359,29.966797 L9.2324219,46.34375 C9.12646963,46.7428009 9.27663058,47.1659433 9.61042698,47.4089402 C9.94422338,47.651937 10.3930345,47.664834 10.740234,47.441406 L25,38.289062 L39.259766,47.441406 C39.6069655,47.6648339 40.0557766,47.6519369 40.3895729,47.4089401 C40.7233693,47.1659432 40.8735302,46.7428009 40.767578,46.34375 L36.431641,29.966797 L49.632812,19.173828 C49.953179,18.9122002 50.0794348,18.4803507 49.9504549,18.0873516 C49.821475,17.6943524 49.4638753,17.4213014 49.050781,17.400391 L32.097656,16.535156 L25.931641,0.6386719 C25.7825483,0.254002847 25.4125516,0 25,0 Z M25,3.7636719 L30.466797,17.861328 C30.609689,18.2291416 30.9554962,18.4785515 31.349609,18.498047 L46.359375,19.265625 L34.667969,28.826172 C34.3646054,29.0742114 34.2340493,29.4765679 34.333984,29.855469 L38.175781,44.369141 L25.541016,36.257812 C25.2114789,36.0458536 24.7885211,36.0458536 24.458984,36.257812 L11.824219,44.369141 L15.666016,29.855469 C15.7659507,29.4765679 15.6353946,29.0742114 15.332031,28.826172 L3.640625,19.265625 L18.650391,18.498047 C19.0445038,18.4785515 19.390311,18.2291416 19.533203,17.861328 L25,3.7636719 Z" id="Shape"></path>
+            </g>
+        </g>
+    </g>
 </svg>""",
 }

pointblank/_constants_translations.py CHANGED Viewed

@@ -1160,6 +1160,60 @@ EXPECT_FAIL_TEXT = {
         "hi": "असफल परीक्षण इकाइयों की अधिकता जहां संयुक्त 'पास' इकाइयां होनी चाहिए थीं।",
         "el": "Υπέρβαση αποτυχημένων μονάδων δοκιμής όπου θα έπρεπε να υπάρχουν κοινές μονάδες 'επιτυχίας'.",
     },
+    "specially_expectation_text": {
+        "en": "Expect that special testing with a given function yields agreement.",
+        "fr": "On s'attend à ce que les tests spéciaux avec une fonction donnée produisent un accord.",
+        "de": "Erwarten Sie, dass spezielle Tests mit einer bestimmten Funktion Übereinstimmung ergeben.",
+        "it": "Aspettati che i test speciali con una funzione data producano accordo.",
+        "es": "Se espera que las pruebas especiales con una función dada produzcan concordancia.",
+        "pt": "Espera-se que testes especiais com uma função dada produzam concordância.",
+        "ro": "Se așteaptă ca testarea specială cu o funcție dată să producă acord.",
+        "tr": "Belirli bir fonksiyonla özel testlerin uyum sağlamasını bekleyin.",
+        "zh-Hans": "预期使用给定函数的特殊测试会产生一致结果。",
+        "zh-Hant": "預期使用給定函數的特殊測試會產生一致結果。",
+        "ja": "指定された関数による特別なテストが一致することを期待します。",
+        "ko": "주어진 함수로 특수 테스트를 수행하면 일치함을 기대합니다.",
+        "vi": "Kỳ vọng rằng kiểm tra đặc biệt với một hàm đã cho sẽ cho kết quả phù hợp.",
+        "ru": "Ожидайте, что специальное тестирование с заданной функцией дает согласие.",
+        "cs": "Očekává se, že speciální testování s danou funkcí přinese shodu.",
+        "pl": "Oczekuj, że specjalne testowanie z użyciem danej funkcji przyniesie zgodność.",
+        "da": "Forvent at speciel test med en given funktion giver overensstemmelse.",
+        "sv": "Förvänta dig att speciell testning med en given funktion ger överensstämmelse.",
+        "nb": "Forvent at spesiell testing med en gitt funksjon gir samsvar.",
+        "nl": "Verwacht dat speciale tests met een gegeven functie overeenstemming opleveren.",
+        "fi": "Odota, että erityinen testaus annetulla funktiolla tuottaa yhdenmukaisuuden.",
+        "is": "Væntir þess að sérstök prófun með gefnu falli leiði til samræmis.",
+        "ar": "توقع أن الاختبار الخاص بدالة معينة يؤدي إلى التوافق.",
+        "hi": "अपेक्षा है कि दिए गए फ़ंक्शन के साथ विशेष परीक्षण सहमति प्रदान करेगा।",
+        "el": "Αναμένεται ότι ο ειδικός έλεγχος με μια δεδομένη συνάρτηση αποδίδει συμφωνία.",
+    },
+    "specially_failure_text": {
+        "en": "Exceedance of failed test units when performing specialized testing with a given function.",
+        "fr": "Dépassement des unités de test ayant échoué lors de l'exécution de tests spécialisés avec une fonction donnée.",
+        "de": "Überschreitung fehlgeschlagener Testeinheiten bei der Durchführung spezialisierter Tests mit einer bestimmten Funktion.",
+        "it": "Superamento delle unità di test fallite durante l'esecuzione di test specializzati con una funzione data.",
+        "es": "Se superó el número de unidades de prueba fallidas al realizar pruebas especializadas con una función dada.",
+        "pt": "Excedeu o número de unidades de teste com falha ao realizar testes especializados com uma função dada.",
+        "ro": "Depășirea unităților de test eșuate la efectuarea testării specializate cu o funcție dată.",
+        "tr": "Belirli bir fonksiyonla özel testler yapılırken başarısız test birimlerinin aşılması.",
+        "zh-Hans": "使用给定函数进行专门测试时，失败的测试单元数量超标。",
+        "zh-Hant": "使用給定函數進行專門測試時，失敗的測試單元數量超標。",
+        "ja": "指定された関数を使用した特殊テスト実行時のテスト単位の失敗の超過。",
+        "ko": "주어진 함수로 특수 테스트를 수행할 때 실패한 테스트 단위 초과.",
+        "vi": "Vượt quá số đơn vị kiểm tra thất bại khi thực hiện kiểm tra chuyên biệt với một hàm đã cho.",
+        "ru": "Превышение неудачных тестовых единиц при выполнении специализированного тестирования с заданной функцией.",
+        "cs": "Překročení počtu neúspěšných testovacích jednotek při provádění specializovaného testování s danou funkcí.",
+        "pl": "Przekroczenie nieudanych jednostek testowych podczas przeprowadzania specjalistycznych testów z daną funkcją.",
+        "da": "Overskridelse af fejlslagne testenheder ved udførelse af specialiseret test med en given funktion.",
+        "sv": "Överskrider antalet misslyckade testenheter vid utförande av specialiserad testning med en given funktion.",
+        "nb": "Overskridelse av mislykkede testenheter ved utførelse av spesialisert testing med en gitt funksjon.",
+        "nl": "Overschrijding van mislukte testeenheden bij het uitvoeren van gespecialiseerde tests met een gegeven functie.",
+        "fi": "Epäonnistuneiden testiyksiköiden ylitys suoritettaessa erikoistestejä annetulla funktiolla.",
+        "is": "Of mörg misheppnuð próf við framkvæmd sérhæfðra prófana með gefnu falli.",
+        "ar": "تجاوز وحدات الاختبار الفاشلة عند إجراء اختبار متخصص بدالة معينة.",
+        "hi": "दिए गए फ़ंक्शन के साथ विशेष परीक्षण करते समय असफल परीक्षण इकाइयों की अधिकता।",
+        "el": "Υπέρβαση αποτυχημένων μονάδων δοκιμής κατά την εκτέλεση εξειδικευμένων ελέγχων με μια δεδομένη συνάρτηση.",
+    },
 }

pointblank/_interrogation.py CHANGED Viewed

@@ -2248,6 +2248,107 @@ class ConjointlyValidation:
         return results_tbl
+class SpeciallyValidation:
+    def __init__(self, data_tbl, expression, threshold, tbl_type):
+        self.data_tbl = data_tbl
+        self.expression = expression
+        self.threshold = threshold
+        # Detect the table type
+        if tbl_type in (None, "local"):
+            # Detect the table type using _get_tbl_type()
+            self.tbl_type = _get_tbl_type(data=data_tbl)
+        else:
+            self.tbl_type = tbl_type
+    def get_test_results(self) -> any | list[bool]:
+        """Evaluate the expression get either a list of booleans or a results table."""
+        # Get the expression and inspect whether there is a `data` argument
+        expression = self.expression
+        import inspect
+        # During execution of `specially` validation
+        sig = inspect.signature(expression)
+        params = list(sig.parameters.keys())
+        # Execute the function based on its signature
+        if len(params) == 0:
+            # No parameters: call without arguments
+            result = expression()
+        elif len(params) == 1:
+            # One parameter: pass the data table
+            data_tbl = self.data_tbl
+            result = expression(data_tbl)
+        else:
+            # More than one parameter - this doesn't match either allowed signature
+            raise ValueError(
+                f"The function provided to 'specially()' should have either no parameters or a "
+                f"single 'data' parameter, but it has {len(params)} parameters: {params}"
+            )
+        # Determine if the object is a DataFrame by inspecting the string version of its type
+        if (
+            "pandas" in str(type(result))
+            or "polars" in str(type(result))
+            or "ibis" in str(type(result))
+        ):
+            # Get the type of the table
+            tbl_type = _get_tbl_type(data=result)
+            if "pandas" in tbl_type:
+                # If it's a Pandas DataFrame, check if the last column is a boolean column
+                last_col = result.iloc[:, -1]
+                import pandas as pd
+                if last_col.dtype == bool or pd.api.types.is_bool_dtype(last_col):
+                    # If the last column is a boolean column, rename it as `pb_is_good_`
+                    result.rename(columns={result.columns[-1]: "pb_is_good_"}, inplace=True)
+            elif "polars" in tbl_type:
+                # If it's a Polars DataFrame, check if the last column is a boolean column
+                last_col_name = result.columns[-1]
+                last_col_dtype = result.schema[last_col_name]
+                import polars as pl
+                if last_col_dtype == pl.Boolean:
+                    # If the last column is a boolean column, rename it as `pb_is_good_`
+                    result = result.rename({last_col_name: "pb_is_good_"})
+            elif tbl_type in IBIS_BACKENDS:
+                # If it's an Ibis table, check if the last column is a boolean column
+                last_col_name = result.columns[-1]
+                result_schema = result.schema()
+                is_last_col_bool = str(result_schema[last_col_name]) == "boolean"
+                if is_last_col_bool:
+                    # If the last column is a boolean column, rename it as `pb_is_good_`
+                    result = result.rename(pb_is_good_=last_col_name)
+            else:  # pragma: no cover
+                raise NotImplementedError(f"Support for {tbl_type} is not yet implemented")
+        elif isinstance(result, bool):
+            # If it's a single boolean, return that as a list
+            return [result]
+        elif isinstance(result, list):
+            # If it's a list, check that it is a boolean list
+            if all(isinstance(x, bool) for x in result):
+                # If it's a list of booleans, return it as is
+                return result
+            else:
+                # If it's not a list of booleans, raise an error
+                raise TypeError("The result is not a list of booleans.")
+        else:  # pragma: no cover
+            # If it's not a DataFrame or a list, raise an error
+            raise TypeError("The result is not a DataFrame or a list of booleans.")
+        # Return the results table or list of booleans
+        return result
 @dataclass
 class NumberOfTestUnits:
     """

pointblank/_typing.py CHANGED Viewed

@@ -1,26 +1,37 @@
 from __future__ import annotations
-from typing import TypeAlias
-## Absolute bounds, ie. plus or minus
-AbsoluteBounds: TypeAlias = tuple[int, int]
-## Relative bounds, ie. plus or minus some percent
-RelativeBounds: TypeAlias = tuple[float, float]
-## Tolerance afforded to some check
-Tolerance: TypeAlias = int | float | AbsoluteBounds | RelativeBounds
-## Types for data segmentation
-## Value(s) that can be used in a segment tuple
-SegmentValue: TypeAlias = str | list[str]
-## (column, value(s)) format for segments
-SegmentTuple: TypeAlias = tuple[str, SegmentValue]
-## Individual segment item (string or tuple)
-SegmentItem: TypeAlias = str | SegmentTuple
-## Full segment specification options
-SegmentSpec: TypeAlias = str | SegmentTuple | list[SegmentItem]
+import sys
+from typing import List, Tuple, Union
+# Check Python version for TypeAlias support
+if sys.version_info >= (3, 10):
+    from typing import TypeAlias
+    # Python 3.10+ style type aliases
+    AbsoluteBounds: TypeAlias = Tuple[int, int]
+    RelativeBounds: TypeAlias = Tuple[float, float]
+    Tolerance: TypeAlias = Union[int, float, AbsoluteBounds, RelativeBounds]
+    SegmentValue: TypeAlias = Union[str, List[str]]
+    SegmentTuple: TypeAlias = Tuple[str, SegmentValue]
+    SegmentItem: TypeAlias = Union[str, SegmentTuple]
+    SegmentSpec: TypeAlias = Union[str, SegmentTuple, List[SegmentItem]]
+else:
+    # Python 3.8 and 3.9 compatible type aliases
+    AbsoluteBounds = Tuple[int, int]
+    RelativeBounds = Tuple[float, float]
+    Tolerance = Union[int, float, AbsoluteBounds, RelativeBounds]
+    SegmentValue = Union[str, List[str]]
+    SegmentTuple = Tuple[str, SegmentValue]
+    SegmentItem = Union[str, SegmentTuple]
+    SegmentSpec = Union[str, SegmentTuple, List[SegmentItem]]
+# Add docstrings for better IDE support
+AbsoluteBounds.__doc__ = "Absolute bounds (i.e., plus or minus)"
+RelativeBounds.__doc__ = "Relative bounds (i.e., plus or minus some percent)"
+Tolerance.__doc__ = "Tolerance (i.e., the allowed deviation)"
+SegmentValue.__doc__ = "Value(s) that can be used in a segment tuple"
+SegmentTuple.__doc__ = "(column, value(s)) format for segments"
+SegmentItem.__doc__ = "Individual segment item (string or tuple)"
+SegmentSpec.__doc__ = (
+    "Full segment specification options (i.e., all options for segment specification)"
+)

pointblank/_utils.py CHANGED Viewed

@@ -490,6 +490,7 @@ def _get_api_text() -> str:
         "Validate.row_count_match",
         "Validate.col_count_match",
         "Validate.conjointly",
+        "Validate.specially",
     ]
     column_selection_exported = [

pointblank/actions.py CHANGED Viewed

@@ -225,7 +225,7 @@ def send_slack_notification(
     validation
     ```
-    By placing the `notify_slack` function in the `Validate(actions=Actions(critical=))` argument,
+    By placing the `notify_slack()` function in the `Validate(actions=Actions(critical=))` argument,
     you can ensure that the notification is sent whenever the 'critical' threshold is reached (as
     set here, when 15% or more of the test units fail). The notification will include information
     about the validation step that triggered the alert.
@@ -255,7 +255,7 @@ def send_slack_notification(
     )
     ```
-    In this case, the same `notify_slack` function is used, but it is placed in
+    In this case, the same `notify_slack()` function is used, but it is placed in
     `Validate(final_actions=FinalActions())`. This results in the summary notification being sent
     after all validation steps are completed, regardless of whether any steps failed or not.

pointblank/data/api-docs.txt CHANGED Viewed

@@ -708,8 +708,9 @@ FinalActions(*args)
     In this example, the `send_alert()` function is defined to check the validation summary for
     critical failures. If any are found, an alert message is printed to the console. The function is
     passed to the `FinalActions` class, which ensures it will be executed after all validation steps
-    are complete. Note that we used the `get_validation_summary()` function to retrieve the summary
-    of the validation results to help craft the alert message.
+    are complete. Note that we used the
+    [`get_validation_summary()`](`pointblank.get_validation_summary`) function to retrieve the
+    summary of the validation results to help craft the alert message.
     Multiple final actions can be provided in a sequence. They will be executed in the order they
     are specified after all validation steps have completed:
@@ -5177,6 +5178,306 @@ conjointly(self, *exprs: 'Callable', pre: 'Callable | None' = None, thresholds:
         information on how to use it with different table backends.
+specially(self, expr: 'Callable', pre: 'Callable | None' = None, thresholds: 'int | float | bool | tuple | dict | Thresholds' = None, actions: 'Actions | None' = None, brief: 'str | bool | None' = None, active: 'bool' = True) -> 'Validate'
+        Perform a specialized validation with customized logic.
+        The `specially()` validation method allows for the creation of specialized validation
+        expressions that can be used to validate specific conditions or logic in the data. This
+        method provides maximum flexibility by accepting a custom callable that encapsulates
+        your validation logic.
+        The callable function can have one of two signatures:
+        - a function accepting a single parameter (the data table): `def validate(data): ...`
+        - a function with no parameters: `def validate(): ...`
+        The second form is particularly useful for environment validations that don't need to
+        inspect the data table.
+        The callable function must ultimately return one of:
+        1. a single boolean value or boolean list
+        2. a table where the final column contains boolean values (column name is unimportant)
+        The validation will operate over the number of test units that is equal to the number of
+        rows in the data table (if returning a table with boolean values). If returning a scalar
+        boolean value, the validation will operate over a single test unit. For a return of a list
+        of boolean values, the length of the list constitutes the number of test units.
+        Parameters
+        ----------
+        expr
+            A callable function that defines the specialized validation logic. This function should:
+            (1) accept the target data table as its single argument (though it may ignore it), or
+            (2) take no parameters at all (for environment validations). The function must
+            ultimately return boolean values representing validation results. Design your function
+            to incorporate any custom parameters directly within the function itself using closure
+            variables or default parameters.
+        pre
+            An optional preprocessing function or lambda to apply to the data table during
+            interrogation. This function should take a table as input and return a modified table.
+            Have a look at the *Preprocessing* section for more information on how to use this
+            argument.
+        thresholds
+            Set threshold failure levels for reporting and reacting to exceedences of the levels.
+            The thresholds are set at the step level and will override any global thresholds set in
+            `Validate(thresholds=...)`. The default is `None`, which means that no thresholds will
+            be set locally and global thresholds (if any) will take effect. Look at the *Thresholds*
+            section for information on how to set threshold levels.
+        actions
+            Optional actions to take when the validation step meets or exceeds any set threshold
+            levels. If provided, the [`Actions`](`pointblank.Actions`) class should be used to
+            define the actions.
+        brief
+            An optional brief description of the validation step that will be displayed in the
+            reporting table. You can use the templating elements like `"{step}"` to insert
+            the step number, or `"{auto}"` to include an automatically generated brief. If `True`
+            the entire brief will be automatically generated. If `None` (the default) then there
+            won't be a brief.
+        active
+            A boolean value indicating whether the validation step should be active. Using `False`
+            will make the validation step inactive (still reporting its presence and keeping indexes
+            for the steps unchanged).
+        Returns
+        -------
+        Validate
+            The `Validate` object with the added validation step.
+        Preprocessing
+        -------------
+        The `pre=` argument allows for a preprocessing function or lambda to be applied to the data
+        table during interrogation. This function should take a table as input and return a modified
+        table. This is useful for performing any necessary transformations or filtering on the data
+        before the validation step is applied.
+        The preprocessing function can be any callable that takes a table as input and returns a
+        modified table. For example, you could use a lambda function to filter the table based on
+        certain criteria or to apply a transformation to the data. Regarding the lifetime of the
+        transformed table, it only exists during the validation step and is not stored in the
+        `Validate` object or used in subsequent validation steps.
+        Thresholds
+        ----------
+        The `thresholds=` parameter is used to set the failure-condition levels for the validation
+        step. If they are set here at the step level, these thresholds will override any thresholds
+        set at the global level in `Validate(thresholds=...)`.
+        There are three threshold levels: 'warning', 'error', and 'critical'. The threshold values
+        can either be set as a proportion failing of all test units (a value between `0` to `1`),
+        or, the absolute number of failing test units (as integer that's `1` or greater).
+        Thresholds can be defined using one of these input schemes:
+        1. use the [`Thresholds`](`pointblank.Thresholds`) class (the most direct way to create
+        thresholds)
+        2. provide a tuple of 1-3 values, where position `0` is the 'warning' level, position `1` is
+        the 'error' level, and position `2` is the 'critical' level
+        3. create a dictionary of 1-3 value entries; the valid keys: are 'warning', 'error', and
+        'critical'
+        4. a single integer/float value denoting absolute number or fraction of failing test units
+        for the 'warning' level only
+        If the number of failing test units exceeds set thresholds, the validation step will be
+        marked as 'warning', 'error', or 'critical'. All of the threshold levels don't need to be
+        set, you're free to set any combination of them.
+        Aside from reporting failure conditions, thresholds can be used to determine the actions to
+        take for each level of failure (using the `actions=` parameter).
+        Examples
+        --------
+        The `specially()` method offers maximum flexibility for validation, allowing you to create
+        custom validation logic that fits your specific needs. The following examples demonstrate
+        different patterns and use cases for this powerful validation approach.
+        ### Simple validation with direct table access
+        This example shows the most straightforward use case where we create a function that
+        directly checks if the sum of two columns is positive.
+        ```python
+        import pointblank as pb
+        import polars as pl
+        simple_tbl = pl.DataFrame({
+            "a": [5, 7, 1, 3, 9, 4],
+            "b": [6, 3, 0, 5, 8, 2]
+        })
+        # Simple function that validates directly on the table
+        def validate_sum_positive(data):
+            return data.select(pl.col("a") + pl.col("b") > 0)
+        (
+            pb.Validate(data=simple_tbl)
+            .specially(expr=validate_sum_positive)
+            .interrogate()
+        )
+        ```
+        The function returns a Polars DataFrame with a single boolean column indicating whether
+        the sum of columns `a` and `b` is positive for each row. Each row in the resulting DataFrame
+        is a distinct test unit. This pattern works well for simple validations where you don't need
+        configurable parameters.
+        ### Advanced validation with closure variables for parameters
+        When you need to make your validation configurable, you can use the function factory pattern
+        (also known as closures) to create parameterized validations:
+        ```python
+        # Create a parameterized validation function using closures
+        def make_column_ratio_validator(col1, col2, min_ratio):
+            def validate_column_ratio(data):
+                return data.select((pl.col(col1) / pl.col(col2)) > min_ratio)
+            return validate_column_ratio
+        (
+            pb.Validate(data=simple_tbl)
+            .specially(
+                expr=make_column_ratio_validator(col1="a", col2="b", min_ratio=0.5)
+            )
+            .interrogate()
+        )
+        ```
+        This approach allows you to create reusable validation functions that can be configured with
+        different parameters without modifying the function itself.
+        ### Validation function returning a list of booleans
+        This example demonstrates how to create a validation function that returns a list of boolean
+        values, where each element represents a separate test unit:
+        ```python
+        import pointblank as pb
+        import polars as pl
+        import random
+        # Create sample data
+        transaction_tbl = pl.DataFrame({
+            "transaction_id": [f"TX{i:04d}" for i in range(1, 11)],
+            "amount": [120.50, 85.25, 50.00, 240.75, 35.20, 150.00, 85.25, 65.00, 210.75, 90.50],
+            "category": ["food", "shopping", "entertainment", "travel", "utilities",
+                        "food", "shopping", "entertainment", "travel", "utilities"]
+        })
+        # Define a validation function that returns a list of booleans
+        def validate_transaction_rules(data):
+            # Create a list to store individual test results
+            test_results = []
+            # Check each row individually against multiple business rules
+            for row in data.iter_rows(named=True):
+                # Rule: transaction IDs must start with "TX" and be 6 chars long
+                valid_id = row["transaction_id"].startswith("TX") and len(row["transaction_id"]) == 6
+                # Rule: Amounts must be appropriate for their category
+                valid_amount = True
+                if row["category"] == "food" and (row["amount"] < 10 or row["amount"] > 200):
+                    valid_amount = False
+                elif row["category"] == "utilities" and (row["amount"] < 20 or row["amount"] > 300):
+                    valid_amount = False
+                elif row["category"] == "entertainment" and row["amount"] > 100:
+                    valid_amount = False
+                # A transaction passes if it satisfies both rules
+                test_results.append(valid_id and valid_amount)
+            return test_results
+        (
+            pb.Validate(data=transaction_tbl)
+            .specially(
+                expr=validate_transaction_rules,
+                brief="Validate transaction IDs and amounts by category."
+            )
+            .interrogate()
+        )
+        ```
+        This example shows how to create a validation function that applies multiple business rules
+        to each row and returns a list of boolean results. Each boolean in the list represents a
+        separate test unit, and a test unit passes only if all rules are satisfied for a given row.
+        The function iterates through each row in the data table, checking:
+        1. if transaction IDs follow the required format
+        2. if transaction amounts are appropriate for their respective categories
+        This approach is powerful when you need to apply complex, conditional logic that can't be
+        easily expressed using the built-in validation functions.
+        ### Table-level validation returning a single boolean
+        Sometimes you need to validate properties of the entire table rather than row-by-row. In
+        these cases, your function can return a single boolean value:
+        ```python
+        def validate_table_properties(data):
+            # Check if table has at least one row with column 'a' > 10
+            has_large_values = data.filter(pl.col("a") > 10).height > 0
+            # Check if mean of column 'b' is positive
+            has_positive_mean = data.select(pl.mean("b")).item() > 0
+            # Return a single boolean for the entire table
+            return has_large_values and has_positive_mean
+        (
+            pb.Validate(data=simple_tbl)
+            .specially(expr=validate_table_properties)
+            .interrogate()
+        )
+        ```
+        This example demonstrates how to perform multiple checks on the table as a whole and combine
+        them into a single validation result.
+        ### Environment validation that doesn't use the data table
+        The `specially()` validation method can even be used to validate aspects of your environment
+        that are completely independent of the data:
+        ```python
+        def validate_pointblank_version():
+            try:
+                import importlib.metadata
+                version = importlib.metadata.version("pointblank")
+                version_parts = version.split(".")
+                # Get major and minor components regardless of how many parts there are
+                major = int(version_parts[0])
+                minor = int(version_parts[1])
+                # Check both major and minor components for version `0.9+`
+                return (major > 0) or (major == 0 and minor >= 9)
+            except Exception as e:
+                # More specific error handling could be added here
+                print(f"Version check failed: {e}")
+                return False
+        (
+            pb.Validate(data=simple_tbl)
+            .specially(
+                expr=validate_pointblank_version,
+                brief="Check Pointblank version `>=0.9.0`."
+            )
+            .interrogate()
+        )
+        ```
+        This pattern shows how to validate external dependencies or environment conditions as part
+        of your validation workflow. Notice that the function doesn't take any parameters at all,
+        which makes it cleaner when the validation doesn't need to access the data table.
+        By combining these patterns, you can create sophisticated validation workflows that address
+        virtually any data quality requirement in your organization.
 ## The Column Selection family
@@ -9160,7 +9461,7 @@ send_slack_notification(webhook_url: 'str | None' = None, step_msg: 'str | None'
     validation
     ```
-    By placing the `notify_slack` function in the `Validate(actions=Actions(critical=))` argument,
+    By placing the `notify_slack()` function in the `Validate(actions=Actions(critical=))` argument,
     you can ensure that the notification is sent whenever the 'critical' threshold is reached (as
     set here, when 15% or more of the test units fail). The notification will include information
     about the validation step that triggered the alert.
@@ -9190,7 +9491,7 @@ send_slack_notification(webhook_url: 'str | None' = None, step_msg: 'str | None'
     )
     ```
-    In this case, the same `notify_slack` function is used, but it is placed in
+    In this case, the same `notify_slack()` function is used, but it is placed in
     `Validate(final_actions=FinalActions())`. This results in the summary notification being sent
     after all validation steps are completed, regardless of whether any steps failed or not.

pointblank/data/global_sales-duckdb.zip ADDED Viewed

Binary file

pointblank/data/global_sales.zip ADDED Viewed

Binary file

pointblank/thresholds.py CHANGED Viewed

@@ -574,8 +574,9 @@ class FinalActions:
     In this example, the `send_alert()` function is defined to check the validation summary for
     critical failures. If any are found, an alert message is printed to the console. The function is
     passed to the `FinalActions` class, which ensures it will be executed after all validation steps
-    are complete. Note that we used the `get_validation_summary()` function to retrieve the summary
-    of the validation results to help craft the alert message.
+    are complete. Note that we used the
+    [`get_validation_summary()`](`pointblank.get_validation_summary`) function to retrieve the
+    summary of the validation results to help craft the alert message.
     Multiple final actions can be provided in a sequence. They will be executed in the order they
     are specified after all validation steps have completed:

pointblank 0.9.1__py3-none-any.whl → 0.9.4__py3-none-any.whl

pointblank 0.9.1py3-none-any.whl → 0.9.4py3-none-any.whl