PyPI - pointblank - Versions diffs - 0.8.5__py3-none-any.whl → 0.8.7__py3-none-any.whl - Mend

pointblank 0.8.5py3-none-any.whl → 0.8.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

pointblank/__init__.py +2 -0
pointblank/_constants.py +13 -0
pointblank/_constants_translations.py +54 -0
pointblank/_interrogation.py +192 -4
pointblank/_utils.py +2 -0
pointblank/column.py +352 -4
pointblank/data/api-docs.txt +270 -4
pointblank/validate.py +291 -5
pointblank-0.8.7.dist-info/METADATA +323 -0
{pointblank-0.8.5.dist-info → pointblank-0.8.7.dist-info}/RECORD +13 -13
{pointblank-0.8.5.dist-info → pointblank-0.8.7.dist-info}/WHEEL +1 -1
pointblank-0.8.5.dist-info/METADATA +0 -269
{pointblank-0.8.5.dist-info → pointblank-0.8.7.dist-info}/licenses/LICENSE +0 -0
{pointblank-0.8.5.dist-info → pointblank-0.8.7.dist-info}/top_level.txt +0 -0

pointblank/__init__.py CHANGED Viewed

@@ -16,6 +16,7 @@ from pointblank.column import (
     contains,
     ends_with,
     everything,
+    expr_col,
     first_n,
     last_n,
     matches,
@@ -49,6 +50,7 @@ __all__ = [
     "DataScan",
     "DraftValidation",
     "col",
+    "expr_col",
     "col_summary_tbl",
     "starts_with",
     "ends_with",

pointblank/_constants.py CHANGED Viewed

@@ -42,6 +42,7 @@ ASSERTION_TYPE_METHOD_MAP = {
     "col_schema_match": "col_schema_match",
     "row_count_match": "row_count_match",
     "col_count_match": "col_count_match",
+    "conjointly": "conjointly",
 }
 METHOD_CATEGORY_MAP = {
@@ -65,6 +66,7 @@ METHOD_CATEGORY_MAP = {
     "col_schema_match": "COL_SCHEMA_MATCH",
     "row_count_match": "ROW_COUNT_MATCH",
     "col_count_match": "COL_COUNT_MATCH",
+    "conjointly": "CONJOINTLY",
 }
 COMPARISON_OPERATORS = {
@@ -99,6 +101,7 @@ ROW_BASED_VALIDATION_TYPES = [
     "col_vals_regex",
     "col_vals_null",
     "col_vals_not_null",
+    "conjointly",
 ]
 IBIS_BACKENDS = [
@@ -426,6 +429,16 @@ SVG_ICONS_FOR_ASSERTION_TYPES = {
             <path d="M22.1682701,14.6021863 L22.1682701,17.9472433 L25.5133271,17.9472433 C26.8395904,17.9472433 28.0482531,18.7508414 28.544785,19.9856375 L31.2103774,26.623485 L20.4957415,51.3978137 L24.1543977,51.3978137 L32.9351724,31.0138724 L39.4684869,47.2687589 C40.4680837,49.7644859 42.8984767,51.3978137 45.5836693,51.3978137 L48.9287264,51.3978137 L48.9287264,48.0527567 L45.5836693,48.0527567 C44.2508732,48.0527567 43.0487433,47.2491586 42.5522114,46.0143625 L31.6285095,18.7312411 C30.6289128,16.2355157 28.1985198,14.6021863 25.5133271,14.6021863 L22.1682701,14.6021863 Z" id="lambda" fill="#000000" fill-rule="nonzero"></path>
         </g>
     </g>
+</svg>""",
+    "conjointly": """<?xml version="1.0" encoding="UTF-8"?>
+<svg width="67px" height="67px" viewBox="0 0 67 67" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+    <title>conjointly</title>
+    <g id="All-Icons" stroke="none" stroke-width="1" fill="none" fill-rule="evenodd">
+        <g id="conjointly" transform="translate(0.000000, 0.241379)">
+            <path d="M56.712234,1 C59.1975153,1 61.4475153,2.00735931 63.076195,3.63603897 C64.7048747,5.26471863 65.712234,7.51471863 65.712234,10 L65.712234,10 L65.712234,65 L10.712234,65 C8.22695259,65 5.97695259,63.9926407 4.34827294,62.363961 C2.71959328,60.7352814 1.71223397,58.4852814 1.71223397,56 L1.71223397,56 L1.71223397,10 C1.71223397,7.51471863 2.71959328,5.26471863 4.34827294,3.63603897 C5.97695259,2.00735931 8.22695259,1 10.712234,1 L10.712234,1 Z" id="rectangle" stroke="#000000" stroke-width="2" fill="#FFFFFF"></path>
+            <path d="M51.8485976,12 L15.5758703,12 C13.9986329,12 12.712234,13.2863989 12.712234,14.8636364 L12.712234,51.1363636 C12.712234,52.7136011 13.9986329,54 15.5758703,54 L51.8485976,54 C53.4258351,54 54.712234,52.7136011 54.712234,51.1363636 L54.712234,14.8636364 C54.712234,13.2863989 53.4258351,12 51.8485976,12 Z M37.072234,44 L20.272234,44 L20.272234,42 L37.072234,42 L37.072234,44 Z M37.072234,34 L20.272234,34 L20.272234,32 L37.072234,32 L37.072234,34 Z M37.072234,24 L20.272234,24 L20.272234,22 L37.072234,22 L37.072234,24 Z M47.9233279,41.773438 L45.5706719,45.773438 C45.4427029,45.996094 45.239265,46.148438 45.0095779,46.1875 C44.9702029,46.195313 44.9275469,46.199219 44.88489,46.199219 C44.70114,46.199219 44.5206719,46.128906 44.373015,45.992188 L42.1877029,43.992188 C41.8202029,43.65625 41.7512969,43.027344 42.033484,42.589844 C42.3156719,42.152344 42.8439529,42.070313 43.2114529,42.40625 L44.697859,43.769531 L46.548484,40.625 C46.814265,40.171875 47.335984,40.0625 47.716609,40.378906 C48.097234,40.695313 48.189109,41.320313 47.9233279,41.773438 Z M47.9233279,31.773438 L45.5706719,35.773438 C45.4427029,35.996094 45.239265,36.148438 45.0095779,36.1875 C44.9702029,36.195313 44.9275469,36.199219 44.88489,36.199219 C44.70114,36.199219 44.5206719,36.128906 44.373015,35.992188 L42.1877029,33.992188 C41.8202029,33.65625 41.7512969,33.027344 42.033484,32.589844 C42.3156719,32.152344 42.8439529,32.070313 43.2114529,32.40625 L44.697859,33.769531 L46.548484,30.628906 C46.814265,30.175781 47.335984,30.0625 47.716609,30.382813 C48.097234,30.699219 48.189109,31.320313 47.9233279,31.773438 Z M47.9233279,21.773438 L45.5706719,25.773438 C45.4427029,25.996094 45.239265,26.148438 45.0095779,26.1875 C44.9702029,26.195313 44.9275469,26.199219 44.88489,26.199219 C44.70114,26.199219 44.5206719,26.128906 44.373015,25.992188 L42.1877029,23.992188 C41.8202029,23.65625 41.7512969,23.027344 42.033484,22.589844 C42.3156719,22.152344 42.8439529,22.070313 43.2114529,22.40625 L44.697859,23.769531 L46.548484,20.625 C46.814265,20.171875 47.335984,20.0625 47.716609,20.378906 C48.097234,20.699219 48.189109,21.320313 47.9233279,21.773438 Z" id="conjoint" fill="#000000" fill-rule="nonzero"></path>
+        </g>
+    </g>
 </svg>""",
 }

pointblank/_constants_translations.py CHANGED Viewed

@@ -998,6 +998,60 @@ EXPECT_FAIL_TEXT = {
         "hi": "स्तंभ संख्या {values_text} से मेल नहीं खाती।",
         "el": "Ο αριθμός στηλών δεν ταίριαζε με το {values_text}.",
     },
+    "conjointly_expectation_text": {
+        "en": "Expect conjoint 'pass' units across all expressions.",
+        "fr": "On s'attend à des unités 'réussite' conjointes sur toutes les expressions.",
+        "de": "Erwarten Sie gemeinsame 'Bestanden'-Einheiten über alle Ausdrücke hinweg.",
+        "it": "Aspettatevi unità 'pass' congiunte su tutte le espressioni.",
+        "es": "Se esperan unidades 'aprobadas' conjuntas en todas las expresiones.",
+        "pt": "Espera-se unidades 'aprovadas' conjuntas em todas as expressões.",
+        "ro": "Se așteaptă unități 'pass' comune în toate expresiile.",
+        "tr": "Tüm ifadeler boyunca birleşik 'geçen' birimler bekleyin.",
+        "zh-Hans": "预期所有表达式中都有共同的'通过'单元。",
+        "zh-Hant": "預期所有表達式中都有共同的'通過'單元。",
+        "ja": "全ての表現にわたって結合された'合格'ユニットを期待します。",
+        "ko": "모든 표현식에 걸쳐 결합된 '통과' 단위를 기대합니다.",
+        "vi": "Kỳ vọng các đơn vị 'đạt' kết hợp trên tất cả các biểu thức.",
+        "ru": "Ожидайте совместные единицы 'прохождения' по всем выражениям.",
+        "cs": "Očekávejte společné 'úspěšné' jednotky ve všech výrazech.",
+        "pl": "Oczekuj wspólnych jednostek 'zaliczonych' we wszystkich wyrażeniach.",
+        "da": "Forvent fælles 'godkendte' enheder på tværs af alle udtryk.",
+        "sv": "Förvänta dig gemensamma 'godkända' enheter över alla uttryck.",
+        "nb": "Forvent felles 'godkjente' enheter på tvers av alle uttrykk.",
+        "nl": "Verwacht gezamenlijke 'geslaagde' eenheden over alle expressies.",
+        "fi": "Odota yhteisiä 'läpäisseitä' yksiköitä kaikissa lausekkeissa.",
+        "is": "Væntir sameiginlegra 'staðinna' eininga yfir allar segðir.",
+        "ar": "توقع وحدات 'ناجحة' مشتركة عبر جميع التعبيرات.",
+        "hi": "सभी अभिव्यक्तियों में संयुक्त 'पास' इकाइयों की अपेक्षा करें।",
+        "el": "Αναμένονται κοινές μονάδες 'επιτυχίας' σε όλες τις εκφράσεις.",
+    },
+    "conjointly_failure_text": {
+        "en": "Exceedance of failed test units where there should have been conjoint 'pass' units.",
+        "fr": "Dépassement des unités de test ayant échoué où il aurait dû y avoir des unités 'réussite' conjointes.",
+        "de": "Überschreitung fehlgeschlagener Testeinheiten, bei denen es gemeinsame 'Bestanden'-Einheiten hätte geben sollen.",
+        "it": "Superamento di unità di test fallite dove ci sarebbero dovute essere unità 'pass' congiunte.",
+        "es": "Se superó el número de unidades de prueba fallidas donde debería haber habido unidades 'aprobadas' conjuntas.",
+        "pt": "Excedeu o número de unidades de teste com falha onde deveria haver unidades 'aprovadas' conjuntas.",
+        "ro": "Depășirea unităților de test eșuate unde ar fi trebuit să existe unități 'pass' comune.",
+        "tr": "Birleşik 'geçen' birimler olması gereken yerlerde başarısız test birimlerinin aşılması.",
+        "zh-Hans": "错误过多，其中应当有共同的'通过'单元。",
+        "zh-Hant": "錯誤過多，其中應該有共同的'通過'單元。",
+        "ja": "結合された'合格'ユニットがあるはずの場所でのテスト単位の失敗の超過。",
+        "ko": "결합된 '통과' 단위가 있어야 했던 실패한 테스트 단위 초과.",
+        "vi": "Vượt quá số đơn vị kiểm tra thất bại trong đó đáng lẽ phải có các đơn vị 'đạt' kết hợp.",
+        "ru": "Превышение неудачных тестовых единиц, где должны были быть совместные единицы 'прохождения'.",
+        "cs": "Překročení počtu neúspěšných testovacích jednotek, kde měly být společné 'úspěšné' jednotky.",
+        "pl": "Przekroczenie nieudanych jednostek testowych, gdzie powinny były być wspólne jednostki 'zaliczone'.",
+        "da": "Overskridelse af fejlslagne testenheder, hvor der skulle have været fælles 'godkendte' enheder.",
+        "sv": "Överskrider antalet misslyckade testenheter där det borde ha funnits gemensamma 'godkända' enheter.",
+        "nb": "Overskridelse av mislykkede testenheter hvor det skulle ha vært felles 'godkjente' enheter.",
+        "nl": "Overschrijding van mislukte testeenheden waar gezamenlijke 'geslaagde' eenheden hadden moeten zijn.",
+        "fi": "Epäonnistuneiden testiyksiköiden ylitys, joissa olisi pitänyt olla yhteisiä 'läpäisseitä' yksiköitä.",
+        "is": "Of mörg misheppnuð próf þar sem hefðu átt að vera sameiginlegar 'staðnar' einingar.",
+        "ar": "تجاوز وحدات الاختبار الفاشلة حيث كان يجب أن تكون هناك وحدات 'ناجحة' مشتركة.",
+        "hi": "असफल परीक्षण इकाइयों की अधिकता जहां संयुक्त 'पास' इकाइयां होनी चाहिए थीं।",
+        "el": "Υπέρβαση αποτυχημένων μονάδων δοκιμής όπου θα έπρεπε να υπάρχουν κοινές μονάδες 'επιτυχίας'.",
+    },
 }

pointblank/_interrogation.py CHANGED Viewed

@@ -1089,14 +1089,20 @@ class Interrogator:
     def isin(self) -> FrameT | Any:
         # Ibis backends ---------------------------------------------
+        can_be_null: bool = None in self.set
         if self.tbl_type in IBIS_BACKENDS:
-            return self.x.mutate(pb_is_good_=self.x[self.column].isin(self.set))
+            base_expr = self.x[self.column].isin(self.set)
+            if can_be_null:
+                base_expr = base_expr | self.x[self.column].isnull()
+            return self.x.mutate(pb_is_good_=base_expr)
         # Local backends (Narwhals) ---------------------------------
+        base_expr: nw.Expr = nw.col(self.column).is_in(self.set)
+        if can_be_null:
+            base_expr = base_expr | nw.col(self.column).is_null()
-        return self.x.with_columns(
-            pb_is_good_=nw.col(self.column).is_in(self.set),
-        ).to_native()
+        return self.x.with_columns(pb_is_good_=base_expr).to_native()
     def notin(self) -> FrameT | Any:
         # Ibis backends ---------------------------------------------
@@ -1977,6 +1983,188 @@ class ColCountMatch:
         return self.test_unit_res
+class ConjointlyValidation:
+    def __init__(self, data_tbl, expressions, threshold, tbl_type):
+        self.data_tbl = data_tbl
+        self.expressions = expressions
+        self.threshold = threshold
+        # Detect the table type
+        if tbl_type in (None, "local"):
+            # Detect the table type using _get_tbl_type()
+            self.tbl_type = _get_tbl_type(data=data_tbl)
+        else:
+            self.tbl_type = tbl_type
+    def get_test_results(self):
+        """Evaluate all expressions and combine them conjointly."""
+        if "polars" in self.tbl_type:
+            return self._get_polars_results()
+        elif "pandas" in self.tbl_type:
+            return self._get_pandas_results()
+        elif "duckdb" in self.tbl_type or "ibis" in self.tbl_type:
+            return self._get_ibis_results()
+        else:  # pragma: no cover
+            raise NotImplementedError(f"Support for {self.tbl_type} is not yet implemented")
+    def _get_polars_results(self):
+        """Process expressions for Polars DataFrames."""
+        import polars as pl
+        polars_expressions = []
+        for expr_fn in self.expressions:
+            try:
+                # First try direct evaluation with native Polars expressions
+                expr_result = expr_fn(self.data_tbl)
+                if isinstance(expr_result, pl.Expr):
+                    polars_expressions.append(expr_result)
+                else:
+                    raise TypeError("Not a valid Polars expression")
+            except Exception as e:
+                try:
+                    # Try to get a ColumnExpression
+                    col_expr = expr_fn(None)
+                    if hasattr(col_expr, "to_polars_expr"):
+                        polars_expr = col_expr.to_polars_expr()
+                        polars_expressions.append(polars_expr)
+                    else:  # pragma: no cover
+                        raise TypeError(f"Cannot convert {type(col_expr)} to Polars expression")
+                except Exception as e:  # pragma: no cover
+                    print(f"Error evaluating expression: {e}")
+        # Combine results with AND logic
+        if polars_expressions:
+            final_result = polars_expressions[0]
+            for expr in polars_expressions[1:]:
+                final_result = final_result & expr
+            # Create results table with boolean column
+            results_tbl = self.data_tbl.with_columns(pb_is_good_=final_result)
+            return results_tbl
+        # Default case
+        results_tbl = self.data_tbl.with_columns(pb_is_good_=pl.lit(True))  # pragma: no cover
+        return results_tbl  # pragma: no cover
+    def _get_pandas_results(self):
+        """Process expressions for pandas DataFrames."""
+        import pandas as pd
+        pandas_series = []
+        for expr_fn in self.expressions:
+            try:
+                # First try direct evaluation with pandas DataFrame
+                expr_result = expr_fn(self.data_tbl)
+                # Check that it's a pandas Series with bool dtype
+                if isinstance(expr_result, pd.Series):
+                    if expr_result.dtype == bool or pd.api.types.is_bool_dtype(expr_result):
+                        pandas_series.append(expr_result)
+                    else:  # pragma: no cover
+                        raise TypeError(
+                            f"Expression returned Series of type {expr_result.dtype}, expected bool"
+                        )
+                else:  # pragma: no cover
+                    raise TypeError(f"Expression returned {type(expr_result)}, expected pd.Series")
+            except Exception as e:
+                try:
+                    # Try as a ColumnExpression (for pb.expr_col style)
+                    col_expr = expr_fn(None)
+                    if hasattr(col_expr, "to_pandas_expr"):
+                        # Watch for NotImplementedError here and re-raise it
+                        try:
+                            pandas_expr = col_expr.to_pandas_expr(self.data_tbl)
+                            pandas_series.append(pandas_expr)
+                        except NotImplementedError as nie:  # pragma: no cover
+                            # Re-raise NotImplementedError with the original message
+                            raise NotImplementedError(str(nie))
+                    else:  # pragma: no cover
+                        raise TypeError(f"Cannot convert {type(col_expr)} to pandas Series")
+                except NotImplementedError as nie:  # pragma: no cover
+                    # Re-raise NotImplementedError
+                    raise NotImplementedError(str(nie))
+                except Exception as nested_e:  # pragma: no cover
+                    print(f"Error evaluating pandas expression: {e} -> {nested_e}")
+        # Combine results with AND logic
+        if pandas_series:
+            final_result = pandas_series[0]
+            for series in pandas_series[1:]:
+                final_result = final_result & series
+            # Create results table with boolean column
+            results_tbl = self.data_tbl.copy()
+            results_tbl["pb_is_good_"] = final_result
+            return results_tbl
+        # Default case
+        results_tbl = self.data_tbl.copy()  # pragma: no cover
+        results_tbl["pb_is_good_"] = pd.Series(  # pragma: no cover
+            [True] * len(self.data_tbl), index=self.data_tbl.index
+        )
+        return results_tbl  # pragma: no cover
+    def _get_ibis_results(self):
+        """Process expressions for Ibis tables (including DuckDB)."""
+        import ibis
+        ibis_expressions = []
+        for expr_fn in self.expressions:
+            # Strategy 1: Try direct evaluation with native Ibis expressions
+            try:
+                expr_result = expr_fn(self.data_tbl)
+                # Check if it's a valid Ibis expression
+                if hasattr(expr_result, "_ibis_expr"):  # pragma: no cover
+                    ibis_expressions.append(expr_result)
+                    continue  # Skip to next expression if this worked
+            except Exception:  # pragma: no cover
+                pass  # Silently continue to Strategy 2
+            # Strategy 2: Try with ColumnExpression
+            try:  # pragma: no cover
+                # Skip this strategy if we don't have an expr_col implementation
+                if not hasattr(self, "to_ibis_expr"):
+                    continue
+                col_expr = expr_fn(None)
+                # Skip if we got None
+                if col_expr is None:
+                    continue
+                # Convert ColumnExpression to Ibis expression
+                if hasattr(col_expr, "to_ibis_expr"):
+                    ibis_expr = col_expr.to_ibis_expr(self.data_tbl)
+                    ibis_expressions.append(ibis_expr)
+            except Exception:  # pragma: no cover
+                # Silent failure - we already tried both strategies
+                pass
+        # Combine expressions
+        if ibis_expressions:  # pragma: no cover
+            try:
+                final_result = ibis_expressions[0]
+                for expr in ibis_expressions[1:]:
+                    final_result = final_result & expr
+                # Create results table with boolean column
+                results_tbl = self.data_tbl.mutate(pb_is_good_=final_result)
+                return results_tbl
+            except Exception as e:
+                print(f"Error combining Ibis expressions: {e}")
+        # Default case
+        results_tbl = self.data_tbl.mutate(pb_is_good_=ibis.literal(True))
+        return results_tbl
 @dataclass
 class NumberOfTestUnits:
     """

pointblank/_utils.py CHANGED Viewed

@@ -488,6 +488,7 @@ def _get_api_text() -> str:
         "Validate.col_schema_match",
         "Validate.row_count_match",
         "Validate.col_count_match",
+        "Validate.conjointly",
     ]
     column_selection_exported = [
@@ -499,6 +500,7 @@ def _get_api_text() -> str:
         "everything",
         "first_n",
         "last_n",
+        "expr_col",
     ]
     interrogation_exported = [

pointblank 0.8.5__py3-none-any.whl → 0.8.7__py3-none-any.whl

pointblank 0.8.5py3-none-any.whl → 0.8.7py3-none-any.whl