pointblank 0.8.5__py3-none-any.whl → 0.8.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pointblank/__init__.py CHANGED
@@ -16,6 +16,7 @@ from pointblank.column import (
16
16
  contains,
17
17
  ends_with,
18
18
  everything,
19
+ expr_col,
19
20
  first_n,
20
21
  last_n,
21
22
  matches,
@@ -49,6 +50,7 @@ __all__ = [
49
50
  "DataScan",
50
51
  "DraftValidation",
51
52
  "col",
53
+ "expr_col",
52
54
  "col_summary_tbl",
53
55
  "starts_with",
54
56
  "ends_with",
pointblank/_constants.py CHANGED
@@ -42,6 +42,7 @@ ASSERTION_TYPE_METHOD_MAP = {
42
42
  "col_schema_match": "col_schema_match",
43
43
  "row_count_match": "row_count_match",
44
44
  "col_count_match": "col_count_match",
45
+ "conjointly": "conjointly",
45
46
  }
46
47
 
47
48
  METHOD_CATEGORY_MAP = {
@@ -65,6 +66,7 @@ METHOD_CATEGORY_MAP = {
65
66
  "col_schema_match": "COL_SCHEMA_MATCH",
66
67
  "row_count_match": "ROW_COUNT_MATCH",
67
68
  "col_count_match": "COL_COUNT_MATCH",
69
+ "conjointly": "CONJOINTLY",
68
70
  }
69
71
 
70
72
  COMPARISON_OPERATORS = {
@@ -99,6 +101,7 @@ ROW_BASED_VALIDATION_TYPES = [
99
101
  "col_vals_regex",
100
102
  "col_vals_null",
101
103
  "col_vals_not_null",
104
+ "conjointly",
102
105
  ]
103
106
 
104
107
  IBIS_BACKENDS = [
@@ -426,6 +429,16 @@ SVG_ICONS_FOR_ASSERTION_TYPES = {
426
429
  <path d="M22.1682701,14.6021863 L22.1682701,17.9472433 L25.5133271,17.9472433 C26.8395904,17.9472433 28.0482531,18.7508414 28.544785,19.9856375 L31.2103774,26.623485 L20.4957415,51.3978137 L24.1543977,51.3978137 L32.9351724,31.0138724 L39.4684869,47.2687589 C40.4680837,49.7644859 42.8984767,51.3978137 45.5836693,51.3978137 L48.9287264,51.3978137 L48.9287264,48.0527567 L45.5836693,48.0527567 C44.2508732,48.0527567 43.0487433,47.2491586 42.5522114,46.0143625 L31.6285095,18.7312411 C30.6289128,16.2355157 28.1985198,14.6021863 25.5133271,14.6021863 L22.1682701,14.6021863 Z" id="lambda" fill="#000000" fill-rule="nonzero"></path>
427
430
  </g>
428
431
  </g>
432
+ </svg>""",
433
+ "conjointly": """<?xml version="1.0" encoding="UTF-8"?>
434
+ <svg width="67px" height="67px" viewBox="0 0 67 67" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
435
+ <title>conjointly</title>
436
+ <g id="All-Icons" stroke="none" stroke-width="1" fill="none" fill-rule="evenodd">
437
+ <g id="conjointly" transform="translate(0.000000, 0.241379)">
438
+ <path d="M56.712234,1 C59.1975153,1 61.4475153,2.00735931 63.076195,3.63603897 C64.7048747,5.26471863 65.712234,7.51471863 65.712234,10 L65.712234,10 L65.712234,65 L10.712234,65 C8.22695259,65 5.97695259,63.9926407 4.34827294,62.363961 C2.71959328,60.7352814 1.71223397,58.4852814 1.71223397,56 L1.71223397,56 L1.71223397,10 C1.71223397,7.51471863 2.71959328,5.26471863 4.34827294,3.63603897 C5.97695259,2.00735931 8.22695259,1 10.712234,1 L10.712234,1 Z" id="rectangle" stroke="#000000" stroke-width="2" fill="#FFFFFF"></path>
439
+ <path d="M51.8485976,12 L15.5758703,12 C13.9986329,12 12.712234,13.2863989 12.712234,14.8636364 L12.712234,51.1363636 C12.712234,52.7136011 13.9986329,54 15.5758703,54 L51.8485976,54 C53.4258351,54 54.712234,52.7136011 54.712234,51.1363636 L54.712234,14.8636364 C54.712234,13.2863989 53.4258351,12 51.8485976,12 Z M37.072234,44 L20.272234,44 L20.272234,42 L37.072234,42 L37.072234,44 Z M37.072234,34 L20.272234,34 L20.272234,32 L37.072234,32 L37.072234,34 Z M37.072234,24 L20.272234,24 L20.272234,22 L37.072234,22 L37.072234,24 Z M47.9233279,41.773438 L45.5706719,45.773438 C45.4427029,45.996094 45.239265,46.148438 45.0095779,46.1875 C44.9702029,46.195313 44.9275469,46.199219 44.88489,46.199219 C44.70114,46.199219 44.5206719,46.128906 44.373015,45.992188 L42.1877029,43.992188 C41.8202029,43.65625 41.7512969,43.027344 42.033484,42.589844 C42.3156719,42.152344 42.8439529,42.070313 43.2114529,42.40625 L44.697859,43.769531 L46.548484,40.625 C46.814265,40.171875 47.335984,40.0625 47.716609,40.378906 C48.097234,40.695313 48.189109,41.320313 47.9233279,41.773438 Z M47.9233279,31.773438 L45.5706719,35.773438 C45.4427029,35.996094 45.239265,36.148438 45.0095779,36.1875 C44.9702029,36.195313 44.9275469,36.199219 44.88489,36.199219 C44.70114,36.199219 44.5206719,36.128906 44.373015,35.992188 L42.1877029,33.992188 C41.8202029,33.65625 41.7512969,33.027344 42.033484,32.589844 C42.3156719,32.152344 42.8439529,32.070313 43.2114529,32.40625 L44.697859,33.769531 L46.548484,30.628906 C46.814265,30.175781 47.335984,30.0625 47.716609,30.382813 C48.097234,30.699219 48.189109,31.320313 47.9233279,31.773438 Z M47.9233279,21.773438 L45.5706719,25.773438 C45.4427029,25.996094 45.239265,26.148438 45.0095779,26.1875 C44.9702029,26.195313 44.9275469,26.199219 44.88489,26.199219 C44.70114,26.199219 44.5206719,26.128906 44.373015,25.992188 L42.1877029,23.992188 C41.8202029,23.65625 41.7512969,23.027344 42.033484,22.589844 C42.3156719,22.152344 42.8439529,22.070313 43.2114529,22.40625 L44.697859,23.769531 L46.548484,20.625 C46.814265,20.171875 47.335984,20.0625 47.716609,20.378906 C48.097234,20.699219 48.189109,21.320313 47.9233279,21.773438 Z" id="conjoint" fill="#000000" fill-rule="nonzero"></path>
440
+ </g>
441
+ </g>
429
442
  </svg>""",
430
443
  }
431
444
 
@@ -998,6 +998,60 @@ EXPECT_FAIL_TEXT = {
998
998
  "hi": "स्तंभ संख्या {values_text} से मेल नहीं खाती।",
999
999
  "el": "Ο αριθμός στηλών δεν ταίριαζε με το {values_text}.",
1000
1000
  },
1001
+ "conjointly_expectation_text": {
1002
+ "en": "Expect conjoint 'pass' units across all expressions.",
1003
+ "fr": "On s'attend à des unités 'réussite' conjointes sur toutes les expressions.",
1004
+ "de": "Erwarten Sie gemeinsame 'Bestanden'-Einheiten über alle Ausdrücke hinweg.",
1005
+ "it": "Aspettatevi unità 'pass' congiunte su tutte le espressioni.",
1006
+ "es": "Se esperan unidades 'aprobadas' conjuntas en todas las expresiones.",
1007
+ "pt": "Espera-se unidades 'aprovadas' conjuntas em todas as expressões.",
1008
+ "ro": "Se așteaptă unități 'pass' comune în toate expresiile.",
1009
+ "tr": "Tüm ifadeler boyunca birleşik 'geçen' birimler bekleyin.",
1010
+ "zh-Hans": "预期所有表达式中都有共同的'通过'单元。",
1011
+ "zh-Hant": "預期所有表達式中都有共同的'通過'單元。",
1012
+ "ja": "全ての表現にわたって結合された'合格'ユニットを期待します。",
1013
+ "ko": "모든 표현식에 걸쳐 결합된 '통과' 단위를 기대합니다.",
1014
+ "vi": "Kỳ vọng các đơn vị 'đạt' kết hợp trên tất cả các biểu thức.",
1015
+ "ru": "Ожидайте совместные единицы 'прохождения' по всем выражениям.",
1016
+ "cs": "Očekávejte společné 'úspěšné' jednotky ve všech výrazech.",
1017
+ "pl": "Oczekuj wspólnych jednostek 'zaliczonych' we wszystkich wyrażeniach.",
1018
+ "da": "Forvent fælles 'godkendte' enheder på tværs af alle udtryk.",
1019
+ "sv": "Förvänta dig gemensamma 'godkända' enheter över alla uttryck.",
1020
+ "nb": "Forvent felles 'godkjente' enheter på tvers av alle uttrykk.",
1021
+ "nl": "Verwacht gezamenlijke 'geslaagde' eenheden over alle expressies.",
1022
+ "fi": "Odota yhteisiä 'läpäisseitä' yksiköitä kaikissa lausekkeissa.",
1023
+ "is": "Væntir sameiginlegra 'staðinna' eininga yfir allar segðir.",
1024
+ "ar": "توقع وحدات 'ناجحة' مشتركة عبر جميع التعبيرات.",
1025
+ "hi": "सभी अभिव्यक्तियों में संयुक्त 'पास' इकाइयों की अपेक्षा करें।",
1026
+ "el": "Αναμένονται κοινές μονάδες 'επιτυχίας' σε όλες τις εκφράσεις.",
1027
+ },
1028
+ "conjointly_failure_text": {
1029
+ "en": "Exceedance of failed test units where there should have been conjoint 'pass' units.",
1030
+ "fr": "Dépassement des unités de test ayant échoué où il aurait dû y avoir des unités 'réussite' conjointes.",
1031
+ "de": "Überschreitung fehlgeschlagener Testeinheiten, bei denen es gemeinsame 'Bestanden'-Einheiten hätte geben sollen.",
1032
+ "it": "Superamento di unità di test fallite dove ci sarebbero dovute essere unità 'pass' congiunte.",
1033
+ "es": "Se superó el número de unidades de prueba fallidas donde debería haber habido unidades 'aprobadas' conjuntas.",
1034
+ "pt": "Excedeu o número de unidades de teste com falha onde deveria haver unidades 'aprovadas' conjuntas.",
1035
+ "ro": "Depășirea unităților de test eșuate unde ar fi trebuit să existe unități 'pass' comune.",
1036
+ "tr": "Birleşik 'geçen' birimler olması gereken yerlerde başarısız test birimlerinin aşılması.",
1037
+ "zh-Hans": "错误过多,其中应当有共同的'通过'单元。",
1038
+ "zh-Hant": "錯誤過多,其中應該有共同的'通過'單元。",
1039
+ "ja": "結合された'合格'ユニットがあるはずの場所でのテスト単位の失敗の超過。",
1040
+ "ko": "결합된 '통과' 단위가 있어야 했던 실패한 테스트 단위 초과.",
1041
+ "vi": "Vượt quá số đơn vị kiểm tra thất bại trong đó đáng lẽ phải có các đơn vị 'đạt' kết hợp.",
1042
+ "ru": "Превышение неудачных тестовых единиц, где должны были быть совместные единицы 'прохождения'.",
1043
+ "cs": "Překročení počtu neúspěšných testovacích jednotek, kde měly být společné 'úspěšné' jednotky.",
1044
+ "pl": "Przekroczenie nieudanych jednostek testowych, gdzie powinny były być wspólne jednostki 'zaliczone'.",
1045
+ "da": "Overskridelse af fejlslagne testenheder, hvor der skulle have været fælles 'godkendte' enheder.",
1046
+ "sv": "Överskrider antalet misslyckade testenheter där det borde ha funnits gemensamma 'godkända' enheter.",
1047
+ "nb": "Overskridelse av mislykkede testenheter hvor det skulle ha vært felles 'godkjente' enheter.",
1048
+ "nl": "Overschrijding van mislukte testeenheden waar gezamenlijke 'geslaagde' eenheden hadden moeten zijn.",
1049
+ "fi": "Epäonnistuneiden testiyksiköiden ylitys, joissa olisi pitänyt olla yhteisiä 'läpäisseitä' yksiköitä.",
1050
+ "is": "Of mörg misheppnuð próf þar sem hefðu átt að vera sameiginlegar 'staðnar' einingar.",
1051
+ "ar": "تجاوز وحدات الاختبار الفاشلة حيث كان يجب أن تكون هناك وحدات 'ناجحة' مشتركة.",
1052
+ "hi": "असफल परीक्षण इकाइयों की अधिकता जहां संयुक्त 'पास' इकाइयां होनी चाहिए थीं।",
1053
+ "el": "Υπέρβαση αποτυχημένων μονάδων δοκιμής όπου θα έπρεπε να υπάρχουν κοινές μονάδες 'επιτυχίας'.",
1054
+ },
1001
1055
  }
1002
1056
 
1003
1057
 
@@ -1089,14 +1089,20 @@ class Interrogator:
1089
1089
  def isin(self) -> FrameT | Any:
1090
1090
  # Ibis backends ---------------------------------------------
1091
1091
 
1092
+ can_be_null: bool = None in self.set
1093
+
1092
1094
  if self.tbl_type in IBIS_BACKENDS:
1093
- return self.x.mutate(pb_is_good_=self.x[self.column].isin(self.set))
1095
+ base_expr = self.x[self.column].isin(self.set)
1096
+ if can_be_null:
1097
+ base_expr = base_expr | self.x[self.column].isnull()
1098
+ return self.x.mutate(pb_is_good_=base_expr)
1094
1099
 
1095
1100
  # Local backends (Narwhals) ---------------------------------
1101
+ base_expr: nw.Expr = nw.col(self.column).is_in(self.set)
1102
+ if can_be_null:
1103
+ base_expr = base_expr | nw.col(self.column).is_null()
1096
1104
 
1097
- return self.x.with_columns(
1098
- pb_is_good_=nw.col(self.column).is_in(self.set),
1099
- ).to_native()
1105
+ return self.x.with_columns(pb_is_good_=base_expr).to_native()
1100
1106
 
1101
1107
  def notin(self) -> FrameT | Any:
1102
1108
  # Ibis backends ---------------------------------------------
@@ -1977,6 +1983,188 @@ class ColCountMatch:
1977
1983
  return self.test_unit_res
1978
1984
 
1979
1985
 
1986
+ class ConjointlyValidation:
1987
+ def __init__(self, data_tbl, expressions, threshold, tbl_type):
1988
+ self.data_tbl = data_tbl
1989
+ self.expressions = expressions
1990
+ self.threshold = threshold
1991
+
1992
+ # Detect the table type
1993
+ if tbl_type in (None, "local"):
1994
+ # Detect the table type using _get_tbl_type()
1995
+ self.tbl_type = _get_tbl_type(data=data_tbl)
1996
+ else:
1997
+ self.tbl_type = tbl_type
1998
+
1999
+ def get_test_results(self):
2000
+ """Evaluate all expressions and combine them conjointly."""
2001
+
2002
+ if "polars" in self.tbl_type:
2003
+ return self._get_polars_results()
2004
+ elif "pandas" in self.tbl_type:
2005
+ return self._get_pandas_results()
2006
+ elif "duckdb" in self.tbl_type or "ibis" in self.tbl_type:
2007
+ return self._get_ibis_results()
2008
+ else: # pragma: no cover
2009
+ raise NotImplementedError(f"Support for {self.tbl_type} is not yet implemented")
2010
+
2011
+ def _get_polars_results(self):
2012
+ """Process expressions for Polars DataFrames."""
2013
+ import polars as pl
2014
+
2015
+ polars_expressions = []
2016
+
2017
+ for expr_fn in self.expressions:
2018
+ try:
2019
+ # First try direct evaluation with native Polars expressions
2020
+ expr_result = expr_fn(self.data_tbl)
2021
+ if isinstance(expr_result, pl.Expr):
2022
+ polars_expressions.append(expr_result)
2023
+ else:
2024
+ raise TypeError("Not a valid Polars expression")
2025
+ except Exception as e:
2026
+ try:
2027
+ # Try to get a ColumnExpression
2028
+ col_expr = expr_fn(None)
2029
+ if hasattr(col_expr, "to_polars_expr"):
2030
+ polars_expr = col_expr.to_polars_expr()
2031
+ polars_expressions.append(polars_expr)
2032
+ else: # pragma: no cover
2033
+ raise TypeError(f"Cannot convert {type(col_expr)} to Polars expression")
2034
+ except Exception as e: # pragma: no cover
2035
+ print(f"Error evaluating expression: {e}")
2036
+
2037
+ # Combine results with AND logic
2038
+ if polars_expressions:
2039
+ final_result = polars_expressions[0]
2040
+ for expr in polars_expressions[1:]:
2041
+ final_result = final_result & expr
2042
+
2043
+ # Create results table with boolean column
2044
+ results_tbl = self.data_tbl.with_columns(pb_is_good_=final_result)
2045
+ return results_tbl
2046
+
2047
+ # Default case
2048
+ results_tbl = self.data_tbl.with_columns(pb_is_good_=pl.lit(True)) # pragma: no cover
2049
+ return results_tbl # pragma: no cover
2050
+
2051
+ def _get_pandas_results(self):
2052
+ """Process expressions for pandas DataFrames."""
2053
+ import pandas as pd
2054
+
2055
+ pandas_series = []
2056
+
2057
+ for expr_fn in self.expressions:
2058
+ try:
2059
+ # First try direct evaluation with pandas DataFrame
2060
+ expr_result = expr_fn(self.data_tbl)
2061
+
2062
+ # Check that it's a pandas Series with bool dtype
2063
+ if isinstance(expr_result, pd.Series):
2064
+ if expr_result.dtype == bool or pd.api.types.is_bool_dtype(expr_result):
2065
+ pandas_series.append(expr_result)
2066
+ else: # pragma: no cover
2067
+ raise TypeError(
2068
+ f"Expression returned Series of type {expr_result.dtype}, expected bool"
2069
+ )
2070
+ else: # pragma: no cover
2071
+ raise TypeError(f"Expression returned {type(expr_result)}, expected pd.Series")
2072
+
2073
+ except Exception as e:
2074
+ try:
2075
+ # Try as a ColumnExpression (for pb.expr_col style)
2076
+ col_expr = expr_fn(None)
2077
+
2078
+ if hasattr(col_expr, "to_pandas_expr"):
2079
+ # Watch for NotImplementedError here and re-raise it
2080
+ try:
2081
+ pandas_expr = col_expr.to_pandas_expr(self.data_tbl)
2082
+ pandas_series.append(pandas_expr)
2083
+ except NotImplementedError as nie: # pragma: no cover
2084
+ # Re-raise NotImplementedError with the original message
2085
+ raise NotImplementedError(str(nie))
2086
+ else: # pragma: no cover
2087
+ raise TypeError(f"Cannot convert {type(col_expr)} to pandas Series")
2088
+ except NotImplementedError as nie: # pragma: no cover
2089
+ # Re-raise NotImplementedError
2090
+ raise NotImplementedError(str(nie))
2091
+ except Exception as nested_e: # pragma: no cover
2092
+ print(f"Error evaluating pandas expression: {e} -> {nested_e}")
2093
+
2094
+ # Combine results with AND logic
2095
+ if pandas_series:
2096
+ final_result = pandas_series[0]
2097
+ for series in pandas_series[1:]:
2098
+ final_result = final_result & series
2099
+
2100
+ # Create results table with boolean column
2101
+ results_tbl = self.data_tbl.copy()
2102
+ results_tbl["pb_is_good_"] = final_result
2103
+ return results_tbl
2104
+
2105
+ # Default case
2106
+ results_tbl = self.data_tbl.copy() # pragma: no cover
2107
+ results_tbl["pb_is_good_"] = pd.Series( # pragma: no cover
2108
+ [True] * len(self.data_tbl), index=self.data_tbl.index
2109
+ )
2110
+ return results_tbl # pragma: no cover
2111
+
2112
+ def _get_ibis_results(self):
2113
+ """Process expressions for Ibis tables (including DuckDB)."""
2114
+ import ibis
2115
+
2116
+ ibis_expressions = []
2117
+
2118
+ for expr_fn in self.expressions:
2119
+ # Strategy 1: Try direct evaluation with native Ibis expressions
2120
+ try:
2121
+ expr_result = expr_fn(self.data_tbl)
2122
+
2123
+ # Check if it's a valid Ibis expression
2124
+ if hasattr(expr_result, "_ibis_expr"): # pragma: no cover
2125
+ ibis_expressions.append(expr_result)
2126
+ continue # Skip to next expression if this worked
2127
+ except Exception: # pragma: no cover
2128
+ pass # Silently continue to Strategy 2
2129
+
2130
+ # Strategy 2: Try with ColumnExpression
2131
+ try: # pragma: no cover
2132
+ # Skip this strategy if we don't have an expr_col implementation
2133
+ if not hasattr(self, "to_ibis_expr"):
2134
+ continue
2135
+
2136
+ col_expr = expr_fn(None)
2137
+
2138
+ # Skip if we got None
2139
+ if col_expr is None:
2140
+ continue
2141
+
2142
+ # Convert ColumnExpression to Ibis expression
2143
+ if hasattr(col_expr, "to_ibis_expr"):
2144
+ ibis_expr = col_expr.to_ibis_expr(self.data_tbl)
2145
+ ibis_expressions.append(ibis_expr)
2146
+ except Exception: # pragma: no cover
2147
+ # Silent failure - we already tried both strategies
2148
+ pass
2149
+
2150
+ # Combine expressions
2151
+ if ibis_expressions: # pragma: no cover
2152
+ try:
2153
+ final_result = ibis_expressions[0]
2154
+ for expr in ibis_expressions[1:]:
2155
+ final_result = final_result & expr
2156
+
2157
+ # Create results table with boolean column
2158
+ results_tbl = self.data_tbl.mutate(pb_is_good_=final_result)
2159
+ return results_tbl
2160
+ except Exception as e:
2161
+ print(f"Error combining Ibis expressions: {e}")
2162
+
2163
+ # Default case
2164
+ results_tbl = self.data_tbl.mutate(pb_is_good_=ibis.literal(True))
2165
+ return results_tbl
2166
+
2167
+
1980
2168
  @dataclass
1981
2169
  class NumberOfTestUnits:
1982
2170
  """
pointblank/_utils.py CHANGED
@@ -488,6 +488,7 @@ def _get_api_text() -> str:
488
488
  "Validate.col_schema_match",
489
489
  "Validate.row_count_match",
490
490
  "Validate.col_count_match",
491
+ "Validate.conjointly",
491
492
  ]
492
493
 
493
494
  column_selection_exported = [
@@ -499,6 +500,7 @@ def _get_api_text() -> str:
499
500
  "everything",
500
501
  "first_n",
501
502
  "last_n",
503
+ "expr_col",
502
504
  ]
503
505
 
504
506
  interrogation_exported = [