pointblank 0.8.5__py3-none-any.whl → 0.8.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pointblank/column.py CHANGED
@@ -15,6 +15,7 @@ __all__ = [
15
15
  "everything",
16
16
  "first_n",
17
17
  "last_n",
18
+ "expr_col",
18
19
  ]
19
20
 
20
21
 
@@ -234,18 +235,20 @@ def col(
234
235
  [`interrogate()`](`pointblank.Validate.interrogate`) is called), Pointblank will then check that
235
236
  the column exists in the input table.
236
237
 
238
+ For creating expressions to use with the `conjointly()` validation method, use the
239
+ [`expr_col()`](`pointblank.expr_col`) function instead.
240
+
237
241
  Parameters
238
242
  ----------
239
243
  exprs
240
244
  Either the name of a single column in the target table, provided as a string, or, an
241
245
  expression involving column selector functions (e.g., `starts_with("a")`,
242
- `ends_with("e") | starts_with("a")`, etc.). Please read the documentation for further
243
- details on which input forms are valid depending on the context.
246
+ `ends_with("e") | starts_with("a")`, etc.).
244
247
 
245
248
  Returns
246
249
  -------
247
- Column
248
- A `Column` object representing the column.
250
+ Column | ColumnLiteral | ColumnSelectorNarwhals:
251
+ A column object or expression representing the column reference.
249
252
 
250
253
  Usage with the `columns=` Argument
251
254
  -----------------------------------
@@ -496,6 +499,11 @@ def col(
496
499
  [`matches()`](`pointblank.matches`) column selector functions from Narwhals, combined with the
497
500
  `&` operator. This is necessary to specify the set of columns that are numeric *and* match the
498
501
  text `"2023"` or `"2024"`.
502
+
503
+ See Also
504
+ --------
505
+ Create a column expression for use in `conjointly()` validation with the
506
+ [`expr_col()`](`pointblank.expr_col`) function.
499
507
  """
500
508
  if isinstance(exprs, str):
501
509
  return ColumnLiteral(exprs=exprs)
@@ -1590,3 +1598,343 @@ def last_n(n: int, offset: int = 0) -> LastN:
1590
1598
  `paid_2022`, and `paid_2024`.
1591
1599
  """
1592
1600
  return LastN(n=n, offset=offset)
1601
+
1602
+
1603
+ class ColumnExpression:
1604
+ """
1605
+ A class representing a column expression for use in conjointly() validation.
1606
+ Supports operations like >, <, +, etc. for creating backend-agnostic validation expressions.
1607
+ """
1608
+
1609
+ def __init__(self, column_name=None, operation=None, left=None, right=None):
1610
+ self.column_name = column_name # Name of the column (for leaf nodes)
1611
+ self.operation = operation # Operation type (gt, lt, add, etc.)
1612
+ self.left = left # Left operand (ColumnExpression or None for column reference)
1613
+ self.right = right # Right operand (ColumnExpression, value, or None)
1614
+
1615
+ def to_polars_expr(self):
1616
+ """Convert this expression to a Polars expression."""
1617
+ import polars as pl
1618
+
1619
+ # Base case: simple column reference
1620
+ if self.operation is None and self.column_name is not None:
1621
+ return pl.col(self.column_name)
1622
+
1623
+ # Handle unary operations like is_null
1624
+ if self.operation == "is_null":
1625
+ left_expr = self.left
1626
+ if isinstance(left_expr, ColumnExpression):
1627
+ left_expr = left_expr.to_polars_expr()
1628
+ return left_expr.is_null()
1629
+
1630
+ if self.operation == "is_not_null":
1631
+ left_expr = self.left
1632
+ if isinstance(left_expr, ColumnExpression):
1633
+ left_expr = left_expr.to_polars_expr()
1634
+ return left_expr.is_not_null()
1635
+
1636
+ # Handle nested expressions through recursive evaluation
1637
+ if self.operation is None:
1638
+ # This shouldn't happen in normal use
1639
+ raise ValueError("Invalid expression state: No operation or column name")
1640
+
1641
+ # Get the left operand
1642
+ if self.left is None and self.column_name is not None:
1643
+ # Column name as left operand
1644
+ left_expr = pl.col(self.column_name) # pragma: no cover
1645
+ elif isinstance(self.left, ColumnExpression):
1646
+ # Nested expression as left operand
1647
+ left_expr = self.left.to_polars_expr() # pragma: no cover
1648
+ else:
1649
+ # Literal value as left operand
1650
+ left_expr = self.left # pragma: no cover
1651
+
1652
+ # Get the right operand
1653
+ if isinstance(self.right, ColumnExpression):
1654
+ # Nested expression as right operand
1655
+ right_expr = self.right.to_polars_expr() # pragma: no cover
1656
+ elif isinstance(self.right, str):
1657
+ # Column name as right operand
1658
+ right_expr = pl.col(self.right) # pragma: no cover
1659
+ else:
1660
+ # Literal value as right operand
1661
+ right_expr = self.right # pragma: no cover
1662
+
1663
+ # Apply the operation
1664
+ if self.operation == "gt":
1665
+ return left_expr > right_expr
1666
+ elif self.operation == "lt":
1667
+ return left_expr < right_expr
1668
+ elif self.operation == "eq":
1669
+ return left_expr == right_expr
1670
+ elif self.operation == "ne":
1671
+ return left_expr != right_expr
1672
+ elif self.operation == "ge":
1673
+ return left_expr >= right_expr
1674
+ elif self.operation == "le":
1675
+ return left_expr <= right_expr
1676
+ elif self.operation == "add":
1677
+ return left_expr + right_expr
1678
+ elif self.operation == "sub":
1679
+ return left_expr - right_expr
1680
+ elif self.operation == "mul":
1681
+ return left_expr * right_expr
1682
+ elif self.operation == "div":
1683
+ return left_expr / right_expr
1684
+ elif self.operation == "and":
1685
+ return left_expr & right_expr
1686
+ elif self.operation == "or":
1687
+ return left_expr | right_expr
1688
+ else:
1689
+ raise ValueError(f"Unsupported operation: {self.operation}")
1690
+
1691
+ def to_pandas_expr(self, df):
1692
+ """Convert this expression to a Pandas Series of booleans."""
1693
+
1694
+ # Handle is_null as a special case - but raise an error
1695
+ if self.operation == "is_null":
1696
+ raise NotImplementedError(
1697
+ "is_null() is not supported with pandas DataFrames. "
1698
+ "Please use native pandas syntax with pd.isna() instead: "
1699
+ "lambda df: pd.isna(df['column_name'])"
1700
+ )
1701
+
1702
+ if self.operation == "is_not_null":
1703
+ raise NotImplementedError(
1704
+ "is_not_null() is not supported with pandas DataFrames. "
1705
+ "Please use native pandas syntax with ~pd.isna() instead: "
1706
+ "lambda df: ~pd.isna(df['column_name'])"
1707
+ )
1708
+
1709
+ # Base case: simple column reference
1710
+ if self.operation is None and self.column_name is not None:
1711
+ return df[self.column_name]
1712
+
1713
+ # For other operations, recursively process operands
1714
+ left_expr = self.left
1715
+ if isinstance(left_expr, ColumnExpression):
1716
+ left_expr = left_expr.to_pandas_expr(df)
1717
+ elif isinstance(left_expr, str) and left_expr in df.columns: # pragma: no cover
1718
+ left_expr = df[left_expr]
1719
+
1720
+ right_expr = self.right
1721
+ if isinstance(right_expr, ColumnExpression):
1722
+ right_expr = right_expr.to_pandas_expr(df)
1723
+ elif isinstance(right_expr, str) and right_expr in df.columns: # pragma: no cover
1724
+ right_expr = df[right_expr]
1725
+
1726
+ # Apply the operation
1727
+ if self.operation == "gt":
1728
+ return left_expr > right_expr
1729
+ elif self.operation == "lt":
1730
+ return left_expr < right_expr
1731
+ elif self.operation == "eq":
1732
+ return left_expr == right_expr
1733
+ elif self.operation == "ne":
1734
+ return left_expr != right_expr
1735
+ elif self.operation == "ge":
1736
+ return left_expr >= right_expr
1737
+ elif self.operation == "le":
1738
+ return left_expr <= right_expr
1739
+ elif self.operation == "add":
1740
+ return left_expr + right_expr
1741
+ elif self.operation == "sub":
1742
+ return left_expr - right_expr
1743
+ elif self.operation == "mul":
1744
+ return left_expr * right_expr
1745
+ elif self.operation == "div":
1746
+ return left_expr / right_expr
1747
+ else:
1748
+ raise ValueError(f"Unsupported operation: {self.operation}")
1749
+
1750
+ def to_ibis_expr(self, table):
1751
+ """Convert this expression to an Ibis expression."""
1752
+
1753
+ # Base case: simple column reference
1754
+ if self.operation is None and self.column_name is not None:
1755
+ return table[self.column_name]
1756
+
1757
+ # Handle unary operations
1758
+ if self.operation == "is_null":
1759
+ left_expr = self.left
1760
+ if isinstance(left_expr, ColumnExpression):
1761
+ left_expr = left_expr.to_ibis_expr(table)
1762
+ return left_expr.isnull()
1763
+
1764
+ if self.operation == "is_not_null":
1765
+ left_expr = self.left
1766
+ if isinstance(left_expr, ColumnExpression):
1767
+ left_expr = left_expr.to_ibis_expr(table)
1768
+ return ~left_expr.isnull()
1769
+
1770
+ # Handle nested expressions through recursive evaluation
1771
+ if self.operation is None:
1772
+ # This shouldn't happen in normal use
1773
+ raise ValueError("Invalid expression state: No operation or column name")
1774
+
1775
+ # Get the left operand
1776
+ if self.left is None and self.column_name is not None:
1777
+ # Column name as left operand
1778
+ left_expr = table[self.column_name] # pragma: no cover
1779
+ elif isinstance(self.left, ColumnExpression):
1780
+ # Nested expression as left operand
1781
+ left_expr = self.left.to_ibis_expr(table) # pragma: no cover
1782
+ else:
1783
+ # Literal value as left operand
1784
+ left_expr = self.left # pragma: no cover
1785
+
1786
+ # Get the right operand
1787
+ if isinstance(self.right, ColumnExpression):
1788
+ # Nested expression as right operand
1789
+ right_expr = self.right.to_ibis_expr(table) # pragma: no cover
1790
+ elif isinstance(self.right, str) and self.right in table.columns:
1791
+ # Column name as right operand
1792
+ right_expr = table[self.right] # pragma: no cover
1793
+ else:
1794
+ # Literal value as right operand
1795
+ right_expr = self.right # pragma: no cover
1796
+
1797
+ # Apply the operation
1798
+ if self.operation == "gt":
1799
+ return left_expr > right_expr
1800
+ elif self.operation == "lt":
1801
+ return left_expr < right_expr
1802
+ elif self.operation == "eq":
1803
+ return left_expr == right_expr
1804
+ elif self.operation == "ne":
1805
+ return left_expr != right_expr
1806
+ elif self.operation == "ge":
1807
+ return left_expr >= right_expr
1808
+ elif self.operation == "le":
1809
+ return left_expr <= right_expr
1810
+ elif self.operation == "add":
1811
+ return left_expr + right_expr
1812
+ elif self.operation == "sub":
1813
+ return left_expr - right_expr
1814
+ elif self.operation == "mul":
1815
+ return left_expr * right_expr
1816
+ elif self.operation == "div":
1817
+ return left_expr / right_expr
1818
+ elif self.operation == "and":
1819
+ return left_expr & right_expr
1820
+ elif self.operation == "or":
1821
+ return left_expr | right_expr
1822
+ else:
1823
+ raise ValueError(f"Unsupported operation: {self.operation}")
1824
+
1825
+ def __gt__(self, other):
1826
+ return ColumnExpression(operation="gt", left=self, right=other)
1827
+
1828
+ def __lt__(self, other):
1829
+ return ColumnExpression(operation="lt", left=self, right=other)
1830
+
1831
+ def __eq__(self, other):
1832
+ return ColumnExpression(operation="eq", left=self, right=other)
1833
+
1834
+ def __ne__(self, other):
1835
+ return ColumnExpression(operation="ne", left=self, right=other)
1836
+
1837
+ def __ge__(self, other):
1838
+ return ColumnExpression(operation="ge", left=self, right=other)
1839
+
1840
+ def __le__(self, other):
1841
+ return ColumnExpression(operation="le", left=self, right=other)
1842
+
1843
+ def __add__(self, other):
1844
+ return ColumnExpression(operation="add", left=self, right=other)
1845
+
1846
+ def __sub__(self, other):
1847
+ return ColumnExpression(operation="sub", left=self, right=other)
1848
+
1849
+ def __mul__(self, other):
1850
+ return ColumnExpression(operation="mul", left=self, right=other)
1851
+
1852
+ def __truediv__(self, other):
1853
+ return ColumnExpression(operation="div", left=self, right=other)
1854
+
1855
+ def is_null(self):
1856
+ """Check if values are null."""
1857
+ return ColumnExpression(operation="is_null", left=self, right=None)
1858
+
1859
+ def is_not_null(self):
1860
+ """Check if values are not null."""
1861
+ return ColumnExpression(operation="is_not_null", left=self, right=None)
1862
+
1863
+ def __or__(self, other):
1864
+ """Logical OR operation."""
1865
+ return ColumnExpression(operation="or", left=self, right=other)
1866
+
1867
+ def __and__(self, other):
1868
+ """Logical AND operation."""
1869
+ return ColumnExpression(operation="and", left=self, right=other)
1870
+
1871
+
1872
+ def expr_col(column_name: str) -> ColumnExpression:
1873
+ """
1874
+ Create a column expression for use in `conjointly()` validation.
1875
+
1876
+ This function returns a ColumnExpression object that supports operations like `>`, `<`, `+`,
1877
+ etc. for use in [`conjointly()`](`pointblank.Validate.conjointly`) validation expressions.
1878
+
1879
+ Parameters
1880
+ ----------
1881
+ column_name
1882
+ The name of the column to reference.
1883
+
1884
+ Returns
1885
+ -------
1886
+ ColumnExpression
1887
+ A column expression that can be used in comparisons and operations.
1888
+
1889
+ Examples
1890
+ --------
1891
+ ```{python}
1892
+ #| echo: false
1893
+ #| output: false
1894
+ import pointblank as pb
1895
+ pb.config(report_incl_header=False, report_incl_footer=False, preview_incl_header=False)
1896
+ ```
1897
+ Let's say we have a table with three columns: `a`, `b`, and `c`. We want to validate that:
1898
+
1899
+ - The values in column `a` are greater than `2`.
1900
+ - The values in column `b` are less than `7`.
1901
+ - The sum of columns `a` and `b` is less than the values in column `c`.
1902
+
1903
+ We can use the `expr_col()` function to create a column expression for each of these conditions.
1904
+
1905
+ ```{python}
1906
+ import pointblank as pb
1907
+ import polars as pl
1908
+
1909
+ tbl = pl.DataFrame(
1910
+ {
1911
+ "a": [5, 7, 1, 3, 9, 4],
1912
+ "b": [6, 3, 0, 5, 8, 2],
1913
+ "c": [10, 4, 8, 9, 10, 5],
1914
+ }
1915
+ )
1916
+
1917
+ # Using expr_col() to create backend-agnostic validation expressions
1918
+ validation = (
1919
+ pb.Validate(data=tbl)
1920
+ .conjointly(
1921
+ lambda df: pb.expr_col("a") > 2,
1922
+ lambda df: pb.expr_col("b") < 7,
1923
+ lambda df: pb.expr_col("a") + pb.expr_col("b") < pb.expr_col("c")
1924
+ )
1925
+ .interrogate()
1926
+ )
1927
+
1928
+ validation
1929
+ ```
1930
+
1931
+ The above code creates a validation object that checks the specified conditions using the
1932
+ `expr_col()` function. The resulting validation table will show whether each condition was
1933
+ satisfied for each row in the table.
1934
+
1935
+ See Also
1936
+ --------
1937
+ The [`conjointly()`](`pointblank.Validate.conjointly`) validation method, which is where this
1938
+ function should be used.
1939
+ """
1940
+ return ColumnExpression(column_name=column_name)