pointblank 0.8.4__py3-none-any.whl → 0.8.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pointblank/__init__.py +2 -0
- pointblank/_constants.py +13 -0
- pointblank/_constants_translations.py +216 -0
- pointblank/_interrogation.py +182 -0
- pointblank/_utils.py +2 -0
- pointblank/column.py +352 -4
- pointblank/data/api-docs.txt +270 -4
- pointblank/validate.py +462 -5
- pointblank-0.8.6.dist-info/METADATA +312 -0
- {pointblank-0.8.4.dist-info → pointblank-0.8.6.dist-info}/RECORD +13 -13
- pointblank-0.8.4.dist-info/METADATA +0 -269
- {pointblank-0.8.4.dist-info → pointblank-0.8.6.dist-info}/WHEEL +0 -0
- {pointblank-0.8.4.dist-info → pointblank-0.8.6.dist-info}/licenses/LICENSE +0 -0
- {pointblank-0.8.4.dist-info → pointblank-0.8.6.dist-info}/top_level.txt +0 -0
pointblank/column.py
CHANGED
|
@@ -15,6 +15,7 @@ __all__ = [
|
|
|
15
15
|
"everything",
|
|
16
16
|
"first_n",
|
|
17
17
|
"last_n",
|
|
18
|
+
"expr_col",
|
|
18
19
|
]
|
|
19
20
|
|
|
20
21
|
|
|
@@ -234,18 +235,20 @@ def col(
|
|
|
234
235
|
[`interrogate()`](`pointblank.Validate.interrogate`) is called), Pointblank will then check that
|
|
235
236
|
the column exists in the input table.
|
|
236
237
|
|
|
238
|
+
For creating expressions to use with the `conjointly()` validation method, use the
|
|
239
|
+
[`expr_col()`](`pointblank.expr_col`) function instead.
|
|
240
|
+
|
|
237
241
|
Parameters
|
|
238
242
|
----------
|
|
239
243
|
exprs
|
|
240
244
|
Either the name of a single column in the target table, provided as a string, or, an
|
|
241
245
|
expression involving column selector functions (e.g., `starts_with("a")`,
|
|
242
|
-
`ends_with("e") | starts_with("a")`, etc.).
|
|
243
|
-
details on which input forms are valid depending on the context.
|
|
246
|
+
`ends_with("e") | starts_with("a")`, etc.).
|
|
244
247
|
|
|
245
248
|
Returns
|
|
246
249
|
-------
|
|
247
|
-
Column
|
|
248
|
-
A
|
|
250
|
+
Column | ColumnLiteral | ColumnSelectorNarwhals:
|
|
251
|
+
A column object or expression representing the column reference.
|
|
249
252
|
|
|
250
253
|
Usage with the `columns=` Argument
|
|
251
254
|
-----------------------------------
|
|
@@ -496,6 +499,11 @@ def col(
|
|
|
496
499
|
[`matches()`](`pointblank.matches`) column selector functions from Narwhals, combined with the
|
|
497
500
|
`&` operator. This is necessary to specify the set of columns that are numeric *and* match the
|
|
498
501
|
text `"2023"` or `"2024"`.
|
|
502
|
+
|
|
503
|
+
See Also
|
|
504
|
+
--------
|
|
505
|
+
Create a column expression for use in `conjointly()` validation with the
|
|
506
|
+
[`expr_col()`](`pointblank.expr_col`) function.
|
|
499
507
|
"""
|
|
500
508
|
if isinstance(exprs, str):
|
|
501
509
|
return ColumnLiteral(exprs=exprs)
|
|
@@ -1590,3 +1598,343 @@ def last_n(n: int, offset: int = 0) -> LastN:
|
|
|
1590
1598
|
`paid_2022`, and `paid_2024`.
|
|
1591
1599
|
"""
|
|
1592
1600
|
return LastN(n=n, offset=offset)
|
|
1601
|
+
|
|
1602
|
+
|
|
1603
|
+
class ColumnExpression:
|
|
1604
|
+
"""
|
|
1605
|
+
A class representing a column expression for use in conjointly() validation.
|
|
1606
|
+
Supports operations like >, <, +, etc. for creating backend-agnostic validation expressions.
|
|
1607
|
+
"""
|
|
1608
|
+
|
|
1609
|
+
def __init__(self, column_name=None, operation=None, left=None, right=None):
|
|
1610
|
+
self.column_name = column_name # Name of the column (for leaf nodes)
|
|
1611
|
+
self.operation = operation # Operation type (gt, lt, add, etc.)
|
|
1612
|
+
self.left = left # Left operand (ColumnExpression or None for column reference)
|
|
1613
|
+
self.right = right # Right operand (ColumnExpression, value, or None)
|
|
1614
|
+
|
|
1615
|
+
def to_polars_expr(self):
|
|
1616
|
+
"""Convert this expression to a Polars expression."""
|
|
1617
|
+
import polars as pl
|
|
1618
|
+
|
|
1619
|
+
# Base case: simple column reference
|
|
1620
|
+
if self.operation is None and self.column_name is not None:
|
|
1621
|
+
return pl.col(self.column_name)
|
|
1622
|
+
|
|
1623
|
+
# Handle unary operations like is_null
|
|
1624
|
+
if self.operation == "is_null":
|
|
1625
|
+
left_expr = self.left
|
|
1626
|
+
if isinstance(left_expr, ColumnExpression):
|
|
1627
|
+
left_expr = left_expr.to_polars_expr()
|
|
1628
|
+
return left_expr.is_null()
|
|
1629
|
+
|
|
1630
|
+
if self.operation == "is_not_null":
|
|
1631
|
+
left_expr = self.left
|
|
1632
|
+
if isinstance(left_expr, ColumnExpression):
|
|
1633
|
+
left_expr = left_expr.to_polars_expr()
|
|
1634
|
+
return left_expr.is_not_null()
|
|
1635
|
+
|
|
1636
|
+
# Handle nested expressions through recursive evaluation
|
|
1637
|
+
if self.operation is None:
|
|
1638
|
+
# This shouldn't happen in normal use
|
|
1639
|
+
raise ValueError("Invalid expression state: No operation or column name")
|
|
1640
|
+
|
|
1641
|
+
# Get the left operand
|
|
1642
|
+
if self.left is None and self.column_name is not None:
|
|
1643
|
+
# Column name as left operand
|
|
1644
|
+
left_expr = pl.col(self.column_name) # pragma: no cover
|
|
1645
|
+
elif isinstance(self.left, ColumnExpression):
|
|
1646
|
+
# Nested expression as left operand
|
|
1647
|
+
left_expr = self.left.to_polars_expr() # pragma: no cover
|
|
1648
|
+
else:
|
|
1649
|
+
# Literal value as left operand
|
|
1650
|
+
left_expr = self.left # pragma: no cover
|
|
1651
|
+
|
|
1652
|
+
# Get the right operand
|
|
1653
|
+
if isinstance(self.right, ColumnExpression):
|
|
1654
|
+
# Nested expression as right operand
|
|
1655
|
+
right_expr = self.right.to_polars_expr() # pragma: no cover
|
|
1656
|
+
elif isinstance(self.right, str):
|
|
1657
|
+
# Column name as right operand
|
|
1658
|
+
right_expr = pl.col(self.right) # pragma: no cover
|
|
1659
|
+
else:
|
|
1660
|
+
# Literal value as right operand
|
|
1661
|
+
right_expr = self.right # pragma: no cover
|
|
1662
|
+
|
|
1663
|
+
# Apply the operation
|
|
1664
|
+
if self.operation == "gt":
|
|
1665
|
+
return left_expr > right_expr
|
|
1666
|
+
elif self.operation == "lt":
|
|
1667
|
+
return left_expr < right_expr
|
|
1668
|
+
elif self.operation == "eq":
|
|
1669
|
+
return left_expr == right_expr
|
|
1670
|
+
elif self.operation == "ne":
|
|
1671
|
+
return left_expr != right_expr
|
|
1672
|
+
elif self.operation == "ge":
|
|
1673
|
+
return left_expr >= right_expr
|
|
1674
|
+
elif self.operation == "le":
|
|
1675
|
+
return left_expr <= right_expr
|
|
1676
|
+
elif self.operation == "add":
|
|
1677
|
+
return left_expr + right_expr
|
|
1678
|
+
elif self.operation == "sub":
|
|
1679
|
+
return left_expr - right_expr
|
|
1680
|
+
elif self.operation == "mul":
|
|
1681
|
+
return left_expr * right_expr
|
|
1682
|
+
elif self.operation == "div":
|
|
1683
|
+
return left_expr / right_expr
|
|
1684
|
+
elif self.operation == "and":
|
|
1685
|
+
return left_expr & right_expr
|
|
1686
|
+
elif self.operation == "or":
|
|
1687
|
+
return left_expr | right_expr
|
|
1688
|
+
else:
|
|
1689
|
+
raise ValueError(f"Unsupported operation: {self.operation}")
|
|
1690
|
+
|
|
1691
|
+
def to_pandas_expr(self, df):
|
|
1692
|
+
"""Convert this expression to a Pandas Series of booleans."""
|
|
1693
|
+
|
|
1694
|
+
# Handle is_null as a special case - but raise an error
|
|
1695
|
+
if self.operation == "is_null":
|
|
1696
|
+
raise NotImplementedError(
|
|
1697
|
+
"is_null() is not supported with pandas DataFrames. "
|
|
1698
|
+
"Please use native pandas syntax with pd.isna() instead: "
|
|
1699
|
+
"lambda df: pd.isna(df['column_name'])"
|
|
1700
|
+
)
|
|
1701
|
+
|
|
1702
|
+
if self.operation == "is_not_null":
|
|
1703
|
+
raise NotImplementedError(
|
|
1704
|
+
"is_not_null() is not supported with pandas DataFrames. "
|
|
1705
|
+
"Please use native pandas syntax with ~pd.isna() instead: "
|
|
1706
|
+
"lambda df: ~pd.isna(df['column_name'])"
|
|
1707
|
+
)
|
|
1708
|
+
|
|
1709
|
+
# Base case: simple column reference
|
|
1710
|
+
if self.operation is None and self.column_name is not None:
|
|
1711
|
+
return df[self.column_name]
|
|
1712
|
+
|
|
1713
|
+
# For other operations, recursively process operands
|
|
1714
|
+
left_expr = self.left
|
|
1715
|
+
if isinstance(left_expr, ColumnExpression):
|
|
1716
|
+
left_expr = left_expr.to_pandas_expr(df)
|
|
1717
|
+
elif isinstance(left_expr, str) and left_expr in df.columns: # pragma: no cover
|
|
1718
|
+
left_expr = df[left_expr]
|
|
1719
|
+
|
|
1720
|
+
right_expr = self.right
|
|
1721
|
+
if isinstance(right_expr, ColumnExpression):
|
|
1722
|
+
right_expr = right_expr.to_pandas_expr(df)
|
|
1723
|
+
elif isinstance(right_expr, str) and right_expr in df.columns: # pragma: no cover
|
|
1724
|
+
right_expr = df[right_expr]
|
|
1725
|
+
|
|
1726
|
+
# Apply the operation
|
|
1727
|
+
if self.operation == "gt":
|
|
1728
|
+
return left_expr > right_expr
|
|
1729
|
+
elif self.operation == "lt":
|
|
1730
|
+
return left_expr < right_expr
|
|
1731
|
+
elif self.operation == "eq":
|
|
1732
|
+
return left_expr == right_expr
|
|
1733
|
+
elif self.operation == "ne":
|
|
1734
|
+
return left_expr != right_expr
|
|
1735
|
+
elif self.operation == "ge":
|
|
1736
|
+
return left_expr >= right_expr
|
|
1737
|
+
elif self.operation == "le":
|
|
1738
|
+
return left_expr <= right_expr
|
|
1739
|
+
elif self.operation == "add":
|
|
1740
|
+
return left_expr + right_expr
|
|
1741
|
+
elif self.operation == "sub":
|
|
1742
|
+
return left_expr - right_expr
|
|
1743
|
+
elif self.operation == "mul":
|
|
1744
|
+
return left_expr * right_expr
|
|
1745
|
+
elif self.operation == "div":
|
|
1746
|
+
return left_expr / right_expr
|
|
1747
|
+
else:
|
|
1748
|
+
raise ValueError(f"Unsupported operation: {self.operation}")
|
|
1749
|
+
|
|
1750
|
+
def to_ibis_expr(self, table):
|
|
1751
|
+
"""Convert this expression to an Ibis expression."""
|
|
1752
|
+
|
|
1753
|
+
# Base case: simple column reference
|
|
1754
|
+
if self.operation is None and self.column_name is not None:
|
|
1755
|
+
return table[self.column_name]
|
|
1756
|
+
|
|
1757
|
+
# Handle unary operations
|
|
1758
|
+
if self.operation == "is_null":
|
|
1759
|
+
left_expr = self.left
|
|
1760
|
+
if isinstance(left_expr, ColumnExpression):
|
|
1761
|
+
left_expr = left_expr.to_ibis_expr(table)
|
|
1762
|
+
return left_expr.isnull()
|
|
1763
|
+
|
|
1764
|
+
if self.operation == "is_not_null":
|
|
1765
|
+
left_expr = self.left
|
|
1766
|
+
if isinstance(left_expr, ColumnExpression):
|
|
1767
|
+
left_expr = left_expr.to_ibis_expr(table)
|
|
1768
|
+
return ~left_expr.isnull()
|
|
1769
|
+
|
|
1770
|
+
# Handle nested expressions through recursive evaluation
|
|
1771
|
+
if self.operation is None:
|
|
1772
|
+
# This shouldn't happen in normal use
|
|
1773
|
+
raise ValueError("Invalid expression state: No operation or column name")
|
|
1774
|
+
|
|
1775
|
+
# Get the left operand
|
|
1776
|
+
if self.left is None and self.column_name is not None:
|
|
1777
|
+
# Column name as left operand
|
|
1778
|
+
left_expr = table[self.column_name] # pragma: no cover
|
|
1779
|
+
elif isinstance(self.left, ColumnExpression):
|
|
1780
|
+
# Nested expression as left operand
|
|
1781
|
+
left_expr = self.left.to_ibis_expr(table) # pragma: no cover
|
|
1782
|
+
else:
|
|
1783
|
+
# Literal value as left operand
|
|
1784
|
+
left_expr = self.left # pragma: no cover
|
|
1785
|
+
|
|
1786
|
+
# Get the right operand
|
|
1787
|
+
if isinstance(self.right, ColumnExpression):
|
|
1788
|
+
# Nested expression as right operand
|
|
1789
|
+
right_expr = self.right.to_ibis_expr(table) # pragma: no cover
|
|
1790
|
+
elif isinstance(self.right, str) and self.right in table.columns:
|
|
1791
|
+
# Column name as right operand
|
|
1792
|
+
right_expr = table[self.right] # pragma: no cover
|
|
1793
|
+
else:
|
|
1794
|
+
# Literal value as right operand
|
|
1795
|
+
right_expr = self.right # pragma: no cover
|
|
1796
|
+
|
|
1797
|
+
# Apply the operation
|
|
1798
|
+
if self.operation == "gt":
|
|
1799
|
+
return left_expr > right_expr
|
|
1800
|
+
elif self.operation == "lt":
|
|
1801
|
+
return left_expr < right_expr
|
|
1802
|
+
elif self.operation == "eq":
|
|
1803
|
+
return left_expr == right_expr
|
|
1804
|
+
elif self.operation == "ne":
|
|
1805
|
+
return left_expr != right_expr
|
|
1806
|
+
elif self.operation == "ge":
|
|
1807
|
+
return left_expr >= right_expr
|
|
1808
|
+
elif self.operation == "le":
|
|
1809
|
+
return left_expr <= right_expr
|
|
1810
|
+
elif self.operation == "add":
|
|
1811
|
+
return left_expr + right_expr
|
|
1812
|
+
elif self.operation == "sub":
|
|
1813
|
+
return left_expr - right_expr
|
|
1814
|
+
elif self.operation == "mul":
|
|
1815
|
+
return left_expr * right_expr
|
|
1816
|
+
elif self.operation == "div":
|
|
1817
|
+
return left_expr / right_expr
|
|
1818
|
+
elif self.operation == "and":
|
|
1819
|
+
return left_expr & right_expr
|
|
1820
|
+
elif self.operation == "or":
|
|
1821
|
+
return left_expr | right_expr
|
|
1822
|
+
else:
|
|
1823
|
+
raise ValueError(f"Unsupported operation: {self.operation}")
|
|
1824
|
+
|
|
1825
|
+
def __gt__(self, other):
|
|
1826
|
+
return ColumnExpression(operation="gt", left=self, right=other)
|
|
1827
|
+
|
|
1828
|
+
def __lt__(self, other):
|
|
1829
|
+
return ColumnExpression(operation="lt", left=self, right=other)
|
|
1830
|
+
|
|
1831
|
+
def __eq__(self, other):
|
|
1832
|
+
return ColumnExpression(operation="eq", left=self, right=other)
|
|
1833
|
+
|
|
1834
|
+
def __ne__(self, other):
|
|
1835
|
+
return ColumnExpression(operation="ne", left=self, right=other)
|
|
1836
|
+
|
|
1837
|
+
def __ge__(self, other):
|
|
1838
|
+
return ColumnExpression(operation="ge", left=self, right=other)
|
|
1839
|
+
|
|
1840
|
+
def __le__(self, other):
|
|
1841
|
+
return ColumnExpression(operation="le", left=self, right=other)
|
|
1842
|
+
|
|
1843
|
+
def __add__(self, other):
|
|
1844
|
+
return ColumnExpression(operation="add", left=self, right=other)
|
|
1845
|
+
|
|
1846
|
+
def __sub__(self, other):
|
|
1847
|
+
return ColumnExpression(operation="sub", left=self, right=other)
|
|
1848
|
+
|
|
1849
|
+
def __mul__(self, other):
|
|
1850
|
+
return ColumnExpression(operation="mul", left=self, right=other)
|
|
1851
|
+
|
|
1852
|
+
def __truediv__(self, other):
|
|
1853
|
+
return ColumnExpression(operation="div", left=self, right=other)
|
|
1854
|
+
|
|
1855
|
+
def is_null(self):
|
|
1856
|
+
"""Check if values are null."""
|
|
1857
|
+
return ColumnExpression(operation="is_null", left=self, right=None)
|
|
1858
|
+
|
|
1859
|
+
def is_not_null(self):
|
|
1860
|
+
"""Check if values are not null."""
|
|
1861
|
+
return ColumnExpression(operation="is_not_null", left=self, right=None)
|
|
1862
|
+
|
|
1863
|
+
def __or__(self, other):
|
|
1864
|
+
"""Logical OR operation."""
|
|
1865
|
+
return ColumnExpression(operation="or", left=self, right=other)
|
|
1866
|
+
|
|
1867
|
+
def __and__(self, other):
|
|
1868
|
+
"""Logical AND operation."""
|
|
1869
|
+
return ColumnExpression(operation="and", left=self, right=other)
|
|
1870
|
+
|
|
1871
|
+
|
|
1872
|
+
def expr_col(column_name: str) -> ColumnExpression:
|
|
1873
|
+
"""
|
|
1874
|
+
Create a column expression for use in `conjointly()` validation.
|
|
1875
|
+
|
|
1876
|
+
This function returns a ColumnExpression object that supports operations like `>`, `<`, `+`,
|
|
1877
|
+
etc. for use in [`conjointly()`](`pointblank.Validate.conjointly`) validation expressions.
|
|
1878
|
+
|
|
1879
|
+
Parameters
|
|
1880
|
+
----------
|
|
1881
|
+
column_name
|
|
1882
|
+
The name of the column to reference.
|
|
1883
|
+
|
|
1884
|
+
Returns
|
|
1885
|
+
-------
|
|
1886
|
+
ColumnExpression
|
|
1887
|
+
A column expression that can be used in comparisons and operations.
|
|
1888
|
+
|
|
1889
|
+
Examples
|
|
1890
|
+
--------
|
|
1891
|
+
```{python}
|
|
1892
|
+
#| echo: false
|
|
1893
|
+
#| output: false
|
|
1894
|
+
import pointblank as pb
|
|
1895
|
+
pb.config(report_incl_header=False, report_incl_footer=False, preview_incl_header=False)
|
|
1896
|
+
```
|
|
1897
|
+
Let's say we have a table with three columns: `a`, `b`, and `c`. We want to validate that:
|
|
1898
|
+
|
|
1899
|
+
- The values in column `a` are greater than `2`.
|
|
1900
|
+
- The values in column `b` are less than `7`.
|
|
1901
|
+
- The sum of columns `a` and `b` is less than the values in column `c`.
|
|
1902
|
+
|
|
1903
|
+
We can use the `expr_col()` function to create a column expression for each of these conditions.
|
|
1904
|
+
|
|
1905
|
+
```{python}
|
|
1906
|
+
import pointblank as pb
|
|
1907
|
+
import polars as pl
|
|
1908
|
+
|
|
1909
|
+
tbl = pl.DataFrame(
|
|
1910
|
+
{
|
|
1911
|
+
"a": [5, 7, 1, 3, 9, 4],
|
|
1912
|
+
"b": [6, 3, 0, 5, 8, 2],
|
|
1913
|
+
"c": [10, 4, 8, 9, 10, 5],
|
|
1914
|
+
}
|
|
1915
|
+
)
|
|
1916
|
+
|
|
1917
|
+
# Using expr_col() to create backend-agnostic validation expressions
|
|
1918
|
+
validation = (
|
|
1919
|
+
pb.Validate(data=tbl)
|
|
1920
|
+
.conjointly(
|
|
1921
|
+
lambda df: pb.expr_col("a") > 2,
|
|
1922
|
+
lambda df: pb.expr_col("b") < 7,
|
|
1923
|
+
lambda df: pb.expr_col("a") + pb.expr_col("b") < pb.expr_col("c")
|
|
1924
|
+
)
|
|
1925
|
+
.interrogate()
|
|
1926
|
+
)
|
|
1927
|
+
|
|
1928
|
+
validation
|
|
1929
|
+
```
|
|
1930
|
+
|
|
1931
|
+
The above code creates a validation object that checks the specified conditions using the
|
|
1932
|
+
`expr_col()` function. The resulting validation table will show whether each condition was
|
|
1933
|
+
satisfied for each row in the table.
|
|
1934
|
+
|
|
1935
|
+
See Also
|
|
1936
|
+
--------
|
|
1937
|
+
The [`conjointly()`](`pointblank.Validate.conjointly`) validation method, which is where this
|
|
1938
|
+
function should be used.
|
|
1939
|
+
"""
|
|
1940
|
+
return ColumnExpression(column_name=column_name)
|