pointblank 0.12.2__py3-none-any.whl → 0.13.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pointblank/_interrogation.py +11 -17
- pointblank/validate.py +110 -139
- pointblank/yaml.py +0 -20
- {pointblank-0.12.2.dist-info → pointblank-0.13.1.dist-info}/METADATA +8 -1
- {pointblank-0.12.2.dist-info → pointblank-0.13.1.dist-info}/RECORD +9 -9
- {pointblank-0.12.2.dist-info → pointblank-0.13.1.dist-info}/WHEEL +0 -0
- {pointblank-0.12.2.dist-info → pointblank-0.13.1.dist-info}/entry_points.txt +0 -0
- {pointblank-0.12.2.dist-info → pointblank-0.13.1.dist-info}/licenses/LICENSE +0 -0
- {pointblank-0.12.2.dist-info → pointblank-0.13.1.dist-info}/top_level.txt +0 -0
pointblank/_interrogation.py
CHANGED
|
@@ -1388,6 +1388,17 @@ class RowsDistinct:
|
|
|
1388
1388
|
def get_test_results(self):
|
|
1389
1389
|
return self.test_unit_res
|
|
1390
1390
|
|
|
1391
|
+
def test(self):
|
|
1392
|
+
# Get the number of failing test units by counting instances of `False` in the `pb_is_good_`
|
|
1393
|
+
# column and then determine if the test passes overall by comparing the number of failing
|
|
1394
|
+
# test units to the threshold for failing test units
|
|
1395
|
+
|
|
1396
|
+
results_list = nw.from_native(self.test_unit_res)["pb_is_good_"].to_list()
|
|
1397
|
+
|
|
1398
|
+
return _threshold_check(
|
|
1399
|
+
failing_test_units=results_list.count(False), threshold=self.threshold
|
|
1400
|
+
)
|
|
1401
|
+
|
|
1391
1402
|
|
|
1392
1403
|
@dataclass
|
|
1393
1404
|
class RowsComplete:
|
|
@@ -2029,23 +2040,6 @@ def _column_has_null_values(table: FrameT, column: str) -> bool:
|
|
|
2029
2040
|
return True
|
|
2030
2041
|
|
|
2031
2042
|
|
|
2032
|
-
def _check_nulls_across_columns_ibis(table, columns_subset):
|
|
2033
|
-
# Get all column names from the table
|
|
2034
|
-
column_names = columns_subset if columns_subset else table.columns
|
|
2035
|
-
|
|
2036
|
-
# Build the expression by combining each column's isnull() with OR operations
|
|
2037
|
-
null_expr = functools.reduce(
|
|
2038
|
-
lambda acc, col: acc | table[col].isnull() if acc is not None else table[col].isnull(),
|
|
2039
|
-
column_names,
|
|
2040
|
-
None,
|
|
2041
|
-
)
|
|
2042
|
-
|
|
2043
|
-
# Add the expression as a new column to the table
|
|
2044
|
-
result = table.mutate(_any_is_null_=null_expr)
|
|
2045
|
-
|
|
2046
|
-
return result
|
|
2047
|
-
|
|
2048
|
-
|
|
2049
2043
|
def _check_nulls_across_columns_nw(table, columns_subset):
|
|
2050
2044
|
# Get all column names from the table
|
|
2051
2045
|
column_names = columns_subset if columns_subset else table.columns
|
pointblank/validate.py
CHANGED
|
@@ -740,9 +740,9 @@ def _process_data(data: FrameT | Any) -> FrameT | Any:
|
|
|
740
740
|
"""
|
|
741
741
|
Centralized data processing pipeline that handles all supported input types.
|
|
742
742
|
|
|
743
|
-
This function consolidates the data processing pipeline used across multiple
|
|
744
|
-
|
|
745
|
-
|
|
743
|
+
This function consolidates the data processing pipeline used across multiple classes and
|
|
744
|
+
functions in Pointblank. It processes data through a consistent sequence of transformations to
|
|
745
|
+
handle different data source types.
|
|
746
746
|
|
|
747
747
|
The processing order is important:
|
|
748
748
|
|
|
@@ -829,7 +829,9 @@ def _process_github_url(data: FrameT | Any) -> FrameT | Any:
|
|
|
829
829
|
# Parse the URL to check if it's a GitHub URL
|
|
830
830
|
try:
|
|
831
831
|
parsed = urlparse(data)
|
|
832
|
-
except
|
|
832
|
+
except ValueError:
|
|
833
|
+
# urlparse can raise ValueError for malformed URLs (e.g., invalid IPv6)
|
|
834
|
+
# Return original data as it's likely not a GitHub URL we can process
|
|
833
835
|
return data
|
|
834
836
|
|
|
835
837
|
# Check if it's a GitHub URL (standard or raw)
|
|
@@ -881,13 +883,10 @@ def _process_github_url(data: FrameT | Any) -> FrameT | Any:
|
|
|
881
883
|
else: # .parquet
|
|
882
884
|
return _process_parquet_input(tmp_file_path)
|
|
883
885
|
|
|
884
|
-
except Exception:
|
|
886
|
+
except Exception: # pragma: no cover
|
|
885
887
|
# If download or processing fails, return original data
|
|
886
888
|
return data
|
|
887
889
|
|
|
888
|
-
except Exception as e:
|
|
889
|
-
raise RuntimeError(f"Failed to download or process GitHub file from {raw_url}: {e}") from e
|
|
890
|
-
|
|
891
890
|
|
|
892
891
|
def _process_connection_string(data: FrameT | Any) -> FrameT | Any:
|
|
893
892
|
"""
|
|
@@ -943,8 +942,7 @@ def _process_csv_input(data: FrameT | Any) -> FrameT | Any:
|
|
|
943
942
|
if not csv_path.exists():
|
|
944
943
|
raise FileNotFoundError(f"CSV file not found: {csv_path}")
|
|
945
944
|
|
|
946
|
-
# Determine which library to use for reading CSV
|
|
947
|
-
# Prefer Polars, fallback to Pandas
|
|
945
|
+
# Determine which library to use for reading CSV: prefer Polars but fallback to Pandas
|
|
948
946
|
if _is_lib_present(lib_name="polars"):
|
|
949
947
|
try:
|
|
950
948
|
import polars as pl
|
|
@@ -956,7 +954,7 @@ def _process_csv_input(data: FrameT | Any) -> FrameT | Any:
|
|
|
956
954
|
import pandas as pd
|
|
957
955
|
|
|
958
956
|
return pd.read_csv(csv_path)
|
|
959
|
-
else:
|
|
957
|
+
else: # pragma: no cover
|
|
960
958
|
raise RuntimeError(
|
|
961
959
|
f"Failed to read CSV file with Polars: {e}. "
|
|
962
960
|
"Pandas is not available as fallback."
|
|
@@ -1093,7 +1091,7 @@ def _process_parquet_input(data: FrameT | Any) -> FrameT | Any:
|
|
|
1093
1091
|
# Multiple files: concatenate them
|
|
1094
1092
|
dfs = [pd.read_parquet(path) for path in parquet_paths]
|
|
1095
1093
|
return pd.concat(dfs, ignore_index=True)
|
|
1096
|
-
else:
|
|
1094
|
+
else: # pragma: no cover
|
|
1097
1095
|
raise RuntimeError(
|
|
1098
1096
|
f"Failed to read Parquet file(s) with Polars: {e}. "
|
|
1099
1097
|
"Pandas is not available as fallback."
|
|
@@ -1615,24 +1613,9 @@ def _generate_display_table(
|
|
|
1615
1613
|
# This is used to highlight these values in the table
|
|
1616
1614
|
if df_lib_name_gt == "polars":
|
|
1617
1615
|
none_values = {k: data[k].is_null().to_list() for k in col_names}
|
|
1618
|
-
elif df_lib_name_gt == "pyspark":
|
|
1619
|
-
# For PySpark, check if data has been converted to pandas already
|
|
1620
|
-
if hasattr(data, "isnull"):
|
|
1621
|
-
# Data has been converted to pandas
|
|
1622
|
-
none_values = {k: data[k].isnull() for k in col_names}
|
|
1623
|
-
else:
|
|
1624
|
-
# Data is still a PySpark DataFrame - use narwhals
|
|
1625
|
-
import narwhals as nw
|
|
1626
|
-
|
|
1627
|
-
df_nw = nw.from_native(data)
|
|
1628
|
-
none_values = {}
|
|
1629
|
-
for col in col_names:
|
|
1630
|
-
# Get null mask, collect to pandas, then convert to list
|
|
1631
|
-
null_mask = (
|
|
1632
|
-
df_nw.select(nw.col(col).is_null()).collect().to_pandas().iloc[:, 0].tolist()
|
|
1633
|
-
)
|
|
1634
|
-
none_values[col] = null_mask
|
|
1635
1616
|
else:
|
|
1617
|
+
# PySpark data has been converted to Pandas by this point so the 'isnull()'
|
|
1618
|
+
# method can be used
|
|
1636
1619
|
none_values = {k: data[k].isnull() for k in col_names}
|
|
1637
1620
|
|
|
1638
1621
|
none_values = [(k, i) for k, v in none_values.items() for i, val in enumerate(v) if val]
|
|
@@ -1980,59 +1963,68 @@ def missing_vals_tbl(data: FrameT | Any) -> GT:
|
|
|
1980
1963
|
|
|
1981
1964
|
# Use the `row_ranges` list of lists to query, for each column, the proportion of missing
|
|
1982
1965
|
# values in each 'sector' of the table (a sector is a range of rows)
|
|
1983
|
-
|
|
1984
|
-
|
|
1985
|
-
|
|
1986
|
-
|
|
1987
|
-
|
|
1988
|
-
|
|
1989
|
-
|
|
1990
|
-
|
|
1991
|
-
|
|
1992
|
-
|
|
1993
|
-
|
|
1994
|
-
|
|
1995
|
-
)
|
|
1996
|
-
for i in range(len(cut_points))
|
|
1997
|
-
]
|
|
1998
|
-
+ [
|
|
1999
|
-
(
|
|
2000
|
-
data[cut_points[-1] : n_rows][col].isnull().sum().to_polars()
|
|
2001
|
-
/ (n_rows - cut_points[-1])
|
|
2002
|
-
* 100
|
|
2003
|
-
if n_rows > cut_points[-1]
|
|
2004
|
-
else 0
|
|
2005
|
-
)
|
|
2006
|
-
]
|
|
2007
|
-
for col in data.columns
|
|
2008
|
-
}
|
|
1966
|
+
def _calculate_missing_proportions(use_polars_conversion: bool = False):
|
|
1967
|
+
"""
|
|
1968
|
+
Calculate missing value proportions for each column and sector.
|
|
1969
|
+
|
|
1970
|
+
Parameters
|
|
1971
|
+
----------
|
|
1972
|
+
use_polars_conversion
|
|
1973
|
+
If True, use `.to_polars()` for conversions, otherwise use `.to_pandas()`
|
|
1974
|
+
"""
|
|
1975
|
+
missing_vals = {}
|
|
1976
|
+
for col in data.columns:
|
|
1977
|
+
col_missing_props = []
|
|
2009
1978
|
|
|
1979
|
+
# Calculate missing value proportions for each sector
|
|
1980
|
+
for i in range(len(cut_points)):
|
|
1981
|
+
start_row = cut_points[i - 1] if i > 0 else 0
|
|
1982
|
+
end_row = cut_points[i]
|
|
1983
|
+
sector_size = end_row - start_row
|
|
1984
|
+
|
|
1985
|
+
if sector_size > 0:
|
|
1986
|
+
sector_data = data[start_row:end_row][col]
|
|
1987
|
+
null_sum = sector_data.isnull().sum()
|
|
1988
|
+
|
|
1989
|
+
# Apply the appropriate conversion method
|
|
1990
|
+
if use_polars_conversion:
|
|
1991
|
+
null_sum_converted = null_sum.to_polars()
|
|
1992
|
+
else:
|
|
1993
|
+
null_sum_converted = null_sum.to_pandas()
|
|
1994
|
+
|
|
1995
|
+
missing_prop = (null_sum_converted / sector_size) * 100
|
|
1996
|
+
col_missing_props.append(missing_prop)
|
|
1997
|
+
else:
|
|
1998
|
+
col_missing_props.append(0)
|
|
1999
|
+
|
|
2000
|
+
# Handle the final sector (after last cut point)
|
|
2001
|
+
if n_rows > cut_points[-1]:
|
|
2002
|
+
start_row = cut_points[-1]
|
|
2003
|
+
sector_size = n_rows - start_row
|
|
2004
|
+
|
|
2005
|
+
sector_data = data[start_row:n_rows][col]
|
|
2006
|
+
null_sum = sector_data.isnull().sum()
|
|
2007
|
+
|
|
2008
|
+
# Apply the appropriate conversion method
|
|
2009
|
+
if use_polars_conversion:
|
|
2010
|
+
null_sum_converted = null_sum.to_polars()
|
|
2011
|
+
else:
|
|
2012
|
+
null_sum_converted = null_sum.to_pandas()
|
|
2013
|
+
|
|
2014
|
+
missing_prop = (null_sum_converted / sector_size) * 100
|
|
2015
|
+
col_missing_props.append(missing_prop)
|
|
2016
|
+
else:
|
|
2017
|
+
col_missing_props.append(0) # pragma: no cover
|
|
2018
|
+
|
|
2019
|
+
missing_vals[col] = col_missing_props
|
|
2020
|
+
|
|
2021
|
+
return missing_vals
|
|
2022
|
+
|
|
2023
|
+
# Use the helper function based on the DataFrame library
|
|
2024
|
+
if df_lib_name_gt == "polars":
|
|
2025
|
+
missing_vals = _calculate_missing_proportions(use_polars_conversion=True)
|
|
2010
2026
|
else:
|
|
2011
|
-
missing_vals =
|
|
2012
|
-
col: [
|
|
2013
|
-
(
|
|
2014
|
-
data[(cut_points[i - 1] if i > 0 else 0) : cut_points[i]][col]
|
|
2015
|
-
.isnull()
|
|
2016
|
-
.sum()
|
|
2017
|
-
.to_pandas()
|
|
2018
|
-
/ (cut_points[i] - (cut_points[i - 1] if i > 0 else 0))
|
|
2019
|
-
* 100
|
|
2020
|
-
if cut_points[i] > (cut_points[i - 1] if i > 0 else 0)
|
|
2021
|
-
else 0
|
|
2022
|
-
)
|
|
2023
|
-
for i in range(len(cut_points))
|
|
2024
|
-
]
|
|
2025
|
-
+ [
|
|
2026
|
-
(
|
|
2027
|
-
data[cut_points[-1] : n_rows][col].isnull().sum().to_pandas()
|
|
2028
|
-
/ (n_rows - cut_points[-1])
|
|
2029
|
-
* 100
|
|
2030
|
-
if n_rows > cut_points[-1]
|
|
2031
|
-
else 0
|
|
2032
|
-
)
|
|
2033
|
-
]
|
|
2034
|
-
for col in data.columns
|
|
2035
|
-
}
|
|
2027
|
+
missing_vals = _calculate_missing_proportions(use_polars_conversion=False)
|
|
2036
2028
|
|
|
2037
2029
|
# Pivot the `missing_vals` dictionary to create a table with the missing value proportions
|
|
2038
2030
|
missing_vals = {
|
|
@@ -2053,16 +2045,17 @@ def missing_vals_tbl(data: FrameT | Any) -> GT:
|
|
|
2053
2045
|
# Get the column names from the table
|
|
2054
2046
|
col_names = list(data.columns)
|
|
2055
2047
|
|
|
2056
|
-
#
|
|
2057
|
-
|
|
2058
|
-
|
|
2059
|
-
|
|
2048
|
+
# Helper function for DataFrame missing value calculation (Polars/Pandas)
|
|
2049
|
+
def _calculate_missing_proportions_dataframe(is_polars=False):
|
|
2050
|
+
null_method = "is_null" if is_polars else "isnull"
|
|
2051
|
+
|
|
2060
2052
|
missing_vals = {
|
|
2061
2053
|
col: [
|
|
2062
2054
|
(
|
|
2063
|
-
|
|
2064
|
-
|
|
2065
|
-
|
|
2055
|
+
getattr(
|
|
2056
|
+
data[(cut_points[i - 1] if i > 0 else 0) : cut_points[i]][col],
|
|
2057
|
+
null_method,
|
|
2058
|
+
)().sum()
|
|
2066
2059
|
/ (cut_points[i] - (cut_points[i - 1] if i > 0 else 0))
|
|
2067
2060
|
* 100
|
|
2068
2061
|
if cut_points[i] > (cut_points[i - 1] if i > 0 else 0)
|
|
@@ -2072,7 +2065,7 @@ def missing_vals_tbl(data: FrameT | Any) -> GT:
|
|
|
2072
2065
|
]
|
|
2073
2066
|
+ [
|
|
2074
2067
|
(
|
|
2075
|
-
data[cut_points[-1] : n_rows][col]
|
|
2068
|
+
getattr(data[cut_points[-1] : n_rows][col], null_method)().sum()
|
|
2076
2069
|
/ (n_rows - cut_points[-1])
|
|
2077
2070
|
* 100
|
|
2078
2071
|
if n_rows > cut_points[-1]
|
|
@@ -2082,7 +2075,8 @@ def missing_vals_tbl(data: FrameT | Any) -> GT:
|
|
|
2082
2075
|
for col in data.columns
|
|
2083
2076
|
}
|
|
2084
2077
|
|
|
2085
|
-
|
|
2078
|
+
# Transform to the expected format
|
|
2079
|
+
formatted_missing_vals = {
|
|
2086
2080
|
"columns": list(missing_vals.keys()),
|
|
2087
2081
|
**{
|
|
2088
2082
|
str(i + 1): [missing_vals[col][i] for col in missing_vals.keys()]
|
|
@@ -2091,48 +2085,25 @@ def missing_vals_tbl(data: FrameT | Any) -> GT:
|
|
|
2091
2085
|
}
|
|
2092
2086
|
|
|
2093
2087
|
# Get a dictionary of counts of missing values in each column
|
|
2094
|
-
missing_val_counts = {
|
|
2095
|
-
|
|
2096
|
-
if "pandas" in tbl_type:
|
|
2097
|
-
missing_vals = {
|
|
2098
|
-
col: [
|
|
2099
|
-
(
|
|
2100
|
-
data[(cut_points[i - 1] if i > 0 else 0) : cut_points[i]][col]
|
|
2101
|
-
.isnull()
|
|
2102
|
-
.sum()
|
|
2103
|
-
/ (cut_points[i] - (cut_points[i - 1] if i > 0 else 0))
|
|
2104
|
-
* 100
|
|
2105
|
-
if cut_points[i] > (cut_points[i - 1] if i > 0 else 0)
|
|
2106
|
-
else 0
|
|
2107
|
-
)
|
|
2108
|
-
for i in range(len(cut_points))
|
|
2109
|
-
]
|
|
2110
|
-
+ [
|
|
2111
|
-
(
|
|
2112
|
-
data[cut_points[-1] : n_rows][col].isnull().sum()
|
|
2113
|
-
/ (n_rows - cut_points[-1])
|
|
2114
|
-
* 100
|
|
2115
|
-
if n_rows > cut_points[-1]
|
|
2116
|
-
else 0
|
|
2117
|
-
)
|
|
2118
|
-
]
|
|
2119
|
-
for col in data.columns
|
|
2088
|
+
missing_val_counts = {
|
|
2089
|
+
col: getattr(data[col], null_method)().sum() for col in data.columns
|
|
2120
2090
|
}
|
|
2121
2091
|
|
|
2122
|
-
|
|
2123
|
-
# value proportions
|
|
2124
|
-
missing_vals = {
|
|
2125
|
-
"columns": list(missing_vals.keys()),
|
|
2126
|
-
**{
|
|
2127
|
-
str(i + 1): [missing_vals[col][i] for col in missing_vals.keys()]
|
|
2128
|
-
for i in range(len(cut_points) + 1)
|
|
2129
|
-
},
|
|
2130
|
-
}
|
|
2092
|
+
return formatted_missing_vals, missing_val_counts
|
|
2131
2093
|
|
|
2132
|
-
|
|
2133
|
-
|
|
2094
|
+
# Iterate over the cut points and get the proportion of missing values in each 'sector'
|
|
2095
|
+
# for each column
|
|
2096
|
+
if "polars" in tbl_type:
|
|
2097
|
+
missing_vals, missing_val_counts = _calculate_missing_proportions_dataframe(
|
|
2098
|
+
is_polars=True
|
|
2099
|
+
)
|
|
2134
2100
|
|
|
2135
|
-
|
|
2101
|
+
elif "pandas" in tbl_type:
|
|
2102
|
+
missing_vals, missing_val_counts = _calculate_missing_proportions_dataframe(
|
|
2103
|
+
is_polars=False
|
|
2104
|
+
)
|
|
2105
|
+
|
|
2106
|
+
elif "pyspark" in tbl_type:
|
|
2136
2107
|
from pyspark.sql.functions import col as pyspark_col
|
|
2137
2108
|
|
|
2138
2109
|
# PySpark implementation for missing values calculation
|
|
@@ -2164,7 +2135,7 @@ def missing_vals_tbl(data: FrameT | Any) -> GT:
|
|
|
2164
2135
|
missing_prop = (null_count / sector_size) * 100
|
|
2165
2136
|
col_missing_props.append(missing_prop)
|
|
2166
2137
|
else:
|
|
2167
|
-
col_missing_props.append(0)
|
|
2138
|
+
col_missing_props.append(0) # pragma: no cover
|
|
2168
2139
|
|
|
2169
2140
|
# Handle the final sector (after last cut point)
|
|
2170
2141
|
if n_rows > cut_points[-1]:
|
|
@@ -2184,7 +2155,7 @@ def missing_vals_tbl(data: FrameT | Any) -> GT:
|
|
|
2184
2155
|
missing_prop = (null_count / sector_size) * 100
|
|
2185
2156
|
col_missing_props.append(missing_prop)
|
|
2186
2157
|
else:
|
|
2187
|
-
col_missing_props.append(0)
|
|
2158
|
+
col_missing_props.append(0) # pragma: no cover
|
|
2188
2159
|
|
|
2189
2160
|
missing_vals[col_name] = col_missing_props
|
|
2190
2161
|
|
|
@@ -2623,7 +2594,7 @@ def get_column_count(data: FrameT | Any) -> int:
|
|
|
2623
2594
|
except Exception:
|
|
2624
2595
|
# Fallback for unsupported types
|
|
2625
2596
|
if "pandas" in str(type(data)):
|
|
2626
|
-
return data.shape[1]
|
|
2597
|
+
return data.shape[1] # pragma: no cover
|
|
2627
2598
|
else:
|
|
2628
2599
|
raise ValueError("The input table type supplied in `data=` is not supported.")
|
|
2629
2600
|
|
|
@@ -2793,14 +2764,14 @@ def get_row_count(data: FrameT | Any) -> int:
|
|
|
2793
2764
|
if hasattr(df_nw, "shape"):
|
|
2794
2765
|
return df_nw.shape[0]
|
|
2795
2766
|
elif hasattr(df_nw, "height"):
|
|
2796
|
-
return df_nw.height
|
|
2797
|
-
else:
|
|
2767
|
+
return df_nw.height # pragma: no cover
|
|
2768
|
+
else: # pragma: no cover
|
|
2798
2769
|
raise ValueError("Unable to determine row count from Narwhals DataFrame")
|
|
2799
2770
|
except Exception:
|
|
2800
2771
|
# Fallback for types that don't work with Narwhals
|
|
2801
|
-
if "pandas" in str(type(data)):
|
|
2772
|
+
if "pandas" in str(type(data)): # pragma: no cover
|
|
2802
2773
|
return data.shape[0]
|
|
2803
|
-
elif "pyspark" in str(type(data)):
|
|
2774
|
+
elif "pyspark" in str(type(data)): # pragma: no cover
|
|
2804
2775
|
return data.count()
|
|
2805
2776
|
else:
|
|
2806
2777
|
raise ValueError("The input table type supplied in `data=` is not supported.")
|
|
@@ -3019,7 +2990,7 @@ def connect_to_table(connection_string: str) -> Any:
|
|
|
3019
2990
|
# Get list of available tables
|
|
3020
2991
|
try:
|
|
3021
2992
|
available_tables = conn.list_tables()
|
|
3022
|
-
except Exception:
|
|
2993
|
+
except Exception: # pragma: no cover
|
|
3023
2994
|
available_tables = []
|
|
3024
2995
|
|
|
3025
2996
|
conn.disconnect()
|
|
@@ -3064,7 +3035,7 @@ def connect_to_table(connection_string: str) -> Any:
|
|
|
3064
3035
|
}
|
|
3065
3036
|
|
|
3066
3037
|
# Check if this is a missing backend dependency
|
|
3067
|
-
for backend, install_cmd in backend_install_map.items():
|
|
3038
|
+
for backend, install_cmd in backend_install_map.items(): # pragma: no cover
|
|
3068
3039
|
if backend in error_str and ("not found" in error_str or "no module" in error_str):
|
|
3069
3040
|
raise ConnectionError(
|
|
3070
3041
|
f"Missing {backend.upper()} backend for Ibis. Install it with:\n"
|
|
@@ -3081,7 +3052,7 @@ def connect_to_table(connection_string: str) -> Any:
|
|
|
3081
3052
|
) from e
|
|
3082
3053
|
|
|
3083
3054
|
# Generic connection error
|
|
3084
|
-
raise ConnectionError(
|
|
3055
|
+
raise ConnectionError( # pragma: no cover
|
|
3085
3056
|
f"Failed to connect to database using connection string: {connection_string}\n"
|
|
3086
3057
|
f"Error: {e}\n\n"
|
|
3087
3058
|
f"No table specified. Use the format: {connection_string}::TABLE_NAME"
|
|
@@ -3090,7 +3061,7 @@ def connect_to_table(connection_string: str) -> Any:
|
|
|
3090
3061
|
# Split connection string and table name
|
|
3091
3062
|
try:
|
|
3092
3063
|
base_connection, table_name = connection_string.rsplit("::", 1)
|
|
3093
|
-
except ValueError:
|
|
3064
|
+
except ValueError: # pragma: no cover
|
|
3094
3065
|
raise ValueError(f"Invalid connection string format: {connection_string}")
|
|
3095
3066
|
|
|
3096
3067
|
# Connect to database and get table
|
|
@@ -3124,7 +3095,7 @@ def connect_to_table(connection_string: str) -> Any:
|
|
|
3124
3095
|
# Check if table doesn't exist
|
|
3125
3096
|
if "table" in error_str and ("not found" in error_str or "does not exist" in error_str):
|
|
3126
3097
|
# Try to get available tables for helpful message
|
|
3127
|
-
try:
|
|
3098
|
+
try: # pragma: no cover
|
|
3128
3099
|
available_tables = conn.list_tables()
|
|
3129
3100
|
if available_tables:
|
|
3130
3101
|
table_list = "\n".join(f" - {table}" for table in available_tables)
|
pointblank/yaml.py
CHANGED
|
@@ -1510,26 +1510,6 @@ def yaml_to_python(yaml: Union[str, Path]) -> str:
|
|
|
1510
1510
|
action_params.append(f"highest_only={value.highest_only}")
|
|
1511
1511
|
actions_str = "pb.Actions(" + ", ".join(action_params) + ")"
|
|
1512
1512
|
param_parts.append(f"actions={actions_str}")
|
|
1513
|
-
elif isinstance(value, dict):
|
|
1514
|
-
action_params = []
|
|
1515
|
-
step_action_base = f"steps[{step_index}].{list(step_config.keys())[0]}.actions"
|
|
1516
|
-
for action_key, action_value in value.items():
|
|
1517
|
-
if action_key == "highest_only":
|
|
1518
|
-
action_params.append(f"{action_key}={action_value}")
|
|
1519
|
-
else:
|
|
1520
|
-
# Check if we have an original expression for this action
|
|
1521
|
-
action_expr_path = f"{step_action_base}.{action_key}"
|
|
1522
|
-
if action_expr_path in step_expressions:
|
|
1523
|
-
action_params.append(
|
|
1524
|
-
f"{action_key}={step_expressions[action_expr_path]}"
|
|
1525
|
-
)
|
|
1526
|
-
elif isinstance(action_value, str):
|
|
1527
|
-
action_params.append(f'{action_key}="{action_value}"')
|
|
1528
|
-
else:
|
|
1529
|
-
# For callables or complex expressions
|
|
1530
|
-
action_params.append(f"{action_key}={action_value}")
|
|
1531
|
-
actions_str = "pb.Actions(" + ", ".join(action_params) + ")"
|
|
1532
|
-
param_parts.append(f"actions={actions_str}")
|
|
1533
1513
|
else:
|
|
1534
1514
|
param_parts.append(f"actions={value}")
|
|
1535
1515
|
elif key == "thresholds":
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pointblank
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.13.1
|
|
4
4
|
Summary: Find out if your data is what you think it is.
|
|
5
5
|
Author-email: Richard Iannone <riannone@me.com>
|
|
6
6
|
License: MIT License
|
|
@@ -60,6 +60,12 @@ Requires-Dist: chatlas>=0.3.0; extra == "generate"
|
|
|
60
60
|
Requires-Dist: anthropic[bedrock]>=0.45.2; extra == "generate"
|
|
61
61
|
Requires-Dist: openai>=1.63.0; extra == "generate"
|
|
62
62
|
Requires-Dist: shiny>=1.3.0; extra == "generate"
|
|
63
|
+
Provides-Extra: mcp
|
|
64
|
+
Requires-Dist: mcp[cli]>=1.10.1; extra == "mcp"
|
|
65
|
+
Requires-Dist: fastmcp>=2.11.3; extra == "mcp"
|
|
66
|
+
Requires-Dist: pytest-asyncio>=1.0.0; extra == "mcp"
|
|
67
|
+
Provides-Extra: excel
|
|
68
|
+
Requires-Dist: openpyxl>=3.0.0; extra == "excel"
|
|
63
69
|
Provides-Extra: bigquery
|
|
64
70
|
Requires-Dist: ibis-framework[bigquery]>=9.5.0; extra == "bigquery"
|
|
65
71
|
Provides-Extra: databricks
|
|
@@ -84,6 +90,7 @@ Requires-Dist: quartodoc>=0.8.1; python_version >= "3.9" and extra == "docs"
|
|
|
84
90
|
Requires-Dist: pandas>=2.2.3; extra == "docs"
|
|
85
91
|
Requires-Dist: polars>=1.17.1; extra == "docs"
|
|
86
92
|
Requires-Dist: pyspark==3.5.6; extra == "docs"
|
|
93
|
+
Requires-Dist: openpyxl>=3.0.0; extra == "docs"
|
|
87
94
|
Dynamic: license-file
|
|
88
95
|
|
|
89
96
|
<div align="center">
|
|
@@ -3,7 +3,7 @@ pointblank/_constants.py,sha256=rB8qTnhabwmSQURevHqokC1pp5lfaWMCzhmbMZ0CP8A,8151
|
|
|
3
3
|
pointblank/_constants_docs.py,sha256=JBmtt16zTYQ-zaM4ElLExtKs-dKlnN553Ys2ML1Y1C8,2099
|
|
4
4
|
pointblank/_constants_translations.py,sha256=HXcCYmKoMjoaFv-Ym4UWv3AsIVXik2zDyAy7xvTvv0Y,186710
|
|
5
5
|
pointblank/_datascan_utils.py,sha256=EMfeabXm_ZsCUKPROB7rFhyOpjtRs8jcnZ_9nBtMyws,1750
|
|
6
|
-
pointblank/_interrogation.py,sha256=
|
|
6
|
+
pointblank/_interrogation.py,sha256=p3qPTgcsYiDEyV9d5pWLzAqz9rU9-IsfmSFV4sWRBNI,76932
|
|
7
7
|
pointblank/_typing.py,sha256=aItbCbzhbzqjK3lCbL27ltRyXoAH1c3-U6xQdRzg-lU,1594
|
|
8
8
|
pointblank/_utils.py,sha256=ikgkFomoAEOxaiItHZUo3NTHu0MJHWfKAF_fnX9rRnA,30685
|
|
9
9
|
pointblank/_utils_check_args.py,sha256=rFEc1nbCN8ftsQQWVjCNWmQ2QmUDxkfgmoJclrZeTLs,5489
|
|
@@ -21,8 +21,8 @@ pointblank/schema.py,sha256=vwGF8UKy2riRSQzcwatcI6L0t_6ccdbOayrKonvyodE,45777
|
|
|
21
21
|
pointblank/segments.py,sha256=RXp3lPr3FboVseadNqLgIeoMBh_mykrQSFp1WtV41Yg,5570
|
|
22
22
|
pointblank/tf.py,sha256=8o_8m4i01teulEe3-YYMotSNf3tImjBMInsvdjSAO5Q,8844
|
|
23
23
|
pointblank/thresholds.py,sha256=mybeLzTVdmN04NLKoV-jiSBXsWknwHO0Gox0ttVN_MU,25766
|
|
24
|
-
pointblank/validate.py,sha256=
|
|
25
|
-
pointblank/yaml.py,sha256=
|
|
24
|
+
pointblank/validate.py,sha256=py6w239Mh7tbAfXJkanDLARCkWE5EFhTlfvS0KOjnWA,697215
|
|
25
|
+
pointblank/yaml.py,sha256=Sy802CZBOgEZGwbIes8wcXPPt2a5rXO0b3lh9tsLS8w,58966
|
|
26
26
|
pointblank/data/api-docs.txt,sha256=w2nIkIL_fJpXlPR9clogqcgdiv-uHvdSDI8gjkP_mCQ,531711
|
|
27
27
|
pointblank/data/game_revenue-duckdb.zip,sha256=tKIVx48OGLYGsQPS3h5AjA2Nyq_rfEpLCjBiFUWhagU,35880
|
|
28
28
|
pointblank/data/game_revenue.zip,sha256=7c9EvHLyi93CHUd4p3dM4CZ-GucFCtXKSPxgLojL32U,33749
|
|
@@ -33,9 +33,9 @@ pointblank/data/nycflights.zip,sha256=yVjbUaKUz2LydSdF9cABuir0VReHBBgV7shiNWSd0m
|
|
|
33
33
|
pointblank/data/polars-api-docs.txt,sha256=KGcS-BOtUs9zgpkWfXD-GFdFh4O_zjdkpX7msHjztLg,198045
|
|
34
34
|
pointblank/data/small_table-duckdb.zip,sha256=BhTaZ2CRS4-9Z1uVhOU6HggvW3XCar7etMznfENIcOc,2028
|
|
35
35
|
pointblank/data/small_table.zip,sha256=lmFb90Nb-v5X559Ikjg31YLAXuRyMkD9yLRElkXPMzQ,472
|
|
36
|
-
pointblank-0.
|
|
37
|
-
pointblank-0.
|
|
38
|
-
pointblank-0.
|
|
39
|
-
pointblank-0.
|
|
40
|
-
pointblank-0.
|
|
41
|
-
pointblank-0.
|
|
36
|
+
pointblank-0.13.1.dist-info/licenses/LICENSE,sha256=apLF-HWPNU7pT5bmf5KmZpD5Cklpy2u-BN_0xBoRMLY,1081
|
|
37
|
+
pointblank-0.13.1.dist-info/METADATA,sha256=nuywCyQQooecMF-WiWVnM-AHN2kCL0roQgWi5s2hSwU,19529
|
|
38
|
+
pointblank-0.13.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
39
|
+
pointblank-0.13.1.dist-info/entry_points.txt,sha256=GqqqOTOH8uZe22wLcvYjzpizqk_j4MNcUo2YM14ryCw,42
|
|
40
|
+
pointblank-0.13.1.dist-info/top_level.txt,sha256=-wHrS1SvV8-nhvc3w-PPYs1C1WtEc1pK-eGjubbCCKc,11
|
|
41
|
+
pointblank-0.13.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|