pointblank 0.13.2__py3-none-any.whl → 0.13.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pointblank/validate.py +203 -148
- pointblank/yaml.py +154 -44
- {pointblank-0.13.2.dist-info → pointblank-0.13.3.dist-info}/METADATA +2 -2
- {pointblank-0.13.2.dist-info → pointblank-0.13.3.dist-info}/RECORD +8 -8
- {pointblank-0.13.2.dist-info → pointblank-0.13.3.dist-info}/WHEEL +0 -0
- {pointblank-0.13.2.dist-info → pointblank-0.13.3.dist-info}/entry_points.txt +0 -0
- {pointblank-0.13.2.dist-info → pointblank-0.13.3.dist-info}/licenses/LICENSE +0 -0
- {pointblank-0.13.2.dist-info → pointblank-0.13.3.dist-info}/top_level.txt +0 -0
pointblank/validate.py
CHANGED
|
@@ -2006,9 +2006,9 @@ def missing_vals_tbl(data: FrameT | Any) -> GT:
|
|
|
2006
2006
|
|
|
2007
2007
|
# Apply the appropriate conversion method
|
|
2008
2008
|
if use_polars_conversion:
|
|
2009
|
-
null_sum_converted = null_sum.to_polars()
|
|
2009
|
+
null_sum_converted = null_sum.to_polars() # pragma: no cover
|
|
2010
2010
|
else:
|
|
2011
|
-
null_sum_converted = null_sum.to_pandas()
|
|
2011
|
+
null_sum_converted = null_sum.to_pandas() # pragma: no cover
|
|
2012
2012
|
|
|
2013
2013
|
missing_prop = (null_sum_converted / sector_size) * 100
|
|
2014
2014
|
col_missing_props.append(missing_prop)
|
|
@@ -2025,9 +2025,9 @@ def missing_vals_tbl(data: FrameT | Any) -> GT:
|
|
|
2025
2025
|
|
|
2026
2026
|
# Apply the appropriate conversion method
|
|
2027
2027
|
if use_polars_conversion:
|
|
2028
|
-
null_sum_converted = null_sum.to_polars()
|
|
2028
|
+
null_sum_converted = null_sum.to_polars() # pragma: no cover
|
|
2029
2029
|
else:
|
|
2030
|
-
null_sum_converted = null_sum.to_pandas()
|
|
2030
|
+
null_sum_converted = null_sum.to_pandas() # pragma: no cover
|
|
2031
2031
|
|
|
2032
2032
|
missing_prop = (null_sum_converted / sector_size) * 100
|
|
2033
2033
|
col_missing_props.append(missing_prop)
|
|
@@ -2040,9 +2040,13 @@ def missing_vals_tbl(data: FrameT | Any) -> GT:
|
|
|
2040
2040
|
|
|
2041
2041
|
# Use the helper function based on the DataFrame library
|
|
2042
2042
|
if df_lib_name_gt == "polars":
|
|
2043
|
-
missing_vals = _calculate_missing_proportions(
|
|
2043
|
+
missing_vals = _calculate_missing_proportions(
|
|
2044
|
+
use_polars_conversion=True
|
|
2045
|
+
) # pragma: no cover
|
|
2044
2046
|
else:
|
|
2045
|
-
missing_vals = _calculate_missing_proportions(
|
|
2047
|
+
missing_vals = _calculate_missing_proportions(
|
|
2048
|
+
use_polars_conversion=False
|
|
2049
|
+
) # pragma: no cover
|
|
2046
2050
|
|
|
2047
2051
|
# Pivot the `missing_vals` dictionary to create a table with the missing value proportions
|
|
2048
2052
|
missing_vals = {
|
|
@@ -2055,9 +2059,13 @@ def missing_vals_tbl(data: FrameT | Any) -> GT:
|
|
|
2055
2059
|
|
|
2056
2060
|
# Get a dictionary of counts of missing values in each column
|
|
2057
2061
|
if df_lib_name_gt == "polars":
|
|
2058
|
-
missing_val_counts = {
|
|
2062
|
+
missing_val_counts = {
|
|
2063
|
+
col: data[col].isnull().sum().to_polars() for col in data.columns
|
|
2064
|
+
} # pragma: no cover
|
|
2059
2065
|
else:
|
|
2060
|
-
missing_val_counts = {
|
|
2066
|
+
missing_val_counts = {
|
|
2067
|
+
col: data[col].isnull().sum().to_pandas() for col in data.columns
|
|
2068
|
+
} # pragma: no cover
|
|
2061
2069
|
|
|
2062
2070
|
if pl_pb_tbl:
|
|
2063
2071
|
# Get the column names from the table
|
|
@@ -2429,10 +2437,10 @@ def _get_column_names_safe(data: Any) -> list[str]:
|
|
|
2429
2437
|
if hasattr(df_nw, "collect_schema"):
|
|
2430
2438
|
return list(df_nw.collect_schema().keys())
|
|
2431
2439
|
else:
|
|
2432
|
-
return list(df_nw.columns)
|
|
2433
|
-
except Exception:
|
|
2440
|
+
return list(df_nw.columns) # pragma: no cover
|
|
2441
|
+
except Exception: # pragma: no cover
|
|
2434
2442
|
# Fallback to direct column access
|
|
2435
|
-
return list(data.columns)
|
|
2443
|
+
return list(data.columns) # pragma: no cover
|
|
2436
2444
|
|
|
2437
2445
|
|
|
2438
2446
|
def _get_column_names(data: FrameT | Any, ibis_tbl: bool, df_lib_name_gt: str) -> list[str]:
|
|
@@ -2633,7 +2641,7 @@ def get_column_count(data: FrameT | Any) -> int:
|
|
|
2633
2641
|
if hasattr(df_nw, "collect_schema"):
|
|
2634
2642
|
return len(df_nw.collect_schema())
|
|
2635
2643
|
else:
|
|
2636
|
-
return len(df_nw.columns)
|
|
2644
|
+
return len(df_nw.columns) # pragma: no cover
|
|
2637
2645
|
except Exception:
|
|
2638
2646
|
# Fallback for unsupported types
|
|
2639
2647
|
if "pandas" in str(type(data)):
|
|
@@ -2806,11 +2814,11 @@ def get_row_count(data: FrameT | Any) -> int:
|
|
|
2806
2814
|
# Try different ways to get row count
|
|
2807
2815
|
if hasattr(df_nw, "shape"):
|
|
2808
2816
|
return df_nw.shape[0]
|
|
2809
|
-
elif hasattr(df_nw, "height"):
|
|
2817
|
+
elif hasattr(df_nw, "height"): # pragma: no cover
|
|
2810
2818
|
return df_nw.height # pragma: no cover
|
|
2811
2819
|
else: # pragma: no cover
|
|
2812
2820
|
raise ValueError("Unable to determine row count from Narwhals DataFrame")
|
|
2813
|
-
except Exception:
|
|
2821
|
+
except Exception: # pragma: no cover
|
|
2814
2822
|
# Fallback for types that don't work with Narwhals
|
|
2815
2823
|
if "pandas" in str(type(data)): # pragma: no cover
|
|
2816
2824
|
return data.shape[0]
|
|
@@ -8401,8 +8409,8 @@ class Validate:
|
|
|
8401
8409
|
self.thresholds if thresholds is None else _normalize_thresholds_creation(thresholds)
|
|
8402
8410
|
)
|
|
8403
8411
|
|
|
8404
|
-
if columns_subset is not None and isinstance(columns_subset, str):
|
|
8405
|
-
columns_subset = [columns_subset]
|
|
8412
|
+
if columns_subset is not None and isinstance(columns_subset, str): # pragma: no cover
|
|
8413
|
+
columns_subset = [columns_subset] # pragma: no cover
|
|
8406
8414
|
|
|
8407
8415
|
# TODO: incorporate Column object
|
|
8408
8416
|
|
|
@@ -10096,7 +10104,9 @@ class Validate:
|
|
|
10096
10104
|
)
|
|
10097
10105
|
|
|
10098
10106
|
else:
|
|
10099
|
-
raise ValueError(
|
|
10107
|
+
raise ValueError(
|
|
10108
|
+
f"Unknown assertion type: {assertion_type}"
|
|
10109
|
+
) # pragma: no cover
|
|
10100
10110
|
|
|
10101
10111
|
except Exception as e:
|
|
10102
10112
|
# Only catch specific data quality comparison errors, not programming errors
|
|
@@ -10111,14 +10121,18 @@ class Validate:
|
|
|
10111
10121
|
or ("dtype" in error_msg and "compare" in error_msg)
|
|
10112
10122
|
)
|
|
10113
10123
|
|
|
10114
|
-
if is_comparison_error:
|
|
10124
|
+
if is_comparison_error: # pragma: no cover
|
|
10115
10125
|
# If data quality comparison fails, mark the validation as having an eval_error
|
|
10116
|
-
validation.eval_error = True
|
|
10117
|
-
end_time = datetime.datetime.now(datetime.timezone.utc)
|
|
10118
|
-
validation.proc_duration_s = (
|
|
10119
|
-
|
|
10120
|
-
|
|
10121
|
-
|
|
10126
|
+
validation.eval_error = True # pragma: no cover
|
|
10127
|
+
end_time = datetime.datetime.now(datetime.timezone.utc) # pragma: no cover
|
|
10128
|
+
validation.proc_duration_s = (
|
|
10129
|
+
end_time - start_time
|
|
10130
|
+
).total_seconds() # pragma: no cover
|
|
10131
|
+
validation.time_processed = end_time.isoformat(
|
|
10132
|
+
timespec="milliseconds"
|
|
10133
|
+
) # pragma: no cover
|
|
10134
|
+
validation.active = False # pragma: no cover
|
|
10135
|
+
continue # pragma: no cover
|
|
10122
10136
|
else:
|
|
10123
10137
|
# For other errors (like missing columns), let them propagate
|
|
10124
10138
|
raise
|
|
@@ -10363,32 +10377,46 @@ class Validate:
|
|
|
10363
10377
|
except AttributeError:
|
|
10364
10378
|
# For LazyFrames without sample method, collect first then sample
|
|
10365
10379
|
validation_extract_native = validation_extract_nw.collect().to_native()
|
|
10366
|
-
if hasattr(validation_extract_native, "sample"):
|
|
10380
|
+
if hasattr(validation_extract_native, "sample"): # pragma: no cover
|
|
10367
10381
|
# PySpark DataFrame has sample method
|
|
10368
|
-
validation_extract_native =
|
|
10369
|
-
|
|
10370
|
-
|
|
10371
|
-
|
|
10382
|
+
validation_extract_native = (
|
|
10383
|
+
validation_extract_native.sample( # pragma: no cover
|
|
10384
|
+
fraction=min(
|
|
10385
|
+
1.0, sample_n / validation_extract_native.count()
|
|
10386
|
+
) # pragma: no cover
|
|
10387
|
+
).limit(sample_n)
|
|
10388
|
+
) # pragma: no cover
|
|
10389
|
+
validation_extract_nw = nw.from_native(
|
|
10390
|
+
validation_extract_native
|
|
10391
|
+
) # pragma: no cover
|
|
10372
10392
|
else:
|
|
10373
10393
|
# Fallback: just take first n rows after collecting
|
|
10374
|
-
validation_extract_nw = validation_extract_nw.collect().head(
|
|
10394
|
+
validation_extract_nw = validation_extract_nw.collect().head(
|
|
10395
|
+
sample_n
|
|
10396
|
+
) # pragma: no cover
|
|
10375
10397
|
elif sample_frac is not None:
|
|
10376
10398
|
try:
|
|
10377
10399
|
validation_extract_nw = validation_extract_nw.sample(fraction=sample_frac)
|
|
10378
|
-
except AttributeError:
|
|
10400
|
+
except AttributeError: # pragma: no cover
|
|
10379
10401
|
# For LazyFrames without sample method, collect first then sample
|
|
10380
|
-
validation_extract_native =
|
|
10381
|
-
|
|
10402
|
+
validation_extract_native = (
|
|
10403
|
+
validation_extract_nw.collect().to_native()
|
|
10404
|
+
) # pragma: no cover
|
|
10405
|
+
if hasattr(validation_extract_native, "sample"): # pragma: no cover
|
|
10382
10406
|
# PySpark DataFrame has sample method
|
|
10383
10407
|
validation_extract_native = validation_extract_native.sample(
|
|
10384
10408
|
fraction=sample_frac
|
|
10385
|
-
)
|
|
10386
|
-
validation_extract_nw = nw.from_native(
|
|
10409
|
+
) # pragma: no cover
|
|
10410
|
+
validation_extract_nw = nw.from_native(
|
|
10411
|
+
validation_extract_native
|
|
10412
|
+
) # pragma: no cover
|
|
10387
10413
|
else:
|
|
10388
10414
|
# Fallback: use fraction to calculate head size
|
|
10389
|
-
collected = validation_extract_nw.collect()
|
|
10390
|
-
sample_size = max(
|
|
10391
|
-
|
|
10415
|
+
collected = validation_extract_nw.collect() # pragma: no cover
|
|
10416
|
+
sample_size = max(
|
|
10417
|
+
1, int(len(collected) * sample_frac)
|
|
10418
|
+
) # pragma: no cover
|
|
10419
|
+
validation_extract_nw = collected.head(sample_size) # pragma: no cover
|
|
10392
10420
|
|
|
10393
10421
|
# Ensure a limit is set on the number of rows to extract
|
|
10394
10422
|
try:
|
|
@@ -10398,9 +10426,9 @@ class Validate:
|
|
|
10398
10426
|
# For LazyFrames, collect to get length (or use a reasonable default)
|
|
10399
10427
|
try:
|
|
10400
10428
|
extract_length = len(validation_extract_nw.collect())
|
|
10401
|
-
except Exception:
|
|
10429
|
+
except Exception: # pragma: no cover
|
|
10402
10430
|
# If collection fails, apply limit anyway as a safety measure
|
|
10403
|
-
extract_length = extract_limit + 1 #
|
|
10431
|
+
extract_length = extract_limit + 1 # pragma: no cover
|
|
10404
10432
|
|
|
10405
10433
|
if extract_length > extract_limit:
|
|
10406
10434
|
validation_extract_nw = validation_extract_nw.head(extract_limit)
|
|
@@ -12065,10 +12093,12 @@ class Validate:
|
|
|
12065
12093
|
try:
|
|
12066
12094
|
# Try without order_by first (for DataFrames)
|
|
12067
12095
|
data_nw = data_nw.with_row_index(name=index_name)
|
|
12068
|
-
except TypeError:
|
|
12096
|
+
except TypeError: # pragma: no cover
|
|
12069
12097
|
# LazyFrames require order_by parameter - use first column for ordering
|
|
12070
|
-
first_col = data_nw.columns[0]
|
|
12071
|
-
data_nw = data_nw.with_row_index(
|
|
12098
|
+
first_col = data_nw.columns[0] # pragma: no cover
|
|
12099
|
+
data_nw = data_nw.with_row_index(
|
|
12100
|
+
name=index_name, order_by=first_col
|
|
12101
|
+
) # pragma: no cover
|
|
12072
12102
|
|
|
12073
12103
|
# Get all validation step result tables and join together the `pb_is_good_` columns
|
|
12074
12104
|
# ensuring that the columns are named uniquely (e.g., `pb_is_good_1`, `pb_is_good_2`, ...)
|
|
@@ -12080,10 +12110,12 @@ class Validate:
|
|
|
12080
12110
|
try:
|
|
12081
12111
|
# Try without order_by first (for DataFrames)
|
|
12082
12112
|
results_tbl = results_tbl.with_row_index(name=index_name)
|
|
12083
|
-
except TypeError:
|
|
12113
|
+
except TypeError: # pragma: no cover
|
|
12084
12114
|
# LazyFrames require order_by parameter - use first column for ordering
|
|
12085
|
-
first_col = results_tbl.columns[0]
|
|
12086
|
-
results_tbl = results_tbl.with_row_index(
|
|
12115
|
+
first_col = results_tbl.columns[0] # pragma: no cover
|
|
12116
|
+
results_tbl = results_tbl.with_row_index(
|
|
12117
|
+
name=index_name, order_by=first_col
|
|
12118
|
+
) # pragma: no cover
|
|
12087
12119
|
|
|
12088
12120
|
# Add numerical suffix to the `pb_is_good_` column to make it unique
|
|
12089
12121
|
results_tbl = results_tbl.select([index_name, "pb_is_good_"]).rename(
|
|
@@ -12215,15 +12247,15 @@ class Validate:
|
|
|
12215
12247
|
# If the table is a Polars one, determine if it's a LazyFrame
|
|
12216
12248
|
if tbl_info == "polars":
|
|
12217
12249
|
if _is_lazy_frame(self.data):
|
|
12218
|
-
tbl_info = "polars-lazy"
|
|
12250
|
+
tbl_info = "polars-lazy" # pragma: no cover
|
|
12219
12251
|
|
|
12220
12252
|
# Determine if the input table is a Narwhals DF
|
|
12221
12253
|
if _is_narwhals_table(self.data):
|
|
12222
12254
|
# Determine if the Narwhals table is a LazyFrame
|
|
12223
|
-
if _is_lazy_frame(self.data):
|
|
12224
|
-
tbl_info = "narwhals-lazy"
|
|
12255
|
+
if _is_lazy_frame(self.data): # pragma: no cover
|
|
12256
|
+
tbl_info = "narwhals-lazy" # pragma: no cover
|
|
12225
12257
|
else:
|
|
12226
|
-
tbl_info = "narwhals"
|
|
12258
|
+
tbl_info = "narwhals" # pragma: no cover
|
|
12227
12259
|
|
|
12228
12260
|
# Get the thresholds object
|
|
12229
12261
|
thresholds = self.thresholds
|
|
@@ -12388,7 +12420,7 @@ class Validate:
|
|
|
12388
12420
|
if lang in RTL_LANGUAGES:
|
|
12389
12421
|
gt_tbl = gt_tbl.tab_style(
|
|
12390
12422
|
style=style.css("direction: rtl;"), locations=loc.source_notes()
|
|
12391
|
-
)
|
|
12423
|
+
) # pragma: no cover
|
|
12392
12424
|
|
|
12393
12425
|
if incl_header:
|
|
12394
12426
|
gt_tbl = gt_tbl.tab_header(title=html(title_text), subtitle=html(combined_subtitle))
|
|
@@ -12705,9 +12737,11 @@ class Validate:
|
|
|
12705
12737
|
# Get the number of rows in the extract (safe for LazyFrames)
|
|
12706
12738
|
try:
|
|
12707
12739
|
n_rows = len(extract_nw)
|
|
12708
|
-
except TypeError:
|
|
12740
|
+
except TypeError: # pragma: no cover
|
|
12709
12741
|
# For LazyFrames, collect() first to get length
|
|
12710
|
-
n_rows =
|
|
12742
|
+
n_rows = (
|
|
12743
|
+
len(extract_nw.collect()) if hasattr(extract_nw, "collect") else 0
|
|
12744
|
+
) # pragma: no cover
|
|
12711
12745
|
|
|
12712
12746
|
# If the number of rows is zero, then produce an em dash then go to the next iteration
|
|
12713
12747
|
if n_rows == 0:
|
|
@@ -12715,7 +12749,7 @@ class Validate:
|
|
|
12715
12749
|
continue
|
|
12716
12750
|
|
|
12717
12751
|
# Write the CSV text (ensure LazyFrames are collected first)
|
|
12718
|
-
if hasattr(extract_nw, "collect"):
|
|
12752
|
+
if hasattr(extract_nw, "collect"): # pragma: no cover
|
|
12719
12753
|
extract_nw = extract_nw.collect()
|
|
12720
12754
|
csv_text = extract_nw.write_csv()
|
|
12721
12755
|
|
|
@@ -13217,7 +13251,7 @@ class Validate:
|
|
|
13217
13251
|
elif isinstance(column, list):
|
|
13218
13252
|
column_position = [list(self.data.columns).index(col) + 1 for col in column]
|
|
13219
13253
|
else:
|
|
13220
|
-
column_position = None
|
|
13254
|
+
column_position = None # pragma: no cover
|
|
13221
13255
|
else:
|
|
13222
13256
|
column_position = None
|
|
13223
13257
|
|
|
@@ -13309,7 +13343,7 @@ class Validate:
|
|
|
13309
13343
|
)
|
|
13310
13344
|
|
|
13311
13345
|
else:
|
|
13312
|
-
step_report = None
|
|
13346
|
+
step_report = None # pragma: no cover
|
|
13313
13347
|
|
|
13314
13348
|
return step_report
|
|
13315
13349
|
|
|
@@ -13797,7 +13831,7 @@ def _conditional_string_date_dttm_conversion(
|
|
|
13797
13831
|
elif not allow_regular_strings:
|
|
13798
13832
|
raise ValueError(
|
|
13799
13833
|
"If `value=` is provided as a string it must be a date or datetime string."
|
|
13800
|
-
)
|
|
13834
|
+
) # pragma: no cover
|
|
13801
13835
|
# If allow_regular_strings is True, regular strings pass through unchanged
|
|
13802
13836
|
|
|
13803
13837
|
return value
|
|
@@ -13851,12 +13885,33 @@ def _process_brief(
|
|
|
13851
13885
|
|
|
13852
13886
|
if segment is not None:
|
|
13853
13887
|
# The segment is always a tuple of the form ("{column}", "{value}")
|
|
13888
|
+
# Handle both regular lists and Segment objects (from seg_group())
|
|
13889
|
+
|
|
13890
|
+
segment_column = segment[0]
|
|
13891
|
+
segment_value = segment[1]
|
|
13892
|
+
|
|
13893
|
+
# If segment_value is a Segment object (from seg_group()), format it appropriately
|
|
13894
|
+
if isinstance(segment_value, Segment):
|
|
13895
|
+
# For Segment objects, format the segments as a readable string
|
|
13896
|
+
segments = segment_value.segments
|
|
13897
|
+
if len(segments) == 1:
|
|
13898
|
+
# Single segment: join the values with commas
|
|
13899
|
+
segment_value_str = ", ".join(str(v) for v in segments[0])
|
|
13900
|
+
else:
|
|
13901
|
+
# Multiple segments: join each segment with commas, separate segments with " | "
|
|
13902
|
+
segment_value_str = " | ".join([", ".join(str(v) for v in seg) for seg in segments])
|
|
13903
|
+
else:
|
|
13904
|
+
# For regular lists or other types, convert to string
|
|
13905
|
+
if isinstance(segment_value, list):
|
|
13906
|
+
segment_value_str = ", ".join(str(v) for v in segment_value)
|
|
13907
|
+
else:
|
|
13908
|
+
segment_value_str = str(segment_value)
|
|
13854
13909
|
|
|
13855
|
-
segment_fmt = f"{
|
|
13910
|
+
segment_fmt = f"{segment_column} / {segment_value_str}"
|
|
13856
13911
|
|
|
13857
13912
|
brief = brief.replace("{segment}", segment_fmt)
|
|
13858
|
-
brief = brief.replace("{segment_column}",
|
|
13859
|
-
brief = brief.replace("{segment_value}",
|
|
13913
|
+
brief = brief.replace("{segment_column}", segment_column)
|
|
13914
|
+
brief = brief.replace("{segment_value}", segment_value_str)
|
|
13860
13915
|
|
|
13861
13916
|
return brief
|
|
13862
13917
|
|
|
@@ -13890,7 +13945,7 @@ def _process_action_str(
|
|
|
13890
13945
|
if col is not None:
|
|
13891
13946
|
# If a list of columns is provided, then join the columns into a comma-separated string
|
|
13892
13947
|
if isinstance(col, list):
|
|
13893
|
-
col = ", ".join(col)
|
|
13948
|
+
col = ", ".join(col) # pragma: no cover
|
|
13894
13949
|
|
|
13895
13950
|
action_str = action_str.replace("{col}", col)
|
|
13896
13951
|
action_str = action_str.replace("{column}", col)
|
|
@@ -14287,7 +14342,7 @@ def _prep_values_text(
|
|
|
14287
14342
|
length_values = len(values)
|
|
14288
14343
|
|
|
14289
14344
|
if length_values == 0:
|
|
14290
|
-
return ""
|
|
14345
|
+
return "" # pragma: no cover
|
|
14291
14346
|
|
|
14292
14347
|
if length_values > limit:
|
|
14293
14348
|
num_omitted = length_values - limit
|
|
@@ -14296,7 +14351,7 @@ def _prep_values_text(
|
|
|
14296
14351
|
formatted_values = []
|
|
14297
14352
|
for value in values[:limit]:
|
|
14298
14353
|
if isinstance(value, (datetime.datetime, datetime.date)):
|
|
14299
|
-
formatted_values.append(f"`{value.isoformat()}`")
|
|
14354
|
+
formatted_values.append(f"`{value.isoformat()}`") # pragma: no cover
|
|
14300
14355
|
else:
|
|
14301
14356
|
formatted_values.append(f"`{value}`")
|
|
14302
14357
|
|
|
@@ -14486,8 +14541,8 @@ def _apply_segments(data_tbl: any, segments_expr: tuple[str, Any]) -> any:
|
|
|
14486
14541
|
if len(segment_str) == 10 and segment_str.count("-") == 2:
|
|
14487
14542
|
try:
|
|
14488
14543
|
parsed_value = date.fromisoformat(segment_str)
|
|
14489
|
-
except ValueError:
|
|
14490
|
-
pass
|
|
14544
|
+
except ValueError: # pragma: no cover
|
|
14545
|
+
pass # pragma: no cover
|
|
14491
14546
|
|
|
14492
14547
|
# Format 2: Datetime strings with UTC timezone like
|
|
14493
14548
|
# "2016-01-04 00:00:01 UTC.strict_cast(...)"
|
|
@@ -14499,27 +14554,28 @@ def _apply_segments(data_tbl: any, segments_expr: tuple[str, Any]) -> any:
|
|
|
14499
14554
|
parsed_dt = datetime.fromisoformat(datetime_part)
|
|
14500
14555
|
# Convert midnight datetimes to dates for consistency
|
|
14501
14556
|
if parsed_dt.time() == datetime.min.time():
|
|
14502
|
-
parsed_value = parsed_dt.date()
|
|
14557
|
+
parsed_value = parsed_dt.date() # pragma: no cover
|
|
14503
14558
|
else:
|
|
14504
14559
|
parsed_value = parsed_dt
|
|
14505
|
-
except (ValueError, IndexError):
|
|
14506
|
-
pass
|
|
14560
|
+
except (ValueError, IndexError): # pragma: no cover
|
|
14561
|
+
pass # pragma: no cover
|
|
14507
14562
|
|
|
14508
14563
|
# Format 3: Bracketed expressions like ['2016-01-04']
|
|
14509
14564
|
elif segment_str.startswith("[") and segment_str.endswith("]"):
|
|
14510
|
-
try:
|
|
14511
|
-
|
|
14565
|
+
try: # pragma: no cover
|
|
14566
|
+
# Remove [' and ']
|
|
14567
|
+
content = segment_str[2:-2] # pragma: no cover
|
|
14512
14568
|
|
|
14513
14569
|
# Try parsing as date first
|
|
14514
|
-
if len(content) == 10 and content.count("-") == 2:
|
|
14515
|
-
try:
|
|
14516
|
-
parsed_value = date.fromisoformat(content)
|
|
14517
|
-
except ValueError:
|
|
14518
|
-
pass
|
|
14570
|
+
if len(content) == 10 and content.count("-") == 2: # pragma: no cover
|
|
14571
|
+
try: # pragma: no cover
|
|
14572
|
+
parsed_value = date.fromisoformat(content) # pragma: no cover
|
|
14573
|
+
except ValueError: # pragma: no cover
|
|
14574
|
+
pass # pragma: no cover
|
|
14519
14575
|
|
|
14520
14576
|
# Try parsing as datetime
|
|
14521
|
-
if parsed_value is None:
|
|
14522
|
-
try:
|
|
14577
|
+
if parsed_value is None: # pragma: no cover
|
|
14578
|
+
try: # pragma: no cover
|
|
14523
14579
|
parsed_dt = datetime.fromisoformat(content.replace(" UTC", ""))
|
|
14524
14580
|
if parsed_dt.time() == datetime.min.time():
|
|
14525
14581
|
parsed_value = parsed_dt.date()
|
|
@@ -14528,8 +14584,8 @@ def _apply_segments(data_tbl: any, segments_expr: tuple[str, Any]) -> any:
|
|
|
14528
14584
|
except ValueError:
|
|
14529
14585
|
pass
|
|
14530
14586
|
|
|
14531
|
-
except (ValueError, IndexError):
|
|
14532
|
-
pass
|
|
14587
|
+
except (ValueError, IndexError): # pragma: no cover
|
|
14588
|
+
pass # pragma: no cover
|
|
14533
14589
|
|
|
14534
14590
|
# Handle `pl.datetime()` expressions with .alias("datetime")
|
|
14535
14591
|
elif "datetime" in segment_str and '.alias("datetime")' in segment_str:
|
|
@@ -14540,10 +14596,10 @@ def _apply_segments(data_tbl: any, segments_expr: tuple[str, Any]) -> any:
|
|
|
14540
14596
|
if parsed_dt.time() == datetime.min.time():
|
|
14541
14597
|
parsed_value = parsed_dt.date()
|
|
14542
14598
|
else:
|
|
14543
|
-
parsed_value = parsed_dt
|
|
14599
|
+
parsed_value = parsed_dt # pragma: no cover
|
|
14544
14600
|
|
|
14545
|
-
except (ValueError, AttributeError):
|
|
14546
|
-
pass
|
|
14601
|
+
except (ValueError, AttributeError): # pragma: no cover
|
|
14602
|
+
pass # pragma: no cover
|
|
14547
14603
|
|
|
14548
14604
|
# If we successfully parsed a value, use it; otherwise leave segment as is
|
|
14549
14605
|
if parsed_value is not None:
|
|
@@ -14567,9 +14623,9 @@ def _apply_segments(data_tbl: any, segments_expr: tuple[str, Any]) -> any:
|
|
|
14567
14623
|
# Filter the data table based on the column name and segment
|
|
14568
14624
|
# Use the new Ibis API methods to avoid deprecation warnings
|
|
14569
14625
|
if segment is None:
|
|
14570
|
-
data_tbl = data_tbl.filter(data_tbl[column].isnull())
|
|
14626
|
+
data_tbl = data_tbl.filter(data_tbl[column].isnull()) # pragma: no cover
|
|
14571
14627
|
elif isinstance(segment, list):
|
|
14572
|
-
data_tbl = data_tbl.filter(data_tbl[column].isin(segment))
|
|
14628
|
+
data_tbl = data_tbl.filter(data_tbl[column].isin(segment)) # pragma: no cover
|
|
14573
14629
|
else:
|
|
14574
14630
|
data_tbl = data_tbl.filter(data_tbl[column] == segment)
|
|
14575
14631
|
|
|
@@ -14690,7 +14746,7 @@ def _get_title_text(
|
|
|
14690
14746
|
"</span>"
|
|
14691
14747
|
f'<span style="float: right;">{title}</span>'
|
|
14692
14748
|
"</div>"
|
|
14693
|
-
)
|
|
14749
|
+
) # pragma: no cover
|
|
14694
14750
|
|
|
14695
14751
|
return html_str
|
|
14696
14752
|
|
|
@@ -14768,24 +14824,6 @@ def _transform_eval(
|
|
|
14768
14824
|
return symbol_list
|
|
14769
14825
|
|
|
14770
14826
|
|
|
14771
|
-
def _format_numbers_with_gt(
|
|
14772
|
-
values: list[int], n_sigfig: int = 3, compact: bool = True, locale: str = "en"
|
|
14773
|
-
) -> list[str]:
|
|
14774
|
-
"""Format numbers using Great Tables GT object to avoid pandas dependency."""
|
|
14775
|
-
import polars as pl
|
|
14776
|
-
|
|
14777
|
-
# Create a single-column DataFrame with all values
|
|
14778
|
-
df = pl.DataFrame({"values": values})
|
|
14779
|
-
|
|
14780
|
-
# Create GT object and format the column
|
|
14781
|
-
gt_obj = GT(df).fmt_number(columns="values", n_sigfig=n_sigfig, compact=compact, locale=locale)
|
|
14782
|
-
|
|
14783
|
-
# Extract the formatted values using _get_column_of_values
|
|
14784
|
-
formatted_values = _get_column_of_values(gt_obj, column_name="values", context="html")
|
|
14785
|
-
|
|
14786
|
-
return formatted_values
|
|
14787
|
-
|
|
14788
|
-
|
|
14789
14827
|
def _format_single_number_with_gt(
|
|
14790
14828
|
value: int, n_sigfig: int = 3, compact: bool = True, locale: str = "en", df_lib=None
|
|
14791
14829
|
) -> str:
|
|
@@ -14796,12 +14834,14 @@ def _format_single_number_with_gt(
|
|
|
14796
14834
|
import polars as pl
|
|
14797
14835
|
|
|
14798
14836
|
df_lib = pl
|
|
14799
|
-
elif _is_lib_present("pandas"):
|
|
14800
|
-
import pandas as pd
|
|
14837
|
+
elif _is_lib_present("pandas"): # pragma: no cover
|
|
14838
|
+
import pandas as pd # pragma: no cover
|
|
14801
14839
|
|
|
14802
|
-
df_lib = pd
|
|
14803
|
-
else:
|
|
14804
|
-
raise ImportError(
|
|
14840
|
+
df_lib = pd # pragma: no cover
|
|
14841
|
+
else: # pragma: no cover
|
|
14842
|
+
raise ImportError(
|
|
14843
|
+
"Neither Polars nor Pandas is available for formatting"
|
|
14844
|
+
) # pragma: no cover
|
|
14805
14845
|
|
|
14806
14846
|
# Create a single-row, single-column DataFrame using the specified library
|
|
14807
14847
|
df = df_lib.DataFrame({"value": [value]})
|
|
@@ -14867,12 +14907,14 @@ def _format_single_float_with_gt(
|
|
|
14867
14907
|
import polars as pl
|
|
14868
14908
|
|
|
14869
14909
|
df_lib = pl
|
|
14870
|
-
elif _is_lib_present("pandas"):
|
|
14871
|
-
import pandas as pd
|
|
14910
|
+
elif _is_lib_present("pandas"): # pragma: no cover
|
|
14911
|
+
import pandas as pd # pragma: no cover
|
|
14872
14912
|
|
|
14873
|
-
df_lib = pd
|
|
14874
|
-
else:
|
|
14875
|
-
raise ImportError(
|
|
14913
|
+
df_lib = pd # pragma: no cover
|
|
14914
|
+
else: # pragma: no cover
|
|
14915
|
+
raise ImportError(
|
|
14916
|
+
"Neither Polars nor Pandas is available for formatting"
|
|
14917
|
+
) # pragma: no cover
|
|
14876
14918
|
|
|
14877
14919
|
# Create a single-row, single-column DataFrame using the specified library
|
|
14878
14920
|
df = df_lib.DataFrame({"value": [value]})
|
|
@@ -14904,7 +14946,7 @@ def _transform_passed_failed(
|
|
|
14904
14946
|
return _format_single_float_with_gt(value, decimals=2, locale=locale, df_lib=df_lib)
|
|
14905
14947
|
else:
|
|
14906
14948
|
# Fallback to the original behavior
|
|
14907
|
-
return vals.fmt_number(value, decimals=2, locale=locale)[0]
|
|
14949
|
+
return vals.fmt_number(value, decimals=2, locale=locale)[0] # pragma: no cover
|
|
14908
14950
|
|
|
14909
14951
|
passed_failed = [
|
|
14910
14952
|
(
|
|
@@ -15044,7 +15086,7 @@ def _get_callable_source(fn: Callable) -> str:
|
|
|
15044
15086
|
return pre_arg
|
|
15045
15087
|
except (OSError, TypeError): # pragma: no cover
|
|
15046
15088
|
return fn.__name__
|
|
15047
|
-
return fn
|
|
15089
|
+
return fn # pragma: no cover
|
|
15048
15090
|
|
|
15049
15091
|
|
|
15050
15092
|
def _extract_pre_argument(source: str) -> str:
|
|
@@ -15128,12 +15170,14 @@ def _format_single_integer_with_gt(value: int, locale: str = "en", df_lib=None)
|
|
|
15128
15170
|
import polars as pl
|
|
15129
15171
|
|
|
15130
15172
|
df_lib = pl
|
|
15131
|
-
elif _is_lib_present("pandas"):
|
|
15132
|
-
import pandas as pd
|
|
15173
|
+
elif _is_lib_present("pandas"): # pragma: no cover
|
|
15174
|
+
import pandas as pd # pragma: no cover
|
|
15133
15175
|
|
|
15134
|
-
df_lib = pd
|
|
15135
|
-
else:
|
|
15136
|
-
raise ImportError(
|
|
15176
|
+
df_lib = pd # pragma: no cover
|
|
15177
|
+
else: # pragma: no cover
|
|
15178
|
+
raise ImportError(
|
|
15179
|
+
"Neither Polars nor Pandas is available for formatting"
|
|
15180
|
+
) # pragma: no cover
|
|
15137
15181
|
|
|
15138
15182
|
# Create a single-row, single-column DataFrame using the specified library
|
|
15139
15183
|
df = df_lib.DataFrame({"value": [value]})
|
|
@@ -15161,12 +15205,14 @@ def _format_single_float_with_gt_custom(
|
|
|
15161
15205
|
import polars as pl
|
|
15162
15206
|
|
|
15163
15207
|
df_lib = pl
|
|
15164
|
-
elif _is_lib_present("pandas"):
|
|
15165
|
-
import pandas as pd
|
|
15208
|
+
elif _is_lib_present("pandas"): # pragma: no cover
|
|
15209
|
+
import pandas as pd # pragma: no cover
|
|
15166
15210
|
|
|
15167
|
-
df_lib = pd
|
|
15168
|
-
else:
|
|
15169
|
-
raise ImportError(
|
|
15211
|
+
df_lib = pd # pragma: no cover
|
|
15212
|
+
else: # pragma: no cover
|
|
15213
|
+
raise ImportError(
|
|
15214
|
+
"Neither Polars nor Pandas is available for formatting"
|
|
15215
|
+
) # pragma: no cover
|
|
15170
15216
|
|
|
15171
15217
|
# Create a single-row, single-column DataFrame using the specified library
|
|
15172
15218
|
df = df_lib.DataFrame({"value": [value]})
|
|
@@ -15201,7 +15247,7 @@ def _create_thresholds_html(thresholds: Thresholds, locale: str, df_lib=None) ->
|
|
|
15201
15247
|
# Fallback to the original behavior
|
|
15202
15248
|
return fmt_number(
|
|
15203
15249
|
value, decimals=decimals, drop_trailing_zeros=drop_trailing_zeros, locale=locale
|
|
15204
|
-
)[0]
|
|
15250
|
+
)[0] # pragma: no cover
|
|
15205
15251
|
|
|
15206
15252
|
def _format_integer_safe(value: int) -> str:
|
|
15207
15253
|
if df_lib is not None and value is not None:
|
|
@@ -15386,9 +15432,12 @@ def _step_report_row_based(
|
|
|
15386
15432
|
title = STEP_REPORT_TEXT["report_for_step_i"][lang].format(i=i) + " " + CHECK_MARK_SPAN
|
|
15387
15433
|
assertion_header_text = STEP_REPORT_TEXT["assertion_header_text"][lang]
|
|
15388
15434
|
|
|
15389
|
-
# Use success_statement_no_column for col_vals_expr since it doesn't target
|
|
15435
|
+
# Use 'success_statement_no_column' for col_vals_expr() since it doesn't target
|
|
15436
|
+
# a specific column
|
|
15390
15437
|
if assertion_type == "col_vals_expr":
|
|
15391
|
-
success_stmt = STEP_REPORT_TEXT["success_statement_no_column"][lang].format(
|
|
15438
|
+
success_stmt = STEP_REPORT_TEXT["success_statement_no_column"][lang].format(
|
|
15439
|
+
n=n
|
|
15440
|
+
) # pragma: no cover
|
|
15392
15441
|
else:
|
|
15393
15442
|
success_stmt = STEP_REPORT_TEXT["success_statement"][lang].format(
|
|
15394
15443
|
n=n,
|
|
@@ -16101,14 +16150,14 @@ def _step_report_schema_any_order(
|
|
|
16101
16150
|
if exp_columns_dict[column_name_exp_i]["colname_matched"]:
|
|
16102
16151
|
col_exp_correct.append(CHECK_MARK_SPAN)
|
|
16103
16152
|
else:
|
|
16104
|
-
col_exp_correct.append(CROSS_MARK_SPAN)
|
|
16153
|
+
col_exp_correct.append(CROSS_MARK_SPAN) # pragma: no cover
|
|
16105
16154
|
|
|
16106
16155
|
#
|
|
16107
16156
|
# `dtype_exp` values
|
|
16108
16157
|
#
|
|
16109
16158
|
|
|
16110
16159
|
if not exp_columns_dict[column_name_exp_i]["dtype_present"]:
|
|
16111
|
-
dtype_exp.append("")
|
|
16160
|
+
dtype_exp.append("") # pragma: no cover
|
|
16112
16161
|
|
|
16113
16162
|
elif len(exp_columns_dict[column_name_exp_i]["dtype_input"]) > 1:
|
|
16114
16163
|
dtype = exp_columns_dict[column_name_exp_i]["dtype_input"]
|
|
@@ -16143,9 +16192,9 @@ def _step_report_schema_any_order(
|
|
|
16143
16192
|
#
|
|
16144
16193
|
|
|
16145
16194
|
if not exp_columns_dict[column_name_exp_i]["colname_matched"]:
|
|
16146
|
-
dtype_exp_correct.append("—")
|
|
16195
|
+
dtype_exp_correct.append("—") # pragma: no cover
|
|
16147
16196
|
elif not exp_columns_dict[column_name_exp_i]["dtype_present"]:
|
|
16148
|
-
dtype_exp_correct.append("")
|
|
16197
|
+
dtype_exp_correct.append("") # pragma: no cover
|
|
16149
16198
|
elif exp_columns_dict[column_name_exp_i]["dtype_matched"]:
|
|
16150
16199
|
dtype_exp_correct.append(CHECK_MARK_SPAN)
|
|
16151
16200
|
else:
|
|
@@ -16191,13 +16240,17 @@ def _step_report_schema_any_order(
|
|
|
16191
16240
|
#
|
|
16192
16241
|
|
|
16193
16242
|
if not exp_columns_dict[column_name_exp_i]["dtype_present"]:
|
|
16194
|
-
dtype_exp.append("")
|
|
16243
|
+
dtype_exp.append("") # pragma: no cover
|
|
16195
16244
|
|
|
16196
16245
|
elif len(exp_columns_dict[column_name_exp_i]["dtype_input"]) > 1:
|
|
16197
|
-
dtype = exp_columns_dict[column_name_exp_i]["dtype_input"]
|
|
16246
|
+
dtype = exp_columns_dict[column_name_exp_i]["dtype_input"] # pragma: no cover
|
|
16198
16247
|
|
|
16199
|
-
if
|
|
16200
|
-
|
|
16248
|
+
if (
|
|
16249
|
+
exp_columns_dict[column_name_exp_i]["dtype_matched_pos"] is not None
|
|
16250
|
+
): # pragma: no cover
|
|
16251
|
+
pos = exp_columns_dict[column_name_exp_i][
|
|
16252
|
+
"dtype_matched_pos"
|
|
16253
|
+
] # pragma: no cover
|
|
16201
16254
|
|
|
16202
16255
|
# Combine the dtypes together with pipes but underline the matched dtype in
|
|
16203
16256
|
# green with an HTML span tag and style attribute
|
|
@@ -16209,13 +16262,13 @@ def _step_report_schema_any_order(
|
|
|
16209
16262
|
else dtype[i]
|
|
16210
16263
|
)
|
|
16211
16264
|
for i in range(len(dtype))
|
|
16212
|
-
]
|
|
16213
|
-
dtype = " | ".join(dtype)
|
|
16214
|
-
dtype_exp.append(dtype)
|
|
16265
|
+
] # pragma: no cover
|
|
16266
|
+
dtype = " | ".join(dtype) # pragma: no cover
|
|
16267
|
+
dtype_exp.append(dtype) # pragma: no cover
|
|
16215
16268
|
|
|
16216
16269
|
else:
|
|
16217
|
-
dtype = " | ".join(dtype)
|
|
16218
|
-
dtype_exp.append(dtype)
|
|
16270
|
+
dtype = " | ".join(dtype) # pragma: no cover
|
|
16271
|
+
dtype_exp.append(dtype) # pragma: no cover
|
|
16219
16272
|
|
|
16220
16273
|
else:
|
|
16221
16274
|
dtype = exp_columns_dict[column_name_exp_i]["dtype_input"][0]
|
|
@@ -16227,12 +16280,12 @@ def _step_report_schema_any_order(
|
|
|
16227
16280
|
|
|
16228
16281
|
if not exp_columns_dict[column_name_exp_i]["colname_matched"]:
|
|
16229
16282
|
dtype_exp_correct.append("—")
|
|
16230
|
-
elif not exp_columns_dict[column_name_exp_i]["dtype_present"]:
|
|
16231
|
-
dtype_exp_correct.append("")
|
|
16232
|
-
elif exp_columns_dict[column_name_exp_i]["dtype_matched"]:
|
|
16233
|
-
dtype_exp_correct.append(CHECK_MARK_SPAN)
|
|
16234
|
-
else:
|
|
16235
|
-
dtype_exp_correct.append(CROSS_MARK_SPAN)
|
|
16283
|
+
elif not exp_columns_dict[column_name_exp_i]["dtype_present"]: # pragma: no cover
|
|
16284
|
+
dtype_exp_correct.append("") # pragma: no cover
|
|
16285
|
+
elif exp_columns_dict[column_name_exp_i]["dtype_matched"]: # pragma: no cover
|
|
16286
|
+
dtype_exp_correct.append(CHECK_MARK_SPAN) # pragma: no cover
|
|
16287
|
+
else: # pragma: no cover
|
|
16288
|
+
dtype_exp_correct.append(CROSS_MARK_SPAN) # pragma: no cover
|
|
16236
16289
|
|
|
16237
16290
|
if len(columns_found) > 0:
|
|
16238
16291
|
# Get the last index of the columns found
|
|
@@ -16248,7 +16301,9 @@ def _step_report_schema_any_order(
|
|
|
16248
16301
|
]
|
|
16249
16302
|
|
|
16250
16303
|
else:
|
|
16251
|
-
index_exp = [
|
|
16304
|
+
index_exp = [
|
|
16305
|
+
str(i) for i in range(1, len(colnames_exp_unmatched) + 1)
|
|
16306
|
+
] # pragma: no cover
|
|
16252
16307
|
|
|
16253
16308
|
schema_exp_unmatched = pl.DataFrame(
|
|
16254
16309
|
{
|
pointblank/yaml.py
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
from importlib import import_module
|
|
3
4
|
from pathlib import Path
|
|
4
|
-
from typing import Any, Union
|
|
5
|
+
from typing import Any, Iterable, Mapping, Optional, Union
|
|
5
6
|
|
|
6
7
|
import yaml
|
|
7
8
|
from narwhals.typing import FrameT
|
|
@@ -17,7 +18,9 @@ class YAMLValidationError(Exception):
|
|
|
17
18
|
pass
|
|
18
19
|
|
|
19
20
|
|
|
20
|
-
def _safe_eval_python_code(
|
|
21
|
+
def _safe_eval_python_code(
|
|
22
|
+
code: str, namespaces: Optional[Union[Iterable[str], Mapping[str, str]]] = None
|
|
23
|
+
) -> Any:
|
|
21
24
|
"""Safely evaluate Python code with restricted namespace.
|
|
22
25
|
|
|
23
26
|
This function provides a controlled environment for executing Python code embedded in YAML
|
|
@@ -68,6 +71,7 @@ def _safe_eval_python_code(code: str) -> Any:
|
|
|
68
71
|
"abs": abs,
|
|
69
72
|
"round": round,
|
|
70
73
|
"print": print,
|
|
74
|
+
"__import__": __import__,
|
|
71
75
|
},
|
|
72
76
|
}
|
|
73
77
|
|
|
@@ -88,12 +92,25 @@ def _safe_eval_python_code(code: str) -> Any:
|
|
|
88
92
|
|
|
89
93
|
safe_namespace["pd"] = pd
|
|
90
94
|
|
|
91
|
-
|
|
95
|
+
if namespaces:
|
|
96
|
+
for alias, module_name in (
|
|
97
|
+
namespaces.items() if isinstance(namespaces, dict) else ((m, m) for m in namespaces)
|
|
98
|
+
):
|
|
99
|
+
try:
|
|
100
|
+
safe_namespace[alias] = import_module(module_name)
|
|
101
|
+
except ImportError as e:
|
|
102
|
+
raise ImportError(
|
|
103
|
+
f"Could not import requested namespace '{module_name}': {e}"
|
|
104
|
+
) from e
|
|
105
|
+
|
|
106
|
+
# Check for dangerous patterns and be more specific about __import__ to allow legitimate use
|
|
92
107
|
dangerous_patterns = [
|
|
93
|
-
r"import\s+os",
|
|
94
|
-
r"import\s+sys",
|
|
95
|
-
r"import\s+subprocess",
|
|
96
|
-
r"__import__",
|
|
108
|
+
r"import\s+os\b",
|
|
109
|
+
r"import\s+sys\b",
|
|
110
|
+
r"import\s+subprocess\b",
|
|
111
|
+
r"__import__\s*\(\s*['\"]os['\"]",
|
|
112
|
+
r"__import__\s*\(\s*['\"]sys['\"]",
|
|
113
|
+
r"__import__\s*\(\s*['\"]subprocess['\"]",
|
|
97
114
|
r"exec\s*\(",
|
|
98
115
|
r"eval\s*\(",
|
|
99
116
|
r"open\s*\(",
|
|
@@ -142,7 +159,9 @@ def _safe_eval_python_code(code: str) -> Any:
|
|
|
142
159
|
raise YAMLValidationError(f"Error executing Python code '{code}': {e}")
|
|
143
160
|
|
|
144
161
|
|
|
145
|
-
def _process_python_expressions(
|
|
162
|
+
def _process_python_expressions(
|
|
163
|
+
value: Any, namespaces: Optional[Union[Iterable[str], Mapping[str, str]]] = None
|
|
164
|
+
) -> Any:
|
|
146
165
|
"""Process Python code snippets embedded in YAML values.
|
|
147
166
|
|
|
148
167
|
This function supports the python: block syntax for embedding Python code:
|
|
@@ -152,7 +171,7 @@ def _process_python_expressions(value: Any) -> Any:
|
|
|
152
171
|
pl.scan_csv("data.csv").head(10)
|
|
153
172
|
|
|
154
173
|
Note: col_vals_expr() also supports a shortcut syntax where the expr parameter
|
|
155
|
-
can be written directly without the python: wrapper:
|
|
174
|
+
can be written directly without the python: wrapper: +
|
|
156
175
|
|
|
157
176
|
col_vals_expr:
|
|
158
177
|
expr: |
|
|
@@ -180,14 +199,14 @@ def _process_python_expressions(value: Any) -> Any:
|
|
|
180
199
|
# Handle python: block syntax
|
|
181
200
|
if "python" in value and len(value) == 1:
|
|
182
201
|
code = value["python"]
|
|
183
|
-
return _safe_eval_python_code(code)
|
|
202
|
+
return _safe_eval_python_code(code, namespaces=namespaces)
|
|
184
203
|
|
|
185
204
|
# Recursively process dictionary values
|
|
186
|
-
return {k: _process_python_expressions(v) for k, v in value.items()}
|
|
205
|
+
return {k: _process_python_expressions(v, namespaces=namespaces) for k, v in value.items()}
|
|
187
206
|
|
|
188
207
|
elif isinstance(value, list):
|
|
189
208
|
# Recursively process list items
|
|
190
|
-
return [_process_python_expressions(item) for item in value]
|
|
209
|
+
return [_process_python_expressions(item, namespaces=namespaces) for item in value]
|
|
191
210
|
|
|
192
211
|
else:
|
|
193
212
|
# Return primitive types unchanged
|
|
@@ -302,7 +321,7 @@ class YAMLValidator:
|
|
|
302
321
|
raise YAMLValidationError("YAML must contain 'steps' field")
|
|
303
322
|
|
|
304
323
|
if not isinstance(config["steps"], list):
|
|
305
|
-
raise YAMLValidationError("'steps' must be a list")
|
|
324
|
+
raise YAMLValidationError("'steps' must be a list") # pragma: no cover
|
|
306
325
|
|
|
307
326
|
if len(config["steps"]) == 0:
|
|
308
327
|
raise YAMLValidationError("'steps' cannot be empty")
|
|
@@ -393,9 +412,9 @@ class YAMLValidator:
|
|
|
393
412
|
if processed_data is processed_tbl_spec and isinstance(processed_tbl_spec, str):
|
|
394
413
|
return load_dataset(processed_tbl_spec, tbl_type=df_library)
|
|
395
414
|
else:
|
|
396
|
-
return processed_data
|
|
415
|
+
return processed_data # pragma: no cover
|
|
397
416
|
|
|
398
|
-
except Exception as e:
|
|
417
|
+
except Exception as e: # pragma: no cover
|
|
399
418
|
raise YAMLValidationError(f"Failed to load data source '{tbl_spec}': {e}")
|
|
400
419
|
|
|
401
420
|
def _load_csv_file(self, file_path: str, df_library: str) -> Any:
|
|
@@ -439,16 +458,16 @@ class YAMLValidator:
|
|
|
439
458
|
|
|
440
459
|
elif df_library == "duckdb":
|
|
441
460
|
# For DuckDB, we'll use the existing _process_data since it handles DuckDB
|
|
442
|
-
from pointblank.validate import _process_data
|
|
461
|
+
from pointblank.validate import _process_data # pragma: no cover
|
|
443
462
|
|
|
444
|
-
return _process_data(file_path)
|
|
463
|
+
return _process_data(file_path) # pragma: no cover
|
|
445
464
|
|
|
446
465
|
else:
|
|
447
466
|
raise YAMLValidationError(
|
|
448
467
|
f"Unsupported df_library: {df_library}. Use 'polars', 'pandas', or 'duckdb'"
|
|
449
468
|
)
|
|
450
469
|
|
|
451
|
-
except Exception as e:
|
|
470
|
+
except Exception as e: # pragma: no cover
|
|
452
471
|
raise YAMLValidationError(
|
|
453
472
|
f"Failed to load CSV file '{file_path}' with {df_library}: {e}"
|
|
454
473
|
)
|
|
@@ -547,7 +566,11 @@ class YAMLValidator:
|
|
|
547
566
|
f"Schema specification must be a dictionary, got: {type(schema_spec)}"
|
|
548
567
|
)
|
|
549
568
|
|
|
550
|
-
def _parse_validation_step(
|
|
569
|
+
def _parse_validation_step(
|
|
570
|
+
self,
|
|
571
|
+
step_config: Union[str, dict],
|
|
572
|
+
namespaces: Optional[Union[Iterable[str], Mapping[str, str]]] = None,
|
|
573
|
+
) -> tuple[str, dict]:
|
|
551
574
|
"""Parse a single validation step from YAML configuration.
|
|
552
575
|
|
|
553
576
|
Parameters
|
|
@@ -598,14 +621,16 @@ class YAMLValidator:
|
|
|
598
621
|
# Special case: `col_vals_expr()`'s `expr=` parameter can use shortcut syntax
|
|
599
622
|
if method_name == "col_vals_expr" and key == "expr" and isinstance(value, str):
|
|
600
623
|
# Treat string directly as Python code (shortcut syntax)
|
|
601
|
-
processed_parameters[key] = _safe_eval_python_code(value)
|
|
624
|
+
processed_parameters[key] = _safe_eval_python_code(value, namespaces=namespaces)
|
|
602
625
|
# Special case: `pre=` parameter can use shortcut syntax (like `expr=`)
|
|
603
626
|
elif key == "pre" and isinstance(value, str):
|
|
604
627
|
# Treat string directly as Python code (shortcut syntax)
|
|
605
|
-
processed_parameters[key] = _safe_eval_python_code(value)
|
|
628
|
+
processed_parameters[key] = _safe_eval_python_code(value, namespaces=namespaces)
|
|
606
629
|
else:
|
|
607
630
|
# Normal processing (requires python: block syntax)
|
|
608
|
-
processed_parameters[key] = _process_python_expressions(
|
|
631
|
+
processed_parameters[key] = _process_python_expressions(
|
|
632
|
+
value, namespaces=namespaces
|
|
633
|
+
)
|
|
609
634
|
parameters = processed_parameters
|
|
610
635
|
|
|
611
636
|
# Convert `columns=` specification
|
|
@@ -634,7 +659,7 @@ class YAMLValidator:
|
|
|
634
659
|
if isinstance(expr, str):
|
|
635
660
|
lambda_expressions.append(_safe_eval_python_code(expr))
|
|
636
661
|
else:
|
|
637
|
-
lambda_expressions.append(expr)
|
|
662
|
+
lambda_expressions.append(expr) # pragma: no cover
|
|
638
663
|
# Pass expressions as positional arguments (stored as special key)
|
|
639
664
|
parameters["_conjointly_expressions"] = lambda_expressions
|
|
640
665
|
else:
|
|
@@ -658,7 +683,9 @@ class YAMLValidator:
|
|
|
658
683
|
|
|
659
684
|
return self.validation_method_map[method_name], parameters
|
|
660
685
|
|
|
661
|
-
def build_validation(
|
|
686
|
+
def build_validation(
|
|
687
|
+
self, config: dict, namespaces: Optional[Union[Iterable[str], Mapping[str, str]]] = None
|
|
688
|
+
) -> Validate:
|
|
662
689
|
"""Convert YAML config to Validate object.
|
|
663
690
|
|
|
664
691
|
Parameters
|
|
@@ -693,7 +720,9 @@ class YAMLValidator:
|
|
|
693
720
|
# Set actions if provided
|
|
694
721
|
if "actions" in config:
|
|
695
722
|
# Process actions: handle `python:` block syntax for callables
|
|
696
|
-
processed_actions = _process_python_expressions(
|
|
723
|
+
processed_actions = _process_python_expressions(
|
|
724
|
+
config["actions"], namespaces=namespaces
|
|
725
|
+
)
|
|
697
726
|
# Convert to Actions object
|
|
698
727
|
validate_kwargs["actions"] = Actions(**processed_actions)
|
|
699
728
|
|
|
@@ -713,7 +742,9 @@ class YAMLValidator:
|
|
|
713
742
|
|
|
714
743
|
# Add validation steps
|
|
715
744
|
for step_config in config["steps"]:
|
|
716
|
-
method_name, parameters = self._parse_validation_step(
|
|
745
|
+
method_name, parameters = self._parse_validation_step(
|
|
746
|
+
step_config, namespaces=namespaces
|
|
747
|
+
)
|
|
717
748
|
|
|
718
749
|
# Get the method from the validation object
|
|
719
750
|
method = getattr(validation, method_name)
|
|
@@ -728,7 +759,9 @@ class YAMLValidator:
|
|
|
728
759
|
|
|
729
760
|
return validation
|
|
730
761
|
|
|
731
|
-
def execute_workflow(
|
|
762
|
+
def execute_workflow(
|
|
763
|
+
self, config: dict, namespaces: Optional[Union[Iterable[str], Mapping[str, str]]] = None
|
|
764
|
+
) -> Validate:
|
|
732
765
|
"""Execute a complete YAML validation workflow.
|
|
733
766
|
|
|
734
767
|
Parameters
|
|
@@ -742,7 +775,7 @@ class YAMLValidator:
|
|
|
742
775
|
Interrogated Validate object with results.
|
|
743
776
|
"""
|
|
744
777
|
# Build the validation plan
|
|
745
|
-
validation = self.build_validation(config)
|
|
778
|
+
validation = self.build_validation(config, namespaces=namespaces)
|
|
746
779
|
|
|
747
780
|
# Execute interrogation to get results
|
|
748
781
|
validation = validation.interrogate()
|
|
@@ -750,7 +783,11 @@ class YAMLValidator:
|
|
|
750
783
|
return validation
|
|
751
784
|
|
|
752
785
|
|
|
753
|
-
def yaml_interrogate(
|
|
786
|
+
def yaml_interrogate(
|
|
787
|
+
yaml: Union[str, Path],
|
|
788
|
+
set_tbl: Union[FrameT, Any, None] = None,
|
|
789
|
+
namespaces: Optional[Union[Iterable[str], Mapping[str, str]]] = None,
|
|
790
|
+
) -> Validate:
|
|
754
791
|
"""Execute a YAML-based validation workflow.
|
|
755
792
|
|
|
756
793
|
This is the main entry point for YAML-based validation workflows. It takes YAML configuration
|
|
@@ -772,6 +809,10 @@ def yaml_interrogate(yaml: Union[str, Path], set_tbl: Union[FrameT, Any, None] =
|
|
|
772
809
|
`tbl` field before executing the validation workflow. This can be any supported table type
|
|
773
810
|
including DataFrame objects, Ibis table objects, CSV file paths, Parquet file paths, GitHub
|
|
774
811
|
URLs, or database connection strings.
|
|
812
|
+
namespaces
|
|
813
|
+
Optional module namespaces to make available for Python code execution in YAML
|
|
814
|
+
configurations. Can be a dictionary mapping aliases to module names or a list of module
|
|
815
|
+
names. See the "Using Namespaces" section below for detailed examples.
|
|
775
816
|
|
|
776
817
|
Returns
|
|
777
818
|
-------
|
|
@@ -786,6 +827,71 @@ def yaml_interrogate(yaml: Union[str, Path], set_tbl: Union[FrameT, Any, None] =
|
|
|
786
827
|
If the YAML is invalid, malformed, or execution fails. This includes syntax errors, missing
|
|
787
828
|
required fields, unknown validation methods, or data loading failures.
|
|
788
829
|
|
|
830
|
+
Using Namespaces
|
|
831
|
+
----------------
|
|
832
|
+
The `namespaces=` parameter enables custom Python modules and functions in YAML configurations.
|
|
833
|
+
This is particularly useful for custom action functions and advanced Python expressions.
|
|
834
|
+
|
|
835
|
+
**Namespace formats:**
|
|
836
|
+
|
|
837
|
+
- Dictionary format: `{"alias": "module.name"}` maps aliases to module names
|
|
838
|
+
- List format: `["module.name", "another.module"]` imports modules directly
|
|
839
|
+
|
|
840
|
+
**Option 1: Inline expressions (no namespaces needed)**
|
|
841
|
+
|
|
842
|
+
```{python}
|
|
843
|
+
import pointblank as pb
|
|
844
|
+
|
|
845
|
+
# Simple inline custom action
|
|
846
|
+
yaml_config = '''
|
|
847
|
+
tbl: small_table
|
|
848
|
+
thresholds:
|
|
849
|
+
warning: 0.01
|
|
850
|
+
actions:
|
|
851
|
+
warning:
|
|
852
|
+
python: "lambda: print('Custom warning triggered')"
|
|
853
|
+
steps:
|
|
854
|
+
- col_vals_gt:
|
|
855
|
+
columns: [a]
|
|
856
|
+
value: 1000
|
|
857
|
+
'''
|
|
858
|
+
|
|
859
|
+
result = pb.yaml_interrogate(yaml_config)
|
|
860
|
+
result
|
|
861
|
+
```
|
|
862
|
+
|
|
863
|
+
**Option 2: External functions with namespaces**
|
|
864
|
+
|
|
865
|
+
```{python}
|
|
866
|
+
# Define a custom action function
|
|
867
|
+
def my_custom_action():
|
|
868
|
+
print("Data validation failed: please check your data.")
|
|
869
|
+
|
|
870
|
+
# Add to current module for demo
|
|
871
|
+
import sys
|
|
872
|
+
sys.modules[__name__].my_custom_action = my_custom_action
|
|
873
|
+
|
|
874
|
+
# YAML that references the external function
|
|
875
|
+
yaml_config = '''
|
|
876
|
+
tbl: small_table
|
|
877
|
+
thresholds:
|
|
878
|
+
warning: 0.01
|
|
879
|
+
actions:
|
|
880
|
+
warning:
|
|
881
|
+
python: actions.my_custom_action
|
|
882
|
+
steps:
|
|
883
|
+
- col_vals_gt:
|
|
884
|
+
columns: [a]
|
|
885
|
+
value: 1000 # This will fail
|
|
886
|
+
'''
|
|
887
|
+
|
|
888
|
+
# Use namespaces to make the function available
|
|
889
|
+
result = pb.yaml_interrogate(yaml_config, namespaces={'actions': '__main__'})
|
|
890
|
+
result
|
|
891
|
+
```
|
|
892
|
+
|
|
893
|
+
This approach enables modular, reusable validation workflows with custom business logic.
|
|
894
|
+
|
|
789
895
|
Examples
|
|
790
896
|
--------
|
|
791
897
|
```{python}
|
|
@@ -928,14 +1034,14 @@ def yaml_interrogate(yaml: Union[str, Path], set_tbl: Union[FrameT, Any, None] =
|
|
|
928
1034
|
# If `set_tbl=` is provided, we need to build the validation workflow and then use `set_tbl()`
|
|
929
1035
|
if set_tbl is not None:
|
|
930
1036
|
# First build the validation object without interrogation
|
|
931
|
-
validation = validator.build_validation(config)
|
|
1037
|
+
validation = validator.build_validation(config, namespaces=namespaces)
|
|
932
1038
|
# Then replace the table using set_tbl method
|
|
933
1039
|
validation = validation.set_tbl(tbl=set_tbl)
|
|
934
1040
|
# Finally interrogate with the new table
|
|
935
1041
|
return validation.interrogate()
|
|
936
1042
|
else:
|
|
937
1043
|
# Standard execution without table override (includes interrogation)
|
|
938
|
-
return validator.execute_workflow(config)
|
|
1044
|
+
return validator.execute_workflow(config, namespaces=namespaces)
|
|
939
1045
|
|
|
940
1046
|
|
|
941
1047
|
def load_yaml_config(file_path: Union[str, Path]) -> dict:
|
|
@@ -1223,7 +1329,7 @@ def yaml_to_python(yaml: Union[str, Path]) -> str:
|
|
|
1223
1329
|
"""
|
|
1224
1330
|
# First, parse the raw YAML to detect Polars/Pandas expressions in the source code
|
|
1225
1331
|
if isinstance(yaml, Path):
|
|
1226
|
-
yaml_content = yaml.read_text()
|
|
1332
|
+
yaml_content = yaml.read_text() # pragma: no cover
|
|
1227
1333
|
elif isinstance(yaml, str):
|
|
1228
1334
|
# Check if it's a file path (single line, reasonable length, no newlines)
|
|
1229
1335
|
if len(yaml) < 260 and "\n" not in yaml and Path(yaml).exists():
|
|
@@ -1231,7 +1337,7 @@ def yaml_to_python(yaml: Union[str, Path]) -> str:
|
|
|
1231
1337
|
else:
|
|
1232
1338
|
yaml_content = yaml
|
|
1233
1339
|
else:
|
|
1234
|
-
yaml_content = str(yaml)
|
|
1340
|
+
yaml_content = str(yaml) # pragma: no cover
|
|
1235
1341
|
|
|
1236
1342
|
# Track whether we need to import Polars and Pandas by analyzing the raw YAML content
|
|
1237
1343
|
needs_polars_import = False
|
|
@@ -1326,7 +1432,7 @@ def yaml_to_python(yaml: Union[str, Path]) -> str:
|
|
|
1326
1432
|
validate_args.append(f'data=pb.load_dataset("{tbl_spec}", tbl_type="{df_library}")')
|
|
1327
1433
|
else:
|
|
1328
1434
|
# Fallback to placeholder if we couldn't extract the original expression
|
|
1329
|
-
validate_args.append("data=<python_expression_result>")
|
|
1435
|
+
validate_args.append("data=<python_expression_result>") # pragma: no cover
|
|
1330
1436
|
|
|
1331
1437
|
# Add table name if present
|
|
1332
1438
|
if "tbl_name" in config:
|
|
@@ -1359,7 +1465,7 @@ def yaml_to_python(yaml: Union[str, Path]) -> str:
|
|
|
1359
1465
|
action_params.append(f'{key}="{value}"')
|
|
1360
1466
|
else:
|
|
1361
1467
|
# For callables or complex expressions, use placeholder
|
|
1362
|
-
action_params.append(f"{key}={value}")
|
|
1468
|
+
action_params.append(f"{key}={value}") # pragma: no cover
|
|
1363
1469
|
actions_str = "pb.Actions(" + ", ".join(action_params) + ")"
|
|
1364
1470
|
validate_args.append(f"actions={actions_str}")
|
|
1365
1471
|
|
|
@@ -1414,7 +1520,7 @@ def yaml_to_python(yaml: Union[str, Path]) -> str:
|
|
|
1414
1520
|
elif isinstance(step_params["expr"], str):
|
|
1415
1521
|
original_expressions["expr"] = step_params["expr"]
|
|
1416
1522
|
|
|
1417
|
-
method_name, parameters = validator._parse_validation_step(step_config)
|
|
1523
|
+
method_name, parameters = validator._parse_validation_step(step_config, namespaces=None)
|
|
1418
1524
|
|
|
1419
1525
|
# Apply the original expressions to override the converted lambda functions
|
|
1420
1526
|
if method_name == "conjointly" and "expressions" in original_expressions:
|
|
@@ -1446,13 +1552,13 @@ def yaml_to_python(yaml: Union[str, Path]) -> str:
|
|
|
1446
1552
|
expressions_str = "[" + ", ".join([f'"{expr}"' for expr in value]) + "]"
|
|
1447
1553
|
param_parts.append(f"expressions={expressions_str}")
|
|
1448
1554
|
else:
|
|
1449
|
-
param_parts.append(f"expressions={value}")
|
|
1555
|
+
param_parts.append(f"expressions={value}") # pragma: no cover
|
|
1450
1556
|
elif key == "expr" and method_name == "specially":
|
|
1451
1557
|
# Handle specially expr parameter: should be unquoted lambda expression
|
|
1452
1558
|
if isinstance(value, str):
|
|
1453
1559
|
param_parts.append(f"expr={value}")
|
|
1454
1560
|
else:
|
|
1455
|
-
param_parts.append(f"expr={value}")
|
|
1561
|
+
param_parts.append(f"expr={value}") # pragma: no cover
|
|
1456
1562
|
elif key in ["columns", "columns_subset"]:
|
|
1457
1563
|
if isinstance(value, list):
|
|
1458
1564
|
if len(value) == 1:
|
|
@@ -1463,7 +1569,7 @@ def yaml_to_python(yaml: Union[str, Path]) -> str:
|
|
|
1463
1569
|
columns_str = "[" + ", ".join([f'"{col}"' for col in value]) + "]"
|
|
1464
1570
|
param_parts.append(f"{key}={columns_str}")
|
|
1465
1571
|
else:
|
|
1466
|
-
param_parts.append(f'{key}="{value}"')
|
|
1572
|
+
param_parts.append(f'{key}="{value}"') # pragma: no cover
|
|
1467
1573
|
elif key == "brief":
|
|
1468
1574
|
# Handle `brief=` parameter: can be a boolean or a string
|
|
1469
1575
|
if isinstance(value, bool):
|
|
@@ -1486,25 +1592,29 @@ def yaml_to_python(yaml: Union[str, Path]) -> str:
|
|
|
1486
1592
|
elif isinstance(value.warning, list) and len(value.warning) == 1:
|
|
1487
1593
|
action_params.append(f'warning="{value.warning[0]}"')
|
|
1488
1594
|
else:
|
|
1489
|
-
action_params.append(f"warning={value.warning}")
|
|
1595
|
+
action_params.append(f"warning={value.warning}") # pragma: no cover
|
|
1490
1596
|
|
|
1491
1597
|
if value.error is not None:
|
|
1492
1598
|
error_expr_path = f"{step_action_base}.error"
|
|
1493
1599
|
if error_expr_path in step_expressions:
|
|
1494
|
-
action_params.append(
|
|
1600
|
+
action_params.append(
|
|
1601
|
+
f"error={step_expressions[error_expr_path]}"
|
|
1602
|
+
) # pragma: no cover
|
|
1495
1603
|
elif isinstance(value.error, list) and len(value.error) == 1:
|
|
1496
1604
|
action_params.append(f'error="{value.error[0]}"')
|
|
1497
1605
|
else:
|
|
1498
|
-
action_params.append(f"error={value.error}")
|
|
1606
|
+
action_params.append(f"error={value.error}") # pragma: no cover
|
|
1499
1607
|
|
|
1500
1608
|
if value.critical is not None:
|
|
1501
1609
|
critical_expr_path = f"{step_action_base}.critical"
|
|
1502
1610
|
if critical_expr_path in step_expressions:
|
|
1503
|
-
action_params.append(
|
|
1611
|
+
action_params.append(
|
|
1612
|
+
f"critical={step_expressions[critical_expr_path]}"
|
|
1613
|
+
) # pragma: no cover
|
|
1504
1614
|
elif isinstance(value.critical, list) and len(value.critical) == 1:
|
|
1505
1615
|
action_params.append(f'critical="{value.critical[0]}"')
|
|
1506
1616
|
else:
|
|
1507
|
-
action_params.append(f"critical={value.critical}")
|
|
1617
|
+
action_params.append(f"critical={value.critical}") # pragma: no cover
|
|
1508
1618
|
|
|
1509
1619
|
if hasattr(value, "highest_only") and value.highest_only is not True:
|
|
1510
1620
|
action_params.append(f"highest_only={value.highest_only}")
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pointblank
|
|
3
|
-
Version: 0.13.
|
|
3
|
+
Version: 0.13.3
|
|
4
4
|
Summary: Find out if your data is what you think it is.
|
|
5
5
|
Author-email: Richard Iannone <riannone@me.com>
|
|
6
6
|
License: MIT License
|
|
@@ -49,7 +49,6 @@ Requires-Dist: requests>=2.31.0
|
|
|
49
49
|
Requires-Dist: click>=8.0.0
|
|
50
50
|
Requires-Dist: rich>=13.0.0
|
|
51
51
|
Requires-Dist: pyyaml>=6.0.0
|
|
52
|
-
Requires-Dist: polars>=1.33.0
|
|
53
52
|
Provides-Extra: pd
|
|
54
53
|
Requires-Dist: pandas>=2.2.3; extra == "pd"
|
|
55
54
|
Provides-Extra: pl
|
|
@@ -92,6 +91,7 @@ Requires-Dist: pandas>=2.2.3; extra == "docs"
|
|
|
92
91
|
Requires-Dist: polars>=1.17.1; extra == "docs"
|
|
93
92
|
Requires-Dist: pyspark==3.5.6; extra == "docs"
|
|
94
93
|
Requires-Dist: openpyxl>=3.0.0; extra == "docs"
|
|
94
|
+
Requires-Dist: duckdb<1.3.3,>=1.2.0; extra == "docs"
|
|
95
95
|
Dynamic: license-file
|
|
96
96
|
|
|
97
97
|
<div align="center">
|
|
@@ -20,8 +20,8 @@ pointblank/scan_profile_stats.py,sha256=qdzoGXB-zi2hmpA4mTz6LLTqMnb-NRG9ndxU9cxS
|
|
|
20
20
|
pointblank/schema.py,sha256=hjALMuYppNfELC_nAqfM9fLjPdN1w2M3rDMusrPqFYA,50757
|
|
21
21
|
pointblank/segments.py,sha256=RXp3lPr3FboVseadNqLgIeoMBh_mykrQSFp1WtV41Yg,5570
|
|
22
22
|
pointblank/thresholds.py,sha256=mybeLzTVdmN04NLKoV-jiSBXsWknwHO0Gox0ttVN_MU,25766
|
|
23
|
-
pointblank/validate.py,sha256=
|
|
24
|
-
pointblank/yaml.py,sha256=
|
|
23
|
+
pointblank/validate.py,sha256=v4jzFOYufrck_3CPIz4Jo53Y_5VYYTTFcqMq6B4LttY,713196
|
|
24
|
+
pointblank/yaml.py,sha256=cHwDvybhp_oLOGR1rA83trEDQWYuRGhT4iEa6FMXi6w,63074
|
|
25
25
|
pointblank/data/api-docs.txt,sha256=w2nIkIL_fJpXlPR9clogqcgdiv-uHvdSDI8gjkP_mCQ,531711
|
|
26
26
|
pointblank/data/game_revenue-duckdb.zip,sha256=tKIVx48OGLYGsQPS3h5AjA2Nyq_rfEpLCjBiFUWhagU,35880
|
|
27
27
|
pointblank/data/game_revenue.zip,sha256=7c9EvHLyi93CHUd4p3dM4CZ-GucFCtXKSPxgLojL32U,33749
|
|
@@ -32,9 +32,9 @@ pointblank/data/nycflights.zip,sha256=yVjbUaKUz2LydSdF9cABuir0VReHBBgV7shiNWSd0m
|
|
|
32
32
|
pointblank/data/polars-api-docs.txt,sha256=KGcS-BOtUs9zgpkWfXD-GFdFh4O_zjdkpX7msHjztLg,198045
|
|
33
33
|
pointblank/data/small_table-duckdb.zip,sha256=BhTaZ2CRS4-9Z1uVhOU6HggvW3XCar7etMznfENIcOc,2028
|
|
34
34
|
pointblank/data/small_table.zip,sha256=lmFb90Nb-v5X559Ikjg31YLAXuRyMkD9yLRElkXPMzQ,472
|
|
35
|
-
pointblank-0.13.
|
|
36
|
-
pointblank-0.13.
|
|
37
|
-
pointblank-0.13.
|
|
38
|
-
pointblank-0.13.
|
|
39
|
-
pointblank-0.13.
|
|
40
|
-
pointblank-0.13.
|
|
35
|
+
pointblank-0.13.3.dist-info/licenses/LICENSE,sha256=apLF-HWPNU7pT5bmf5KmZpD5Cklpy2u-BN_0xBoRMLY,1081
|
|
36
|
+
pointblank-0.13.3.dist-info/METADATA,sha256=jXGDWi-DW5kAdRyUTjgVfRTB-6tMgyYd-uqeeyCvvKk,19582
|
|
37
|
+
pointblank-0.13.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
38
|
+
pointblank-0.13.3.dist-info/entry_points.txt,sha256=GqqqOTOH8uZe22wLcvYjzpizqk_j4MNcUo2YM14ryCw,42
|
|
39
|
+
pointblank-0.13.3.dist-info/top_level.txt,sha256=-wHrS1SvV8-nhvc3w-PPYs1C1WtEc1pK-eGjubbCCKc,11
|
|
40
|
+
pointblank-0.13.3.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|