pointblank 0.11.5__py3-none-any.whl → 0.11.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pointblank/validate.py CHANGED
@@ -11684,7 +11684,9 @@ class Validate:
11684
11684
  # Determine the rows that passed all validation steps by checking if all `pb_is_good_`
11685
11685
  # columns are `True`
11686
11686
  labeled_tbl_nw = (
11687
- labeled_tbl_nw.with_columns(pb_is_good_all=nw.all_horizontal(pb_is_good_cols))
11687
+ labeled_tbl_nw.with_columns(
11688
+ pb_is_good_all=nw.all_horizontal(pb_is_good_cols, ignore_nulls=True)
11689
+ )
11688
11690
  .join(data_nw, on=index_name, how="left")
11689
11691
  .drop(index_name)
11690
11692
  )
pointblank/yaml.py CHANGED
@@ -5,6 +5,7 @@ from typing import Any, Union
5
5
 
6
6
  import yaml
7
7
 
8
+ from pointblank._utils import _is_lib_present
8
9
  from pointblank.thresholds import Actions
9
10
  from pointblank.validate import Validate, load_dataset
10
11
 
@@ -217,6 +218,8 @@ class YAMLValidator:
217
218
  "col_count_match": "col_count_match",
218
219
  "row_count_match": "row_count_match",
219
220
  "col_schema_match": "col_schema_match",
221
+ "conjointly": "conjointly",
222
+ "specially": "specially",
220
223
  }
221
224
 
222
225
  def __init__(self):
@@ -345,7 +348,7 @@ class YAMLValidator:
345
348
  f"or list of strings/dictionaries"
346
349
  )
347
350
 
348
- def _load_data_source(self, tbl_spec: str) -> Any:
351
+ def _load_data_source(self, tbl_spec: str, df_library: str = "polars") -> Any:
349
352
  """Load data source based on table specification.
350
353
 
351
354
  Parameters
@@ -354,6 +357,8 @@ class YAMLValidator:
354
357
  Data source specification. Can be (1) a dataset name for `load_dataset()`, (2) a CSV file
355
358
  path (relative or absolute), (3) a Parquet file path (relative or absolute), or (4) a
356
359
  Python code snippet to be executed for dynamic data loading.
360
+ df_library
361
+ DataFrame library to use for loading datasets and CSV files. Options: "polars", "pandas", "duckdb".
357
362
 
358
363
  Returns
359
364
  -------
@@ -374,20 +379,79 @@ class YAMLValidator:
374
379
  if processed_tbl_spec is not tbl_spec or not isinstance(processed_tbl_spec, str):
375
380
  return processed_tbl_spec
376
381
 
382
+ # Check if it's a CSV file and handle with specified library
383
+ if isinstance(processed_tbl_spec, str) and processed_tbl_spec.endswith(".csv"):
384
+ return self._load_csv_file(processed_tbl_spec, df_library)
385
+
377
386
  # Use the centralized data processing pipeline from validate.py
378
- # This handles CSV files, Parquet files, and other data sources
387
+ # This handles Parquet files and other data sources
379
388
  processed_data = _process_data(processed_tbl_spec)
380
389
 
381
390
  # If _process_data returns the original string unchanged,
382
- # then it's not a file path, so try load_dataset
391
+ # then it's not a file path, so try load_dataset with specified library
383
392
  if processed_data is processed_tbl_spec and isinstance(processed_tbl_spec, str):
384
- return load_dataset(processed_tbl_spec)
393
+ return load_dataset(processed_tbl_spec, tbl_type=df_library)
385
394
  else:
386
395
  return processed_data
387
396
 
388
397
  except Exception as e:
389
398
  raise YAMLValidationError(f"Failed to load data source '{tbl_spec}': {e}")
390
399
 
400
+ def _load_csv_file(self, file_path: str, df_library: str) -> Any:
401
+ """Load CSV file using the specified DataFrame library.
402
+
403
+ Parameters
404
+ ----------
405
+ file_path
406
+ Path to the CSV file.
407
+ df_library
408
+ DataFrame library to use: "polars", "pandas", or "duckdb".
409
+
410
+ Returns
411
+ -------
412
+ Loaded DataFrame object.
413
+
414
+ Raises
415
+ ------
416
+ YAMLValidationError
417
+ If CSV file cannot be loaded or library is not available.
418
+ """
419
+ import os
420
+
421
+ if not os.path.exists(file_path):
422
+ raise YAMLValidationError(f"CSV file not found: {file_path}")
423
+
424
+ try:
425
+ if df_library == "polars":
426
+ if not _is_lib_present("polars"):
427
+ raise YAMLValidationError("Polars library is not available")
428
+ import polars as pl
429
+
430
+ return pl.read_csv(file_path)
431
+
432
+ elif df_library == "pandas":
433
+ if not _is_lib_present("pandas"):
434
+ raise YAMLValidationError("Pandas library is not available")
435
+ import pandas as pd
436
+
437
+ return pd.read_csv(file_path)
438
+
439
+ elif df_library == "duckdb":
440
+ # For DuckDB, we'll use the existing _process_data since it handles DuckDB
441
+ from pointblank.validate import _process_data
442
+
443
+ return _process_data(file_path)
444
+
445
+ else:
446
+ raise YAMLValidationError(
447
+ f"Unsupported df_library: {df_library}. Use 'polars', 'pandas', or 'duckdb'"
448
+ )
449
+
450
+ except Exception as e:
451
+ raise YAMLValidationError(
452
+ f"Failed to load CSV file '{file_path}' with {df_library}: {e}"
453
+ )
454
+
391
455
  def _parse_column_spec(self, columns_expr: Any) -> list[str]:
392
456
  """Parse column specification from YAML.
393
457
 
@@ -559,6 +623,29 @@ class YAMLValidator:
559
623
  if "schema" in parameters and method_name == "col_schema_match":
560
624
  parameters["schema"] = self._parse_schema_spec(parameters["schema"])
561
625
 
626
+ # Handle `conjointly()` expressions: convert list to separate positional arguments
627
+ if method_name == "conjointly" and "expressions" in parameters:
628
+ expressions = parameters.pop("expressions") # Remove from parameters
629
+ if isinstance(expressions, list):
630
+ # Convert string expressions to lambda functions
631
+ lambda_expressions = []
632
+ for expr in expressions:
633
+ if isinstance(expr, str):
634
+ lambda_expressions.append(_safe_eval_python_code(expr))
635
+ else:
636
+ lambda_expressions.append(expr)
637
+ # Pass expressions as positional arguments (stored as special key)
638
+ parameters["_conjointly_expressions"] = lambda_expressions
639
+ else:
640
+ raise YAMLValidationError("conjointly 'expressions' must be a list")
641
+
642
+ # Handle `specially()` expr parameter: support shortcut syntax
643
+ if method_name == "specially" and "expr" in parameters:
644
+ expr_value = parameters["expr"]
645
+ if isinstance(expr_value, str):
646
+ # Treat string directly as Python code (shortcut syntax)
647
+ parameters["expr"] = _safe_eval_python_code(expr_value)
648
+
562
649
  # Convert `actions=` if present (ensure it's an Actions object)
563
650
  if "actions" in parameters:
564
651
  if isinstance(parameters["actions"], dict):
@@ -583,8 +670,9 @@ class YAMLValidator:
583
670
  Validate
584
671
  Validate object with configured validation steps.
585
672
  """
586
- # Load data source
587
- data = self._load_data_source(config["tbl"])
673
+ # Load data source with specified library
674
+ df_library = config.get("df_library", "polars")
675
+ data = self._load_data_source(config["tbl"], df_library)
588
676
 
589
677
  # Create Validate object
590
678
  validate_kwargs = {}
@@ -603,7 +691,7 @@ class YAMLValidator:
603
691
 
604
692
  # Set actions if provided
605
693
  if "actions" in config:
606
- # Process actions - handle python: block syntax for callables
694
+ # Process actions: handle `python:` block syntax for callables
607
695
  processed_actions = _process_python_expressions(config["actions"])
608
696
  # Convert to Actions object
609
697
  validate_kwargs["actions"] = Actions(**processed_actions)
@@ -629,8 +717,13 @@ class YAMLValidator:
629
717
  # Get the method from the validation object
630
718
  method = getattr(validation, method_name)
631
719
 
632
- # Call the method with parameters
633
- validation = method(**parameters)
720
+ # Special handling for conjointly: pass expressions as positional arguments
721
+ if method_name == "conjointly" and "_conjointly_expressions" in parameters:
722
+ expressions = parameters.pop("_conjointly_expressions")
723
+ validation = method(*expressions, **parameters)
724
+ else:
725
+ # Call the method with parameters
726
+ validation = method(**parameters)
634
727
 
635
728
  return validation
636
729
 
@@ -1162,20 +1255,21 @@ def yaml_to_python(yaml: Union[str, Path]) -> str:
1162
1255
 
1163
1256
  # Add data loading as first argument
1164
1257
  tbl_spec = config["tbl"]
1165
- if isinstance(tbl_spec, str):
1258
+ df_library = config.get("df_library", "polars")
1259
+
1260
+ # Use the original Python expression if we extracted it (df_library is ignored in this case)
1261
+ if original_tbl_expression:
1262
+ validate_args.append(f"data={original_tbl_expression}")
1263
+ elif isinstance(tbl_spec, str):
1166
1264
  if tbl_spec.endswith((".csv", ".parquet")):
1167
1265
  # File loading
1168
- validate_args.append(f'data=pb.load_dataset("{tbl_spec}")')
1266
+ validate_args.append(f'data=pb.load_dataset("{tbl_spec}", tbl_type="{df_library}")')
1169
1267
  else:
1170
1268
  # Dataset loading
1171
- validate_args.append(f'data=pb.load_dataset("{tbl_spec}")')
1269
+ validate_args.append(f'data=pb.load_dataset("{tbl_spec}", tbl_type="{df_library}")')
1172
1270
  else:
1173
- # Use the original Python expression if we extracted it
1174
- if original_tbl_expression:
1175
- validate_args.append(f"data={original_tbl_expression}")
1176
- else:
1177
- # Fallback to placeholder if we couldn't extract the original expression
1178
- validate_args.append("data=<python_expression_result>")
1271
+ # Fallback to placeholder if we couldn't extract the original expression
1272
+ validate_args.append("data=<python_expression_result>")
1179
1273
 
1180
1274
  # Add table name if present
1181
1275
  if "tbl_name" in config:
@@ -1243,16 +1337,65 @@ def yaml_to_python(yaml: Union[str, Path]) -> str:
1243
1337
 
1244
1338
  # Add validation steps as chained method calls
1245
1339
  for step_index, step_config in enumerate(config["steps"]):
1340
+ # Get original expressions before parsing
1341
+ original_expressions = {}
1342
+ step_method = list(step_config.keys())[
1343
+ 0
1344
+ ] # Get the method name (conjointly, specially, etc.)
1345
+ step_params = step_config[step_method]
1346
+
1347
+ if (
1348
+ step_method == "conjointly"
1349
+ and isinstance(step_params, dict)
1350
+ and "expressions" in step_params
1351
+ ):
1352
+ original_expressions["expressions"] = step_params["expressions"]
1353
+
1354
+ if step_method == "specially" and isinstance(step_params, dict) and "expr" in step_params:
1355
+ if isinstance(step_params["expr"], dict) and "python" in step_params["expr"]:
1356
+ original_expressions["expr"] = step_params["expr"]["python"].strip()
1357
+ elif isinstance(step_params["expr"], str):
1358
+ original_expressions["expr"] = step_params["expr"]
1359
+
1246
1360
  method_name, parameters = validator._parse_validation_step(step_config)
1247
1361
 
1362
+ # Apply the original expressions to override the converted lambda functions
1363
+ if method_name == "conjointly" and "expressions" in original_expressions:
1364
+ # Remove the internal parameter and add expressions as a proper parameter
1365
+ if "_conjointly_expressions" in parameters:
1366
+ parameters.pop("_conjointly_expressions")
1367
+ parameters["expressions"] = original_expressions["expressions"]
1368
+
1369
+ if method_name == "specially" and "expr" in original_expressions:
1370
+ parameters["expr"] = original_expressions["expr"]
1371
+
1248
1372
  # Format parameters
1249
1373
  param_parts = []
1250
1374
  for key, value in parameters.items():
1251
1375
  # Check if we have an original expression for this parameter
1252
1376
  expression_path = f"steps[{step_index}].{list(step_config.keys())[0]}.{key}"
1253
- if expression_path in step_expressions:
1377
+
1378
+ # Skip using step_expressions for specially/conjointly parameters that we handle specially
1379
+ if (
1380
+ expression_path in step_expressions
1381
+ and not (method_name == "specially" and key == "expr")
1382
+ and not (method_name == "conjointly" and key == "expressions")
1383
+ ):
1254
1384
  # Use the original Python expression
1255
1385
  param_parts.append(f"{key}={step_expressions[expression_path]}")
1386
+ elif key == "expressions" and method_name == "conjointly":
1387
+ # Handle conjointly expressions list
1388
+ if isinstance(value, list):
1389
+ expressions_str = "[" + ", ".join([f'"{expr}"' for expr in value]) + "]"
1390
+ param_parts.append(f"expressions={expressions_str}")
1391
+ else:
1392
+ param_parts.append(f"expressions={value}")
1393
+ elif key == "expr" and method_name == "specially":
1394
+ # Handle specially expr parameter: should be unquoted lambda expression
1395
+ if isinstance(value, str):
1396
+ param_parts.append(f"expr={value}")
1397
+ else:
1398
+ param_parts.append(f"expr={value}")
1256
1399
  elif key in ["columns", "columns_subset"]:
1257
1400
  if isinstance(value, list):
1258
1401
  if len(value) == 1:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pointblank
3
- Version: 0.11.5
3
+ Version: 0.11.6
4
4
  Summary: Find out if your data is what you think it is.
5
5
  Author-email: Richard Iannone <riannone@me.com>
6
6
  License: MIT License
@@ -43,7 +43,7 @@ License-File: LICENSE
43
43
  Requires-Dist: commonmark>=0.9.1
44
44
  Requires-Dist: importlib-metadata
45
45
  Requires-Dist: great_tables>=0.17.0
46
- Requires-Dist: narwhals>=1.41.0
46
+ Requires-Dist: narwhals>=1.45.0
47
47
  Requires-Dist: typing_extensions>=3.10.0.0
48
48
  Requires-Dist: requests>=2.31.0
49
49
  Requires-Dist: click>=8.0.0
@@ -20,8 +20,8 @@ pointblank/scan_profile_stats.py,sha256=qdzoGXB-zi2hmpA4mTz6LLTqMnb-NRG9ndxU9cxS
20
20
  pointblank/schema.py,sha256=d93omncsV2lVbatM_QUFeCfCFA42WPZcgO_kE-ktjfU,45107
21
21
  pointblank/tf.py,sha256=8o_8m4i01teulEe3-YYMotSNf3tImjBMInsvdjSAO5Q,8844
22
22
  pointblank/thresholds.py,sha256=mybeLzTVdmN04NLKoV-jiSBXsWknwHO0Gox0ttVN_MU,25766
23
- pointblank/validate.py,sha256=AHy0WfNYyHV8fM3D8XHnuNPP1A1VGwrt6R9fWpwwY5Q,680283
24
- pointblank/yaml.py,sha256=R7pQ3p6kdi1OZ0zGINYTZ_D4IyLigtVW9utsu3T4OjU,51268
23
+ pointblank/validate.py,sha256=CelY6wwB1JRyii-KWEyKfiazd7mToxAegHG1GMtKIm4,680332
24
+ pointblank/yaml.py,sha256=4DrkOJwCQ3CaXQ7ESNIW72pp-dL1ctlX6ONU30Vh1Fs,57901
25
25
  pointblank/data/api-docs.txt,sha256=_mKEb3zuI6TR0bPNkpr5Y-GUtbB3Qv5WESR7MFuL06I,506515
26
26
  pointblank/data/game_revenue-duckdb.zip,sha256=tKIVx48OGLYGsQPS3h5AjA2Nyq_rfEpLCjBiFUWhagU,35880
27
27
  pointblank/data/game_revenue.zip,sha256=7c9EvHLyi93CHUd4p3dM4CZ-GucFCtXKSPxgLojL32U,33749
@@ -32,9 +32,9 @@ pointblank/data/nycflights.zip,sha256=yVjbUaKUz2LydSdF9cABuir0VReHBBgV7shiNWSd0m
32
32
  pointblank/data/polars-api-docs.txt,sha256=KGcS-BOtUs9zgpkWfXD-GFdFh4O_zjdkpX7msHjztLg,198045
33
33
  pointblank/data/small_table-duckdb.zip,sha256=BhTaZ2CRS4-9Z1uVhOU6HggvW3XCar7etMznfENIcOc,2028
34
34
  pointblank/data/small_table.zip,sha256=lmFb90Nb-v5X559Ikjg31YLAXuRyMkD9yLRElkXPMzQ,472
35
- pointblank-0.11.5.dist-info/licenses/LICENSE,sha256=apLF-HWPNU7pT5bmf5KmZpD5Cklpy2u-BN_0xBoRMLY,1081
36
- pointblank-0.11.5.dist-info/METADATA,sha256=pe5a95JXrsC276UPM-ulEEIWzBAFSoJDwARzNYF5qak,17777
37
- pointblank-0.11.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
38
- pointblank-0.11.5.dist-info/entry_points.txt,sha256=GqqqOTOH8uZe22wLcvYjzpizqk_j4MNcUo2YM14ryCw,42
39
- pointblank-0.11.5.dist-info/top_level.txt,sha256=-wHrS1SvV8-nhvc3w-PPYs1C1WtEc1pK-eGjubbCCKc,11
40
- pointblank-0.11.5.dist-info/RECORD,,
35
+ pointblank-0.11.6.dist-info/licenses/LICENSE,sha256=apLF-HWPNU7pT5bmf5KmZpD5Cklpy2u-BN_0xBoRMLY,1081
36
+ pointblank-0.11.6.dist-info/METADATA,sha256=ni-fTo_hxUkYWEsW2qmncCPnpeyfh6me5aqexPcxXSA,17777
37
+ pointblank-0.11.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
38
+ pointblank-0.11.6.dist-info/entry_points.txt,sha256=GqqqOTOH8uZe22wLcvYjzpizqk_j4MNcUo2YM14ryCw,42
39
+ pointblank-0.11.6.dist-info/top_level.txt,sha256=-wHrS1SvV8-nhvc3w-PPYs1C1WtEc1pK-eGjubbCCKc,11
40
+ pointblank-0.11.6.dist-info/RECORD,,