pointblank 0.11.3__py3-none-any.whl → 0.11.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pointblank/__init__.py CHANGED
@@ -40,6 +40,11 @@ from pointblank.validate import (
40
40
  missing_vals_tbl,
41
41
  preview,
42
42
  )
43
+ from pointblank.yaml import (
44
+ validate_yaml,
45
+ yaml_interrogate,
46
+ yaml_to_python,
47
+ )
43
48
 
44
49
  __all__ = [
45
50
  "assistant",
@@ -72,4 +77,8 @@ __all__ = [
72
77
  "get_column_count",
73
78
  "get_row_count",
74
79
  "send_slack_notification",
80
+ # YAML functionality
81
+ "yaml_interrogate",
82
+ "validate_yaml",
83
+ "yaml_to_python",
75
84
  ]
pointblank/cli.py CHANGED
@@ -3869,26 +3869,121 @@ def _show_extract_and_summary(
3869
3869
  @click.argument("output_file", type=click.Path(), required=False)
3870
3870
  def make_template(output_file: str | None):
3871
3871
  """
3872
- Create a validation script template.
3872
+ Create a validation script or YAML configuration template.
3873
3873
 
3874
- Creates a sample Python script with examples showing how to use Pointblank
3875
- for data validation. Edit the template to add your own data loading and
3876
- validation rules, then run it with 'pb run'.
3874
+ Creates a sample Python script or YAML configuration with examples showing how to use Pointblank
3875
+ for data validation. The template type is determined by the file extension:
3876
+ - .py files create Python script templates
3877
+ - .yaml/.yml files create YAML configuration templates
3877
3878
 
3878
- OUTPUT_FILE is the path where the template script will be created.
3879
+ Edit the template to add your own data loading and validation rules, then run it with 'pb run'.
3880
+
3881
+ OUTPUT_FILE is the path where the template will be created.
3879
3882
 
3880
3883
  Examples:
3881
3884
 
3882
3885
  \b
3883
- pb make-template my_validation.py
3884
- pb make-template validation_template.py
3886
+ pb make-template my_validation.py # Creates Python script template
3887
+ pb make-template my_validation.yaml # Creates YAML config template
3888
+ pb make-template validation_template.yml # Creates YAML config template
3885
3889
  """
3886
3890
  # Handle missing output_file with concise help
3887
3891
  if output_file is None:
3888
3892
  _show_concise_help("make-template", None)
3889
3893
  return
3890
3894
 
3891
- example_script = '''"""
3895
+ # Detect file type based on extension
3896
+ file_path = Path(output_file)
3897
+ file_extension = file_path.suffix.lower()
3898
+
3899
+ is_yaml_file = file_extension in [".yaml", ".yml"]
3900
+ is_python_file = file_extension == ".py"
3901
+
3902
+ if not is_yaml_file and not is_python_file:
3903
+ console.print(
3904
+ f"[yellow]Warning:[/yellow] Unknown file extension '{file_extension}'. "
3905
+ "Creating Python template by default. Use .py, .yaml, or .yml extensions for specific template types."
3906
+ )
3907
+ is_python_file = True
3908
+
3909
+ if is_yaml_file:
3910
+ # Create YAML template
3911
+ example_yaml = """# Example Pointblank YAML validation configuration
3912
+ #
3913
+ # This YAML file demonstrates how to create validation rules for your data.
3914
+ # Modify the data source and validation steps below to match your requirements.
3915
+ #
3916
+ # When using 'pb run' with --data option, the CLI will automatically replace
3917
+ # the 'tbl' field with the provided data source.
3918
+
3919
+ # Data source configuration
3920
+ tbl: small_table # Replace with your data source
3921
+ # Can be: dataset name, CSV file, Parquet file, database connection, etc.
3922
+
3923
+ # Optional: Table name for reporting (defaults to filename if not specified)
3924
+ tbl_name: "Example Validation"
3925
+
3926
+ # Optional: Label for this validation run
3927
+ label: "Validation Template"
3928
+
3929
+ # Optional: Validation thresholds (defaults shown below)
3930
+ # thresholds:
3931
+ # warning: 0.05 # 5% failure rate triggers warning
3932
+ # error: 0.10 # 10% failure rate triggers error
3933
+ # critical: 0.15 # 15% failure rate triggers critical
3934
+
3935
+ # Validation steps to perform
3936
+ steps:
3937
+ # Check for duplicate rows across all columns
3938
+ - rows_distinct
3939
+
3940
+ # Check that required columns exist
3941
+ - col_exists:
3942
+ columns: [column1, column2] # Replace with your actual column names
3943
+
3944
+ # Check for null values in important columns
3945
+ - col_vals_not_null:
3946
+ columns: important_column # Replace with your actual column name
3947
+
3948
+ # Check value ranges (uncomment and modify as needed)
3949
+ # - col_vals_gt:
3950
+ # columns: amount
3951
+ # value: 0
3952
+
3953
+ # - col_vals_between:
3954
+ # columns: score
3955
+ # left: 0
3956
+ # right: 100
3957
+
3958
+ # Check string patterns (uncomment and modify as needed)
3959
+ # - col_vals_regex:
3960
+ # columns: email
3961
+ # pattern: "^[\\w\\.-]+@[\\w\\.-]+\\.[a-zA-Z]{2,}$"
3962
+
3963
+ # Check for unique values (uncomment and modify as needed)
3964
+ # - col_vals_unique:
3965
+ # columns: id
3966
+
3967
+ # Check values are in allowed set (uncomment and modify as needed)
3968
+ # - col_vals_in_set:
3969
+ # columns: status
3970
+ # set: [active, inactive, pending]
3971
+
3972
+ # Add more validation steps as needed
3973
+ # See the Pointblank documentation for the full list of available validation functions
3974
+ """
3975
+
3976
+ Path(output_file).write_text(example_yaml)
3977
+ console.print(f"[green]✓[/green] YAML validation template created: {output_file}")
3978
+ console.print("\nEdit the template to add your data source and validation rules, then run:")
3979
+ console.print(f"[cyan]pb run {output_file}[/cyan]")
3980
+ console.print(
3981
+ f"[cyan]pb run {output_file} --data your_data.csv[/cyan] [dim]# Override data source[/dim]"
3982
+ )
3983
+
3984
+ else:
3985
+ # Create Python template
3986
+ example_script = '''"""
3892
3987
  Example Pointblank validation script.
3893
3988
 
3894
3989
  This script demonstrates how to create validation rules for your data.
@@ -3941,21 +4036,23 @@ validation = (
3941
4036
  )
3942
4037
  '''
3943
4038
 
3944
- Path(output_file).write_text(example_script)
3945
- console.print(f"[green]✓[/green] Validation script template created: {output_file}")
3946
- console.print("\nEdit the template to add your data loading and validation rules, then run:")
3947
- console.print(f"[cyan]pb run {output_file}[/cyan]")
3948
- console.print(
3949
- f"[cyan]pb run {output_file} --data your_data.csv[/cyan] [dim]# Replace data source automatically[/dim]"
3950
- )
4039
+ Path(output_file).write_text(example_script)
4040
+ console.print(f"[green]✓[/green] Python validation template created: {output_file}")
4041
+ console.print(
4042
+ "\nEdit the template to add your data loading and validation rules, then run:"
4043
+ )
4044
+ console.print(f"[cyan]pb run {output_file}[/cyan]")
4045
+ console.print(
4046
+ f"[cyan]pb run {output_file} --data your_data.csv[/cyan] [dim]# Replace data source automatically[/dim]"
4047
+ )
3951
4048
 
3952
4049
 
3953
4050
  @cli.command()
3954
- @click.argument("validation_script", type=click.Path(exists=True), required=False)
4051
+ @click.argument("validation_file", type=click.Path(exists=True), required=False)
3955
4052
  @click.option(
3956
4053
  "--data",
3957
4054
  type=str,
3958
- help="Data source to replace in validation objects (single validation scripts only)",
4055
+ help="Data source to replace in validation objects (Python scripts and YAML configs)",
3959
4056
  )
3960
4057
  @click.option("--output-html", type=click.Path(), help="Save HTML validation report to file")
3961
4058
  @click.option("--output-json", type=click.Path(), help="Save JSON validation summary to file")
@@ -3976,7 +4073,7 @@ validation = (
3976
4073
  help="Exit with non-zero code when validation reaches this threshold level",
3977
4074
  )
3978
4075
  def run(
3979
- validation_script: str | None,
4076
+ validation_file: str | None,
3980
4077
  data: str | None,
3981
4078
  output_html: str | None,
3982
4079
  output_json: str | None,
@@ -3986,16 +4083,19 @@ def run(
3986
4083
  fail_on: str | None,
3987
4084
  ):
3988
4085
  """
3989
- Run a Pointblank validation script.
4086
+ Run a Pointblank validation script or YAML configuration.
3990
4087
 
3991
- VALIDATION_SCRIPT should be a Python file that defines validation logic.
3992
- The script should load its own data and create validation objects.
4088
+ VALIDATION_FILE can be:
4089
+ - A Python file (.py) that defines validation logic
4090
+ - A YAML configuration file (.yaml, .yml) that defines validation steps
4091
+
4092
+ Python scripts should load their own data and create validation objects.
4093
+ YAML configurations define data sources and validation steps declaratively.
3993
4094
 
3994
4095
  If --data is provided, it will automatically replace the data source in your
3995
- validation objects. This works with scripts containing a single validation.
3996
- For scripts with multiple validations, use separate script files or remove --data.
4096
+ validation objects (Python scripts) or override the 'tbl' field (YAML configs).
3997
4097
 
3998
- To get started quickly, use 'pb make-template' to create a validation script template.
4098
+ To get started quickly, use 'pb make-template' to create templates.
3999
4099
 
4000
4100
  DATA can be:
4001
4101
 
@@ -4009,19 +4109,34 @@ def run(
4009
4109
  Examples:
4010
4110
 
4011
4111
  \b
4012
- pb make-template my_validation.py # Create a template first
4112
+ pb make-template my_validation.py # Create a Python template
4013
4113
  pb run validation_script.py
4114
+ pb run validation_config.yaml
4014
4115
  pb run validation_script.py --data data.csv
4015
- pb run validation_script.py --data small_table --output-html report.html
4116
+ pb run validation_config.yaml --data small_table --output-html report.html
4016
4117
  pb run validation_script.py --show-extract --fail-on error
4017
- pb run validation_script.py --write-extract extracts_folder --fail-on critical
4118
+ pb run validation_config.yaml --write-extract extracts_folder --fail-on critical
4018
4119
  """
4019
4120
  try:
4020
- # Handle missing validation_script with concise help
4021
- if validation_script is None:
4121
+ # Handle missing validation_file with concise help
4122
+ if validation_file is None:
4022
4123
  _show_concise_help("run", None)
4023
4124
  return
4024
4125
 
4126
+ # Detect file type based on extension
4127
+ file_path = Path(validation_file)
4128
+ file_extension = file_path.suffix.lower()
4129
+
4130
+ is_yaml_file = file_extension in [".yaml", ".yml"]
4131
+ is_python_file = file_extension == ".py"
4132
+
4133
+ if not is_yaml_file and not is_python_file:
4134
+ console.print(
4135
+ f"[red]Error:[/red] Unsupported file type '{file_extension}'. "
4136
+ "Only .py (Python scripts) and .yaml/.yml (YAML configs) are supported."
4137
+ )
4138
+ sys.exit(1)
4139
+
4025
4140
  # Load optional data override if provided
4026
4141
  cli_data = None
4027
4142
  if data:
@@ -4029,60 +4144,94 @@ def run(
4029
4144
  cli_data = _load_data_source(data)
4030
4145
  console.print(f"[green]✓[/green] Loaded data override: {data}")
4031
4146
 
4032
- # Execute the validation script
4033
- with console.status("[bold green]Running validation script..."):
4034
- # Read and execute the validation script
4035
- script_content = Path(validation_script).read_text()
4147
+ # Process based on file type
4148
+ validations = []
4036
4149
 
4037
- # Create a namespace with pointblank and optional CLI data
4038
- namespace = {
4039
- "pb": pb,
4040
- "pointblank": pb,
4041
- "cli_data": cli_data, # Available if --data was provided
4042
- "__name__": "__main__",
4043
- "__file__": str(Path(validation_script).resolve()),
4044
- }
4150
+ if is_yaml_file:
4151
+ # Handle YAML configuration file
4152
+ from pointblank.yaml import YAMLValidationError, YAMLValidator, yaml_interrogate
4045
4153
 
4046
- # Execute the script
4047
- try:
4048
- exec(script_content, namespace)
4049
- except Exception as e:
4050
- console.print(f"[red]Error executing validation script:[/red] {e}")
4051
- sys.exit(1)
4154
+ with console.status("[bold green]Running YAML validation..."):
4155
+ try:
4156
+ if cli_data is not None:
4157
+ # Load and modify YAML config to use CLI data
4158
+ console.print(
4159
+ "[yellow]Replacing data source in YAML config with CLI data[/yellow]"
4160
+ )
4052
4161
 
4053
- # Look for validation objects in the namespace
4054
- validations = []
4162
+ validator = YAMLValidator()
4163
+ config = validator.load_config(validation_file)
4055
4164
 
4056
- # Look for the 'validation' variable specifically first
4057
- if "validation" in namespace:
4058
- validations.append(namespace["validation"])
4165
+ # Replace the 'tbl' field with our CLI data
4166
+ # Note: We pass the CLI data object directly instead of a string
4167
+ config["tbl"] = cli_data
4059
4168
 
4060
- # Also look for any other validation objects
4061
- for key, value in namespace.items():
4062
- if (
4063
- key != "validation"
4064
- and hasattr(value, "interrogate")
4065
- and hasattr(value, "validation_info")
4066
- ):
4067
- validations.append(value)
4068
- # Also check if it's a Validate object that has been interrogated
4069
- elif key != "validation" and str(type(value)).find("Validate") != -1:
4070
- validations.append(value)
4071
-
4072
- if not validations:
4073
- raise ValueError(
4074
- "No validation objects found in script. "
4075
- "Script should create Validate objects and call .interrogate() on them."
4076
- )
4169
+ # Build and execute validation with modified config
4170
+ validation = validator.execute_workflow(config)
4171
+
4172
+ else:
4173
+ # Use YAML config as-is
4174
+ validation = yaml_interrogate(validation_file)
4175
+
4176
+ validations.append(validation)
4177
+
4178
+ except YAMLValidationError as e:
4179
+ console.print(f"[red]YAML validation error:[/red] {e}")
4180
+ sys.exit(1)
4181
+
4182
+ else:
4183
+ # Handle Python script file
4184
+ with console.status("[bold green]Running Python validation script..."):
4185
+ # Read and execute the validation script
4186
+ script_content = Path(validation_file).read_text()
4187
+
4188
+ # Create a namespace with pointblank and optional CLI data
4189
+ namespace = {
4190
+ "pb": pb,
4191
+ "pointblank": pb,
4192
+ "cli_data": cli_data, # Available if --data was provided
4193
+ "__name__": "__main__",
4194
+ "__file__": str(Path(validation_file).resolve()),
4195
+ }
4196
+
4197
+ # Execute the script
4198
+ try:
4199
+ exec(script_content, namespace)
4200
+ except Exception as e:
4201
+ console.print(f"[red]Error executing validation script:[/red] {e}")
4202
+ sys.exit(1)
4203
+
4204
+ # Look for validation objects in the namespace
4205
+ # Look for the 'validation' variable specifically first
4206
+ if "validation" in namespace:
4207
+ validations.append(namespace["validation"])
4208
+
4209
+ # Also look for any other validation objects
4210
+ for key, value in namespace.items():
4211
+ if (
4212
+ key != "validation"
4213
+ and hasattr(value, "interrogate")
4214
+ and hasattr(value, "validation_info")
4215
+ ):
4216
+ validations.append(value)
4217
+ # Also check if it's a Validate object that has been interrogated
4218
+ elif key != "validation" and str(type(value)).find("Validate") != -1:
4219
+ validations.append(value)
4220
+
4221
+ if not validations:
4222
+ raise ValueError(
4223
+ "No validation objects found in script. "
4224
+ "Script should create Validate objects and call .interrogate() on them."
4225
+ )
4077
4226
 
4078
4227
  console.print(f"[green]✓[/green] Found {len(validations)} validation object(s)")
4079
4228
 
4080
- # Implement automatic data replacement for Validate objects if --data was provided
4081
- if cli_data is not None:
4082
- # Check if we have multiple validations (this is not supported)
4229
+ # Implement automatic data replacement for Python scripts only (YAML configs handle this differently)
4230
+ if cli_data is not None and is_python_file:
4231
+ # Check if we have multiple validations (this is not supported for Python scripts)
4083
4232
  if len(validations) > 1:
4084
4233
  console.print(
4085
- f"[red]Error: Found {len(validations)} validation objects in the script.[/red]"
4234
+ f"[red]Error: Found {len(validations)} validation objects in the Python script.[/red]"
4086
4235
  )
4087
4236
  console.print(
4088
4237
  "[yellow]The --data option replaces data in ALL validation objects,[/yellow]"
@@ -5150,14 +5299,14 @@ def _show_concise_help(command_name: str, ctx: click.Context) -> None:
5150
5299
 
5151
5300
  elif command_name == "make-template":
5152
5301
  console.print(
5153
- "[bold cyan]pb make-template[/bold cyan] - Create a validation script template"
5302
+ "[bold cyan]pb make-template[/bold cyan] - Create a validation script or YAML template"
5154
5303
  )
5155
5304
  console.print()
5156
5305
  console.print("[bold yellow]Usage:[/bold yellow]")
5157
- console.print(" pb make-template my_validation.py")
5158
- console.print(" pb make-template validation_template.py")
5306
+ console.print(" pb make-template my_validation.py # Python script template")
5307
+ console.print(" pb make-template my_validation.yaml # YAML config template")
5159
5308
  console.print()
5160
- console.print("[dim]Creates a sample Python script with validation examples[/dim]")
5309
+ console.print("[dim]Creates sample templates with validation examples[/dim]")
5161
5310
  console.print("[dim]Edit the template and run with [bold]pb run[/bold][/dim]")
5162
5311
  console.print()
5163
5312
  console.print(