pointblank 0.11.3__py3-none-any.whl → 0.11.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pointblank/__init__.py +9 -0
- pointblank/cli.py +226 -77
- pointblank/yaml.py +1386 -0
- {pointblank-0.11.3.dist-info → pointblank-0.11.4.dist-info}/METADATA +1 -1
- {pointblank-0.11.3.dist-info → pointblank-0.11.4.dist-info}/RECORD +9 -8
- {pointblank-0.11.3.dist-info → pointblank-0.11.4.dist-info}/WHEEL +0 -0
- {pointblank-0.11.3.dist-info → pointblank-0.11.4.dist-info}/entry_points.txt +0 -0
- {pointblank-0.11.3.dist-info → pointblank-0.11.4.dist-info}/licenses/LICENSE +0 -0
- {pointblank-0.11.3.dist-info → pointblank-0.11.4.dist-info}/top_level.txt +0 -0
pointblank/__init__.py
CHANGED
|
@@ -40,6 +40,11 @@ from pointblank.validate import (
|
|
|
40
40
|
missing_vals_tbl,
|
|
41
41
|
preview,
|
|
42
42
|
)
|
|
43
|
+
from pointblank.yaml import (
|
|
44
|
+
validate_yaml,
|
|
45
|
+
yaml_interrogate,
|
|
46
|
+
yaml_to_python,
|
|
47
|
+
)
|
|
43
48
|
|
|
44
49
|
__all__ = [
|
|
45
50
|
"assistant",
|
|
@@ -72,4 +77,8 @@ __all__ = [
|
|
|
72
77
|
"get_column_count",
|
|
73
78
|
"get_row_count",
|
|
74
79
|
"send_slack_notification",
|
|
80
|
+
# YAML functionality
|
|
81
|
+
"yaml_interrogate",
|
|
82
|
+
"validate_yaml",
|
|
83
|
+
"yaml_to_python",
|
|
75
84
|
]
|
pointblank/cli.py
CHANGED
|
@@ -3869,26 +3869,121 @@ def _show_extract_and_summary(
|
|
|
3869
3869
|
@click.argument("output_file", type=click.Path(), required=False)
|
|
3870
3870
|
def make_template(output_file: str | None):
|
|
3871
3871
|
"""
|
|
3872
|
-
Create a validation script template.
|
|
3872
|
+
Create a validation script or YAML configuration template.
|
|
3873
3873
|
|
|
3874
|
-
Creates a sample Python script with examples showing how to use Pointblank
|
|
3875
|
-
for data validation.
|
|
3876
|
-
|
|
3874
|
+
Creates a sample Python script or YAML configuration with examples showing how to use Pointblank
|
|
3875
|
+
for data validation. The template type is determined by the file extension:
|
|
3876
|
+
- .py files create Python script templates
|
|
3877
|
+
- .yaml/.yml files create YAML configuration templates
|
|
3877
3878
|
|
|
3878
|
-
|
|
3879
|
+
Edit the template to add your own data loading and validation rules, then run it with 'pb run'.
|
|
3880
|
+
|
|
3881
|
+
OUTPUT_FILE is the path where the template will be created.
|
|
3879
3882
|
|
|
3880
3883
|
Examples:
|
|
3881
3884
|
|
|
3882
3885
|
\b
|
|
3883
|
-
pb make-template my_validation.py
|
|
3884
|
-
pb make-template
|
|
3886
|
+
pb make-template my_validation.py # Creates Python script template
|
|
3887
|
+
pb make-template my_validation.yaml # Creates YAML config template
|
|
3888
|
+
pb make-template validation_template.yml # Creates YAML config template
|
|
3885
3889
|
"""
|
|
3886
3890
|
# Handle missing output_file with concise help
|
|
3887
3891
|
if output_file is None:
|
|
3888
3892
|
_show_concise_help("make-template", None)
|
|
3889
3893
|
return
|
|
3890
3894
|
|
|
3891
|
-
|
|
3895
|
+
# Detect file type based on extension
|
|
3896
|
+
file_path = Path(output_file)
|
|
3897
|
+
file_extension = file_path.suffix.lower()
|
|
3898
|
+
|
|
3899
|
+
is_yaml_file = file_extension in [".yaml", ".yml"]
|
|
3900
|
+
is_python_file = file_extension == ".py"
|
|
3901
|
+
|
|
3902
|
+
if not is_yaml_file and not is_python_file:
|
|
3903
|
+
console.print(
|
|
3904
|
+
f"[yellow]Warning:[/yellow] Unknown file extension '{file_extension}'. "
|
|
3905
|
+
"Creating Python template by default. Use .py, .yaml, or .yml extensions for specific template types."
|
|
3906
|
+
)
|
|
3907
|
+
is_python_file = True
|
|
3908
|
+
|
|
3909
|
+
if is_yaml_file:
|
|
3910
|
+
# Create YAML template
|
|
3911
|
+
example_yaml = """# Example Pointblank YAML validation configuration
|
|
3912
|
+
#
|
|
3913
|
+
# This YAML file demonstrates how to create validation rules for your data.
|
|
3914
|
+
# Modify the data source and validation steps below to match your requirements.
|
|
3915
|
+
#
|
|
3916
|
+
# When using 'pb run' with --data option, the CLI will automatically replace
|
|
3917
|
+
# the 'tbl' field with the provided data source.
|
|
3918
|
+
|
|
3919
|
+
# Data source configuration
|
|
3920
|
+
tbl: small_table # Replace with your data source
|
|
3921
|
+
# Can be: dataset name, CSV file, Parquet file, database connection, etc.
|
|
3922
|
+
|
|
3923
|
+
# Optional: Table name for reporting (defaults to filename if not specified)
|
|
3924
|
+
tbl_name: "Example Validation"
|
|
3925
|
+
|
|
3926
|
+
# Optional: Label for this validation run
|
|
3927
|
+
label: "Validation Template"
|
|
3928
|
+
|
|
3929
|
+
# Optional: Validation thresholds (defaults shown below)
|
|
3930
|
+
# thresholds:
|
|
3931
|
+
# warning: 0.05 # 5% failure rate triggers warning
|
|
3932
|
+
# error: 0.10 # 10% failure rate triggers error
|
|
3933
|
+
# critical: 0.15 # 15% failure rate triggers critical
|
|
3934
|
+
|
|
3935
|
+
# Validation steps to perform
|
|
3936
|
+
steps:
|
|
3937
|
+
# Check for duplicate rows across all columns
|
|
3938
|
+
- rows_distinct
|
|
3939
|
+
|
|
3940
|
+
# Check that required columns exist
|
|
3941
|
+
- col_exists:
|
|
3942
|
+
columns: [column1, column2] # Replace with your actual column names
|
|
3943
|
+
|
|
3944
|
+
# Check for null values in important columns
|
|
3945
|
+
- col_vals_not_null:
|
|
3946
|
+
columns: important_column # Replace with your actual column name
|
|
3947
|
+
|
|
3948
|
+
# Check value ranges (uncomment and modify as needed)
|
|
3949
|
+
# - col_vals_gt:
|
|
3950
|
+
# columns: amount
|
|
3951
|
+
# value: 0
|
|
3952
|
+
|
|
3953
|
+
# - col_vals_between:
|
|
3954
|
+
# columns: score
|
|
3955
|
+
# left: 0
|
|
3956
|
+
# right: 100
|
|
3957
|
+
|
|
3958
|
+
# Check string patterns (uncomment and modify as needed)
|
|
3959
|
+
# - col_vals_regex:
|
|
3960
|
+
# columns: email
|
|
3961
|
+
# pattern: "^[\\w\\.-]+@[\\w\\.-]+\\.[a-zA-Z]{2,}$"
|
|
3962
|
+
|
|
3963
|
+
# Check for unique values (uncomment and modify as needed)
|
|
3964
|
+
# - col_vals_unique:
|
|
3965
|
+
# columns: id
|
|
3966
|
+
|
|
3967
|
+
# Check values are in allowed set (uncomment and modify as needed)
|
|
3968
|
+
# - col_vals_in_set:
|
|
3969
|
+
# columns: status
|
|
3970
|
+
# set: [active, inactive, pending]
|
|
3971
|
+
|
|
3972
|
+
# Add more validation steps as needed
|
|
3973
|
+
# See the Pointblank documentation for the full list of available validation functions
|
|
3974
|
+
"""
|
|
3975
|
+
|
|
3976
|
+
Path(output_file).write_text(example_yaml)
|
|
3977
|
+
console.print(f"[green]✓[/green] YAML validation template created: {output_file}")
|
|
3978
|
+
console.print("\nEdit the template to add your data source and validation rules, then run:")
|
|
3979
|
+
console.print(f"[cyan]pb run {output_file}[/cyan]")
|
|
3980
|
+
console.print(
|
|
3981
|
+
f"[cyan]pb run {output_file} --data your_data.csv[/cyan] [dim]# Override data source[/dim]"
|
|
3982
|
+
)
|
|
3983
|
+
|
|
3984
|
+
else:
|
|
3985
|
+
# Create Python template
|
|
3986
|
+
example_script = '''"""
|
|
3892
3987
|
Example Pointblank validation script.
|
|
3893
3988
|
|
|
3894
3989
|
This script demonstrates how to create validation rules for your data.
|
|
@@ -3941,21 +4036,23 @@ validation = (
|
|
|
3941
4036
|
)
|
|
3942
4037
|
'''
|
|
3943
4038
|
|
|
3944
|
-
|
|
3945
|
-
|
|
3946
|
-
|
|
3947
|
-
|
|
3948
|
-
|
|
3949
|
-
f"[cyan]pb run {output_file}
|
|
3950
|
-
|
|
4039
|
+
Path(output_file).write_text(example_script)
|
|
4040
|
+
console.print(f"[green]✓[/green] Python validation template created: {output_file}")
|
|
4041
|
+
console.print(
|
|
4042
|
+
"\nEdit the template to add your data loading and validation rules, then run:"
|
|
4043
|
+
)
|
|
4044
|
+
console.print(f"[cyan]pb run {output_file}[/cyan]")
|
|
4045
|
+
console.print(
|
|
4046
|
+
f"[cyan]pb run {output_file} --data your_data.csv[/cyan] [dim]# Replace data source automatically[/dim]"
|
|
4047
|
+
)
|
|
3951
4048
|
|
|
3952
4049
|
|
|
3953
4050
|
@cli.command()
|
|
3954
|
-
@click.argument("
|
|
4051
|
+
@click.argument("validation_file", type=click.Path(exists=True), required=False)
|
|
3955
4052
|
@click.option(
|
|
3956
4053
|
"--data",
|
|
3957
4054
|
type=str,
|
|
3958
|
-
help="Data source to replace in validation objects (
|
|
4055
|
+
help="Data source to replace in validation objects (Python scripts and YAML configs)",
|
|
3959
4056
|
)
|
|
3960
4057
|
@click.option("--output-html", type=click.Path(), help="Save HTML validation report to file")
|
|
3961
4058
|
@click.option("--output-json", type=click.Path(), help="Save JSON validation summary to file")
|
|
@@ -3976,7 +4073,7 @@ validation = (
|
|
|
3976
4073
|
help="Exit with non-zero code when validation reaches this threshold level",
|
|
3977
4074
|
)
|
|
3978
4075
|
def run(
|
|
3979
|
-
|
|
4076
|
+
validation_file: str | None,
|
|
3980
4077
|
data: str | None,
|
|
3981
4078
|
output_html: str | None,
|
|
3982
4079
|
output_json: str | None,
|
|
@@ -3986,16 +4083,19 @@ def run(
|
|
|
3986
4083
|
fail_on: str | None,
|
|
3987
4084
|
):
|
|
3988
4085
|
"""
|
|
3989
|
-
Run a Pointblank validation script.
|
|
4086
|
+
Run a Pointblank validation script or YAML configuration.
|
|
3990
4087
|
|
|
3991
|
-
|
|
3992
|
-
|
|
4088
|
+
VALIDATION_FILE can be:
|
|
4089
|
+
- A Python file (.py) that defines validation logic
|
|
4090
|
+
- A YAML configuration file (.yaml, .yml) that defines validation steps
|
|
4091
|
+
|
|
4092
|
+
Python scripts should load their own data and create validation objects.
|
|
4093
|
+
YAML configurations define data sources and validation steps declaratively.
|
|
3993
4094
|
|
|
3994
4095
|
If --data is provided, it will automatically replace the data source in your
|
|
3995
|
-
validation objects
|
|
3996
|
-
For scripts with multiple validations, use separate script files or remove --data.
|
|
4096
|
+
validation objects (Python scripts) or override the 'tbl' field (YAML configs).
|
|
3997
4097
|
|
|
3998
|
-
To get started quickly, use 'pb make-template' to create
|
|
4098
|
+
To get started quickly, use 'pb make-template' to create templates.
|
|
3999
4099
|
|
|
4000
4100
|
DATA can be:
|
|
4001
4101
|
|
|
@@ -4009,19 +4109,34 @@ def run(
|
|
|
4009
4109
|
Examples:
|
|
4010
4110
|
|
|
4011
4111
|
\b
|
|
4012
|
-
pb make-template my_validation.py # Create a template
|
|
4112
|
+
pb make-template my_validation.py # Create a Python template
|
|
4013
4113
|
pb run validation_script.py
|
|
4114
|
+
pb run validation_config.yaml
|
|
4014
4115
|
pb run validation_script.py --data data.csv
|
|
4015
|
-
pb run
|
|
4116
|
+
pb run validation_config.yaml --data small_table --output-html report.html
|
|
4016
4117
|
pb run validation_script.py --show-extract --fail-on error
|
|
4017
|
-
pb run
|
|
4118
|
+
pb run validation_config.yaml --write-extract extracts_folder --fail-on critical
|
|
4018
4119
|
"""
|
|
4019
4120
|
try:
|
|
4020
|
-
# Handle missing
|
|
4021
|
-
if
|
|
4121
|
+
# Handle missing validation_file with concise help
|
|
4122
|
+
if validation_file is None:
|
|
4022
4123
|
_show_concise_help("run", None)
|
|
4023
4124
|
return
|
|
4024
4125
|
|
|
4126
|
+
# Detect file type based on extension
|
|
4127
|
+
file_path = Path(validation_file)
|
|
4128
|
+
file_extension = file_path.suffix.lower()
|
|
4129
|
+
|
|
4130
|
+
is_yaml_file = file_extension in [".yaml", ".yml"]
|
|
4131
|
+
is_python_file = file_extension == ".py"
|
|
4132
|
+
|
|
4133
|
+
if not is_yaml_file and not is_python_file:
|
|
4134
|
+
console.print(
|
|
4135
|
+
f"[red]Error:[/red] Unsupported file type '{file_extension}'. "
|
|
4136
|
+
"Only .py (Python scripts) and .yaml/.yml (YAML configs) are supported."
|
|
4137
|
+
)
|
|
4138
|
+
sys.exit(1)
|
|
4139
|
+
|
|
4025
4140
|
# Load optional data override if provided
|
|
4026
4141
|
cli_data = None
|
|
4027
4142
|
if data:
|
|
@@ -4029,60 +4144,94 @@ def run(
|
|
|
4029
4144
|
cli_data = _load_data_source(data)
|
|
4030
4145
|
console.print(f"[green]✓[/green] Loaded data override: {data}")
|
|
4031
4146
|
|
|
4032
|
-
#
|
|
4033
|
-
|
|
4034
|
-
# Read and execute the validation script
|
|
4035
|
-
script_content = Path(validation_script).read_text()
|
|
4147
|
+
# Process based on file type
|
|
4148
|
+
validations = []
|
|
4036
4149
|
|
|
4037
|
-
|
|
4038
|
-
|
|
4039
|
-
|
|
4040
|
-
"pointblank": pb,
|
|
4041
|
-
"cli_data": cli_data, # Available if --data was provided
|
|
4042
|
-
"__name__": "__main__",
|
|
4043
|
-
"__file__": str(Path(validation_script).resolve()),
|
|
4044
|
-
}
|
|
4150
|
+
if is_yaml_file:
|
|
4151
|
+
# Handle YAML configuration file
|
|
4152
|
+
from pointblank.yaml import YAMLValidationError, YAMLValidator, yaml_interrogate
|
|
4045
4153
|
|
|
4046
|
-
|
|
4047
|
-
|
|
4048
|
-
|
|
4049
|
-
|
|
4050
|
-
|
|
4051
|
-
|
|
4154
|
+
with console.status("[bold green]Running YAML validation..."):
|
|
4155
|
+
try:
|
|
4156
|
+
if cli_data is not None:
|
|
4157
|
+
# Load and modify YAML config to use CLI data
|
|
4158
|
+
console.print(
|
|
4159
|
+
"[yellow]Replacing data source in YAML config with CLI data[/yellow]"
|
|
4160
|
+
)
|
|
4052
4161
|
|
|
4053
|
-
|
|
4054
|
-
|
|
4162
|
+
validator = YAMLValidator()
|
|
4163
|
+
config = validator.load_config(validation_file)
|
|
4055
4164
|
|
|
4056
|
-
|
|
4057
|
-
|
|
4058
|
-
|
|
4165
|
+
# Replace the 'tbl' field with our CLI data
|
|
4166
|
+
# Note: We pass the CLI data object directly instead of a string
|
|
4167
|
+
config["tbl"] = cli_data
|
|
4059
4168
|
|
|
4060
|
-
|
|
4061
|
-
|
|
4062
|
-
|
|
4063
|
-
|
|
4064
|
-
|
|
4065
|
-
|
|
4066
|
-
|
|
4067
|
-
validations.append(
|
|
4068
|
-
|
|
4069
|
-
|
|
4070
|
-
|
|
4071
|
-
|
|
4072
|
-
|
|
4073
|
-
|
|
4074
|
-
|
|
4075
|
-
|
|
4076
|
-
|
|
4169
|
+
# Build and execute validation with modified config
|
|
4170
|
+
validation = validator.execute_workflow(config)
|
|
4171
|
+
|
|
4172
|
+
else:
|
|
4173
|
+
# Use YAML config as-is
|
|
4174
|
+
validation = yaml_interrogate(validation_file)
|
|
4175
|
+
|
|
4176
|
+
validations.append(validation)
|
|
4177
|
+
|
|
4178
|
+
except YAMLValidationError as e:
|
|
4179
|
+
console.print(f"[red]YAML validation error:[/red] {e}")
|
|
4180
|
+
sys.exit(1)
|
|
4181
|
+
|
|
4182
|
+
else:
|
|
4183
|
+
# Handle Python script file
|
|
4184
|
+
with console.status("[bold green]Running Python validation script..."):
|
|
4185
|
+
# Read and execute the validation script
|
|
4186
|
+
script_content = Path(validation_file).read_text()
|
|
4187
|
+
|
|
4188
|
+
# Create a namespace with pointblank and optional CLI data
|
|
4189
|
+
namespace = {
|
|
4190
|
+
"pb": pb,
|
|
4191
|
+
"pointblank": pb,
|
|
4192
|
+
"cli_data": cli_data, # Available if --data was provided
|
|
4193
|
+
"__name__": "__main__",
|
|
4194
|
+
"__file__": str(Path(validation_file).resolve()),
|
|
4195
|
+
}
|
|
4196
|
+
|
|
4197
|
+
# Execute the script
|
|
4198
|
+
try:
|
|
4199
|
+
exec(script_content, namespace)
|
|
4200
|
+
except Exception as e:
|
|
4201
|
+
console.print(f"[red]Error executing validation script:[/red] {e}")
|
|
4202
|
+
sys.exit(1)
|
|
4203
|
+
|
|
4204
|
+
# Look for validation objects in the namespace
|
|
4205
|
+
# Look for the 'validation' variable specifically first
|
|
4206
|
+
if "validation" in namespace:
|
|
4207
|
+
validations.append(namespace["validation"])
|
|
4208
|
+
|
|
4209
|
+
# Also look for any other validation objects
|
|
4210
|
+
for key, value in namespace.items():
|
|
4211
|
+
if (
|
|
4212
|
+
key != "validation"
|
|
4213
|
+
and hasattr(value, "interrogate")
|
|
4214
|
+
and hasattr(value, "validation_info")
|
|
4215
|
+
):
|
|
4216
|
+
validations.append(value)
|
|
4217
|
+
# Also check if it's a Validate object that has been interrogated
|
|
4218
|
+
elif key != "validation" and str(type(value)).find("Validate") != -1:
|
|
4219
|
+
validations.append(value)
|
|
4220
|
+
|
|
4221
|
+
if not validations:
|
|
4222
|
+
raise ValueError(
|
|
4223
|
+
"No validation objects found in script. "
|
|
4224
|
+
"Script should create Validate objects and call .interrogate() on them."
|
|
4225
|
+
)
|
|
4077
4226
|
|
|
4078
4227
|
console.print(f"[green]✓[/green] Found {len(validations)} validation object(s)")
|
|
4079
4228
|
|
|
4080
|
-
# Implement automatic data replacement for
|
|
4081
|
-
if cli_data is not None:
|
|
4082
|
-
# Check if we have multiple validations (this is not supported)
|
|
4229
|
+
# Implement automatic data replacement for Python scripts only (YAML configs handle this differently)
|
|
4230
|
+
if cli_data is not None and is_python_file:
|
|
4231
|
+
# Check if we have multiple validations (this is not supported for Python scripts)
|
|
4083
4232
|
if len(validations) > 1:
|
|
4084
4233
|
console.print(
|
|
4085
|
-
f"[red]Error: Found {len(validations)} validation objects in the script.[/red]"
|
|
4234
|
+
f"[red]Error: Found {len(validations)} validation objects in the Python script.[/red]"
|
|
4086
4235
|
)
|
|
4087
4236
|
console.print(
|
|
4088
4237
|
"[yellow]The --data option replaces data in ALL validation objects,[/yellow]"
|
|
@@ -5150,14 +5299,14 @@ def _show_concise_help(command_name: str, ctx: click.Context) -> None:
|
|
|
5150
5299
|
|
|
5151
5300
|
elif command_name == "make-template":
|
|
5152
5301
|
console.print(
|
|
5153
|
-
"[bold cyan]pb make-template[/bold cyan] - Create a validation script template"
|
|
5302
|
+
"[bold cyan]pb make-template[/bold cyan] - Create a validation script or YAML template"
|
|
5154
5303
|
)
|
|
5155
5304
|
console.print()
|
|
5156
5305
|
console.print("[bold yellow]Usage:[/bold yellow]")
|
|
5157
|
-
console.print(" pb make-template my_validation.py")
|
|
5158
|
-
console.print(" pb make-template
|
|
5306
|
+
console.print(" pb make-template my_validation.py # Python script template")
|
|
5307
|
+
console.print(" pb make-template my_validation.yaml # YAML config template")
|
|
5159
5308
|
console.print()
|
|
5160
|
-
console.print("[dim]Creates
|
|
5309
|
+
console.print("[dim]Creates sample templates with validation examples[/dim]")
|
|
5161
5310
|
console.print("[dim]Edit the template and run with [bold]pb run[/bold][/dim]")
|
|
5162
5311
|
console.print()
|
|
5163
5312
|
console.print(
|