pointblank 0.8.0__py3-none-any.whl → 0.8.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pointblank/__init__.py +4 -1
- pointblank/_constants_translations.py +2 -2
- pointblank/thresholds.py +145 -1
- pointblank/validate.py +315 -3
- {pointblank-0.8.0.dist-info → pointblank-0.8.1.dist-info}/METADATA +1 -1
- {pointblank-0.8.0.dist-info → pointblank-0.8.1.dist-info}/RECORD +9 -9
- {pointblank-0.8.0.dist-info → pointblank-0.8.1.dist-info}/WHEEL +0 -0
- {pointblank-0.8.0.dist-info → pointblank-0.8.1.dist-info}/licenses/LICENSE +0 -0
- {pointblank-0.8.0.dist-info → pointblank-0.8.1.dist-info}/top_level.txt +0 -0
pointblank/__init__.py
CHANGED
|
@@ -24,13 +24,14 @@ from pointblank.datascan import DataScan, col_summary_tbl
|
|
|
24
24
|
from pointblank.draft import DraftValidation
|
|
25
25
|
from pointblank.schema import Schema
|
|
26
26
|
from pointblank.tf import TF
|
|
27
|
-
from pointblank.thresholds import Actions, Thresholds
|
|
27
|
+
from pointblank.thresholds import Actions, FinalActions, Thresholds
|
|
28
28
|
from pointblank.validate import (
|
|
29
29
|
Validate,
|
|
30
30
|
config,
|
|
31
31
|
get_action_metadata,
|
|
32
32
|
get_column_count,
|
|
33
33
|
get_row_count,
|
|
34
|
+
get_validation_summary,
|
|
34
35
|
load_dataset,
|
|
35
36
|
missing_vals_tbl,
|
|
36
37
|
preview,
|
|
@@ -42,6 +43,7 @@ __all__ = [
|
|
|
42
43
|
"Validate",
|
|
43
44
|
"Thresholds",
|
|
44
45
|
"Actions",
|
|
46
|
+
"FinalActions",
|
|
45
47
|
"Schema",
|
|
46
48
|
"DataScan",
|
|
47
49
|
"DraftValidation",
|
|
@@ -59,6 +61,7 @@ __all__ = [
|
|
|
59
61
|
"preview",
|
|
60
62
|
"missing_vals_tbl",
|
|
61
63
|
"get_action_metadata",
|
|
64
|
+
"get_validation_summary",
|
|
62
65
|
"get_column_count",
|
|
63
66
|
"get_row_count",
|
|
64
67
|
]
|
|
@@ -78,7 +78,7 @@ EXPECT_FAIL_TEXT = {
|
|
|
78
78
|
"ro": "Se așteaptă ca valorile din {column_text} să fie {operator} {values_text}.",
|
|
79
79
|
"tr": "Beklenti, {column_text} 'deki değerlerin {operator} {values_text} olması gerektiğidir.",
|
|
80
80
|
"zh-Hans": "预期在{column_text} 的值应当{operator} {values_text}。",
|
|
81
|
-
"zh-Hant": "{column_text}
|
|
81
|
+
"zh-Hant": "{column_text}之值應{operator} {values_text}。",
|
|
82
82
|
"ja": "{column_text}の値が{operator} {values_text}であることを期待します。",
|
|
83
83
|
"ko": "{column_text}의 값이 {operator} {values_text}이어야 합니다.",
|
|
84
84
|
"ru": "Ожидайте, что значения в {column_text} должны быть {operator} {values_text}.",
|
|
@@ -101,7 +101,7 @@ EXPECT_FAIL_TEXT = {
|
|
|
101
101
|
"ro": "Depășirea unităților de test eșuate unde valorile din {column_text} ar fi trebuit să fie {operator} {values_text}.",
|
|
102
102
|
"tr": "{column_text} 'deki değerlerin {operator} {values_text} olması gereken başarısız test birimlerinin aşılması.",
|
|
103
103
|
"zh-Hans": "错误过多,其中{column_text}中的值应当被 {operator} {values_text}。",
|
|
104
|
-
"zh-Hant": "錯誤過多,{column_text}
|
|
104
|
+
"zh-Hant": "錯誤過多,{column_text}之值應{operator} {values_text}。",
|
|
105
105
|
"ja": "{column_text}の値が{operator} {values_text}であるべきテスト単位の失敗の超過。",
|
|
106
106
|
"ko": "{column_text}의 값이 {operator} {values_text}이어야 했던 실패한 테스트 단위 초과.",
|
|
107
107
|
"ru": "Превышение неудачных тестовых единиц, где значения в {column_text} должны были быть {operator} {values_text}.",
|
pointblank/thresholds.py
CHANGED
|
@@ -3,7 +3,7 @@ from __future__ import annotations
|
|
|
3
3
|
from dataclasses import dataclass, field
|
|
4
4
|
from typing import Callable
|
|
5
5
|
|
|
6
|
-
__all__ = ["Thresholds", "Actions"]
|
|
6
|
+
__all__ = ["Thresholds", "Actions", "FinalActions"]
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
@dataclass
|
|
@@ -373,6 +373,15 @@ class Actions:
|
|
|
373
373
|
displayed as `"WARNING: 'col_vals_gt' threshold exceeded for column a."` when the 'warning'
|
|
374
374
|
threshold is exceeded in a 'col_vals_gt' validation step involving column `a`.
|
|
375
375
|
|
|
376
|
+
Crafting Callables with `get_action_metadata()`
|
|
377
|
+
-----------------------------------------------
|
|
378
|
+
When creating a callable function to be used as an action, you can use the
|
|
379
|
+
[`get_action_metadata()`](`pointblank.get_action_metadata`) function to retrieve metadata about
|
|
380
|
+
the step where the action is executed. This metadata contains information about the validation
|
|
381
|
+
step, including the step type, level, step number, column name, and associated value. You can
|
|
382
|
+
use this information to craft your action message or to take specific actions based on the
|
|
383
|
+
metadata provided.
|
|
384
|
+
|
|
376
385
|
Examples
|
|
377
386
|
--------
|
|
378
387
|
```{python}
|
|
@@ -484,3 +493,138 @@ class Actions:
|
|
|
484
493
|
|
|
485
494
|
def _get_action(self, level: str) -> list[str | Callable]:
|
|
486
495
|
return getattr(self, level)
|
|
496
|
+
|
|
497
|
+
|
|
498
|
+
@dataclass
|
|
499
|
+
class FinalActions:
|
|
500
|
+
"""
|
|
501
|
+
Define actions to be taken after validation is complete.
|
|
502
|
+
|
|
503
|
+
Final actions are executed after all validation steps have been completed. They provide a
|
|
504
|
+
mechanism to respond to the overall validation results, such as sending alerts when critical
|
|
505
|
+
failures are detected or generating summary reports.
|
|
506
|
+
|
|
507
|
+
Parameters
|
|
508
|
+
----------
|
|
509
|
+
*actions
|
|
510
|
+
One or more actions to execute after validation. An action can be (1) a callable function
|
|
511
|
+
that will be executed with no arguments, or (2) a string message that will be printed to the
|
|
512
|
+
console.
|
|
513
|
+
|
|
514
|
+
Returns
|
|
515
|
+
-------
|
|
516
|
+
FinalActions
|
|
517
|
+
An `FinalActions` object. This can be used when using the
|
|
518
|
+
[`Validate`](`pointblank.Validate`) class (to set final actions for the validation
|
|
519
|
+
workflow).
|
|
520
|
+
|
|
521
|
+
Types of Actions
|
|
522
|
+
----------------
|
|
523
|
+
Final actions can be defined in two different ways:
|
|
524
|
+
|
|
525
|
+
1. **String**: A message to be displayed when the validation is complete.
|
|
526
|
+
2. **Callable**: A function that is called when the validation is complete.
|
|
527
|
+
|
|
528
|
+
The actions are executed at the end of the validation workflow. When providing a string, it will
|
|
529
|
+
simply be printed to the console. A callable will also be executed at the time of validation
|
|
530
|
+
completion. Several strings and callables can be provided to the `FinalActions` class, and
|
|
531
|
+
they will be executed in the order they are provided.
|
|
532
|
+
|
|
533
|
+
Crafting Callables with `get_validation_summary()`
|
|
534
|
+
-------------------------------------------------
|
|
535
|
+
When creating a callable function to be used as a final action, you can use the
|
|
536
|
+
[`get_validation_summary()`](`pointblank.get_validation_summary`) function to retrieve the
|
|
537
|
+
summary of the validation results. This summary contains information about the validation
|
|
538
|
+
workflow, including the number of test units, the number of failing test units, and the
|
|
539
|
+
threshold levels that were exceeded. You can use this information to craft your final action
|
|
540
|
+
message or to take specific actions based on the validation results.
|
|
541
|
+
|
|
542
|
+
Examples
|
|
543
|
+
--------
|
|
544
|
+
Final actions provide a powerful way to respond to the overall results of a validation workflow.
|
|
545
|
+
They're especially useful for sending notifications, generating reports, or taking corrective
|
|
546
|
+
actions based on the complete validation outcome.
|
|
547
|
+
|
|
548
|
+
The following example shows how to create a final action that checks for critical failures
|
|
549
|
+
and sends an alert:
|
|
550
|
+
|
|
551
|
+
```python
|
|
552
|
+
import pointblank as pb
|
|
553
|
+
|
|
554
|
+
def send_alert():
|
|
555
|
+
summary = pb.get_validation_summary()
|
|
556
|
+
if summary["highest_severity"] == "critical":
|
|
557
|
+
print(f"ALERT: Critical validation failures found in {summary['table_name']}")
|
|
558
|
+
|
|
559
|
+
validation = (
|
|
560
|
+
pb.Validate(
|
|
561
|
+
data=my_data,
|
|
562
|
+
final_actions=pb.FinalActions(send_alert)
|
|
563
|
+
)
|
|
564
|
+
.col_vals_gt(columns="revenue", value=0)
|
|
565
|
+
.interrogate()
|
|
566
|
+
)
|
|
567
|
+
```
|
|
568
|
+
|
|
569
|
+
In this example, the `send_alert()` function is defined to check the validation summary for
|
|
570
|
+
critical failures. If any are found, an alert message is printed to the console. The function is
|
|
571
|
+
passed to the `FinalActions` class, which ensures it will be executed after all validation steps
|
|
572
|
+
are complete. Note that we used the `get_validation_summary()` function to retrieve the summary
|
|
573
|
+
of the validation results to help craft the alert message.
|
|
574
|
+
|
|
575
|
+
Multiple final actions can be provided in a sequence. They will be executed in the order they
|
|
576
|
+
are specified after all validation steps have completed:
|
|
577
|
+
|
|
578
|
+
```python
|
|
579
|
+
validation = (
|
|
580
|
+
pb.Validate(
|
|
581
|
+
data=my_data,
|
|
582
|
+
final_actions=pb.FinalActions(
|
|
583
|
+
"Validation complete.", # a string message
|
|
584
|
+
send_alert, # a callable function
|
|
585
|
+
generate_report # another callable function
|
|
586
|
+
)
|
|
587
|
+
)
|
|
588
|
+
.col_vals_gt(columns="revenue", value=0)
|
|
589
|
+
.interrogate()
|
|
590
|
+
)
|
|
591
|
+
```
|
|
592
|
+
"""
|
|
593
|
+
|
|
594
|
+
actions: list | str | Callable
|
|
595
|
+
|
|
596
|
+
def __init__(self, *args):
|
|
597
|
+
# Check that all arguments are either strings or callables
|
|
598
|
+
for arg in args:
|
|
599
|
+
if not isinstance(arg, (str, Callable)) and not (
|
|
600
|
+
isinstance(arg, list) and all(isinstance(item, (str, Callable)) for item in arg)
|
|
601
|
+
):
|
|
602
|
+
raise TypeError(
|
|
603
|
+
f"All final actions must be strings, callables, or lists of strings/callables. "
|
|
604
|
+
f"Got {type(arg).__name__} instead."
|
|
605
|
+
)
|
|
606
|
+
|
|
607
|
+
if len(args) == 0:
|
|
608
|
+
self.actions = []
|
|
609
|
+
elif len(args) == 1:
|
|
610
|
+
# If a single action is provided, store it directly (not in a list)
|
|
611
|
+
self.actions = args[0]
|
|
612
|
+
else:
|
|
613
|
+
# Multiple actions, store as a list
|
|
614
|
+
self.actions = list(args)
|
|
615
|
+
|
|
616
|
+
def __repr__(self) -> str:
|
|
617
|
+
if isinstance(self.actions, list):
|
|
618
|
+
action_reprs = ", ".join(
|
|
619
|
+
f"'{a}'" if isinstance(a, str) else a.__name__ for a in self.actions
|
|
620
|
+
)
|
|
621
|
+
return f"FinalActions([{action_reprs}])"
|
|
622
|
+
elif isinstance(self.actions, str):
|
|
623
|
+
return f"FinalActions('{self.actions}')"
|
|
624
|
+
elif callable(self.actions):
|
|
625
|
+
return f"FinalActions({self.actions.__name__})"
|
|
626
|
+
else:
|
|
627
|
+
return f"FinalActions({self.actions})" # pragma: no cover
|
|
628
|
+
|
|
629
|
+
def __str__(self) -> str:
|
|
630
|
+
return self.__repr__()
|
pointblank/validate.py
CHANGED
|
@@ -73,6 +73,7 @@ from pointblank.column import Column, ColumnLiteral, ColumnSelector, ColumnSelec
|
|
|
73
73
|
from pointblank.schema import Schema, _get_schema_validation_info
|
|
74
74
|
from pointblank.thresholds import (
|
|
75
75
|
Actions,
|
|
76
|
+
FinalActions,
|
|
76
77
|
Thresholds,
|
|
77
78
|
_convert_abs_count_to_fraction,
|
|
78
79
|
_normalize_thresholds_creation,
|
|
@@ -90,6 +91,7 @@ __all__ = [
|
|
|
90
91
|
"get_column_count",
|
|
91
92
|
"get_row_count",
|
|
92
93
|
"get_action_metadata",
|
|
94
|
+
"get_validation_summary",
|
|
93
95
|
]
|
|
94
96
|
|
|
95
97
|
# Create a thread-local storage for the metadata
|
|
@@ -177,6 +179,129 @@ def get_action_metadata():
|
|
|
177
179
|
return None # pragma: no cover
|
|
178
180
|
|
|
179
181
|
|
|
182
|
+
# Create a thread-local storage for the metadata
|
|
183
|
+
_final_action_context = threading.local()
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
@contextlib.contextmanager
|
|
187
|
+
def _final_action_context_manager(summary):
|
|
188
|
+
"""Context manager for storing validation summary during final action execution."""
|
|
189
|
+
_final_action_context.summary = summary
|
|
190
|
+
try:
|
|
191
|
+
yield
|
|
192
|
+
finally:
|
|
193
|
+
# Clean up after execution
|
|
194
|
+
if hasattr(_final_action_context, "summary"):
|
|
195
|
+
delattr(_final_action_context, "summary")
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
def get_validation_summary():
|
|
199
|
+
"""Access validation summary information when authoring final actions.
|
|
200
|
+
|
|
201
|
+
This function provides a convenient way to access summary information about the validation
|
|
202
|
+
process within a final action. It returns a dictionary with key metrics from the validation
|
|
203
|
+
process.
|
|
204
|
+
|
|
205
|
+
Returns
|
|
206
|
+
-------
|
|
207
|
+
dict | None
|
|
208
|
+
A dictionary containing validation metrics, or None if called outside a final action.
|
|
209
|
+
|
|
210
|
+
Description of the Summary Fields
|
|
211
|
+
--------------------------------
|
|
212
|
+
The summary dictionary contains the following fields:
|
|
213
|
+
|
|
214
|
+
- `n_steps` (`int`): The total number of validation steps.
|
|
215
|
+
- `n_passing_steps` (`int`): The number of validation steps where all test units passed.
|
|
216
|
+
- `n_failing_steps` (`int`): The number of validation steps that had some failing test units.
|
|
217
|
+
- `n_warning_steps` (`int`): The number of steps that exceeded a 'warning' threshold.
|
|
218
|
+
- `n_error_steps` (`int`): The number of steps that exceeded an 'error' threshold.
|
|
219
|
+
- `n_critical_steps` (`int`): The number of steps that exceeded a 'critical' threshold.
|
|
220
|
+
- `list_passing_steps` (`list[int]`): List of step numbers where all test units passed.
|
|
221
|
+
- `list_failing_steps` (`list[int]`): List of step numbers for steps having failing test units.
|
|
222
|
+
- `dict_n` (`dict`): The number of test units for each validation step.
|
|
223
|
+
- `dict_n_passed` (`dict`): The number of test units that passed for each validation step.
|
|
224
|
+
- `dict_n_failed` (`dict`): The number of test units that failed for each validation step.
|
|
225
|
+
- `dict_f_passed` (`dict`): The fraction of test units that passed for each validation step.
|
|
226
|
+
- `dict_f_failed` (`dict`): The fraction of test units that failed for each validation step.
|
|
227
|
+
- `dict_warning` (`dict`): The 'warning' level status for each validation step.
|
|
228
|
+
- `dict_error` (`dict`): The 'error' level status for each validation step.
|
|
229
|
+
- `dict_critical` (`dict`): The 'critical' level status for each validation step.
|
|
230
|
+
- `all_passed` (`bool`): Whether or not every validation step had no failing test units.
|
|
231
|
+
- `highest_severity` (`str`): The highest severity level encountered during validation. This can
|
|
232
|
+
be one of the following: `"warning"`, `"error"`, or `"critical"`, `"some failing"`, or
|
|
233
|
+
`"all passed"`.
|
|
234
|
+
- `tbl_row_count` (`int`): The number of rows in the target table.
|
|
235
|
+
- `tbl_column_count` (`int`): The number of columns in the target table.
|
|
236
|
+
- `tbl_name` (`str`): The name of the target table.
|
|
237
|
+
- `validation_duration` (`float`): The duration of the validation in seconds.
|
|
238
|
+
|
|
239
|
+
Note that the summary dictionary is only available within the context of a final action. If
|
|
240
|
+
called outside of a final action (i.e., when no final action is being executed), this function
|
|
241
|
+
will return `None`.
|
|
242
|
+
|
|
243
|
+
Examples
|
|
244
|
+
--------
|
|
245
|
+
Final actions are executed after the completion of all validation steps. They provide an
|
|
246
|
+
opportunity to take appropriate actions based on the overall validation results. Here's an
|
|
247
|
+
example of a final action function (`send_report()`) that sends an alert when critical
|
|
248
|
+
validation failures are detected:
|
|
249
|
+
|
|
250
|
+
```python
|
|
251
|
+
import pointblank as pb
|
|
252
|
+
|
|
253
|
+
def send_report():
|
|
254
|
+
summary = pb.get_validation_summary()
|
|
255
|
+
if summary["highest_severity"] == "critical":
|
|
256
|
+
# Send an alert email
|
|
257
|
+
send_alert_email(
|
|
258
|
+
subject=f"CRITICAL validation failures in {summary['tbl_name']}",
|
|
259
|
+
body=f"{summary['n_critical_steps']} steps failed with critical severity."
|
|
260
|
+
)
|
|
261
|
+
|
|
262
|
+
validation = (
|
|
263
|
+
pb.Validate(
|
|
264
|
+
data=my_data,
|
|
265
|
+
final_actions=pb.FinalActions(send_report)
|
|
266
|
+
)
|
|
267
|
+
.col_vals_gt(columns="revenue", value=0)
|
|
268
|
+
.interrogate()
|
|
269
|
+
)
|
|
270
|
+
```
|
|
271
|
+
|
|
272
|
+
Note that `send_alert_email()` in the example above is a placeholder function that would be
|
|
273
|
+
implemented by the user to send email alerts. This function is not provided by the Pointblank
|
|
274
|
+
package.
|
|
275
|
+
|
|
276
|
+
The `get_validation_summary()` function can also be used to create custom reporting for
|
|
277
|
+
validation results:
|
|
278
|
+
|
|
279
|
+
```python
|
|
280
|
+
def log_validation_results():
|
|
281
|
+
summary = pb.get_validation_summary()
|
|
282
|
+
|
|
283
|
+
print(f"Validation completed with status: {summary['highest_severity'].upper()}")
|
|
284
|
+
print(f"Steps: {summary['n_steps']} total")
|
|
285
|
+
print(f" - {summary['n_passing_steps']} passing, {summary['n_failing_steps']} failing")
|
|
286
|
+
print(
|
|
287
|
+
f" - Severity: {summary['n_warning_steps']} warnings, "
|
|
288
|
+
f"{summary['n_error_steps']} errors, "
|
|
289
|
+
f"{summary['n_critical_steps']} critical"
|
|
290
|
+
)
|
|
291
|
+
|
|
292
|
+
if summary['highest_severity'] in ["error", "critical"]:
|
|
293
|
+
print("⚠️ Action required: Please review failing validation steps!")
|
|
294
|
+
```
|
|
295
|
+
|
|
296
|
+
Final actions work well with both simple logging and more complex notification systems, allowing
|
|
297
|
+
you to integrate validation results into your broader data quality workflows.
|
|
298
|
+
"""
|
|
299
|
+
if hasattr(_final_action_context, "summary"):
|
|
300
|
+
return _final_action_context.summary
|
|
301
|
+
else:
|
|
302
|
+
return None
|
|
303
|
+
|
|
304
|
+
|
|
180
305
|
@dataclass
|
|
181
306
|
class PointblankConfig:
|
|
182
307
|
"""
|
|
@@ -1789,9 +1914,21 @@ class Validate:
|
|
|
1789
1914
|
`thresholds=` parameter). The default is `None`, which means that no thresholds will be set.
|
|
1790
1915
|
Look at the *Thresholds* section for information on how to set threshold levels.
|
|
1791
1916
|
actions
|
|
1792
|
-
The actions to take when validation steps meet or exceed any set threshold levels.
|
|
1793
|
-
|
|
1794
|
-
|
|
1917
|
+
The actions to take when validation steps meet or exceed any set threshold levels. These
|
|
1918
|
+
actions are paired with the threshold levels and are executed during the interrogation
|
|
1919
|
+
process when there are exceedances. The actions are executed right after each step is
|
|
1920
|
+
evaluated. Such actions should be provided in the form of an `Actions` object. If `None`
|
|
1921
|
+
then no global actions will be set. View the *Actions* section for information on how to set
|
|
1922
|
+
actions.
|
|
1923
|
+
final_actions
|
|
1924
|
+
The actions to take when the validation process is complete and the final results are
|
|
1925
|
+
available. This is useful for sending notifications or reporting the overall status of the
|
|
1926
|
+
validation process. The final actions are executed after all validation steps have been
|
|
1927
|
+
processed and the results have been collected. The final actions are not tied to any
|
|
1928
|
+
threshold levels, they are executed regardless of the validation results. Such actions
|
|
1929
|
+
should be provided in the form of a `FinalActions` object. If `None` then no finalizing
|
|
1930
|
+
actions will be set. Please see the *Actions* section for information on how to set final
|
|
1931
|
+
actions.
|
|
1795
1932
|
brief
|
|
1796
1933
|
A global setting for briefs, which are optional brief descriptions for validation steps
|
|
1797
1934
|
(they be displayed in the reporting table). For such a global setting, templating elements
|
|
@@ -1860,6 +1997,85 @@ class Validate:
|
|
|
1860
1997
|
Aside from reporting failure conditions, thresholds can be used to determine the actions to take
|
|
1861
1998
|
for each level of failure (using the `actions=` parameter).
|
|
1862
1999
|
|
|
2000
|
+
Actions
|
|
2001
|
+
-------
|
|
2002
|
+
The `actions=` and `final_actions=` parameters provide mechanisms to respond to validation
|
|
2003
|
+
results. These actions can be used to notify users of validation failures, log issues, or
|
|
2004
|
+
trigger other processes when problems are detected.
|
|
2005
|
+
|
|
2006
|
+
*Step Actions*
|
|
2007
|
+
|
|
2008
|
+
The `actions=` parameter allows you to define actions that are triggered when validation steps
|
|
2009
|
+
exceed specific threshold levels (warning, error, or critical). These actions are executed
|
|
2010
|
+
during the interrogation process, right after each step is evaluated.
|
|
2011
|
+
|
|
2012
|
+
Step actions should be provided using the [`Actions`](`pointblank.Actions`) class, which lets
|
|
2013
|
+
you specify different actions for different severity levels:
|
|
2014
|
+
|
|
2015
|
+
```python
|
|
2016
|
+
# Define an action that logs a message when warning threshold is exceeded
|
|
2017
|
+
def log_warning():
|
|
2018
|
+
metadata = pb.get_action_metadata()
|
|
2019
|
+
print(f"WARNING: Step {metadata['step']} failed with type {metadata['type']}")
|
|
2020
|
+
|
|
2021
|
+
# Define actions for different threshold levels
|
|
2022
|
+
actions = pb.Actions(
|
|
2023
|
+
warning = log_warning,
|
|
2024
|
+
error = lambda: send_email("Error in validation"),
|
|
2025
|
+
critical = "CRITICAL FAILURE DETECTED"
|
|
2026
|
+
)
|
|
2027
|
+
|
|
2028
|
+
# Use in Validate
|
|
2029
|
+
validation = pb.Validate(
|
|
2030
|
+
data=my_data,
|
|
2031
|
+
actions=actions # Global actions for all steps
|
|
2032
|
+
)
|
|
2033
|
+
```
|
|
2034
|
+
|
|
2035
|
+
You can also provide step-specific actions in individual validation methods:
|
|
2036
|
+
|
|
2037
|
+
```python
|
|
2038
|
+
validation.col_vals_gt(
|
|
2039
|
+
columns="revenue",
|
|
2040
|
+
value=0,
|
|
2041
|
+
actions=pb.Actions(warning=log_warning) # Only applies to this step
|
|
2042
|
+
)
|
|
2043
|
+
```
|
|
2044
|
+
|
|
2045
|
+
Step actions have access to step-specific context through the
|
|
2046
|
+
[`get_action_metadata()`](`pointblank.get_action_metadata`) function, which provides details
|
|
2047
|
+
about the current validation step that triggered the action.
|
|
2048
|
+
|
|
2049
|
+
*Final Actions*
|
|
2050
|
+
|
|
2051
|
+
The `final_actions=` parameter lets you define actions that execute after all validation steps
|
|
2052
|
+
have completed. These are useful for providing summaries, sending notifications based on
|
|
2053
|
+
overall validation status, or performing cleanup operations.
|
|
2054
|
+
|
|
2055
|
+
Final actions should be provided using the [`FinalActions`](`pointblank.FinalActions`) class:
|
|
2056
|
+
|
|
2057
|
+
```python
|
|
2058
|
+
def send_report():
|
|
2059
|
+
summary = pb.get_validation_summary()
|
|
2060
|
+
if summary["status"] == "CRITICAL":
|
|
2061
|
+
send_alert_email(
|
|
2062
|
+
subject=f"CRITICAL validation failures in {summary['table_name']}",
|
|
2063
|
+
body=f"{summary['critical_steps']} steps failed with critical severity."
|
|
2064
|
+
)
|
|
2065
|
+
|
|
2066
|
+
validation = pb.Validate(
|
|
2067
|
+
data=my_data,
|
|
2068
|
+
final_actions=pb.FinalActions(send_report)
|
|
2069
|
+
)
|
|
2070
|
+
```
|
|
2071
|
+
|
|
2072
|
+
Final actions have access to validation-wide summary information through the
|
|
2073
|
+
[`get_validation_summary()`](`pointblank.get_validation_summary`) function, which provides a
|
|
2074
|
+
comprehensive overview of the entire validation process.
|
|
2075
|
+
|
|
2076
|
+
The combination of step actions and final actions provides a flexible system for responding to
|
|
2077
|
+
data quality issues at both the individual step level and the overall validation level.
|
|
2078
|
+
|
|
1863
2079
|
Reporting Languages
|
|
1864
2080
|
-------------------
|
|
1865
2081
|
Various pieces of reporting in Pointblank can be localized to a specific language. This is done
|
|
@@ -2035,6 +2251,7 @@ class Validate:
|
|
|
2035
2251
|
label: str | None = None
|
|
2036
2252
|
thresholds: int | float | bool | tuple | dict | Thresholds | None = None
|
|
2037
2253
|
actions: Actions | None = None
|
|
2254
|
+
final_actions: FinalActions | None = None
|
|
2038
2255
|
brief: str | bool | None = None
|
|
2039
2256
|
lang: str | None = None
|
|
2040
2257
|
locale: str | None = None
|
|
@@ -2046,6 +2263,24 @@ class Validate:
|
|
|
2046
2263
|
# Normalize the thresholds value (if any) to a Thresholds object
|
|
2047
2264
|
self.thresholds = _normalize_thresholds_creation(self.thresholds)
|
|
2048
2265
|
|
|
2266
|
+
# Check that `actions` is an Actions object if provided
|
|
2267
|
+
# TODO: allow string, callable, of list of either and upgrade to Actions object
|
|
2268
|
+
if self.actions is not None and not isinstance(self.actions, Actions): # pragma: no cover
|
|
2269
|
+
raise TypeError(
|
|
2270
|
+
"The `actions=` parameter must be an `Actions` object. "
|
|
2271
|
+
"Please use `Actions()` to wrap your actions."
|
|
2272
|
+
)
|
|
2273
|
+
|
|
2274
|
+
# Check that `final_actions` is a FinalActions object if provided
|
|
2275
|
+
# TODO: allow string, callable, of list of either and upgrade to FinalActions object
|
|
2276
|
+
if self.final_actions is not None and not isinstance(
|
|
2277
|
+
self.final_actions, FinalActions
|
|
2278
|
+
): # pragma: no cover
|
|
2279
|
+
raise TypeError(
|
|
2280
|
+
"The `final_actions=` parameter must be a `FinalActions` object. "
|
|
2281
|
+
"Please use `FinalActions()` to wrap your finalizing actions."
|
|
2282
|
+
)
|
|
2283
|
+
|
|
2049
2284
|
# Normalize the reporting language identifier and error if invalid
|
|
2050
2285
|
if self.lang not in ["zh-Hans", "zh-Hant"]:
|
|
2051
2286
|
self.lang = _normalize_reporting_language(lang=self.lang)
|
|
@@ -6806,6 +7041,9 @@ class Validate:
|
|
|
6806
7041
|
|
|
6807
7042
|
self.time_end = datetime.datetime.now(datetime.timezone.utc)
|
|
6808
7043
|
|
|
7044
|
+
# Perform any final actions
|
|
7045
|
+
self._execute_final_actions()
|
|
7046
|
+
|
|
6809
7047
|
return self
|
|
6810
7048
|
|
|
6811
7049
|
def all_passed(self) -> bool:
|
|
@@ -9217,6 +9455,80 @@ class Validate:
|
|
|
9217
9455
|
if validation.i in i
|
|
9218
9456
|
}
|
|
9219
9457
|
|
|
9458
|
+
def _execute_final_actions(self):
|
|
9459
|
+
"""Execute any final actions after interrogation is complete."""
|
|
9460
|
+
if self.final_actions is None:
|
|
9461
|
+
return
|
|
9462
|
+
|
|
9463
|
+
# Get the highest severity level based on the validation results
|
|
9464
|
+
highest_severity = self._get_highest_severity_level()
|
|
9465
|
+
|
|
9466
|
+
# Get row count using the dedicated function that handles all table types correctly
|
|
9467
|
+
row_count = get_row_count(self.data)
|
|
9468
|
+
|
|
9469
|
+
# Get column count using the dedicated function that handles all table types correctly
|
|
9470
|
+
column_count = get_column_count(self.data)
|
|
9471
|
+
|
|
9472
|
+
# Get the validation duration
|
|
9473
|
+
validation_duration = self.validation_duration = (
|
|
9474
|
+
self.time_end - self.time_start
|
|
9475
|
+
).total_seconds()
|
|
9476
|
+
|
|
9477
|
+
# Create a summary of validation results as a dictionary
|
|
9478
|
+
summary = {
|
|
9479
|
+
"n_steps": len(self.validation_info),
|
|
9480
|
+
"n_passing_steps": sum(1 for step in self.validation_info if step.all_passed),
|
|
9481
|
+
"n_failing_steps": sum(1 for step in self.validation_info if not step.all_passed),
|
|
9482
|
+
"n_warning_steps": sum(1 for step in self.validation_info if step.warning),
|
|
9483
|
+
"n_error_steps": sum(1 for step in self.validation_info if step.error),
|
|
9484
|
+
"n_critical_steps": sum(1 for step in self.validation_info if step.critical),
|
|
9485
|
+
"list_passing_steps": [step.i for step in self.validation_info if step.all_passed],
|
|
9486
|
+
"list_failing_steps": [step.i for step in self.validation_info if not step.all_passed],
|
|
9487
|
+
"dict_n": {step.i: step.n for step in self.validation_info},
|
|
9488
|
+
"dict_n_passed": {step.i: step.n_passed for step in self.validation_info},
|
|
9489
|
+
"dict_n_failed": {step.i: step.n_failed for step in self.validation_info},
|
|
9490
|
+
"dict_f_passed": {step.i: step.f_passed for step in self.validation_info},
|
|
9491
|
+
"dict_f_failed": {step.i: step.f_failed for step in self.validation_info},
|
|
9492
|
+
"dict_warning": {step.i: step.warning for step in self.validation_info},
|
|
9493
|
+
"dict_error": {step.i: step.error for step in self.validation_info},
|
|
9494
|
+
"dict_critical": {step.i: step.critical for step in self.validation_info},
|
|
9495
|
+
"all_passed": all(step.all_passed for step in self.validation_info),
|
|
9496
|
+
"highest_severity": highest_severity,
|
|
9497
|
+
"tbl_row_count": row_count,
|
|
9498
|
+
"tbl_column_count": column_count,
|
|
9499
|
+
"tbl_name": self.tbl_name or "Unknown",
|
|
9500
|
+
"validation_duration": validation_duration,
|
|
9501
|
+
}
|
|
9502
|
+
|
|
9503
|
+
# Extract the actions from FinalActions object and execute
|
|
9504
|
+
action = self.final_actions.actions
|
|
9505
|
+
|
|
9506
|
+
# Execute the action within the context manager
|
|
9507
|
+
with _final_action_context_manager(summary):
|
|
9508
|
+
if isinstance(action, str):
|
|
9509
|
+
print(action)
|
|
9510
|
+
elif callable(action):
|
|
9511
|
+
action()
|
|
9512
|
+
elif isinstance(action, list):
|
|
9513
|
+
for single_action in action:
|
|
9514
|
+
if isinstance(single_action, str):
|
|
9515
|
+
print(single_action)
|
|
9516
|
+
elif callable(single_action):
|
|
9517
|
+
single_action()
|
|
9518
|
+
|
|
9519
|
+
def _get_highest_severity_level(self):
|
|
9520
|
+
"""Get the highest severity level reached across all validation steps."""
|
|
9521
|
+
if any(step.critical for step in self.validation_info):
|
|
9522
|
+
return "critical"
|
|
9523
|
+
elif any(step.error for step in self.validation_info):
|
|
9524
|
+
return "error"
|
|
9525
|
+
elif any(step.warning for step in self.validation_info):
|
|
9526
|
+
return "warning"
|
|
9527
|
+
elif any(not step.all_passed for step in self.validation_info):
|
|
9528
|
+
return "some failing"
|
|
9529
|
+
else:
|
|
9530
|
+
return "all passed"
|
|
9531
|
+
|
|
9220
9532
|
|
|
9221
9533
|
def _normalize_reporting_language(lang: str | None) -> str:
|
|
9222
9534
|
if lang is None:
|
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
pointblank/__init__.py,sha256=
|
|
1
|
+
pointblank/__init__.py,sha256=f7Ndfn3fuPSqA5ivCnkYgeUmfEsuWjW-JeQGbDYsZlU,1469
|
|
2
2
|
pointblank/_constants.py,sha256=Jsvvt2TSZvPXCEuBQl2A3HGQ73o2WPQug-coh6thIzY,72184
|
|
3
3
|
pointblank/_constants_docs.py,sha256=JBmtt16zTYQ-zaM4ElLExtKs-dKlnN553Ys2ML1Y1C8,2099
|
|
4
|
-
pointblank/_constants_translations.py,sha256=
|
|
4
|
+
pointblank/_constants_translations.py,sha256=wlsV2tvoZcevSSa7snj-ehdmcb-yb_t4aCbM1VRkDV8,90707
|
|
5
5
|
pointblank/_interrogation.py,sha256=AtygXSb5iaqUcobnfVF3HjO9mjrtPWkLJ8No9XFSvR8,73186
|
|
6
6
|
pointblank/_typing.py,sha256=YQ6Bt-j-W6Cg91qXHHDzBM-ptc-IEvhMg6T5ugWnGwM,306
|
|
7
7
|
pointblank/_utils.py,sha256=2CDpxwy9twkF8XQayTksmKcdxtoDHqVeBuNZiYFzAqc,23232
|
|
@@ -13,8 +13,8 @@ pointblank/datascan.py,sha256=p0b7j4sxbJxNqIvYqq5r-9-8f-i9niswK19PrmWOfFE,47727
|
|
|
13
13
|
pointblank/draft.py,sha256=lIbSlY9Avi1GbRvJhqR-69sGWCfD11im3Go20XsX8L0,15783
|
|
14
14
|
pointblank/schema.py,sha256=gzUCmtccO2v15MH2bo9uHUYjkKEEne1okQucxcH39pc,44291
|
|
15
15
|
pointblank/tf.py,sha256=8o_8m4i01teulEe3-YYMotSNf3tImjBMInsvdjSAO5Q,8844
|
|
16
|
-
pointblank/thresholds.py,sha256=
|
|
17
|
-
pointblank/validate.py,sha256=
|
|
16
|
+
pointblank/thresholds.py,sha256=C8_Rn2z3MVFu4UH5eaGRd7DkW3slgkWB3Hhim2h5CfU,25340
|
|
17
|
+
pointblank/validate.py,sha256=6m6EWbKWyysRaWo9Q20QaEt7WW_fHgapAiR44mNoL6s,490549
|
|
18
18
|
pointblank/data/api-docs.txt,sha256=ka96DmGzd699dG8shiA8ufTfAoilTKRuN09DECy_OpU,364411
|
|
19
19
|
pointblank/data/game_revenue-duckdb.zip,sha256=tKIVx48OGLYGsQPS3h5AjA2Nyq_rfEpLCjBiFUWhagU,35880
|
|
20
20
|
pointblank/data/game_revenue.zip,sha256=7c9EvHLyi93CHUd4p3dM4CZ-GucFCtXKSPxgLojL32U,33749
|
|
@@ -23,8 +23,8 @@ pointblank/data/nycflights.zip,sha256=yVjbUaKUz2LydSdF9cABuir0VReHBBgV7shiNWSd0m
|
|
|
23
23
|
pointblank/data/polars-api-docs.txt,sha256=KGcS-BOtUs9zgpkWfXD-GFdFh4O_zjdkpX7msHjztLg,198045
|
|
24
24
|
pointblank/data/small_table-duckdb.zip,sha256=BhTaZ2CRS4-9Z1uVhOU6HggvW3XCar7etMznfENIcOc,2028
|
|
25
25
|
pointblank/data/small_table.zip,sha256=lmFb90Nb-v5X559Ikjg31YLAXuRyMkD9yLRElkXPMzQ,472
|
|
26
|
-
pointblank-0.8.
|
|
27
|
-
pointblank-0.8.
|
|
28
|
-
pointblank-0.8.
|
|
29
|
-
pointblank-0.8.
|
|
30
|
-
pointblank-0.8.
|
|
26
|
+
pointblank-0.8.1.dist-info/licenses/LICENSE,sha256=apLF-HWPNU7pT5bmf5KmZpD5Cklpy2u-BN_0xBoRMLY,1081
|
|
27
|
+
pointblank-0.8.1.dist-info/METADATA,sha256=89LXVaMNl8XufSfZtx6VcqNDGbYG3am8zrzwhJ8CuWk,12807
|
|
28
|
+
pointblank-0.8.1.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
|
29
|
+
pointblank-0.8.1.dist-info/top_level.txt,sha256=-wHrS1SvV8-nhvc3w-PPYs1C1WtEc1pK-eGjubbCCKc,11
|
|
30
|
+
pointblank-0.8.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|