pointblank 0.18.0__py3-none-any.whl → 0.20.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pointblank/__init__.py +44 -1
- pointblank/_constants.py +258 -166
- pointblank/_constants_translations.py +378 -0
- pointblank/_interrogation.py +204 -0
- pointblank/_utils_llms_txt.py +20 -0
- pointblank/data/api-docs.txt +793 -1
- pointblank/field.py +1507 -0
- pointblank/generate/__init__.py +17 -0
- pointblank/generate/base.py +49 -0
- pointblank/generate/generators.py +573 -0
- pointblank/generate/regex.py +217 -0
- pointblank/locales/__init__.py +1476 -0
- pointblank/locales/data/AR/address.json +73 -0
- pointblank/locales/data/AR/company.json +60 -0
- pointblank/locales/data/AR/internet.json +19 -0
- pointblank/locales/data/AR/misc.json +7 -0
- pointblank/locales/data/AR/person.json +39 -0
- pointblank/locales/data/AR/text.json +38 -0
- pointblank/locales/data/AT/address.json +84 -0
- pointblank/locales/data/AT/company.json +65 -0
- pointblank/locales/data/AT/internet.json +20 -0
- pointblank/locales/data/AT/misc.json +8 -0
- pointblank/locales/data/AT/person.json +17 -0
- pointblank/locales/data/AT/text.json +35 -0
- pointblank/locales/data/AU/address.json +83 -0
- pointblank/locales/data/AU/company.json +65 -0
- pointblank/locales/data/AU/internet.json +20 -0
- pointblank/locales/data/AU/misc.json +8 -0
- pointblank/locales/data/AU/person.json +17 -0
- pointblank/locales/data/AU/text.json +35 -0
- pointblank/locales/data/BE/address.json +225 -0
- pointblank/locales/data/BE/company.json +129 -0
- pointblank/locales/data/BE/internet.json +36 -0
- pointblank/locales/data/BE/misc.json +6 -0
- pointblank/locales/data/BE/person.json +62 -0
- pointblank/locales/data/BE/text.json +38 -0
- pointblank/locales/data/BG/address.json +75 -0
- pointblank/locales/data/BG/company.json +60 -0
- pointblank/locales/data/BG/internet.json +19 -0
- pointblank/locales/data/BG/misc.json +7 -0
- pointblank/locales/data/BG/person.json +40 -0
- pointblank/locales/data/BG/text.json +38 -0
- pointblank/locales/data/BR/address.json +98 -0
- pointblank/locales/data/BR/company.json +65 -0
- pointblank/locales/data/BR/internet.json +20 -0
- pointblank/locales/data/BR/misc.json +8 -0
- pointblank/locales/data/BR/person.json +17 -0
- pointblank/locales/data/BR/text.json +35 -0
- pointblank/locales/data/CA/address.json +747 -0
- pointblank/locales/data/CA/company.json +120 -0
- pointblank/locales/data/CA/internet.json +24 -0
- pointblank/locales/data/CA/misc.json +11 -0
- pointblank/locales/data/CA/person.json +1033 -0
- pointblank/locales/data/CA/text.json +58 -0
- pointblank/locales/data/CH/address.json +184 -0
- pointblank/locales/data/CH/company.json +112 -0
- pointblank/locales/data/CH/internet.json +20 -0
- pointblank/locales/data/CH/misc.json +10 -0
- pointblank/locales/data/CH/person.json +64 -0
- pointblank/locales/data/CH/text.json +45 -0
- pointblank/locales/data/CL/address.json +71 -0
- pointblank/locales/data/CL/company.json +60 -0
- pointblank/locales/data/CL/internet.json +19 -0
- pointblank/locales/data/CL/misc.json +7 -0
- pointblank/locales/data/CL/person.json +38 -0
- pointblank/locales/data/CL/text.json +38 -0
- pointblank/locales/data/CN/address.json +124 -0
- pointblank/locales/data/CN/company.json +76 -0
- pointblank/locales/data/CN/internet.json +20 -0
- pointblank/locales/data/CN/misc.json +8 -0
- pointblank/locales/data/CN/person.json +50 -0
- pointblank/locales/data/CN/text.json +38 -0
- pointblank/locales/data/CO/address.json +76 -0
- pointblank/locales/data/CO/company.json +60 -0
- pointblank/locales/data/CO/internet.json +19 -0
- pointblank/locales/data/CO/misc.json +7 -0
- pointblank/locales/data/CO/person.json +38 -0
- pointblank/locales/data/CO/text.json +38 -0
- pointblank/locales/data/CY/address.json +62 -0
- pointblank/locales/data/CY/company.json +60 -0
- pointblank/locales/data/CY/internet.json +19 -0
- pointblank/locales/data/CY/misc.json +7 -0
- pointblank/locales/data/CY/person.json +38 -0
- pointblank/locales/data/CY/text.json +38 -0
- pointblank/locales/data/CZ/address.json +70 -0
- pointblank/locales/data/CZ/company.json +61 -0
- pointblank/locales/data/CZ/internet.json +19 -0
- pointblank/locales/data/CZ/misc.json +7 -0
- pointblank/locales/data/CZ/person.json +40 -0
- pointblank/locales/data/CZ/text.json +38 -0
- pointblank/locales/data/DE/address.json +756 -0
- pointblank/locales/data/DE/company.json +101 -0
- pointblank/locales/data/DE/internet.json +22 -0
- pointblank/locales/data/DE/misc.json +11 -0
- pointblank/locales/data/DE/person.json +1026 -0
- pointblank/locales/data/DE/text.json +50 -0
- pointblank/locales/data/DK/address.json +231 -0
- pointblank/locales/data/DK/company.json +65 -0
- pointblank/locales/data/DK/internet.json +20 -0
- pointblank/locales/data/DK/misc.json +7 -0
- pointblank/locales/data/DK/person.json +45 -0
- pointblank/locales/data/DK/text.json +43 -0
- pointblank/locales/data/EE/address.json +69 -0
- pointblank/locales/data/EE/company.json +60 -0
- pointblank/locales/data/EE/internet.json +19 -0
- pointblank/locales/data/EE/misc.json +7 -0
- pointblank/locales/data/EE/person.json +39 -0
- pointblank/locales/data/EE/text.json +38 -0
- pointblank/locales/data/ES/address.json +3086 -0
- pointblank/locales/data/ES/company.json +644 -0
- pointblank/locales/data/ES/internet.json +25 -0
- pointblank/locales/data/ES/misc.json +11 -0
- pointblank/locales/data/ES/person.json +488 -0
- pointblank/locales/data/ES/text.json +49 -0
- pointblank/locales/data/FI/address.json +93 -0
- pointblank/locales/data/FI/company.json +65 -0
- pointblank/locales/data/FI/internet.json +20 -0
- pointblank/locales/data/FI/misc.json +8 -0
- pointblank/locales/data/FI/person.json +17 -0
- pointblank/locales/data/FI/text.json +35 -0
- pointblank/locales/data/FR/address.json +619 -0
- pointblank/locales/data/FR/company.json +111 -0
- pointblank/locales/data/FR/internet.json +22 -0
- pointblank/locales/data/FR/misc.json +11 -0
- pointblank/locales/data/FR/person.json +1066 -0
- pointblank/locales/data/FR/text.json +50 -0
- pointblank/locales/data/GB/address.json +5759 -0
- pointblank/locales/data/GB/company.json +131 -0
- pointblank/locales/data/GB/internet.json +24 -0
- pointblank/locales/data/GB/misc.json +45 -0
- pointblank/locales/data/GB/person.json +578 -0
- pointblank/locales/data/GB/text.json +61 -0
- pointblank/locales/data/GR/address.json +68 -0
- pointblank/locales/data/GR/company.json +61 -0
- pointblank/locales/data/GR/internet.json +19 -0
- pointblank/locales/data/GR/misc.json +7 -0
- pointblank/locales/data/GR/person.json +39 -0
- pointblank/locales/data/GR/text.json +38 -0
- pointblank/locales/data/HK/address.json +79 -0
- pointblank/locales/data/HK/company.json +69 -0
- pointblank/locales/data/HK/internet.json +19 -0
- pointblank/locales/data/HK/misc.json +7 -0
- pointblank/locales/data/HK/person.json +42 -0
- pointblank/locales/data/HK/text.json +38 -0
- pointblank/locales/data/HR/address.json +73 -0
- pointblank/locales/data/HR/company.json +60 -0
- pointblank/locales/data/HR/internet.json +19 -0
- pointblank/locales/data/HR/misc.json +7 -0
- pointblank/locales/data/HR/person.json +38 -0
- pointblank/locales/data/HR/text.json +38 -0
- pointblank/locales/data/HU/address.json +70 -0
- pointblank/locales/data/HU/company.json +61 -0
- pointblank/locales/data/HU/internet.json +19 -0
- pointblank/locales/data/HU/misc.json +7 -0
- pointblank/locales/data/HU/person.json +40 -0
- pointblank/locales/data/HU/text.json +38 -0
- pointblank/locales/data/ID/address.json +68 -0
- pointblank/locales/data/ID/company.json +61 -0
- pointblank/locales/data/ID/internet.json +19 -0
- pointblank/locales/data/ID/misc.json +7 -0
- pointblank/locales/data/ID/person.json +40 -0
- pointblank/locales/data/ID/text.json +38 -0
- pointblank/locales/data/IE/address.json +643 -0
- pointblank/locales/data/IE/company.json +140 -0
- pointblank/locales/data/IE/internet.json +24 -0
- pointblank/locales/data/IE/misc.json +44 -0
- pointblank/locales/data/IE/person.json +55 -0
- pointblank/locales/data/IE/text.json +60 -0
- pointblank/locales/data/IN/address.json +92 -0
- pointblank/locales/data/IN/company.json +65 -0
- pointblank/locales/data/IN/internet.json +20 -0
- pointblank/locales/data/IN/misc.json +8 -0
- pointblank/locales/data/IN/person.json +52 -0
- pointblank/locales/data/IN/text.json +39 -0
- pointblank/locales/data/IS/address.json +63 -0
- pointblank/locales/data/IS/company.json +61 -0
- pointblank/locales/data/IS/internet.json +19 -0
- pointblank/locales/data/IS/misc.json +7 -0
- pointblank/locales/data/IS/person.json +44 -0
- pointblank/locales/data/IS/text.json +38 -0
- pointblank/locales/data/IT/address.json +192 -0
- pointblank/locales/data/IT/company.json +137 -0
- pointblank/locales/data/IT/internet.json +20 -0
- pointblank/locales/data/IT/misc.json +10 -0
- pointblank/locales/data/IT/person.json +70 -0
- pointblank/locales/data/IT/text.json +44 -0
- pointblank/locales/data/JP/address.json +713 -0
- pointblank/locales/data/JP/company.json +113 -0
- pointblank/locales/data/JP/internet.json +22 -0
- pointblank/locales/data/JP/misc.json +10 -0
- pointblank/locales/data/JP/person.json +1057 -0
- pointblank/locales/data/JP/text.json +51 -0
- pointblank/locales/data/KR/address.json +77 -0
- pointblank/locales/data/KR/company.json +68 -0
- pointblank/locales/data/KR/internet.json +19 -0
- pointblank/locales/data/KR/misc.json +7 -0
- pointblank/locales/data/KR/person.json +40 -0
- pointblank/locales/data/KR/text.json +38 -0
- pointblank/locales/data/LT/address.json +66 -0
- pointblank/locales/data/LT/company.json +60 -0
- pointblank/locales/data/LT/internet.json +19 -0
- pointblank/locales/data/LT/misc.json +7 -0
- pointblank/locales/data/LT/person.json +42 -0
- pointblank/locales/data/LT/text.json +38 -0
- pointblank/locales/data/LU/address.json +66 -0
- pointblank/locales/data/LU/company.json +60 -0
- pointblank/locales/data/LU/internet.json +19 -0
- pointblank/locales/data/LU/misc.json +7 -0
- pointblank/locales/data/LU/person.json +38 -0
- pointblank/locales/data/LU/text.json +38 -0
- pointblank/locales/data/LV/address.json +62 -0
- pointblank/locales/data/LV/company.json +60 -0
- pointblank/locales/data/LV/internet.json +19 -0
- pointblank/locales/data/LV/misc.json +7 -0
- pointblank/locales/data/LV/person.json +40 -0
- pointblank/locales/data/LV/text.json +38 -0
- pointblank/locales/data/MT/address.json +61 -0
- pointblank/locales/data/MT/company.json +60 -0
- pointblank/locales/data/MT/internet.json +19 -0
- pointblank/locales/data/MT/misc.json +7 -0
- pointblank/locales/data/MT/person.json +38 -0
- pointblank/locales/data/MT/text.json +38 -0
- pointblank/locales/data/MX/address.json +100 -0
- pointblank/locales/data/MX/company.json +65 -0
- pointblank/locales/data/MX/internet.json +20 -0
- pointblank/locales/data/MX/misc.json +8 -0
- pointblank/locales/data/MX/person.json +18 -0
- pointblank/locales/data/MX/text.json +39 -0
- pointblank/locales/data/NL/address.json +1517 -0
- pointblank/locales/data/NL/company.json +133 -0
- pointblank/locales/data/NL/internet.json +44 -0
- pointblank/locales/data/NL/misc.json +55 -0
- pointblank/locales/data/NL/person.json +365 -0
- pointblank/locales/data/NL/text.json +210 -0
- pointblank/locales/data/NO/address.json +86 -0
- pointblank/locales/data/NO/company.json +66 -0
- pointblank/locales/data/NO/internet.json +20 -0
- pointblank/locales/data/NO/misc.json +8 -0
- pointblank/locales/data/NO/person.json +17 -0
- pointblank/locales/data/NO/text.json +35 -0
- pointblank/locales/data/NZ/address.json +90 -0
- pointblank/locales/data/NZ/company.json +65 -0
- pointblank/locales/data/NZ/internet.json +20 -0
- pointblank/locales/data/NZ/misc.json +8 -0
- pointblank/locales/data/NZ/person.json +17 -0
- pointblank/locales/data/NZ/text.json +39 -0
- pointblank/locales/data/PH/address.json +67 -0
- pointblank/locales/data/PH/company.json +61 -0
- pointblank/locales/data/PH/internet.json +19 -0
- pointblank/locales/data/PH/misc.json +7 -0
- pointblank/locales/data/PH/person.json +40 -0
- pointblank/locales/data/PH/text.json +38 -0
- pointblank/locales/data/PL/address.json +91 -0
- pointblank/locales/data/PL/company.json +65 -0
- pointblank/locales/data/PL/internet.json +20 -0
- pointblank/locales/data/PL/misc.json +8 -0
- pointblank/locales/data/PL/person.json +17 -0
- pointblank/locales/data/PL/text.json +35 -0
- pointblank/locales/data/PT/address.json +90 -0
- pointblank/locales/data/PT/company.json +65 -0
- pointblank/locales/data/PT/internet.json +20 -0
- pointblank/locales/data/PT/misc.json +8 -0
- pointblank/locales/data/PT/person.json +17 -0
- pointblank/locales/data/PT/text.json +35 -0
- pointblank/locales/data/RO/address.json +73 -0
- pointblank/locales/data/RO/company.json +61 -0
- pointblank/locales/data/RO/internet.json +19 -0
- pointblank/locales/data/RO/misc.json +7 -0
- pointblank/locales/data/RO/person.json +40 -0
- pointblank/locales/data/RO/text.json +38 -0
- pointblank/locales/data/RU/address.json +74 -0
- pointblank/locales/data/RU/company.json +60 -0
- pointblank/locales/data/RU/internet.json +19 -0
- pointblank/locales/data/RU/misc.json +7 -0
- pointblank/locales/data/RU/person.json +38 -0
- pointblank/locales/data/RU/text.json +38 -0
- pointblank/locales/data/SE/address.json +247 -0
- pointblank/locales/data/SE/company.json +65 -0
- pointblank/locales/data/SE/internet.json +20 -0
- pointblank/locales/data/SE/misc.json +7 -0
- pointblank/locales/data/SE/person.json +45 -0
- pointblank/locales/data/SE/text.json +43 -0
- pointblank/locales/data/SI/address.json +67 -0
- pointblank/locales/data/SI/company.json +60 -0
- pointblank/locales/data/SI/internet.json +19 -0
- pointblank/locales/data/SI/misc.json +7 -0
- pointblank/locales/data/SI/person.json +38 -0
- pointblank/locales/data/SI/text.json +38 -0
- pointblank/locales/data/SK/address.json +64 -0
- pointblank/locales/data/SK/company.json +60 -0
- pointblank/locales/data/SK/internet.json +19 -0
- pointblank/locales/data/SK/misc.json +7 -0
- pointblank/locales/data/SK/person.json +38 -0
- pointblank/locales/data/SK/text.json +38 -0
- pointblank/locales/data/TR/address.json +105 -0
- pointblank/locales/data/TR/company.json +65 -0
- pointblank/locales/data/TR/internet.json +20 -0
- pointblank/locales/data/TR/misc.json +8 -0
- pointblank/locales/data/TR/person.json +17 -0
- pointblank/locales/data/TR/text.json +35 -0
- pointblank/locales/data/TW/address.json +86 -0
- pointblank/locales/data/TW/company.json +69 -0
- pointblank/locales/data/TW/internet.json +19 -0
- pointblank/locales/data/TW/misc.json +7 -0
- pointblank/locales/data/TW/person.json +42 -0
- pointblank/locales/data/TW/text.json +38 -0
- pointblank/locales/data/US/address.json +996 -0
- pointblank/locales/data/US/company.json +131 -0
- pointblank/locales/data/US/internet.json +22 -0
- pointblank/locales/data/US/misc.json +11 -0
- pointblank/locales/data/US/person.json +1092 -0
- pointblank/locales/data/US/text.json +56 -0
- pointblank/locales/data/_shared/misc.json +42 -0
- pointblank/schema.py +339 -2
- pointblank/validate.py +1263 -11
- {pointblank-0.18.0.dist-info → pointblank-0.20.0.dist-info}/METADATA +45 -1
- pointblank-0.20.0.dist-info/RECORD +366 -0
- {pointblank-0.18.0.dist-info → pointblank-0.20.0.dist-info}/WHEEL +1 -1
- pointblank-0.18.0.dist-info/RECORD +0 -59
- {pointblank-0.18.0.dist-info → pointblank-0.20.0.dist-info}/entry_points.txt +0 -0
- {pointblank-0.18.0.dist-info → pointblank-0.20.0.dist-info}/licenses/LICENSE +0 -0
- {pointblank-0.18.0.dist-info → pointblank-0.20.0.dist-info}/top_level.txt +0 -0
pointblank/_interrogation.py
CHANGED
|
@@ -4,6 +4,7 @@ import functools
|
|
|
4
4
|
from collections.abc import Callable
|
|
5
5
|
from dataclasses import dataclass
|
|
6
6
|
from typing import TYPE_CHECKING, Any
|
|
7
|
+
from zoneinfo import ZoneInfo
|
|
7
8
|
|
|
8
9
|
import narwhals as nw
|
|
9
10
|
from narwhals.dependencies import (
|
|
@@ -2992,3 +2993,206 @@ def interrogate_prompt(
|
|
|
2992
2993
|
result_tbl["pb_is_good_"] = validation_results
|
|
2993
2994
|
|
|
2994
2995
|
return result_tbl
|
|
2996
|
+
|
|
2997
|
+
|
|
2998
|
+
def data_freshness(
|
|
2999
|
+
data_tbl: IntoFrame,
|
|
3000
|
+
column: str,
|
|
3001
|
+
max_age: Any, # datetime.timedelta
|
|
3002
|
+
reference_time: Any | None, # datetime.datetime | None
|
|
3003
|
+
timezone: str | None,
|
|
3004
|
+
allow_tz_mismatch: bool,
|
|
3005
|
+
) -> dict:
|
|
3006
|
+
"""
|
|
3007
|
+
Check if the most recent datetime value in a column is within the allowed max_age.
|
|
3008
|
+
|
|
3009
|
+
Parameters
|
|
3010
|
+
----------
|
|
3011
|
+
data_tbl
|
|
3012
|
+
The data table to check.
|
|
3013
|
+
column
|
|
3014
|
+
The datetime column to check.
|
|
3015
|
+
max_age
|
|
3016
|
+
The maximum allowed age as a timedelta.
|
|
3017
|
+
reference_time
|
|
3018
|
+
The reference time to compare against (None = use current time).
|
|
3019
|
+
timezone
|
|
3020
|
+
The timezone to use for interpretation.
|
|
3021
|
+
allow_tz_mismatch
|
|
3022
|
+
Whether to suppress timezone mismatch warnings.
|
|
3023
|
+
|
|
3024
|
+
Returns
|
|
3025
|
+
-------
|
|
3026
|
+
dict
|
|
3027
|
+
A dictionary containing:
|
|
3028
|
+
- 'passed': bool, whether the validation passed
|
|
3029
|
+
- 'max_datetime': the maximum datetime found in the column
|
|
3030
|
+
- 'reference_time': the reference time used
|
|
3031
|
+
- 'age': the calculated age (timedelta)
|
|
3032
|
+
- 'max_age': the maximum allowed age
|
|
3033
|
+
- 'tz_warning': any timezone warning message
|
|
3034
|
+
"""
|
|
3035
|
+
import datetime
|
|
3036
|
+
|
|
3037
|
+
nw_frame = nw.from_native(data_tbl)
|
|
3038
|
+
|
|
3039
|
+
# Handle LazyFrames by collecting them first
|
|
3040
|
+
if is_narwhals_lazyframe(nw_frame):
|
|
3041
|
+
nw_frame = nw_frame.collect()
|
|
3042
|
+
|
|
3043
|
+
assert is_narwhals_dataframe(nw_frame)
|
|
3044
|
+
|
|
3045
|
+
result = {
|
|
3046
|
+
"passed": False,
|
|
3047
|
+
"max_datetime": None,
|
|
3048
|
+
"reference_time": None,
|
|
3049
|
+
"age": None,
|
|
3050
|
+
"max_age": max_age,
|
|
3051
|
+
"tz_warning": None,
|
|
3052
|
+
"column_empty": False,
|
|
3053
|
+
}
|
|
3054
|
+
|
|
3055
|
+
# Get the maximum datetime value from the column
|
|
3056
|
+
try:
|
|
3057
|
+
# Use narwhals to get max value
|
|
3058
|
+
max_val_result = nw_frame.select(nw.col(column).max())
|
|
3059
|
+
max_datetime_raw = max_val_result.item()
|
|
3060
|
+
|
|
3061
|
+
if max_datetime_raw is None:
|
|
3062
|
+
result["column_empty"] = True
|
|
3063
|
+
result["passed"] = False
|
|
3064
|
+
return result
|
|
3065
|
+
|
|
3066
|
+
# Convert to Python datetime if needed
|
|
3067
|
+
if hasattr(max_datetime_raw, "to_pydatetime"):
|
|
3068
|
+
# Pandas Timestamp
|
|
3069
|
+
max_datetime = max_datetime_raw.to_pydatetime()
|
|
3070
|
+
elif hasattr(max_datetime_raw, "isoformat"):
|
|
3071
|
+
# Already a datetime-like object
|
|
3072
|
+
max_datetime = max_datetime_raw
|
|
3073
|
+
else:
|
|
3074
|
+
# Try to parse as string or handle other types
|
|
3075
|
+
max_datetime = datetime.datetime.fromisoformat(str(max_datetime_raw))
|
|
3076
|
+
|
|
3077
|
+
result["max_datetime"] = max_datetime
|
|
3078
|
+
|
|
3079
|
+
except Exception as e:
|
|
3080
|
+
result["error"] = str(e)
|
|
3081
|
+
result["passed"] = False
|
|
3082
|
+
return result
|
|
3083
|
+
|
|
3084
|
+
# Determine the reference time
|
|
3085
|
+
# We'll set the reference time after we know the timezone awareness of the data
|
|
3086
|
+
if reference_time is None:
|
|
3087
|
+
ref_time = None # Will be set below based on data timezone awareness
|
|
3088
|
+
else:
|
|
3089
|
+
ref_time = reference_time
|
|
3090
|
+
|
|
3091
|
+
# Handle timezone awareness/naivete
|
|
3092
|
+
max_dt_aware = _is_datetime_aware(max_datetime)
|
|
3093
|
+
|
|
3094
|
+
# Helper to parse timezone string (supports IANA names and offsets like "-7", "-07:00")
|
|
3095
|
+
def _get_tz_from_string(tz_str: str) -> datetime.tzinfo:
|
|
3096
|
+
import re
|
|
3097
|
+
|
|
3098
|
+
# Check for offset formats: "-7", "+5", "-07:00", "+05:30", etc.
|
|
3099
|
+
offset_pattern = r"^([+-]?)(\d{1,2})(?::(\d{2}))?$"
|
|
3100
|
+
match = re.match(offset_pattern, tz_str.strip())
|
|
3101
|
+
|
|
3102
|
+
if match:
|
|
3103
|
+
sign_str, hours_str, minutes_str = match.groups()
|
|
3104
|
+
hours = int(hours_str)
|
|
3105
|
+
minutes = int(minutes_str) if minutes_str else 0
|
|
3106
|
+
|
|
3107
|
+
total_minutes = hours * 60 + minutes
|
|
3108
|
+
if sign_str == "-":
|
|
3109
|
+
total_minutes = -total_minutes
|
|
3110
|
+
|
|
3111
|
+
return datetime.timezone(datetime.timedelta(minutes=total_minutes))
|
|
3112
|
+
|
|
3113
|
+
# Try IANA timezone names (zoneinfo is standard in Python 3.9+)
|
|
3114
|
+
try:
|
|
3115
|
+
return ZoneInfo(tz_str)
|
|
3116
|
+
except KeyError:
|
|
3117
|
+
# Invalid timezone name, fall back to UTC
|
|
3118
|
+
return datetime.timezone.utc
|
|
3119
|
+
|
|
3120
|
+
# If ref_time is None (no reference_time provided), set it based on data awareness
|
|
3121
|
+
if ref_time is None:
|
|
3122
|
+
if max_dt_aware:
|
|
3123
|
+
# Data is timezone-aware, use timezone-aware now
|
|
3124
|
+
if timezone:
|
|
3125
|
+
ref_time = datetime.datetime.now(_get_tz_from_string(timezone))
|
|
3126
|
+
else:
|
|
3127
|
+
# Default to UTC when data is aware but no timezone specified
|
|
3128
|
+
ref_time = datetime.datetime.now(datetime.timezone.utc)
|
|
3129
|
+
else:
|
|
3130
|
+
# Data is naive, use naive local time for comparison
|
|
3131
|
+
if timezone:
|
|
3132
|
+
# If user specified timezone, use it for reference
|
|
3133
|
+
ref_time = datetime.datetime.now(_get_tz_from_string(timezone))
|
|
3134
|
+
else:
|
|
3135
|
+
# No timezone specified and data is naive -> use naive local time
|
|
3136
|
+
ref_time = datetime.datetime.now()
|
|
3137
|
+
|
|
3138
|
+
result["reference_time"] = ref_time
|
|
3139
|
+
ref_dt_aware = _is_datetime_aware(ref_time)
|
|
3140
|
+
|
|
3141
|
+
# Track timezone warnings - use keys for translation lookup
|
|
3142
|
+
tz_warning_key = None
|
|
3143
|
+
|
|
3144
|
+
if max_dt_aware != ref_dt_aware:
|
|
3145
|
+
if not allow_tz_mismatch:
|
|
3146
|
+
if max_dt_aware and not ref_dt_aware:
|
|
3147
|
+
tz_warning_key = "data_freshness_tz_warning_aware_naive"
|
|
3148
|
+
else:
|
|
3149
|
+
tz_warning_key = "data_freshness_tz_warning_naive_aware"
|
|
3150
|
+
result["tz_warning_key"] = tz_warning_key
|
|
3151
|
+
|
|
3152
|
+
# Make both comparable
|
|
3153
|
+
try:
|
|
3154
|
+
if max_dt_aware and not ref_dt_aware:
|
|
3155
|
+
# Add timezone to reference time
|
|
3156
|
+
if timezone:
|
|
3157
|
+
try:
|
|
3158
|
+
ref_time = ref_time.replace(tzinfo=ZoneInfo(timezone))
|
|
3159
|
+
except KeyError:
|
|
3160
|
+
ref_time = ref_time.replace(tzinfo=datetime.timezone.utc)
|
|
3161
|
+
else:
|
|
3162
|
+
# Assume UTC
|
|
3163
|
+
ref_time = ref_time.replace(tzinfo=datetime.timezone.utc)
|
|
3164
|
+
|
|
3165
|
+
elif not max_dt_aware and ref_dt_aware:
|
|
3166
|
+
# Localize the max_datetime if we have a timezone
|
|
3167
|
+
if timezone:
|
|
3168
|
+
try:
|
|
3169
|
+
max_datetime = max_datetime.replace(tzinfo=ZoneInfo(timezone))
|
|
3170
|
+
except KeyError:
|
|
3171
|
+
# Remove timezone from reference for comparison
|
|
3172
|
+
ref_time = ref_time.replace(tzinfo=None)
|
|
3173
|
+
else:
|
|
3174
|
+
# Remove timezone from reference for comparison
|
|
3175
|
+
ref_time = ref_time.replace(tzinfo=None)
|
|
3176
|
+
|
|
3177
|
+
# Calculate the age
|
|
3178
|
+
age = ref_time - max_datetime
|
|
3179
|
+
result["age"] = age
|
|
3180
|
+
result["reference_time"] = ref_time
|
|
3181
|
+
|
|
3182
|
+
# Check if within max_age
|
|
3183
|
+
result["passed"] = age <= max_age
|
|
3184
|
+
|
|
3185
|
+
except Exception as e:
|
|
3186
|
+
result["error"] = str(e)
|
|
3187
|
+
result["passed"] = False
|
|
3188
|
+
|
|
3189
|
+
return result
|
|
3190
|
+
|
|
3191
|
+
|
|
3192
|
+
def _is_datetime_aware(dt: Any) -> bool:
|
|
3193
|
+
"""Check if a datetime object is timezone-aware."""
|
|
3194
|
+
if dt is None:
|
|
3195
|
+
return False
|
|
3196
|
+
if hasattr(dt, "tzinfo"):
|
|
3197
|
+
return dt.tzinfo is not None and dt.tzinfo.utcoffset(dt) is not None
|
|
3198
|
+
return False
|
pointblank/_utils_llms_txt.py
CHANGED
|
@@ -139,6 +139,7 @@ def _get_api_text() -> str:
|
|
|
139
139
|
"Validate.rows_complete",
|
|
140
140
|
"Validate.col_exists",
|
|
141
141
|
"Validate.col_pct_null",
|
|
142
|
+
"Validate.data_freshness",
|
|
142
143
|
"Validate.col_schema_match",
|
|
143
144
|
"Validate.row_count_match",
|
|
144
145
|
"Validate.col_count_match",
|
|
@@ -214,6 +215,18 @@ def _get_api_text() -> str:
|
|
|
214
215
|
"config",
|
|
215
216
|
]
|
|
216
217
|
|
|
218
|
+
test_data_generation_exported = [
|
|
219
|
+
"generate_dataset",
|
|
220
|
+
"int_field",
|
|
221
|
+
"float_field",
|
|
222
|
+
"string_field",
|
|
223
|
+
"bool_field",
|
|
224
|
+
"date_field",
|
|
225
|
+
"datetime_field",
|
|
226
|
+
"time_field",
|
|
227
|
+
"duration_field",
|
|
228
|
+
]
|
|
229
|
+
|
|
217
230
|
prebuilt_actions_exported = [
|
|
218
231
|
"send_slack_notification",
|
|
219
232
|
]
|
|
@@ -266,6 +279,10 @@ columns or rows in a table. The `get_action_metadata()` function is useful when
|
|
|
266
279
|
actions since it returns metadata about the validation step that's triggering the action. Lastly,
|
|
267
280
|
the `config()` utility lets us set global configuration parameters."""
|
|
268
281
|
|
|
282
|
+
test_data_generation_desc = """Generate synthetic test data based on schema definitions. Use
|
|
283
|
+
`generate_dataset()` to create data from a `Schema` object. The helper functions define typed fields
|
|
284
|
+
with constraints for realistic test data generation."""
|
|
285
|
+
|
|
269
286
|
prebuilt_actions_desc = """The Prebuilt Actions group contains a function that can be used to
|
|
270
287
|
send a Slack notification when validation steps exceed failure threshold levels or just to provide a
|
|
271
288
|
summary of the validation results, including the status, number of steps, passing and failing steps,
|
|
@@ -299,6 +316,9 @@ table information, and timing details."""
|
|
|
299
316
|
api_text += f"""\n## The Utility Functions family\n\n{utility_desc}\n\n"""
|
|
300
317
|
api_text += get_api_details(module=pointblank, exported_list=utility_exported)
|
|
301
318
|
|
|
319
|
+
api_text += f"""\n## The Test Data Generation family\n\n{test_data_generation_desc}\n\n"""
|
|
320
|
+
api_text += get_api_details(module=pointblank, exported_list=test_data_generation_exported)
|
|
321
|
+
|
|
302
322
|
api_text += f"""\n## The Prebuilt Actions family\n\n{prebuilt_actions_desc}\n\n"""
|
|
303
323
|
api_text += get_api_details(module=pointblank, exported_list=prebuilt_actions_exported)
|
|
304
324
|
|