pointblank 0.18.0__py3-none-any.whl → 0.19.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,6 +4,7 @@ import functools
4
4
  from collections.abc import Callable
5
5
  from dataclasses import dataclass
6
6
  from typing import TYPE_CHECKING, Any
7
+ from zoneinfo import ZoneInfo
7
8
 
8
9
  import narwhals as nw
9
10
  from narwhals.dependencies import (
@@ -2992,3 +2993,206 @@ def interrogate_prompt(
2992
2993
  result_tbl["pb_is_good_"] = validation_results
2993
2994
 
2994
2995
  return result_tbl
2996
+
2997
+
2998
+ def data_freshness(
2999
+ data_tbl: IntoFrame,
3000
+ column: str,
3001
+ max_age: Any, # datetime.timedelta
3002
+ reference_time: Any | None, # datetime.datetime | None
3003
+ timezone: str | None,
3004
+ allow_tz_mismatch: bool,
3005
+ ) -> dict:
3006
+ """
3007
+ Check if the most recent datetime value in a column is within the allowed max_age.
3008
+
3009
+ Parameters
3010
+ ----------
3011
+ data_tbl
3012
+ The data table to check.
3013
+ column
3014
+ The datetime column to check.
3015
+ max_age
3016
+ The maximum allowed age as a timedelta.
3017
+ reference_time
3018
+ The reference time to compare against (None = use current time).
3019
+ timezone
3020
+ The timezone to use for interpretation.
3021
+ allow_tz_mismatch
3022
+ Whether to suppress timezone mismatch warnings.
3023
+
3024
+ Returns
3025
+ -------
3026
+ dict
3027
+ A dictionary containing:
3028
+ - 'passed': bool, whether the validation passed
3029
+ - 'max_datetime': the maximum datetime found in the column
3030
+ - 'reference_time': the reference time used
3031
+ - 'age': the calculated age (timedelta)
3032
+ - 'max_age': the maximum allowed age
3033
+ - 'tz_warning': any timezone warning message
3034
+ """
3035
+ import datetime
3036
+
3037
+ nw_frame = nw.from_native(data_tbl)
3038
+
3039
+ # Handle LazyFrames by collecting them first
3040
+ if is_narwhals_lazyframe(nw_frame):
3041
+ nw_frame = nw_frame.collect()
3042
+
3043
+ assert is_narwhals_dataframe(nw_frame)
3044
+
3045
+ result = {
3046
+ "passed": False,
3047
+ "max_datetime": None,
3048
+ "reference_time": None,
3049
+ "age": None,
3050
+ "max_age": max_age,
3051
+ "tz_warning": None,
3052
+ "column_empty": False,
3053
+ }
3054
+
3055
+ # Get the maximum datetime value from the column
3056
+ try:
3057
+ # Use narwhals to get max value
3058
+ max_val_result = nw_frame.select(nw.col(column).max())
3059
+ max_datetime_raw = max_val_result.item()
3060
+
3061
+ if max_datetime_raw is None:
3062
+ result["column_empty"] = True
3063
+ result["passed"] = False
3064
+ return result
3065
+
3066
+ # Convert to Python datetime if needed
3067
+ if hasattr(max_datetime_raw, "to_pydatetime"):
3068
+ # Pandas Timestamp
3069
+ max_datetime = max_datetime_raw.to_pydatetime()
3070
+ elif hasattr(max_datetime_raw, "isoformat"):
3071
+ # Already a datetime-like object
3072
+ max_datetime = max_datetime_raw
3073
+ else:
3074
+ # Try to parse as string or handle other types
3075
+ max_datetime = datetime.datetime.fromisoformat(str(max_datetime_raw))
3076
+
3077
+ result["max_datetime"] = max_datetime
3078
+
3079
+ except Exception as e:
3080
+ result["error"] = str(e)
3081
+ result["passed"] = False
3082
+ return result
3083
+
3084
+ # Determine the reference time
3085
+ # We'll set the reference time after we know the timezone awareness of the data
3086
+ if reference_time is None:
3087
+ ref_time = None # Will be set below based on data timezone awareness
3088
+ else:
3089
+ ref_time = reference_time
3090
+
3091
+ # Handle timezone awareness/naivete
3092
+ max_dt_aware = _is_datetime_aware(max_datetime)
3093
+
3094
+ # Helper to parse timezone string (supports IANA names and offsets like "-7", "-07:00")
3095
+ def _get_tz_from_string(tz_str: str) -> datetime.tzinfo:
3096
+ import re
3097
+
3098
+ # Check for offset formats: "-7", "+5", "-07:00", "+05:30", etc.
3099
+ offset_pattern = r"^([+-]?)(\d{1,2})(?::(\d{2}))?$"
3100
+ match = re.match(offset_pattern, tz_str.strip())
3101
+
3102
+ if match:
3103
+ sign_str, hours_str, minutes_str = match.groups()
3104
+ hours = int(hours_str)
3105
+ minutes = int(minutes_str) if minutes_str else 0
3106
+
3107
+ total_minutes = hours * 60 + minutes
3108
+ if sign_str == "-":
3109
+ total_minutes = -total_minutes
3110
+
3111
+ return datetime.timezone(datetime.timedelta(minutes=total_minutes))
3112
+
3113
+ # Try IANA timezone names (zoneinfo is standard in Python 3.9+)
3114
+ try:
3115
+ return ZoneInfo(tz_str)
3116
+ except KeyError:
3117
+ # Invalid timezone name, fall back to UTC
3118
+ return datetime.timezone.utc
3119
+
3120
+ # If ref_time is None (no reference_time provided), set it based on data awareness
3121
+ if ref_time is None:
3122
+ if max_dt_aware:
3123
+ # Data is timezone-aware, use timezone-aware now
3124
+ if timezone:
3125
+ ref_time = datetime.datetime.now(_get_tz_from_string(timezone))
3126
+ else:
3127
+ # Default to UTC when data is aware but no timezone specified
3128
+ ref_time = datetime.datetime.now(datetime.timezone.utc)
3129
+ else:
3130
+ # Data is naive, use naive local time for comparison
3131
+ if timezone:
3132
+ # If user specified timezone, use it for reference
3133
+ ref_time = datetime.datetime.now(_get_tz_from_string(timezone))
3134
+ else:
3135
+ # No timezone specified and data is naive -> use naive local time
3136
+ ref_time = datetime.datetime.now()
3137
+
3138
+ result["reference_time"] = ref_time
3139
+ ref_dt_aware = _is_datetime_aware(ref_time)
3140
+
3141
+ # Track timezone warnings - use keys for translation lookup
3142
+ tz_warning_key = None
3143
+
3144
+ if max_dt_aware != ref_dt_aware:
3145
+ if not allow_tz_mismatch:
3146
+ if max_dt_aware and not ref_dt_aware:
3147
+ tz_warning_key = "data_freshness_tz_warning_aware_naive"
3148
+ else:
3149
+ tz_warning_key = "data_freshness_tz_warning_naive_aware"
3150
+ result["tz_warning_key"] = tz_warning_key
3151
+
3152
+ # Make both comparable
3153
+ try:
3154
+ if max_dt_aware and not ref_dt_aware:
3155
+ # Add timezone to reference time
3156
+ if timezone:
3157
+ try:
3158
+ ref_time = ref_time.replace(tzinfo=ZoneInfo(timezone))
3159
+ except KeyError:
3160
+ ref_time = ref_time.replace(tzinfo=datetime.timezone.utc)
3161
+ else:
3162
+ # Assume UTC
3163
+ ref_time = ref_time.replace(tzinfo=datetime.timezone.utc)
3164
+
3165
+ elif not max_dt_aware and ref_dt_aware:
3166
+ # Localize the max_datetime if we have a timezone
3167
+ if timezone:
3168
+ try:
3169
+ max_datetime = max_datetime.replace(tzinfo=ZoneInfo(timezone))
3170
+ except KeyError:
3171
+ # Remove timezone from reference for comparison
3172
+ ref_time = ref_time.replace(tzinfo=None)
3173
+ else:
3174
+ # Remove timezone from reference for comparison
3175
+ ref_time = ref_time.replace(tzinfo=None)
3176
+
3177
+ # Calculate the age
3178
+ age = ref_time - max_datetime
3179
+ result["age"] = age
3180
+ result["reference_time"] = ref_time
3181
+
3182
+ # Check if within max_age
3183
+ result["passed"] = age <= max_age
3184
+
3185
+ except Exception as e:
3186
+ result["error"] = str(e)
3187
+ result["passed"] = False
3188
+
3189
+ return result
3190
+
3191
+
3192
+ def _is_datetime_aware(dt: Any) -> bool:
3193
+ """Check if a datetime object is timezone-aware."""
3194
+ if dt is None:
3195
+ return False
3196
+ if hasattr(dt, "tzinfo"):
3197
+ return dt.tzinfo is not None and dt.tzinfo.utcoffset(dt) is not None
3198
+ return False