nrcd 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nrcd/__init__.py +28 -0
- nrcd/data/__init__.py +16 -0
- nrcd/data/schema.py +94 -0
- nrcd/enrich/__init__.py +53 -0
- nrcd/enrich/altitude.py +138 -0
- nrcd/enrich/api_usage.py +67 -0
- nrcd/enrich/batch.py +86 -0
- nrcd/enrich/cache.py +97 -0
- nrcd/enrich/config.py +37 -0
- nrcd/enrich/context.py +163 -0
- nrcd/enrich/geocode.py +63 -0
- nrcd/enrich/guide.py +81 -0
- nrcd/enrich/http.py +28 -0
- nrcd/enrich/throttle.py +32 -0
- nrcd/enrich/timezone_lookup.py +59 -0
- nrcd/enrich/weather.py +296 -0
- nrcd/py.typed +0 -0
- nrcd/standardize/__init__.py +156 -0
- nrcd/standardize/altitude.py +283 -0
- nrcd/standardize/config.py +58 -0
- nrcd/standardize/context.py +187 -0
- nrcd/standardize/events.py +92 -0
- nrcd/standardize/factors.py +94 -0
- nrcd/standardize/grade.py +80 -0
- nrcd/standardize/pipeline.py +502 -0
- nrcd/standardize/reference.py +340 -0
- nrcd/standardize/sport.py +50 -0
- nrcd/standardize/time.py +65 -0
- nrcd/standardize/track.py +179 -0
- nrcd/standardize/units.py +164 -0
- nrcd/standardize/validation.py +53 -0
- nrcd/standardize/wind.py +132 -0
- nrcd-0.1.0.dist-info/METADATA +342 -0
- nrcd-0.1.0.dist-info/RECORD +36 -0
- nrcd-0.1.0.dist-info/WHEEL +4 -0
- nrcd-0.1.0.dist-info/licenses/LICENSE +21 -0
nrcd/__init__.py
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
"""National Running Club Database — performance standardization library."""
|
|
2
|
+
|
|
3
|
+
from nrcd.standardize import (
|
|
4
|
+
PARAMETERS_DOC,
|
|
5
|
+
RaceContext,
|
|
6
|
+
XCRaceContext,
|
|
7
|
+
standardize_indoor_track,
|
|
8
|
+
standardize_outdoor_track,
|
|
9
|
+
standardize_result,
|
|
10
|
+
standardize_road,
|
|
11
|
+
standardize_seconds,
|
|
12
|
+
standardize_xc,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
__version__ = "0.1.0"
|
|
16
|
+
|
|
17
|
+
__all__ = [
|
|
18
|
+
"PARAMETERS_DOC",
|
|
19
|
+
"RaceContext",
|
|
20
|
+
"XCRaceContext",
|
|
21
|
+
"standardize_indoor_track",
|
|
22
|
+
"standardize_outdoor_track",
|
|
23
|
+
"standardize_result",
|
|
24
|
+
"standardize_road",
|
|
25
|
+
"standardize_seconds",
|
|
26
|
+
"standardize_xc",
|
|
27
|
+
"__version__",
|
|
28
|
+
]
|
nrcd/data/__init__.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
"""NRCD CSV column names and derived field helpers.
|
|
2
|
+
|
|
3
|
+
Requires ``pip install "nrcd[data]"`` (pandas).
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from nrcd.data.schema import (
|
|
7
|
+
derive_course_details_fields,
|
|
8
|
+
meet_altitude_column,
|
|
9
|
+
meet_altitude_ft_from_record,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
__all__ = [
|
|
13
|
+
"derive_course_details_fields",
|
|
14
|
+
"meet_altitude_column",
|
|
15
|
+
"meet_altitude_ft_from_record",
|
|
16
|
+
]
|
nrcd/data/schema.py
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
"""NRCD export column resolution and derived ``course_details`` fields."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import math
|
|
6
|
+
from typing import Any, Mapping
|
|
7
|
+
|
|
8
|
+
from nrcd.standardize.factors import heat_index
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def _is_na(value: Any) -> bool:
|
|
12
|
+
if value is None:
|
|
13
|
+
return True
|
|
14
|
+
if isinstance(value, float) and math.isnan(value):
|
|
15
|
+
return True
|
|
16
|
+
try:
|
|
17
|
+
import pandas as pd
|
|
18
|
+
|
|
19
|
+
return bool(pd.isna(value))
|
|
20
|
+
except ImportError:
|
|
21
|
+
return False
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def meet_altitude_column(df: Any) -> str:
|
|
25
|
+
"""Return meet-table altitude column name (``altitude`` or legacy ``elevation``)."""
|
|
26
|
+
import pandas as pd
|
|
27
|
+
|
|
28
|
+
if not isinstance(df, pd.DataFrame):
|
|
29
|
+
raise TypeError("meet_altitude_column expects a pandas DataFrame")
|
|
30
|
+
if "altitude" in df.columns:
|
|
31
|
+
return "altitude"
|
|
32
|
+
if "elevation" in df.columns:
|
|
33
|
+
return "elevation"
|
|
34
|
+
raise KeyError("meet table missing altitude/elevation column")
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _finite_altitude_ft(value: Any) -> float | None:
|
|
38
|
+
if value is None:
|
|
39
|
+
return None
|
|
40
|
+
try:
|
|
41
|
+
z = float(value)
|
|
42
|
+
except (TypeError, ValueError):
|
|
43
|
+
return None
|
|
44
|
+
if not math.isfinite(z) or z < 0:
|
|
45
|
+
return None
|
|
46
|
+
return z
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def meet_altitude_ft_from_record(
|
|
50
|
+
row: Mapping[str, Any] | Any,
|
|
51
|
+
course_details: Mapping[str, Any] | None = None,
|
|
52
|
+
) -> float | None:
|
|
53
|
+
"""Meet venue altitude (ft) from merged result row or ``course_details.altitude``."""
|
|
54
|
+
elev = None
|
|
55
|
+
if hasattr(row, "get"):
|
|
56
|
+
elev = row.get("altitude")
|
|
57
|
+
if _is_na(elev):
|
|
58
|
+
elev = row.get("elevation")
|
|
59
|
+
if _is_na(elev):
|
|
60
|
+
if course_details:
|
|
61
|
+
elev = course_details.get("altitude") or course_details.get("meet_elevation")
|
|
62
|
+
if elev is None:
|
|
63
|
+
elev = course_details.get("elevation")
|
|
64
|
+
return _finite_altitude_ft(elev)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def derive_course_details_fields(record: Mapping[str, Any]) -> dict[str, Any]:
|
|
68
|
+
"""Compute analysis fields not stored on ``course_details`` export rows."""
|
|
69
|
+
out: dict[str, Any] = {}
|
|
70
|
+
|
|
71
|
+
t = record.get("temperature")
|
|
72
|
+
d = record.get("dew_point")
|
|
73
|
+
h = heat_index(t, d)
|
|
74
|
+
if h is not None:
|
|
75
|
+
out["heat_index_f"] = h
|
|
76
|
+
|
|
77
|
+
race_unix = record.get("openweather_dt_unix")
|
|
78
|
+
sunrise = record.get("sunrise_unix")
|
|
79
|
+
sunset = record.get("sunset_unix")
|
|
80
|
+
if race_unix is not None and sunrise is not None and sunset is not None:
|
|
81
|
+
try:
|
|
82
|
+
race_u = int(race_unix)
|
|
83
|
+
rise_u = int(sunrise)
|
|
84
|
+
set_u = int(sunset)
|
|
85
|
+
except (TypeError, ValueError):
|
|
86
|
+
race_u = rise_u = set_u = None
|
|
87
|
+
if race_u is not None:
|
|
88
|
+
out["is_daylight"] = rise_u <= race_u <= set_u
|
|
89
|
+
if rise_u is not None:
|
|
90
|
+
out["minutes_after_sunrise"] = max(0.0, (race_u - rise_u) / 60.0)
|
|
91
|
+
if set_u is not None:
|
|
92
|
+
out["minutes_before_sunset"] = max(0.0, (set_u - race_u) / 60.0)
|
|
93
|
+
|
|
94
|
+
return out
|
nrcd/enrich/__init__.py
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
"""Fetch meet altitude and course weather from external APIs (optional).
|
|
2
|
+
|
|
3
|
+
Requires ``pip install nrcd[apis]`` (installs ``requests``).
|
|
4
|
+
|
|
5
|
+
**Meet altitude** (city/state): OpenWeather geocodes; **USGS EPQS** returns feet.
|
|
6
|
+
OpenWeather does **not** supply altitude — only weather, AQI, and coordinates.
|
|
7
|
+
|
|
8
|
+
API signup: :data:`nrcd.enrich.API_GUIDE`.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from nrcd.enrich.altitude import (
|
|
12
|
+
AltitudeResult,
|
|
13
|
+
lookup_altitude_detail,
|
|
14
|
+
lookup_altitude_ft,
|
|
15
|
+
lookup_elevation_ft,
|
|
16
|
+
)
|
|
17
|
+
from nrcd.enrich.api_usage import (
|
|
18
|
+
AQI_HISTORY_AVAILABLE_FROM,
|
|
19
|
+
AQI_HISTORY_AVAILABLE_UNIX,
|
|
20
|
+
ApiUsage,
|
|
21
|
+
EnrichResult,
|
|
22
|
+
)
|
|
23
|
+
from nrcd.enrich.batch import EnrichJob, JobResult, run_enrich_jobs
|
|
24
|
+
from nrcd.enrich.cache import cache_stats, clear_enrich_cache
|
|
25
|
+
from nrcd.enrich.config import EnrichConfig, api_keys_from_env
|
|
26
|
+
from nrcd.enrich.context import enrich_race_context, enrich_race_context_result
|
|
27
|
+
from nrcd.enrich.guide import API_GUIDE
|
|
28
|
+
from nrcd.enrich.throttle import reset_throttle_state
|
|
29
|
+
from nrcd.enrich.weather import WeatherData, fetch_weather
|
|
30
|
+
|
|
31
|
+
__all__ = [
|
|
32
|
+
"API_GUIDE",
|
|
33
|
+
"AQI_HISTORY_AVAILABLE_FROM",
|
|
34
|
+
"AQI_HISTORY_AVAILABLE_UNIX",
|
|
35
|
+
"AltitudeResult",
|
|
36
|
+
"ApiUsage",
|
|
37
|
+
"EnrichConfig",
|
|
38
|
+
"EnrichResult",
|
|
39
|
+
"WeatherData",
|
|
40
|
+
"api_keys_from_env",
|
|
41
|
+
"EnrichJob",
|
|
42
|
+
"JobResult",
|
|
43
|
+
"cache_stats",
|
|
44
|
+
"clear_enrich_cache",
|
|
45
|
+
"enrich_race_context",
|
|
46
|
+
"enrich_race_context_result",
|
|
47
|
+
"run_enrich_jobs",
|
|
48
|
+
"fetch_weather",
|
|
49
|
+
"lookup_altitude_ft",
|
|
50
|
+
"lookup_altitude_detail",
|
|
51
|
+
"lookup_elevation_ft",
|
|
52
|
+
"reset_throttle_state",
|
|
53
|
+
]
|
nrcd/enrich/altitude.py
ADDED
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
"""Meet **altitude** (venue elevation) from US city/state.
|
|
2
|
+
|
|
3
|
+
OpenWeather is used **only to geocode** city/state → lat/lon. Terrain **altitude in feet**
|
|
4
|
+
comes from the free USGS EPQS service (not OpenWeather).
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from dataclasses import dataclass
|
|
10
|
+
|
|
11
|
+
from nrcd.enrich.api_usage import ApiUsage
|
|
12
|
+
from nrcd.enrich.cache import altitude_cache_key, get_or_fetch
|
|
13
|
+
from nrcd.enrich.config import EnrichConfig
|
|
14
|
+
from nrcd.enrich.geocode import geocode_us_city_state
|
|
15
|
+
from nrcd.enrich.http import get_with_retries
|
|
16
|
+
from nrcd.enrich.throttle import wait_for_provider
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass(frozen=True)
|
|
20
|
+
class AltitudeResult:
|
|
21
|
+
"""Venue altitude lookup result."""
|
|
22
|
+
|
|
23
|
+
altitude_ft: int
|
|
24
|
+
lat: float
|
|
25
|
+
lon: float
|
|
26
|
+
city: str
|
|
27
|
+
state: str
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _altitude_from_coords(
|
|
31
|
+
lat: float,
|
|
32
|
+
lon: float,
|
|
33
|
+
city: str,
|
|
34
|
+
state: str,
|
|
35
|
+
*,
|
|
36
|
+
cfg: EnrichConfig,
|
|
37
|
+
usage: ApiUsage | None = None,
|
|
38
|
+
) -> AltitudeResult | None:
|
|
39
|
+
wait_for_provider("usgs", cfg.usgs_min_interval_sec)
|
|
40
|
+
if usage is not None:
|
|
41
|
+
usage.record("usgs_epqs")
|
|
42
|
+
url = (
|
|
43
|
+
"https://epqs.nationalmap.gov/v1/json"
|
|
44
|
+
f"?x={lon}&y={lat}&units=Feet&includeDate=false"
|
|
45
|
+
)
|
|
46
|
+
response = get_with_retries(url, timeout=10.0, retries=cfg.http_retries)
|
|
47
|
+
response.raise_for_status()
|
|
48
|
+
data = response.json()
|
|
49
|
+
value = data.get("value")
|
|
50
|
+
if value is None:
|
|
51
|
+
return None
|
|
52
|
+
return AltitudeResult(
|
|
53
|
+
altitude_ft=int(round(float(value))),
|
|
54
|
+
lat=lat,
|
|
55
|
+
lon=lon,
|
|
56
|
+
city=city,
|
|
57
|
+
state=state,
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def lookup_altitude_ft(
|
|
62
|
+
city: str,
|
|
63
|
+
state: str,
|
|
64
|
+
*,
|
|
65
|
+
config: EnrichConfig | None = None,
|
|
66
|
+
openweather_api_key: str | None = None,
|
|
67
|
+
use_cache: bool | None = None,
|
|
68
|
+
lat: float | None = None,
|
|
69
|
+
lon: float | None = None,
|
|
70
|
+
usage: ApiUsage | None = None,
|
|
71
|
+
) -> int | None:
|
|
72
|
+
"""Meet altitude in feet for a US city/state (NRCD ``meet.altitude`` column)."""
|
|
73
|
+
result = lookup_altitude_detail(
|
|
74
|
+
city,
|
|
75
|
+
state,
|
|
76
|
+
config=config,
|
|
77
|
+
openweather_api_key=openweather_api_key,
|
|
78
|
+
use_cache=use_cache,
|
|
79
|
+
lat=lat,
|
|
80
|
+
lon=lon,
|
|
81
|
+
usage=usage,
|
|
82
|
+
)
|
|
83
|
+
return None if result is None else result.altitude_ft
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def lookup_altitude_detail(
|
|
87
|
+
city: str,
|
|
88
|
+
state: str,
|
|
89
|
+
*,
|
|
90
|
+
config: EnrichConfig | None = None,
|
|
91
|
+
openweather_api_key: str | None = None,
|
|
92
|
+
use_cache: bool | None = None,
|
|
93
|
+
lat: float | None = None,
|
|
94
|
+
lon: float | None = None,
|
|
95
|
+
usage: ApiUsage | None = None,
|
|
96
|
+
) -> AltitudeResult | None:
|
|
97
|
+
cfg = config or EnrichConfig()
|
|
98
|
+
if openweather_api_key:
|
|
99
|
+
cfg = EnrichConfig(
|
|
100
|
+
openweather_api_key=openweather_api_key,
|
|
101
|
+
timezone_api_key=cfg.timezone_api_key,
|
|
102
|
+
geocode_country_suffix=cfg.geocode_country_suffix,
|
|
103
|
+
http_timeout_sec=cfg.http_timeout_sec,
|
|
104
|
+
http_retries=cfg.http_retries,
|
|
105
|
+
cache_enabled=cfg.cache_enabled,
|
|
106
|
+
geocode_ttl_sec=cfg.geocode_ttl_sec,
|
|
107
|
+
altitude_ttl_sec=cfg.altitude_ttl_sec,
|
|
108
|
+
timezone_ttl_sec=cfg.timezone_ttl_sec,
|
|
109
|
+
weather_ttl_sec=cfg.weather_ttl_sec,
|
|
110
|
+
timezone_min_interval_sec=cfg.timezone_min_interval_sec,
|
|
111
|
+
openweather_min_interval_sec=cfg.openweather_min_interval_sec,
|
|
112
|
+
usgs_min_interval_sec=cfg.usgs_min_interval_sec,
|
|
113
|
+
)
|
|
114
|
+
city = (city or "").strip()
|
|
115
|
+
state = (state or "").strip()
|
|
116
|
+
if lat is not None and lon is not None:
|
|
117
|
+
return _altitude_from_coords(lat, lon, city, state, cfg=cfg, usage=usage)
|
|
118
|
+
if not city or not state:
|
|
119
|
+
return None
|
|
120
|
+
|
|
121
|
+
cache_on = cfg.cache_enabled if use_cache is None else use_cache
|
|
122
|
+
cache_key = altitude_cache_key(city, state, cfg.geocode_country_suffix)
|
|
123
|
+
|
|
124
|
+
def fetch():
|
|
125
|
+
coords = geocode_us_city_state(
|
|
126
|
+
city, state, config=cfg, use_cache=cache_on, usage=usage
|
|
127
|
+
)
|
|
128
|
+
if coords is None:
|
|
129
|
+
return None
|
|
130
|
+
lat_v, lon_v = coords
|
|
131
|
+
return _altitude_from_coords(lat_v, lon_v, city, state, cfg=cfg, usage=usage)
|
|
132
|
+
|
|
133
|
+
return get_or_fetch(cache_key, fetch, ttl_sec=cfg.altitude_ttl_sec, enabled=cache_on)
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
lookup_elevation_ft = lookup_altitude_ft
|
|
137
|
+
lookup_elevation_detail = lookup_altitude_detail
|
|
138
|
+
ElevationResult = AltitudeResult
|
nrcd/enrich/api_usage.py
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
"""Count outbound HTTP calls during enrich lookups (cache misses only)."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import datetime as dt
|
|
6
|
+
from dataclasses import dataclass, field
|
|
7
|
+
|
|
8
|
+
# OpenWeather Air Pollution history: earliest date per provider docs.
|
|
9
|
+
AQI_HISTORY_AVAILABLE_FROM = dt.date(2020, 11, 27)
|
|
10
|
+
AQI_HISTORY_AVAILABLE_UNIX = int(
|
|
11
|
+
dt.datetime(2020, 11, 27, 0, 0, tzinfo=dt.timezone.utc).timestamp()
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass
|
|
16
|
+
class ApiUsage:
|
|
17
|
+
"""HTTP calls made during one enrich operation (not cache hits)."""
|
|
18
|
+
|
|
19
|
+
openweather_geocode: int = 0
|
|
20
|
+
"""City/state → lat/lon (OpenWeather Geocoding API)."""
|
|
21
|
+
openweather_timemachine: int = 0
|
|
22
|
+
"""Historical weather for the race hour (One Call 3.0 timemachine)."""
|
|
23
|
+
openweather_aqi: int = 0
|
|
24
|
+
"""Historical air pollution for the race hour (may retry up to 3 times)."""
|
|
25
|
+
timezonedb: int = 0
|
|
26
|
+
"""Lat/lon → IANA timezone (local race time → Unix)."""
|
|
27
|
+
usgs_epqs: int = 0
|
|
28
|
+
"""Terrain altitude in feet (USGS EPQS; free, no API key)."""
|
|
29
|
+
|
|
30
|
+
def record(self, name: str, count: int = 1) -> None:
|
|
31
|
+
if count <= 0 or not hasattr(self, name):
|
|
32
|
+
raise ValueError(f"unknown api usage field: {name}")
|
|
33
|
+
setattr(self, name, getattr(self, name) + count)
|
|
34
|
+
|
|
35
|
+
def add(self, other: ApiUsage) -> None:
|
|
36
|
+
for fname in _USAGE_FIELDS:
|
|
37
|
+
setattr(self, fname, getattr(self, fname) + getattr(other, fname))
|
|
38
|
+
|
|
39
|
+
@property
|
|
40
|
+
def total(self) -> int:
|
|
41
|
+
return sum(getattr(self, f) for f in _USAGE_FIELDS)
|
|
42
|
+
|
|
43
|
+
def to_dict(self) -> dict[str, int]:
|
|
44
|
+
out = {f: getattr(self, f) for f in _USAGE_FIELDS}
|
|
45
|
+
out["total"] = self.total
|
|
46
|
+
return out
|
|
47
|
+
|
|
48
|
+
@classmethod
|
|
49
|
+
def from_dict(cls, data: dict[str, int]) -> ApiUsage:
|
|
50
|
+
return cls(**{f: int(data.get(f, 0)) for f in _USAGE_FIELDS})
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
_USAGE_FIELDS = (
|
|
54
|
+
"openweather_geocode",
|
|
55
|
+
"openweather_timemachine",
|
|
56
|
+
"openweather_aqi",
|
|
57
|
+
"timezonedb",
|
|
58
|
+
"usgs_epqs",
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
@dataclass
|
|
63
|
+
class EnrichResult:
|
|
64
|
+
"""Race context after API enrichment plus call accounting."""
|
|
65
|
+
|
|
66
|
+
context: object
|
|
67
|
+
api_usage: ApiUsage = field(default_factory=ApiUsage)
|
nrcd/enrich/batch.py
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
"""Run many enrich API jobs; optional parallelism with per-item results."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import traceback
|
|
6
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
from typing import Callable, Generic, Iterable, TypeVar
|
|
9
|
+
|
|
10
|
+
T = TypeVar("T")
|
|
11
|
+
R = TypeVar("R")
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass(frozen=True)
|
|
15
|
+
class JobResult(Generic[R]):
|
|
16
|
+
job_id: str
|
|
17
|
+
ok: bool
|
|
18
|
+
value: R | None = None
|
|
19
|
+
error: str | None = None
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclass(frozen=True)
|
|
23
|
+
class EnrichJob(Generic[R]):
|
|
24
|
+
"""One unit of work (e.g. one meet altitude or one course weather row)."""
|
|
25
|
+
|
|
26
|
+
job_id: str
|
|
27
|
+
run: Callable[[], R]
|
|
28
|
+
|
|
29
|
+
def execute(self) -> JobResult[R]:
|
|
30
|
+
try:
|
|
31
|
+
return JobResult(job_id=self.job_id, ok=True, value=self.run())
|
|
32
|
+
except Exception as e:
|
|
33
|
+
return JobResult(
|
|
34
|
+
job_id=self.job_id,
|
|
35
|
+
ok=False,
|
|
36
|
+
error=f"{type(e).__name__}: {e}",
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def run_enrich_jobs(
|
|
41
|
+
jobs: Iterable[EnrichJob[R]],
|
|
42
|
+
*,
|
|
43
|
+
parallel: int = 1,
|
|
44
|
+
on_result: Callable[[JobResult[R]], None] | None = None,
|
|
45
|
+
) -> list[JobResult[R]]:
|
|
46
|
+
"""Execute jobs; process each result as it finishes (parallel or sequential).
|
|
47
|
+
|
|
48
|
+
Parameters
|
|
49
|
+
----------
|
|
50
|
+
parallel
|
|
51
|
+
``1`` = one job at a time (safest for rate limits). ``>1`` uses a thread pool
|
|
52
|
+
but still invokes ``on_result`` once per completed job (order varies).
|
|
53
|
+
on_result
|
|
54
|
+
Called immediately when each job completes (success or failure).
|
|
55
|
+
"""
|
|
56
|
+
job_list = list(jobs)
|
|
57
|
+
if not job_list:
|
|
58
|
+
return []
|
|
59
|
+
|
|
60
|
+
results: list[JobResult[R]] = []
|
|
61
|
+
|
|
62
|
+
def _emit(res: JobResult[R]) -> None:
|
|
63
|
+
results.append(res)
|
|
64
|
+
if on_result is not None:
|
|
65
|
+
on_result(res)
|
|
66
|
+
|
|
67
|
+
if parallel <= 1:
|
|
68
|
+
for job in job_list:
|
|
69
|
+
_emit(job.execute())
|
|
70
|
+
return results
|
|
71
|
+
|
|
72
|
+
with ThreadPoolExecutor(max_workers=parallel) as pool:
|
|
73
|
+
future_map = {pool.submit(job.execute): job.job_id for job in job_list}
|
|
74
|
+
for future in as_completed(future_map):
|
|
75
|
+
try:
|
|
76
|
+
res = future.result()
|
|
77
|
+
except Exception as e:
|
|
78
|
+
jid = future_map[future]
|
|
79
|
+
res = JobResult(
|
|
80
|
+
job_id=jid,
|
|
81
|
+
ok=False,
|
|
82
|
+
error=f"{type(e).__name__}: {e}\n{traceback.format_exc()}",
|
|
83
|
+
)
|
|
84
|
+
_emit(res)
|
|
85
|
+
|
|
86
|
+
return results
|
nrcd/enrich/cache.py
ADDED
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
"""In-memory TTL cache for enrich API responses (backfill-style deduplication)."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import datetime as dt
|
|
6
|
+
import threading
|
|
7
|
+
import time
|
|
8
|
+
from typing import Any, Callable, TypeVar
|
|
9
|
+
|
|
10
|
+
T = TypeVar("T")
|
|
11
|
+
|
|
12
|
+
_lock = threading.Lock()
|
|
13
|
+
_store: dict[str, tuple[Any, float | None]] = {}
|
|
14
|
+
_stats = {"hits": 0, "misses": 0}
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def _normalize_city_state(city: str, state: str) -> tuple[str, str]:
|
|
18
|
+
return (city or "").strip().lower(), (state or "").strip().lower()
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def geocode_cache_key(city: str, state: str, country: str = "US") -> str:
|
|
22
|
+
c, s = _normalize_city_state(city, state)
|
|
23
|
+
return f"geocode:{c}:{s}:{country.upper()}"
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def altitude_cache_key(city: str, state: str, country: str = "US") -> str:
|
|
27
|
+
c, s = _normalize_city_state(city, state)
|
|
28
|
+
return f"altitude:{c}:{s}:{country.upper()}"
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def timezone_cache_key(lat: float, lon: float) -> str:
|
|
32
|
+
return f"tz:{round(lat, 4)}:{round(lon, 4)}"
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def weather_cache_key(
|
|
36
|
+
city: str,
|
|
37
|
+
state: str,
|
|
38
|
+
event_date: dt.date,
|
|
39
|
+
event_time: dt.time,
|
|
40
|
+
country: str = "US",
|
|
41
|
+
) -> str:
|
|
42
|
+
c, s = _normalize_city_state(city, state)
|
|
43
|
+
return f"weather:{c}:{s}:{country.upper()}:{event_date.isoformat()}:{event_time.isoformat()}"
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def get_cached(key: str) -> Any | None:
|
|
47
|
+
"""Return cached value if present and not expired."""
|
|
48
|
+
now = time.time()
|
|
49
|
+
with _lock:
|
|
50
|
+
entry = _store.get(key)
|
|
51
|
+
if entry is None:
|
|
52
|
+
return None
|
|
53
|
+
value, expires_at = entry
|
|
54
|
+
if expires_at is not None and now >= expires_at:
|
|
55
|
+
del _store[key]
|
|
56
|
+
return None
|
|
57
|
+
_stats["hits"] += 1
|
|
58
|
+
return value
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def set_cached(key: str, value: Any, ttl_sec: float | None) -> None:
|
|
62
|
+
expires_at = None if ttl_sec is None else time.time() + ttl_sec
|
|
63
|
+
with _lock:
|
|
64
|
+
_store[key] = (value, expires_at)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def get_or_fetch(
|
|
68
|
+
key: str,
|
|
69
|
+
fetch: Callable[[], T],
|
|
70
|
+
*,
|
|
71
|
+
ttl_sec: float | None,
|
|
72
|
+
enabled: bool = True,
|
|
73
|
+
) -> T:
|
|
74
|
+
"""Return cached value or call ``fetch``, store, and return."""
|
|
75
|
+
if enabled:
|
|
76
|
+
cached = get_cached(key)
|
|
77
|
+
if cached is not None:
|
|
78
|
+
return cached
|
|
79
|
+
with _lock:
|
|
80
|
+
_stats["misses"] += 1
|
|
81
|
+
value = fetch()
|
|
82
|
+
if enabled and value is not None:
|
|
83
|
+
set_cached(key, value, ttl_sec)
|
|
84
|
+
return value
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def clear_enrich_cache() -> None:
|
|
88
|
+
"""Drop all cached enrich responses (tests / manual refresh)."""
|
|
89
|
+
with _lock:
|
|
90
|
+
_store.clear()
|
|
91
|
+
_stats["hits"] = 0
|
|
92
|
+
_stats["misses"] = 0
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def cache_stats() -> dict[str, int]:
|
|
96
|
+
with _lock:
|
|
97
|
+
return {"hits": _stats["hits"], "misses": _stats["misses"], "entries": len(_store)}
|
nrcd/enrich/config.py
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
"""API keys for enrichment (pass explicitly or via environment)."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass
|
|
10
|
+
class EnrichConfig:
|
|
11
|
+
"""Credentials for :mod:`nrcd.enrich`."""
|
|
12
|
+
|
|
13
|
+
openweather_api_key: str | None = None
|
|
14
|
+
timezone_api_key: str | None = None
|
|
15
|
+
geocode_country_suffix: str = "US"
|
|
16
|
+
http_timeout_sec: float = 20.0
|
|
17
|
+
http_retries: int = 3
|
|
18
|
+
|
|
19
|
+
# In-memory TTL cache (NRCD backfill dedupes by city/state per batch).
|
|
20
|
+
cache_enabled: bool = True
|
|
21
|
+
geocode_ttl_sec: float = 7 * 86400
|
|
22
|
+
altitude_ttl_sec: float = 30 * 86400
|
|
23
|
+
timezone_ttl_sec: float = 365 * 86400
|
|
24
|
+
weather_ttl_sec: float = 86400
|
|
25
|
+
|
|
26
|
+
# Provider spacing (seconds between HTTP calls, per process).
|
|
27
|
+
timezone_min_interval_sec: float = 1.5 # TimeZoneDB free tier (~1 req/s)
|
|
28
|
+
openweather_min_interval_sec: float = 0.0
|
|
29
|
+
usgs_min_interval_sec: float = 0.0
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def api_keys_from_env() -> EnrichConfig:
|
|
33
|
+
"""Read ``NRCD_OPENWEATHER_API_KEY`` and ``NRCD_TIMEZONE_API_KEY``."""
|
|
34
|
+
return EnrichConfig(
|
|
35
|
+
openweather_api_key=os.environ.get("NRCD_OPENWEATHER_API_KEY") or None,
|
|
36
|
+
timezone_api_key=os.environ.get("NRCD_TIMEZONE_API_KEY") or None,
|
|
37
|
+
)
|