nrcd 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
nrcd/__init__.py ADDED
@@ -0,0 +1,28 @@
1
+ """National Running Club Database — performance standardization library."""
2
+
3
+ from nrcd.standardize import (
4
+ PARAMETERS_DOC,
5
+ RaceContext,
6
+ XCRaceContext,
7
+ standardize_indoor_track,
8
+ standardize_outdoor_track,
9
+ standardize_result,
10
+ standardize_road,
11
+ standardize_seconds,
12
+ standardize_xc,
13
+ )
14
+
15
+ __version__ = "0.1.0"
16
+
17
+ __all__ = [
18
+ "PARAMETERS_DOC",
19
+ "RaceContext",
20
+ "XCRaceContext",
21
+ "standardize_indoor_track",
22
+ "standardize_outdoor_track",
23
+ "standardize_result",
24
+ "standardize_road",
25
+ "standardize_seconds",
26
+ "standardize_xc",
27
+ "__version__",
28
+ ]
nrcd/data/__init__.py ADDED
@@ -0,0 +1,16 @@
1
+ """NRCD CSV column names and derived field helpers.
2
+
3
+ Requires ``pip install "nrcd[data]"`` (pandas).
4
+ """
5
+
6
+ from nrcd.data.schema import (
7
+ derive_course_details_fields,
8
+ meet_altitude_column,
9
+ meet_altitude_ft_from_record,
10
+ )
11
+
12
+ __all__ = [
13
+ "derive_course_details_fields",
14
+ "meet_altitude_column",
15
+ "meet_altitude_ft_from_record",
16
+ ]
nrcd/data/schema.py ADDED
@@ -0,0 +1,94 @@
1
+ """NRCD export column resolution and derived ``course_details`` fields."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import math
6
+ from typing import Any, Mapping
7
+
8
+ from nrcd.standardize.factors import heat_index
9
+
10
+
11
+ def _is_na(value: Any) -> bool:
12
+ if value is None:
13
+ return True
14
+ if isinstance(value, float) and math.isnan(value):
15
+ return True
16
+ try:
17
+ import pandas as pd
18
+
19
+ return bool(pd.isna(value))
20
+ except ImportError:
21
+ return False
22
+
23
+
24
+ def meet_altitude_column(df: Any) -> str:
25
+ """Return meet-table altitude column name (``altitude`` or legacy ``elevation``)."""
26
+ import pandas as pd
27
+
28
+ if not isinstance(df, pd.DataFrame):
29
+ raise TypeError("meet_altitude_column expects a pandas DataFrame")
30
+ if "altitude" in df.columns:
31
+ return "altitude"
32
+ if "elevation" in df.columns:
33
+ return "elevation"
34
+ raise KeyError("meet table missing altitude/elevation column")
35
+
36
+
37
+ def _finite_altitude_ft(value: Any) -> float | None:
38
+ if value is None:
39
+ return None
40
+ try:
41
+ z = float(value)
42
+ except (TypeError, ValueError):
43
+ return None
44
+ if not math.isfinite(z) or z < 0:
45
+ return None
46
+ return z
47
+
48
+
49
+ def meet_altitude_ft_from_record(
50
+ row: Mapping[str, Any] | Any,
51
+ course_details: Mapping[str, Any] | None = None,
52
+ ) -> float | None:
53
+ """Meet venue altitude (ft) from merged result row or ``course_details.altitude``."""
54
+ elev = None
55
+ if hasattr(row, "get"):
56
+ elev = row.get("altitude")
57
+ if _is_na(elev):
58
+ elev = row.get("elevation")
59
+ if _is_na(elev):
60
+ if course_details:
61
+ elev = course_details.get("altitude") or course_details.get("meet_elevation")
62
+ if elev is None:
63
+ elev = course_details.get("elevation")
64
+ return _finite_altitude_ft(elev)
65
+
66
+
67
+ def derive_course_details_fields(record: Mapping[str, Any]) -> dict[str, Any]:
68
+ """Compute analysis fields not stored on ``course_details`` export rows."""
69
+ out: dict[str, Any] = {}
70
+
71
+ t = record.get("temperature")
72
+ d = record.get("dew_point")
73
+ h = heat_index(t, d)
74
+ if h is not None:
75
+ out["heat_index_f"] = h
76
+
77
+ race_unix = record.get("openweather_dt_unix")
78
+ sunrise = record.get("sunrise_unix")
79
+ sunset = record.get("sunset_unix")
80
+ if race_unix is not None and sunrise is not None and sunset is not None:
81
+ try:
82
+ race_u = int(race_unix)
83
+ rise_u = int(sunrise)
84
+ set_u = int(sunset)
85
+ except (TypeError, ValueError):
86
+ race_u = rise_u = set_u = None
87
+ if race_u is not None:
88
+ out["is_daylight"] = rise_u <= race_u <= set_u
89
+ if rise_u is not None:
90
+ out["minutes_after_sunrise"] = max(0.0, (race_u - rise_u) / 60.0)
91
+ if set_u is not None:
92
+ out["minutes_before_sunset"] = max(0.0, (set_u - race_u) / 60.0)
93
+
94
+ return out
@@ -0,0 +1,53 @@
1
+ """Fetch meet altitude and course weather from external APIs (optional).
2
+
3
+ Requires ``pip install nrcd[apis]`` (installs ``requests``).
4
+
5
+ **Meet altitude** (city/state): OpenWeather geocodes; **USGS EPQS** returns feet.
6
+ OpenWeather does **not** supply altitude — only weather, AQI, and coordinates.
7
+
8
+ API signup: :data:`nrcd.enrich.API_GUIDE`.
9
+ """
10
+
11
+ from nrcd.enrich.altitude import (
12
+ AltitudeResult,
13
+ lookup_altitude_detail,
14
+ lookup_altitude_ft,
15
+ lookup_elevation_ft,
16
+ )
17
+ from nrcd.enrich.api_usage import (
18
+ AQI_HISTORY_AVAILABLE_FROM,
19
+ AQI_HISTORY_AVAILABLE_UNIX,
20
+ ApiUsage,
21
+ EnrichResult,
22
+ )
23
+ from nrcd.enrich.batch import EnrichJob, JobResult, run_enrich_jobs
24
+ from nrcd.enrich.cache import cache_stats, clear_enrich_cache
25
+ from nrcd.enrich.config import EnrichConfig, api_keys_from_env
26
+ from nrcd.enrich.context import enrich_race_context, enrich_race_context_result
27
+ from nrcd.enrich.guide import API_GUIDE
28
+ from nrcd.enrich.throttle import reset_throttle_state
29
+ from nrcd.enrich.weather import WeatherData, fetch_weather
30
+
31
+ __all__ = [
32
+ "API_GUIDE",
33
+ "AQI_HISTORY_AVAILABLE_FROM",
34
+ "AQI_HISTORY_AVAILABLE_UNIX",
35
+ "AltitudeResult",
36
+ "ApiUsage",
37
+ "EnrichConfig",
38
+ "EnrichResult",
39
+ "WeatherData",
40
+ "api_keys_from_env",
41
+ "EnrichJob",
42
+ "JobResult",
43
+ "cache_stats",
44
+ "clear_enrich_cache",
45
+ "enrich_race_context",
46
+ "enrich_race_context_result",
47
+ "run_enrich_jobs",
48
+ "fetch_weather",
49
+ "lookup_altitude_ft",
50
+ "lookup_altitude_detail",
51
+ "lookup_elevation_ft",
52
+ "reset_throttle_state",
53
+ ]
@@ -0,0 +1,138 @@
1
+ """Meet **altitude** (venue elevation) from US city/state.
2
+
3
+ OpenWeather is used **only to geocode** city/state → lat/lon. Terrain **altitude in feet**
4
+ comes from the free USGS EPQS service (not OpenWeather).
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from dataclasses import dataclass
10
+
11
+ from nrcd.enrich.api_usage import ApiUsage
12
+ from nrcd.enrich.cache import altitude_cache_key, get_or_fetch
13
+ from nrcd.enrich.config import EnrichConfig
14
+ from nrcd.enrich.geocode import geocode_us_city_state
15
+ from nrcd.enrich.http import get_with_retries
16
+ from nrcd.enrich.throttle import wait_for_provider
17
+
18
+
19
+ @dataclass(frozen=True)
20
+ class AltitudeResult:
21
+ """Venue altitude lookup result."""
22
+
23
+ altitude_ft: int
24
+ lat: float
25
+ lon: float
26
+ city: str
27
+ state: str
28
+
29
+
30
+ def _altitude_from_coords(
31
+ lat: float,
32
+ lon: float,
33
+ city: str,
34
+ state: str,
35
+ *,
36
+ cfg: EnrichConfig,
37
+ usage: ApiUsage | None = None,
38
+ ) -> AltitudeResult | None:
39
+ wait_for_provider("usgs", cfg.usgs_min_interval_sec)
40
+ if usage is not None:
41
+ usage.record("usgs_epqs")
42
+ url = (
43
+ "https://epqs.nationalmap.gov/v1/json"
44
+ f"?x={lon}&y={lat}&units=Feet&includeDate=false"
45
+ )
46
+ response = get_with_retries(url, timeout=10.0, retries=cfg.http_retries)
47
+ response.raise_for_status()
48
+ data = response.json()
49
+ value = data.get("value")
50
+ if value is None:
51
+ return None
52
+ return AltitudeResult(
53
+ altitude_ft=int(round(float(value))),
54
+ lat=lat,
55
+ lon=lon,
56
+ city=city,
57
+ state=state,
58
+ )
59
+
60
+
61
+ def lookup_altitude_ft(
62
+ city: str,
63
+ state: str,
64
+ *,
65
+ config: EnrichConfig | None = None,
66
+ openweather_api_key: str | None = None,
67
+ use_cache: bool | None = None,
68
+ lat: float | None = None,
69
+ lon: float | None = None,
70
+ usage: ApiUsage | None = None,
71
+ ) -> int | None:
72
+ """Meet altitude in feet for a US city/state (NRCD ``meet.altitude`` column)."""
73
+ result = lookup_altitude_detail(
74
+ city,
75
+ state,
76
+ config=config,
77
+ openweather_api_key=openweather_api_key,
78
+ use_cache=use_cache,
79
+ lat=lat,
80
+ lon=lon,
81
+ usage=usage,
82
+ )
83
+ return None if result is None else result.altitude_ft
84
+
85
+
86
+ def lookup_altitude_detail(
87
+ city: str,
88
+ state: str,
89
+ *,
90
+ config: EnrichConfig | None = None,
91
+ openweather_api_key: str | None = None,
92
+ use_cache: bool | None = None,
93
+ lat: float | None = None,
94
+ lon: float | None = None,
95
+ usage: ApiUsage | None = None,
96
+ ) -> AltitudeResult | None:
97
+ cfg = config or EnrichConfig()
98
+ if openweather_api_key:
99
+ cfg = EnrichConfig(
100
+ openweather_api_key=openweather_api_key,
101
+ timezone_api_key=cfg.timezone_api_key,
102
+ geocode_country_suffix=cfg.geocode_country_suffix,
103
+ http_timeout_sec=cfg.http_timeout_sec,
104
+ http_retries=cfg.http_retries,
105
+ cache_enabled=cfg.cache_enabled,
106
+ geocode_ttl_sec=cfg.geocode_ttl_sec,
107
+ altitude_ttl_sec=cfg.altitude_ttl_sec,
108
+ timezone_ttl_sec=cfg.timezone_ttl_sec,
109
+ weather_ttl_sec=cfg.weather_ttl_sec,
110
+ timezone_min_interval_sec=cfg.timezone_min_interval_sec,
111
+ openweather_min_interval_sec=cfg.openweather_min_interval_sec,
112
+ usgs_min_interval_sec=cfg.usgs_min_interval_sec,
113
+ )
114
+ city = (city or "").strip()
115
+ state = (state or "").strip()
116
+ if lat is not None and lon is not None:
117
+ return _altitude_from_coords(lat, lon, city, state, cfg=cfg, usage=usage)
118
+ if not city or not state:
119
+ return None
120
+
121
+ cache_on = cfg.cache_enabled if use_cache is None else use_cache
122
+ cache_key = altitude_cache_key(city, state, cfg.geocode_country_suffix)
123
+
124
+ def fetch():
125
+ coords = geocode_us_city_state(
126
+ city, state, config=cfg, use_cache=cache_on, usage=usage
127
+ )
128
+ if coords is None:
129
+ return None
130
+ lat_v, lon_v = coords
131
+ return _altitude_from_coords(lat_v, lon_v, city, state, cfg=cfg, usage=usage)
132
+
133
+ return get_or_fetch(cache_key, fetch, ttl_sec=cfg.altitude_ttl_sec, enabled=cache_on)
134
+
135
+
136
+ lookup_elevation_ft = lookup_altitude_ft
137
+ lookup_elevation_detail = lookup_altitude_detail
138
+ ElevationResult = AltitudeResult
@@ -0,0 +1,67 @@
1
+ """Count outbound HTTP calls during enrich lookups (cache misses only)."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import datetime as dt
6
+ from dataclasses import dataclass, field
7
+
8
+ # OpenWeather Air Pollution history: earliest date per provider docs.
9
+ AQI_HISTORY_AVAILABLE_FROM = dt.date(2020, 11, 27)
10
+ AQI_HISTORY_AVAILABLE_UNIX = int(
11
+ dt.datetime(2020, 11, 27, 0, 0, tzinfo=dt.timezone.utc).timestamp()
12
+ )
13
+
14
+
15
+ @dataclass
16
+ class ApiUsage:
17
+ """HTTP calls made during one enrich operation (not cache hits)."""
18
+
19
+ openweather_geocode: int = 0
20
+ """City/state → lat/lon (OpenWeather Geocoding API)."""
21
+ openweather_timemachine: int = 0
22
+ """Historical weather for the race hour (One Call 3.0 timemachine)."""
23
+ openweather_aqi: int = 0
24
+ """Historical air pollution for the race hour (may retry up to 3 times)."""
25
+ timezonedb: int = 0
26
+ """Lat/lon → IANA timezone (local race time → Unix)."""
27
+ usgs_epqs: int = 0
28
+ """Terrain altitude in feet (USGS EPQS; free, no API key)."""
29
+
30
+ def record(self, name: str, count: int = 1) -> None:
31
+ if count <= 0 or not hasattr(self, name):
32
+ raise ValueError(f"unknown api usage field: {name}")
33
+ setattr(self, name, getattr(self, name) + count)
34
+
35
+ def add(self, other: ApiUsage) -> None:
36
+ for fname in _USAGE_FIELDS:
37
+ setattr(self, fname, getattr(self, fname) + getattr(other, fname))
38
+
39
+ @property
40
+ def total(self) -> int:
41
+ return sum(getattr(self, f) for f in _USAGE_FIELDS)
42
+
43
+ def to_dict(self) -> dict[str, int]:
44
+ out = {f: getattr(self, f) for f in _USAGE_FIELDS}
45
+ out["total"] = self.total
46
+ return out
47
+
48
+ @classmethod
49
+ def from_dict(cls, data: dict[str, int]) -> ApiUsage:
50
+ return cls(**{f: int(data.get(f, 0)) for f in _USAGE_FIELDS})
51
+
52
+
53
+ _USAGE_FIELDS = (
54
+ "openweather_geocode",
55
+ "openweather_timemachine",
56
+ "openweather_aqi",
57
+ "timezonedb",
58
+ "usgs_epqs",
59
+ )
60
+
61
+
62
+ @dataclass
63
+ class EnrichResult:
64
+ """Race context after API enrichment plus call accounting."""
65
+
66
+ context: object
67
+ api_usage: ApiUsage = field(default_factory=ApiUsage)
nrcd/enrich/batch.py ADDED
@@ -0,0 +1,86 @@
1
+ """Run many enrich API jobs; optional parallelism with per-item results."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import traceback
6
+ from concurrent.futures import ThreadPoolExecutor, as_completed
7
+ from dataclasses import dataclass
8
+ from typing import Callable, Generic, Iterable, TypeVar
9
+
10
+ T = TypeVar("T")
11
+ R = TypeVar("R")
12
+
13
+
14
+ @dataclass(frozen=True)
15
+ class JobResult(Generic[R]):
16
+ job_id: str
17
+ ok: bool
18
+ value: R | None = None
19
+ error: str | None = None
20
+
21
+
22
+ @dataclass(frozen=True)
23
+ class EnrichJob(Generic[R]):
24
+ """One unit of work (e.g. one meet altitude or one course weather row)."""
25
+
26
+ job_id: str
27
+ run: Callable[[], R]
28
+
29
+ def execute(self) -> JobResult[R]:
30
+ try:
31
+ return JobResult(job_id=self.job_id, ok=True, value=self.run())
32
+ except Exception as e:
33
+ return JobResult(
34
+ job_id=self.job_id,
35
+ ok=False,
36
+ error=f"{type(e).__name__}: {e}",
37
+ )
38
+
39
+
40
+ def run_enrich_jobs(
41
+ jobs: Iterable[EnrichJob[R]],
42
+ *,
43
+ parallel: int = 1,
44
+ on_result: Callable[[JobResult[R]], None] | None = None,
45
+ ) -> list[JobResult[R]]:
46
+ """Execute jobs; process each result as it finishes (parallel or sequential).
47
+
48
+ Parameters
49
+ ----------
50
+ parallel
51
+ ``1`` = one job at a time (safest for rate limits). ``>1`` uses a thread pool
52
+ but still invokes ``on_result`` once per completed job (order varies).
53
+ on_result
54
+ Called immediately when each job completes (success or failure).
55
+ """
56
+ job_list = list(jobs)
57
+ if not job_list:
58
+ return []
59
+
60
+ results: list[JobResult[R]] = []
61
+
62
+ def _emit(res: JobResult[R]) -> None:
63
+ results.append(res)
64
+ if on_result is not None:
65
+ on_result(res)
66
+
67
+ if parallel <= 1:
68
+ for job in job_list:
69
+ _emit(job.execute())
70
+ return results
71
+
72
+ with ThreadPoolExecutor(max_workers=parallel) as pool:
73
+ future_map = {pool.submit(job.execute): job.job_id for job in job_list}
74
+ for future in as_completed(future_map):
75
+ try:
76
+ res = future.result()
77
+ except Exception as e:
78
+ jid = future_map[future]
79
+ res = JobResult(
80
+ job_id=jid,
81
+ ok=False,
82
+ error=f"{type(e).__name__}: {e}\n{traceback.format_exc()}",
83
+ )
84
+ _emit(res)
85
+
86
+ return results
nrcd/enrich/cache.py ADDED
@@ -0,0 +1,97 @@
1
+ """In-memory TTL cache for enrich API responses (backfill-style deduplication)."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import datetime as dt
6
+ import threading
7
+ import time
8
+ from typing import Any, Callable, TypeVar
9
+
10
+ T = TypeVar("T")
11
+
12
+ _lock = threading.Lock()
13
+ _store: dict[str, tuple[Any, float | None]] = {}
14
+ _stats = {"hits": 0, "misses": 0}
15
+
16
+
17
+ def _normalize_city_state(city: str, state: str) -> tuple[str, str]:
18
+ return (city or "").strip().lower(), (state or "").strip().lower()
19
+
20
+
21
+ def geocode_cache_key(city: str, state: str, country: str = "US") -> str:
22
+ c, s = _normalize_city_state(city, state)
23
+ return f"geocode:{c}:{s}:{country.upper()}"
24
+
25
+
26
+ def altitude_cache_key(city: str, state: str, country: str = "US") -> str:
27
+ c, s = _normalize_city_state(city, state)
28
+ return f"altitude:{c}:{s}:{country.upper()}"
29
+
30
+
31
+ def timezone_cache_key(lat: float, lon: float) -> str:
32
+ return f"tz:{round(lat, 4)}:{round(lon, 4)}"
33
+
34
+
35
+ def weather_cache_key(
36
+ city: str,
37
+ state: str,
38
+ event_date: dt.date,
39
+ event_time: dt.time,
40
+ country: str = "US",
41
+ ) -> str:
42
+ c, s = _normalize_city_state(city, state)
43
+ return f"weather:{c}:{s}:{country.upper()}:{event_date.isoformat()}:{event_time.isoformat()}"
44
+
45
+
46
+ def get_cached(key: str) -> Any | None:
47
+ """Return cached value if present and not expired."""
48
+ now = time.time()
49
+ with _lock:
50
+ entry = _store.get(key)
51
+ if entry is None:
52
+ return None
53
+ value, expires_at = entry
54
+ if expires_at is not None and now >= expires_at:
55
+ del _store[key]
56
+ return None
57
+ _stats["hits"] += 1
58
+ return value
59
+
60
+
61
+ def set_cached(key: str, value: Any, ttl_sec: float | None) -> None:
62
+ expires_at = None if ttl_sec is None else time.time() + ttl_sec
63
+ with _lock:
64
+ _store[key] = (value, expires_at)
65
+
66
+
67
+ def get_or_fetch(
68
+ key: str,
69
+ fetch: Callable[[], T],
70
+ *,
71
+ ttl_sec: float | None,
72
+ enabled: bool = True,
73
+ ) -> T:
74
+ """Return cached value or call ``fetch``, store, and return."""
75
+ if enabled:
76
+ cached = get_cached(key)
77
+ if cached is not None:
78
+ return cached
79
+ with _lock:
80
+ _stats["misses"] += 1
81
+ value = fetch()
82
+ if enabled and value is not None:
83
+ set_cached(key, value, ttl_sec)
84
+ return value
85
+
86
+
87
+ def clear_enrich_cache() -> None:
88
+ """Drop all cached enrich responses (tests / manual refresh)."""
89
+ with _lock:
90
+ _store.clear()
91
+ _stats["hits"] = 0
92
+ _stats["misses"] = 0
93
+
94
+
95
+ def cache_stats() -> dict[str, int]:
96
+ with _lock:
97
+ return {"hits": _stats["hits"], "misses": _stats["misses"], "entries": len(_store)}
nrcd/enrich/config.py ADDED
@@ -0,0 +1,37 @@
1
+ """API keys for enrichment (pass explicitly or via environment)."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import os
6
+ from dataclasses import dataclass
7
+
8
+
9
+ @dataclass
10
+ class EnrichConfig:
11
+ """Credentials for :mod:`nrcd.enrich`."""
12
+
13
+ openweather_api_key: str | None = None
14
+ timezone_api_key: str | None = None
15
+ geocode_country_suffix: str = "US"
16
+ http_timeout_sec: float = 20.0
17
+ http_retries: int = 3
18
+
19
+ # In-memory TTL cache (NRCD backfill dedupes by city/state per batch).
20
+ cache_enabled: bool = True
21
+ geocode_ttl_sec: float = 7 * 86400
22
+ altitude_ttl_sec: float = 30 * 86400
23
+ timezone_ttl_sec: float = 365 * 86400
24
+ weather_ttl_sec: float = 86400
25
+
26
+ # Provider spacing (seconds between HTTP calls, per process).
27
+ timezone_min_interval_sec: float = 1.5 # TimeZoneDB free tier (~1 req/s)
28
+ openweather_min_interval_sec: float = 0.0
29
+ usgs_min_interval_sec: float = 0.0
30
+
31
+
32
+ def api_keys_from_env() -> EnrichConfig:
33
+ """Read ``NRCD_OPENWEATHER_API_KEY`` and ``NRCD_TIMEZONE_API_KEY``."""
34
+ return EnrichConfig(
35
+ openweather_api_key=os.environ.get("NRCD_OPENWEATHER_API_KEY") or None,
36
+ timezone_api_key=os.environ.get("NRCD_TIMEZONE_API_KEY") or None,
37
+ )