mostlyrightmd-weather 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mostlyright/weather/__init__.py +36 -0
- mostlyright/weather/_awc.py +347 -0
- mostlyright/weather/_climate.py +186 -0
- mostlyright/weather/_fetchers/__init__.py +20 -0
- mostlyright/weather/_fetchers/_hafs_storms.py +149 -0
- mostlyright/weather/_fetchers/_iem_chunks.py +75 -0
- mostlyright/weather/_fetchers/_iem_mos.py +302 -0
- mostlyright/weather/_fetchers/_msc_archive.py +202 -0
- mostlyright/weather/_fetchers/_nwp_archive.py +818 -0
- mostlyright/weather/_fetchers/_nwp_cycle_chunks.py +238 -0
- mostlyright/weather/_fetchers/_nwp_extract.py +224 -0
- mostlyright/weather/_fetchers/_nwp_grids/__init__.py +126 -0
- mostlyright/weather/_fetchers/_nwp_grids/cfs.py +25 -0
- mostlyright/weather/_fetchers/_nwp_grids/ecmwf_aifs.py +25 -0
- mostlyright/weather/_fetchers/_nwp_grids/ecmwf_ifs.py +30 -0
- mostlyright/weather/_fetchers/_nwp_grids/gdas.py +17 -0
- mostlyright/weather/_fetchers/_nwp_grids/gdps.py +21 -0
- mostlyright/weather/_fetchers/_nwp_grids/gefs.py +23 -0
- mostlyright/weather/_fetchers/_nwp_grids/geps.py +26 -0
- mostlyright/weather/_fetchers/_nwp_grids/gfs.py +32 -0
- mostlyright/weather/_fetchers/_nwp_grids/hafs.py +25 -0
- mostlyright/weather/_fetchers/_nwp_grids/hiresw.py +33 -0
- mostlyright/weather/_fetchers/_nwp_grids/hrdps.py +19 -0
- mostlyright/weather/_fetchers/_nwp_grids/href.py +22 -0
- mostlyright/weather/_fetchers/_nwp_grids/hrrr.py +39 -0
- mostlyright/weather/_fetchers/_nwp_grids/hrrrak.py +19 -0
- mostlyright/weather/_fetchers/_nwp_grids/nam.py +33 -0
- mostlyright/weather/_fetchers/_nwp_grids/nbm.py +31 -0
- mostlyright/weather/_fetchers/_nwp_grids/rap.py +19 -0
- mostlyright/weather/_fetchers/_nwp_grids/rdps.py +17 -0
- mostlyright/weather/_fetchers/_nwp_grids/reps.py +20 -0
- mostlyright/weather/_fetchers/_nwp_grids/rrfs.py +22 -0
- mostlyright/weather/_fetchers/_nwp_grids/rtma.py +17 -0
- mostlyright/weather/_fetchers/_nwp_grids/urma.py +17 -0
- mostlyright/weather/_fetchers/_nwp_idx.py +302 -0
- mostlyright/weather/_fetchers/_url_transitions.py +48 -0
- mostlyright/weather/_fetchers/awc.py +142 -0
- mostlyright/weather/_fetchers/ghcnh.py +169 -0
- mostlyright/weather/_fetchers/iem_asos.py +255 -0
- mostlyright/weather/_fetchers/iem_cli.py +194 -0
- mostlyright/weather/_ghcnh.py +348 -0
- mostlyright/weather/_iem.py +278 -0
- mostlyright/weather/cache.py +468 -0
- mostlyright/weather/catalog/__init__.py +96 -0
- mostlyright/weather/catalog/_obs_projection.py +192 -0
- mostlyright/weather/catalog/awc.py +82 -0
- mostlyright/weather/catalog/cli.py +239 -0
- mostlyright/weather/catalog/ghcnh.py +84 -0
- mostlyright/weather/catalog/iem.py +138 -0
- mostlyright/weather/forecast_nwp.py +1009 -0
- mostlyright/weather/obs.py +369 -0
- mostlyright/weather/qc/__init__.py +7 -0
- mostlyright/weather/qc/rules_nwp.py +398 -0
- mostlyright/weather/qc_sidecar.py +98 -0
- mostlyrightmd_weather-0.1.2.dist-info/METADATA +40 -0
- mostlyrightmd_weather-0.1.2.dist-info/RECORD +57 -0
- mostlyrightmd_weather-0.1.2.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
"""mostlyright.weather — direct public-API access for AWC, IEM, GHCNh, NWS CLI.
|
|
2
|
+
|
|
3
|
+
Local-first; no hosted backend; no API keys. Parsers are byte-faithful lifts
|
|
4
|
+
from ``monorepo-v0.14.1``; HTTP fetchers and the parquet cache are net-new
|
|
5
|
+
Sprint 0 code so the SDK can run without the v0.14.1 ingest service.
|
|
6
|
+
|
|
7
|
+
Lift inventory (provenance for parity-critical code). Source SHA refers to
|
|
8
|
+
the v0.14.1 release tag of ``Tarabcak/monorepo`` (commit
|
|
9
|
+
``514fcdab227e845145ca32b989355647466231d9``).
|
|
10
|
+
|
|
11
|
+
| Module | Source path | Source SHA | Lift date | Modifications |
|
|
12
|
+
|-------------------------|--------------------------------------------------------|------------|------------|---------------------------------------------------------------------|
|
|
13
|
+
| _awc.py | monorepo-v0.14.1/src/mostlyright/weather/_awc.py | 514fcda | 2026-05-21 | namespace rename only (imports point at ``mostlyright._internal``) |
|
|
14
|
+
| _iem.py | monorepo-v0.14.1/src/mostlyright/weather/_iem.py | 514fcda | 2026-05-21 | namespace rename only |
|
|
15
|
+
| _climate.py | monorepo-v0.14.1/src/mostlyright/weather/_climate.py | 514fcda | 2026-05-21 | namespace rename only |
|
|
16
|
+
| _ghcnh.py | monorepo-v0.14.1/src/mostlyright/weather/_ghcnh.py | 514fcda | 2026-05-21 | namespace rename only |
|
|
17
|
+
| _fetchers/__init__.py | n/a (NEW) | n/a | 2026-05-21 | NEW (Sprint 0 Wave 1 Lane F) — fetcher package marker |
|
|
18
|
+
| _fetchers/awc.py | n/a (NEW) | n/a | 2026-05-21 | NEW (Sprint 0 Wave 1 Lane F) — historical AWC range fetcher |
|
|
19
|
+
| _fetchers/iem_asos.py | n/a (NEW) | n/a | 2026-05-21 | NEW (Sprint 0 Wave 1 Lane F) — monthly-chunked IEM ASOS METAR fetcher |
|
|
20
|
+
| _fetchers/iem_cli.py | n/a (NEW) | n/a | 2026-05-21 | NEW (Sprint 0 Wave 1 Lane F) — IEM CLI settlement-grade fetcher |
|
|
21
|
+
| _fetchers/ghcnh.py | n/a (NEW) | n/a | 2026-05-21 | NEW (Sprint 0 Wave 1 Lane F) — per-year NCEI GHCNh PSV fetcher |
|
|
22
|
+
| cache.py | n/a (NEW) | n/a | 2026-05-21 | NEW (Sprint 0 Wave 1 Lane F) — local parquet cache, filelock-guarded |
|
|
23
|
+
|
|
24
|
+
``_bounds`` is imported from ``mostlyright._internal`` (lifted there from
|
|
25
|
+
``monorepo-v0.14.1/src/mostlyright/_bounds.py``) — see the parallel lift
|
|
26
|
+
inventory in ``mostlyright._internal.__init__``.
|
|
27
|
+
|
|
28
|
+
Public surface kept stable for Vojtech's existing ``mostlyright==0.14.1``
|
|
29
|
+
workflow: ``raw_metar`` is preserved on observation rows so MetPy re-parse
|
|
30
|
+
keeps working without preprocessing in v0.1.0.
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
from mostlyright.weather.obs import obs as obs # re-export Phase 7 public surface
|
|
34
|
+
|
|
35
|
+
__version__ = "0.1.0rc1"
|
|
36
|
+
__all__ = ["__version__", "obs"]
|
|
@@ -0,0 +1,347 @@
|
|
|
1
|
+
"""AWC METAR transform — maps AWC JSON response to observation schema dict.
|
|
2
|
+
|
|
3
|
+
This is THE shared transform. Both the SDK and ingest worker import it.
|
|
4
|
+
Output dicts validate against specs/observation.json (additionalProperties: false).
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import math
|
|
10
|
+
import re
|
|
11
|
+
from datetime import UTC, datetime
|
|
12
|
+
from typing import Any
|
|
13
|
+
|
|
14
|
+
from mostlyright._internal._bounds import (
|
|
15
|
+
MAX_RAW_METAR_LEN,
|
|
16
|
+
MAX_VISIBILITY_MILES,
|
|
17
|
+
MAX_WX_CODES_LEN,
|
|
18
|
+
SKY_BASE_MAX_FT,
|
|
19
|
+
SLP_MAX_MB,
|
|
20
|
+
SLP_MIN_MB,
|
|
21
|
+
STATION_CODE_RE,
|
|
22
|
+
TEMP_MAX_C,
|
|
23
|
+
TEMP_MIN_C,
|
|
24
|
+
WIND_DIR_BOUNDS,
|
|
25
|
+
WIND_GUST_MAX,
|
|
26
|
+
WIND_SPEED_MAX,
|
|
27
|
+
bounded_float,
|
|
28
|
+
bounded_float_min,
|
|
29
|
+
bounded_int,
|
|
30
|
+
)
|
|
31
|
+
from mostlyright._internal._convert import celsius_to_fahrenheit, hpa_to_inhg
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def icao_to_station_code(icao: str) -> str:
|
|
35
|
+
"""Strip leading K for 4-letter CONUS ICAO codes."""
|
|
36
|
+
upper = icao.strip().upper()
|
|
37
|
+
if upper.startswith("K") and len(upper) == 4:
|
|
38
|
+
return upper[1:]
|
|
39
|
+
return upper
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def parse_awc_visibility(vis: Any) -> float | None:
|
|
43
|
+
"""Parse AWC visibility: '10+', '1/2', '2 1/4', '3/4', plain numbers.
|
|
44
|
+
|
|
45
|
+
Returns miles or None. Caps at 99.99.
|
|
46
|
+
"""
|
|
47
|
+
if vis is None:
|
|
48
|
+
return None
|
|
49
|
+
|
|
50
|
+
s = str(vis)
|
|
51
|
+
if s == "" or s == "null":
|
|
52
|
+
return None
|
|
53
|
+
|
|
54
|
+
# "10+" -> 10
|
|
55
|
+
if s.endswith("+"):
|
|
56
|
+
try:
|
|
57
|
+
n = float(s[:-1])
|
|
58
|
+
except (ValueError, OverflowError):
|
|
59
|
+
return None
|
|
60
|
+
if not math.isfinite(n):
|
|
61
|
+
return None
|
|
62
|
+
return min(n, MAX_VISIBILITY_MILES)
|
|
63
|
+
|
|
64
|
+
# Mixed number: "1 1/2", "2 1/4"
|
|
65
|
+
if " " in s and "/" in s:
|
|
66
|
+
parts = s.split(" ", 1)
|
|
67
|
+
if len(parts) != 2:
|
|
68
|
+
return None
|
|
69
|
+
frac_parts = parts[1].split("/")
|
|
70
|
+
if len(frac_parts) != 2:
|
|
71
|
+
return None
|
|
72
|
+
try:
|
|
73
|
+
w = float(parts[0])
|
|
74
|
+
n = float(frac_parts[0])
|
|
75
|
+
d = float(frac_parts[1])
|
|
76
|
+
except (ValueError, OverflowError):
|
|
77
|
+
return None
|
|
78
|
+
if not (math.isfinite(w) and math.isfinite(n) and math.isfinite(d) and d != 0):
|
|
79
|
+
return None
|
|
80
|
+
return min(w + n / d, MAX_VISIBILITY_MILES)
|
|
81
|
+
|
|
82
|
+
# Simple fraction: "1/2", "1/4", "3/4", or "M1/4" (below-quarter-mile
|
|
83
|
+
# AWC/METAR convention — codex review W3A P2). The leading 'M' means
|
|
84
|
+
# "less than", which we represent as the same fractional value (the
|
|
85
|
+
# observation schema treats this as the visibility value, not a flag).
|
|
86
|
+
if "/" in s:
|
|
87
|
+
if s.startswith("M") or s.startswith("m"):
|
|
88
|
+
s = s[1:]
|
|
89
|
+
frac_parts = s.split("/")
|
|
90
|
+
if len(frac_parts) != 2:
|
|
91
|
+
return None
|
|
92
|
+
try:
|
|
93
|
+
n = float(frac_parts[0])
|
|
94
|
+
d = float(frac_parts[1])
|
|
95
|
+
except (ValueError, OverflowError):
|
|
96
|
+
return None
|
|
97
|
+
if not (math.isfinite(n) and math.isfinite(d) and d != 0):
|
|
98
|
+
return None
|
|
99
|
+
return min(n / d, MAX_VISIBILITY_MILES)
|
|
100
|
+
|
|
101
|
+
# Plain number
|
|
102
|
+
try:
|
|
103
|
+
n = float(s)
|
|
104
|
+
except (ValueError, OverflowError):
|
|
105
|
+
return None
|
|
106
|
+
if not math.isfinite(n):
|
|
107
|
+
return None
|
|
108
|
+
return min(n, MAX_VISIBILITY_MILES)
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def map_cloud_cover(cover: str | None) -> str | None:
|
|
112
|
+
"""Map AWC cloud cover code to standard abbreviation."""
|
|
113
|
+
if cover is None:
|
|
114
|
+
return None
|
|
115
|
+
upper = cover.upper()
|
|
116
|
+
if upper in ("CLR", "SKC", "FEW", "SCT", "BKN", "OVC", "VV"):
|
|
117
|
+
return upper
|
|
118
|
+
if upper == "CAVOK":
|
|
119
|
+
return "CLR"
|
|
120
|
+
return None
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def _cloud_layer(layer: Any) -> tuple[str | None, int | None]:
|
|
124
|
+
"""Extract cover and base from a cloud layer dict. Safe against non-dict entries."""
|
|
125
|
+
if not isinstance(layer, dict):
|
|
126
|
+
return None, None
|
|
127
|
+
base = bounded_int(_safe_int(layer.get("base")), 0, SKY_BASE_MAX_FT)
|
|
128
|
+
return map_cloud_cover(layer.get("cover")), base
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def _safe_int(v: Any) -> int | None:
|
|
132
|
+
"""Convert to int. Returns None on bad input."""
|
|
133
|
+
if v is None:
|
|
134
|
+
return None
|
|
135
|
+
try:
|
|
136
|
+
f = float(v)
|
|
137
|
+
if not math.isfinite(f):
|
|
138
|
+
return None
|
|
139
|
+
return round(f)
|
|
140
|
+
except (ValueError, TypeError, OverflowError):
|
|
141
|
+
return None
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def _safe_float(v: Any) -> float | None:
|
|
145
|
+
"""Convert to float. Returns None on bad input."""
|
|
146
|
+
if v is None:
|
|
147
|
+
return None
|
|
148
|
+
try:
|
|
149
|
+
f = float(v)
|
|
150
|
+
return f if math.isfinite(f) else None
|
|
151
|
+
except (ValueError, TypeError, OverflowError):
|
|
152
|
+
return None
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def _safe_precip(v: Any) -> float | None:
|
|
156
|
+
"""Parse precipitation. Trace 'T' → 0.0, numeric passthrough, else None."""
|
|
157
|
+
if v is None:
|
|
158
|
+
return None
|
|
159
|
+
if isinstance(v, str) and v.strip().upper() == "T":
|
|
160
|
+
return 0.0
|
|
161
|
+
return _safe_float(v)
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
_PK_WND_RE = re.compile(r"PK WND (\d{3})(\d{2,3})/(\d{4})")
|
|
165
|
+
|
|
166
|
+
# T-group in METAR remarks: T{s}{SSS}{s}{DDD}
|
|
167
|
+
# s=0 positive, s=1 negative. SSS/DDD = tenths of °C.
|
|
168
|
+
# Example: T02560167 → 25.6°C / 16.7°C. T10390061 → -3.9°C / 6.1°C.
|
|
169
|
+
_TGROUP_RE = re.compile(r"\bT([01])(\d{3})([01])(\d{3})\b")
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def _parse_peak_wind(
|
|
173
|
+
raw_metar: str | None,
|
|
174
|
+
) -> tuple[int | None, int | None, str | None]:
|
|
175
|
+
"""Parse PK WND from METAR remarks. Returns (dir, speed_kt, time_hhmm)."""
|
|
176
|
+
if not raw_metar:
|
|
177
|
+
return None, None, None
|
|
178
|
+
match = _PK_WND_RE.search(raw_metar)
|
|
179
|
+
if not match:
|
|
180
|
+
return None, None, None
|
|
181
|
+
direction = int(match.group(1))
|
|
182
|
+
speed = int(match.group(2))
|
|
183
|
+
time_hhmm = match.group(3)
|
|
184
|
+
if not (0 <= direction <= 360) or speed < 0:
|
|
185
|
+
return None, None, None
|
|
186
|
+
return direction, speed, time_hhmm
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
def _parse_tgroup(raw_metar: str | None) -> tuple[float | None, float | None]:
|
|
190
|
+
"""Parse T-group from METAR remarks for tenths-precision temperature.
|
|
191
|
+
|
|
192
|
+
ASOS stations always include T-group in remarks. Format: T{s}{SSS}{s}{DDD}
|
|
193
|
+
where s=0 positive, s=1 negative, SSS=temp tenths °C, DDD=dewpoint tenths °C.
|
|
194
|
+
Searches only the remarks section (after RMK) to avoid false positives.
|
|
195
|
+
Returns (temp_c, dewpoint_c) or (None, None) if not found.
|
|
196
|
+
"""
|
|
197
|
+
if not raw_metar:
|
|
198
|
+
return None, None
|
|
199
|
+
# T-group is a remarks-only element — search only after RMK.
|
|
200
|
+
# No RMK section = no T-group. Do NOT fallback to full string
|
|
201
|
+
# to avoid false positives on body group patterns.
|
|
202
|
+
rmk_idx = raw_metar.find("RMK")
|
|
203
|
+
if rmk_idx < 0:
|
|
204
|
+
return None, None
|
|
205
|
+
match = _TGROUP_RE.search(raw_metar[rmk_idx:])
|
|
206
|
+
if not match:
|
|
207
|
+
return None, None
|
|
208
|
+
t_sign = -1 if match.group(1) == "1" else 1
|
|
209
|
+
t_val = int(match.group(2)) / 10.0 * t_sign
|
|
210
|
+
d_sign = -1 if match.group(3) == "1" else 1
|
|
211
|
+
d_val = int(match.group(4)) / 10.0 * d_sign
|
|
212
|
+
return t_val, d_val
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
def awc_to_observation(m: dict[str, Any]) -> dict[str, Any] | None:
|
|
216
|
+
"""Convert a parsed AWC METAR dict to an observation schema dict.
|
|
217
|
+
|
|
218
|
+
Returns None if icaoId or obsTime is invalid.
|
|
219
|
+
Output matches specs/observation.json (no extra fields).
|
|
220
|
+
"""
|
|
221
|
+
icao_id = m.get("icaoId")
|
|
222
|
+
if not isinstance(icao_id, str) or not icao_id:
|
|
223
|
+
return None
|
|
224
|
+
|
|
225
|
+
obs_time = m.get("obsTime")
|
|
226
|
+
if not isinstance(obs_time, int | float):
|
|
227
|
+
return None
|
|
228
|
+
|
|
229
|
+
station_code = icao_to_station_code(icao_id)
|
|
230
|
+
if not STATION_CODE_RE.match(station_code):
|
|
231
|
+
return None
|
|
232
|
+
|
|
233
|
+
try:
|
|
234
|
+
dt = datetime.fromtimestamp(obs_time, tz=UTC)
|
|
235
|
+
except (OSError, OverflowError, ValueError):
|
|
236
|
+
return None
|
|
237
|
+
if not (1970 <= dt.year <= 2100):
|
|
238
|
+
return None
|
|
239
|
+
observed_at = dt.strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
240
|
+
|
|
241
|
+
metar_type = (m.get("metarType") or "METAR").upper()
|
|
242
|
+
observation_type = "SPECI" if metar_type == "SPECI" else "METAR"
|
|
243
|
+
|
|
244
|
+
# Wind direction: handle "VRB" -> None, bounded [0, 360]
|
|
245
|
+
wdir: int | None = None
|
|
246
|
+
raw_wdir = m.get("wdir")
|
|
247
|
+
if raw_wdir is not None:
|
|
248
|
+
if isinstance(raw_wdir, int | float):
|
|
249
|
+
wdir = bounded_int(int(raw_wdir), *WIND_DIR_BOUNDS)
|
|
250
|
+
elif raw_wdir != "VRB":
|
|
251
|
+
try:
|
|
252
|
+
parsed = float(raw_wdir)
|
|
253
|
+
if math.isfinite(parsed):
|
|
254
|
+
wdir = bounded_int(int(parsed), *WIND_DIR_BOUNDS)
|
|
255
|
+
except (ValueError, TypeError):
|
|
256
|
+
pass
|
|
257
|
+
|
|
258
|
+
wspd = bounded_int(_safe_int(m.get("wspd")), 0, WIND_SPEED_MAX)
|
|
259
|
+
wgst = bounded_int(_safe_int(m.get("wgst")), 0, WIND_GUST_MAX)
|
|
260
|
+
|
|
261
|
+
# Altimeter: AWC altim is in hPa, convert to inHg (no rounding)
|
|
262
|
+
altim = hpa_to_inhg(_safe_float(m.get("altim")))
|
|
263
|
+
|
|
264
|
+
# Sea-level pressure (already in mb/hPa)
|
|
265
|
+
slp = _safe_float(m.get("slp"))
|
|
266
|
+
if slp is not None and not (SLP_MIN_MB <= slp <= SLP_MAX_MB):
|
|
267
|
+
slp = None
|
|
268
|
+
|
|
269
|
+
# Cloud layers (safe against non-dict entries)
|
|
270
|
+
clouds = m.get("clouds") or []
|
|
271
|
+
cov1, base1 = _cloud_layer(clouds[0]) if len(clouds) > 0 else (None, None)
|
|
272
|
+
cov2, base2 = _cloud_layer(clouds[1]) if len(clouds) > 1 else (None, None)
|
|
273
|
+
cov3, base3 = _cloud_layer(clouds[2]) if len(clouds) > 2 else (None, None)
|
|
274
|
+
cov4, base4 = _cloud_layer(clouds[3]) if len(clouds) > 3 else (None, None)
|
|
275
|
+
|
|
276
|
+
# Raw METAR (truncate to 2048)
|
|
277
|
+
raw_ob = m.get("rawOb")
|
|
278
|
+
raw_metar: str | None = None
|
|
279
|
+
if isinstance(raw_ob, str):
|
|
280
|
+
raw_metar = raw_ob[:MAX_RAW_METAR_LEN]
|
|
281
|
+
|
|
282
|
+
# Weather codes
|
|
283
|
+
raw_wx = m.get("wxString")
|
|
284
|
+
weather_codes: str | None = None
|
|
285
|
+
if isinstance(raw_wx, str):
|
|
286
|
+
weather_codes = raw_wx[:MAX_WX_CODES_LEN]
|
|
287
|
+
|
|
288
|
+
# Temperature: T-group (tenths precision) overrides body group (whole degree).
|
|
289
|
+
# ASOS always includes T-group in remarks. If present, use it.
|
|
290
|
+
# Note: KNYC (Central Park) is NOT an ASOS station — may lack T-group,
|
|
291
|
+
# falling back to whole-degree body group temps from AWC.
|
|
292
|
+
temp_c = _safe_float(m.get("temp"))
|
|
293
|
+
dewp_c = _safe_float(m.get("dewp"))
|
|
294
|
+
tgroup_temp, tgroup_dewp = _parse_tgroup(raw_metar)
|
|
295
|
+
if tgroup_temp is not None:
|
|
296
|
+
temp_c = tgroup_temp
|
|
297
|
+
if tgroup_dewp is not None:
|
|
298
|
+
dewp_c = tgroup_dewp
|
|
299
|
+
temp_c = bounded_float(temp_c, TEMP_MIN_C, TEMP_MAX_C, field="temp_c")
|
|
300
|
+
dewp_c = bounded_float(dewp_c, TEMP_MIN_C, TEMP_MAX_C, field="dewpoint_c")
|
|
301
|
+
temp_f = celsius_to_fahrenheit(temp_c)
|
|
302
|
+
dewpoint_f = celsius_to_fahrenheit(dewp_c)
|
|
303
|
+
|
|
304
|
+
# Peak wind from METAR remarks (PK WND dddss/hhmm), bounded
|
|
305
|
+
pk_dir, pk_spd, pk_time = _parse_peak_wind(raw_metar)
|
|
306
|
+
pk_dir = bounded_int(pk_dir, *WIND_DIR_BOUNDS)
|
|
307
|
+
pk_spd = bounded_int(pk_spd, 0, WIND_GUST_MAX)
|
|
308
|
+
|
|
309
|
+
# Precipitation (AWC provides 'precip' field; 'T' = trace → 0.0, non-negative)
|
|
310
|
+
precip = bounded_float_min(_safe_precip(m.get("precip")), 0.0)
|
|
311
|
+
|
|
312
|
+
# QC field bitmask
|
|
313
|
+
qc_raw = m.get("qcField")
|
|
314
|
+
qc_field = _safe_int(qc_raw)
|
|
315
|
+
|
|
316
|
+
return {
|
|
317
|
+
"station_code": station_code,
|
|
318
|
+
"observed_at": observed_at,
|
|
319
|
+
"observation_type": observation_type,
|
|
320
|
+
"source": "awc",
|
|
321
|
+
"temp_c": temp_c,
|
|
322
|
+
"dewpoint_c": dewp_c,
|
|
323
|
+
"temp_f": temp_f,
|
|
324
|
+
"dewpoint_f": dewpoint_f,
|
|
325
|
+
"wind_dir_degrees": wdir,
|
|
326
|
+
"wind_speed_kt": wspd,
|
|
327
|
+
"wind_gust_kt": wgst,
|
|
328
|
+
"altimeter_inhg": altim,
|
|
329
|
+
"sea_level_pressure_mb": slp,
|
|
330
|
+
"sky_cover_1": cov1,
|
|
331
|
+
"sky_base_1_ft": base1,
|
|
332
|
+
"sky_cover_2": cov2,
|
|
333
|
+
"sky_base_2_ft": base2,
|
|
334
|
+
"sky_cover_3": cov3,
|
|
335
|
+
"sky_base_3_ft": base3,
|
|
336
|
+
"sky_cover_4": cov4,
|
|
337
|
+
"sky_base_4_ft": base4,
|
|
338
|
+
"visibility_miles": parse_awc_visibility(m.get("visib")),
|
|
339
|
+
"weather_codes": weather_codes,
|
|
340
|
+
"precip_1hr_inches": precip,
|
|
341
|
+
"peak_wind_gust_kt": pk_spd,
|
|
342
|
+
"peak_wind_dir": pk_dir,
|
|
343
|
+
"peak_wind_time": pk_time,
|
|
344
|
+
"snow_depth_inches": None, # not available from AWC
|
|
345
|
+
"qc_field": qc_field,
|
|
346
|
+
"raw_metar": raw_metar,
|
|
347
|
+
}
|
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
"""IEM CLI daily climate report parser.
|
|
2
|
+
|
|
3
|
+
Parses IEM's cli.py JSON endpoint into climate records matching
|
|
4
|
+
specs/climate.json. THE Kalshi settlement source.
|
|
5
|
+
|
|
6
|
+
Report type priority determines dedup: final (3.0) overwrites
|
|
7
|
+
preliminary (1.0), but a second final never overwrites the first
|
|
8
|
+
(strict >, first-seen wins at equal priority). The overnight final
|
|
9
|
+
IS the Kalshi settlement value.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import logging
|
|
15
|
+
import re
|
|
16
|
+
from datetime import date, datetime, timezone
|
|
17
|
+
from typing import Any
|
|
18
|
+
|
|
19
|
+
log = logging.getLogger(__name__)
|
|
20
|
+
|
|
21
|
+
REPORT_TYPE_PRIORITY: dict[str, float] = {
|
|
22
|
+
"final": 3.0,
|
|
23
|
+
"ncei_final": 2.5,
|
|
24
|
+
"correction": 2.0,
|
|
25
|
+
"preliminary": 1.0,
|
|
26
|
+
"estimated": 0.0,
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
# Climate temp bounds from specs/climate.json
|
|
30
|
+
HIGH_TEMP_MIN_F = -60
|
|
31
|
+
HIGH_TEMP_MAX_F = 150
|
|
32
|
+
LOW_TEMP_MIN_F = -80
|
|
33
|
+
LOW_TEMP_MAX_F = 130
|
|
34
|
+
|
|
35
|
+
_PRODUCT_TS_RE = re.compile(r"^(\d{12})")
|
|
36
|
+
_DATE_RE = re.compile(r"^\d{4}-\d{2}-\d{2}$")
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _parse_product_timestamp(product: str) -> datetime | None:
|
|
40
|
+
"""Extract issued timestamp from product field.
|
|
41
|
+
|
|
42
|
+
Format: "202501160620-KFFC-CDUS42-CLIATL" -> first 12 chars = YYYYMMDDHHmm.
|
|
43
|
+
"""
|
|
44
|
+
if not product:
|
|
45
|
+
return None
|
|
46
|
+
m = _PRODUCT_TS_RE.match(product)
|
|
47
|
+
if not m:
|
|
48
|
+
return None
|
|
49
|
+
try:
|
|
50
|
+
return datetime.strptime(m.group(1), "%Y%m%d%H%M").replace(tzinfo=timezone.utc) # noqa: UP017
|
|
51
|
+
except ValueError:
|
|
52
|
+
return None
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def infer_report_type(product: str | None, observation_date: str) -> str:
|
|
56
|
+
"""Infer report type from product timestamp vs observation date.
|
|
57
|
+
|
|
58
|
+
- Same day as observation -> preliminary
|
|
59
|
+
- Next day, 04:00-10:00 UTC -> final (overnight report)
|
|
60
|
+
- Next day, outside window -> correction
|
|
61
|
+
- >1 day later -> correction
|
|
62
|
+
- Unparseable -> preliminary (safe default)
|
|
63
|
+
"""
|
|
64
|
+
if not product:
|
|
65
|
+
return "preliminary"
|
|
66
|
+
|
|
67
|
+
issued = _parse_product_timestamp(product)
|
|
68
|
+
if issued is None:
|
|
69
|
+
return "preliminary"
|
|
70
|
+
|
|
71
|
+
try:
|
|
72
|
+
obs_date = date.fromisoformat(observation_date)
|
|
73
|
+
except (ValueError, TypeError):
|
|
74
|
+
return "preliminary"
|
|
75
|
+
|
|
76
|
+
issued_date = issued.date()
|
|
77
|
+
delta_days = (issued_date - obs_date).days
|
|
78
|
+
|
|
79
|
+
if delta_days <= 0:
|
|
80
|
+
return "preliminary"
|
|
81
|
+
if delta_days == 1:
|
|
82
|
+
# 04:00-10:00 UTC window: empirically derived for CONUS ASOS stations.
|
|
83
|
+
# Eastern WFOs issue overnight CLI ~midnight local (04-05 UTC).
|
|
84
|
+
# Western WFOs (PHX UTC-7) issue ~midnight local (07 UTC).
|
|
85
|
+
# The window covers all CONUS timezones.
|
|
86
|
+
if 4 <= issued.hour <= 10:
|
|
87
|
+
return "final"
|
|
88
|
+
return "correction"
|
|
89
|
+
# >1 day later
|
|
90
|
+
return "correction"
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def _parse_temp(val: Any) -> int | None:
|
|
94
|
+
"""Parse temperature value. 'M', null, empty -> None. Otherwise int."""
|
|
95
|
+
if val is None or val == "M" or val == "":
|
|
96
|
+
return None
|
|
97
|
+
try:
|
|
98
|
+
return round(float(val))
|
|
99
|
+
except (ValueError, TypeError):
|
|
100
|
+
return None
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def parse_cli_record(record: dict[str, Any], station_code: str) -> dict[str, Any] | None:
|
|
104
|
+
"""Parse a single IEM CLI record into a climate schema dict.
|
|
105
|
+
|
|
106
|
+
Returns dict with 9 fields (8 schema + report_type_priority), or None
|
|
107
|
+
if both high and low are missing or observation_date is invalid.
|
|
108
|
+
|
|
109
|
+
observation_date: Uses IEM's `valid` field directly. IEM CLI data
|
|
110
|
+
reports by local calendar day (NWS convention). This is THE correct
|
|
111
|
+
behavior: "March 31 high" means the station's local March 31.
|
|
112
|
+
"""
|
|
113
|
+
observation_date = record.get("valid")
|
|
114
|
+
if not observation_date or not isinstance(observation_date, str):
|
|
115
|
+
return None
|
|
116
|
+
if not _DATE_RE.match(observation_date):
|
|
117
|
+
return None
|
|
118
|
+
# Codex W3A P2: regex matches "2025-02-31" but it's not a real date.
|
|
119
|
+
# Validate via date.fromisoformat() so invalid dates drop here (the
|
|
120
|
+
# alternative is infer_report_type() catching the ValueError later and
|
|
121
|
+
# silently classifying as "preliminary" — would corrupt settlement data).
|
|
122
|
+
try:
|
|
123
|
+
date.fromisoformat(observation_date)
|
|
124
|
+
except ValueError:
|
|
125
|
+
return None
|
|
126
|
+
|
|
127
|
+
high = _parse_temp(record.get("high"))
|
|
128
|
+
low = _parse_temp(record.get("low"))
|
|
129
|
+
|
|
130
|
+
# Bounds check: reject physically impossible settlement values
|
|
131
|
+
if high is not None and not (HIGH_TEMP_MIN_F <= high <= HIGH_TEMP_MAX_F):
|
|
132
|
+
log.warning(
|
|
133
|
+
"%s %s: high_temp_f=%d out of bounds [%d, %d], setting None",
|
|
134
|
+
station_code,
|
|
135
|
+
observation_date,
|
|
136
|
+
high,
|
|
137
|
+
HIGH_TEMP_MIN_F,
|
|
138
|
+
HIGH_TEMP_MAX_F,
|
|
139
|
+
)
|
|
140
|
+
high = None
|
|
141
|
+
if low is not None and not (LOW_TEMP_MIN_F <= low <= LOW_TEMP_MAX_F):
|
|
142
|
+
log.warning(
|
|
143
|
+
"%s %s: low_temp_f=%d out of bounds [%d, %d], setting None",
|
|
144
|
+
station_code,
|
|
145
|
+
observation_date,
|
|
146
|
+
low,
|
|
147
|
+
LOW_TEMP_MIN_F,
|
|
148
|
+
LOW_TEMP_MAX_F,
|
|
149
|
+
)
|
|
150
|
+
low = None
|
|
151
|
+
|
|
152
|
+
if high is None and low is None:
|
|
153
|
+
return None
|
|
154
|
+
|
|
155
|
+
product = record.get("product") or None
|
|
156
|
+
report_type = infer_report_type(product, observation_date)
|
|
157
|
+
priority = REPORT_TYPE_PRIORITY[report_type]
|
|
158
|
+
|
|
159
|
+
# Extract issued_at from product timestamp
|
|
160
|
+
issued_at: str | None = None
|
|
161
|
+
if product:
|
|
162
|
+
issued_dt = _parse_product_timestamp(product)
|
|
163
|
+
if issued_dt:
|
|
164
|
+
issued_at = issued_dt.strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
165
|
+
|
|
166
|
+
return {
|
|
167
|
+
"station_code": station_code,
|
|
168
|
+
"observation_date": observation_date,
|
|
169
|
+
"high_temp_f": high,
|
|
170
|
+
"low_temp_f": low,
|
|
171
|
+
"report_type": report_type,
|
|
172
|
+
"report_type_priority": priority,
|
|
173
|
+
"source": "iem",
|
|
174
|
+
"product_id": product if isinstance(product, str) else None,
|
|
175
|
+
"issued_at": issued_at,
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def parse_cli_response(data: list[dict[str, Any]], station_code: str) -> list[dict[str, Any]]:
|
|
180
|
+
"""Parse all CLI records, filter None (both temps missing)."""
|
|
181
|
+
results: list[dict[str, Any]] = []
|
|
182
|
+
for record in data:
|
|
183
|
+
parsed = parse_cli_record(record, station_code)
|
|
184
|
+
if parsed is not None:
|
|
185
|
+
results.append(parsed)
|
|
186
|
+
return results
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
"""HTTP fetcher modules for mostlyright.weather data sources.
|
|
2
|
+
|
|
3
|
+
Sprint 0 Wave 3B (Lane F, net-new code — NOT lifted from mostlyright==0.14.1):
|
|
4
|
+
|
|
5
|
+
- ``awc`` — AWC live METAR fetcher (last ~168 hours, no historical depth).
|
|
6
|
+
- ``iem_asos`` — IEM ASOS historical METAR (CSV) with monthly chunking + cache.
|
|
7
|
+
- ``iem_cli`` — NWS CLI settlement fetcher via IEM ``json/cli.py``. Year granularity.
|
|
8
|
+
- ``ghcnh`` — NOAA GHCNh historical PSV fetcher with annual cache.
|
|
9
|
+
|
|
10
|
+
Each fetcher returns either RAW response dicts (AWC) or local file paths
|
|
11
|
+
(IEM ASOS / IEM CLI / GHCNh after ``download_with_retry`` atomic write).
|
|
12
|
+
Callers compose with parsers in :mod:`mostlyright.weather` (``_awc``, ``_iem``,
|
|
13
|
+
``_climate``, ``_ghcnh``) to produce schema-valid observation / climate dicts.
|
|
14
|
+
|
|
15
|
+
Fetchers do NOT call the parsers directly — parser composition lives in
|
|
16
|
+
``mostlyright.weather.observations.fetch()`` (Wave 4) so the cache layer can
|
|
17
|
+
sit between raw bytes and parsed observations.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from __future__ import annotations
|