mostlyrightmd-weather 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. mostlyright/weather/__init__.py +36 -0
  2. mostlyright/weather/_awc.py +347 -0
  3. mostlyright/weather/_climate.py +186 -0
  4. mostlyright/weather/_fetchers/__init__.py +20 -0
  5. mostlyright/weather/_fetchers/_hafs_storms.py +149 -0
  6. mostlyright/weather/_fetchers/_iem_chunks.py +75 -0
  7. mostlyright/weather/_fetchers/_iem_mos.py +302 -0
  8. mostlyright/weather/_fetchers/_msc_archive.py +202 -0
  9. mostlyright/weather/_fetchers/_nwp_archive.py +818 -0
  10. mostlyright/weather/_fetchers/_nwp_cycle_chunks.py +238 -0
  11. mostlyright/weather/_fetchers/_nwp_extract.py +224 -0
  12. mostlyright/weather/_fetchers/_nwp_grids/__init__.py +126 -0
  13. mostlyright/weather/_fetchers/_nwp_grids/cfs.py +25 -0
  14. mostlyright/weather/_fetchers/_nwp_grids/ecmwf_aifs.py +25 -0
  15. mostlyright/weather/_fetchers/_nwp_grids/ecmwf_ifs.py +30 -0
  16. mostlyright/weather/_fetchers/_nwp_grids/gdas.py +17 -0
  17. mostlyright/weather/_fetchers/_nwp_grids/gdps.py +21 -0
  18. mostlyright/weather/_fetchers/_nwp_grids/gefs.py +23 -0
  19. mostlyright/weather/_fetchers/_nwp_grids/geps.py +26 -0
  20. mostlyright/weather/_fetchers/_nwp_grids/gfs.py +32 -0
  21. mostlyright/weather/_fetchers/_nwp_grids/hafs.py +25 -0
  22. mostlyright/weather/_fetchers/_nwp_grids/hiresw.py +33 -0
  23. mostlyright/weather/_fetchers/_nwp_grids/hrdps.py +19 -0
  24. mostlyright/weather/_fetchers/_nwp_grids/href.py +22 -0
  25. mostlyright/weather/_fetchers/_nwp_grids/hrrr.py +39 -0
  26. mostlyright/weather/_fetchers/_nwp_grids/hrrrak.py +19 -0
  27. mostlyright/weather/_fetchers/_nwp_grids/nam.py +33 -0
  28. mostlyright/weather/_fetchers/_nwp_grids/nbm.py +31 -0
  29. mostlyright/weather/_fetchers/_nwp_grids/rap.py +19 -0
  30. mostlyright/weather/_fetchers/_nwp_grids/rdps.py +17 -0
  31. mostlyright/weather/_fetchers/_nwp_grids/reps.py +20 -0
  32. mostlyright/weather/_fetchers/_nwp_grids/rrfs.py +22 -0
  33. mostlyright/weather/_fetchers/_nwp_grids/rtma.py +17 -0
  34. mostlyright/weather/_fetchers/_nwp_grids/urma.py +17 -0
  35. mostlyright/weather/_fetchers/_nwp_idx.py +302 -0
  36. mostlyright/weather/_fetchers/_url_transitions.py +48 -0
  37. mostlyright/weather/_fetchers/awc.py +142 -0
  38. mostlyright/weather/_fetchers/ghcnh.py +169 -0
  39. mostlyright/weather/_fetchers/iem_asos.py +255 -0
  40. mostlyright/weather/_fetchers/iem_cli.py +194 -0
  41. mostlyright/weather/_ghcnh.py +348 -0
  42. mostlyright/weather/_iem.py +278 -0
  43. mostlyright/weather/cache.py +468 -0
  44. mostlyright/weather/catalog/__init__.py +96 -0
  45. mostlyright/weather/catalog/_obs_projection.py +192 -0
  46. mostlyright/weather/catalog/awc.py +82 -0
  47. mostlyright/weather/catalog/cli.py +239 -0
  48. mostlyright/weather/catalog/ghcnh.py +84 -0
  49. mostlyright/weather/catalog/iem.py +138 -0
  50. mostlyright/weather/forecast_nwp.py +1009 -0
  51. mostlyright/weather/obs.py +369 -0
  52. mostlyright/weather/qc/__init__.py +7 -0
  53. mostlyright/weather/qc/rules_nwp.py +398 -0
  54. mostlyright/weather/qc_sidecar.py +98 -0
  55. mostlyrightmd_weather-0.1.2.dist-info/METADATA +40 -0
  56. mostlyrightmd_weather-0.1.2.dist-info/RECORD +57 -0
  57. mostlyrightmd_weather-0.1.2.dist-info/WHEEL +4 -0
@@ -0,0 +1,36 @@
1
+ """mostlyright.weather — direct public-API access for AWC, IEM, GHCNh, NWS CLI.
2
+
3
+ Local-first; no hosted backend; no API keys. Parsers are byte-faithful lifts
4
+ from ``monorepo-v0.14.1``; HTTP fetchers and the parquet cache are net-new
5
+ Sprint 0 code so the SDK can run without the v0.14.1 ingest service.
6
+
7
+ Lift inventory (provenance for parity-critical code). Source SHA refers to
8
+ the v0.14.1 release tag of ``Tarabcak/monorepo`` (commit
9
+ ``514fcdab227e845145ca32b989355647466231d9``).
10
+
11
+ | Module | Source path | Source SHA | Lift date | Modifications |
12
+ |-------------------------|--------------------------------------------------------|------------|------------|---------------------------------------------------------------------|
13
+ | _awc.py | monorepo-v0.14.1/src/mostlyright/weather/_awc.py | 514fcda | 2026-05-21 | namespace rename only (imports point at ``mostlyright._internal``) |
14
+ | _iem.py | monorepo-v0.14.1/src/mostlyright/weather/_iem.py | 514fcda | 2026-05-21 | namespace rename only |
15
+ | _climate.py | monorepo-v0.14.1/src/mostlyright/weather/_climate.py | 514fcda | 2026-05-21 | namespace rename only |
16
+ | _ghcnh.py | monorepo-v0.14.1/src/mostlyright/weather/_ghcnh.py | 514fcda | 2026-05-21 | namespace rename only |
17
+ | _fetchers/__init__.py | n/a (NEW) | n/a | 2026-05-21 | NEW (Sprint 0 Wave 1 Lane F) — fetcher package marker |
18
+ | _fetchers/awc.py | n/a (NEW) | n/a | 2026-05-21 | NEW (Sprint 0 Wave 1 Lane F) — historical AWC range fetcher |
19
+ | _fetchers/iem_asos.py | n/a (NEW) | n/a | 2026-05-21 | NEW (Sprint 0 Wave 1 Lane F) — monthly-chunked IEM ASOS METAR fetcher |
20
+ | _fetchers/iem_cli.py | n/a (NEW) | n/a | 2026-05-21 | NEW (Sprint 0 Wave 1 Lane F) — IEM CLI settlement-grade fetcher |
21
+ | _fetchers/ghcnh.py | n/a (NEW) | n/a | 2026-05-21 | NEW (Sprint 0 Wave 1 Lane F) — per-year NCEI GHCNh PSV fetcher |
22
+ | cache.py | n/a (NEW) | n/a | 2026-05-21 | NEW (Sprint 0 Wave 1 Lane F) — local parquet cache, filelock-guarded |
23
+
24
+ ``_bounds`` is imported from ``mostlyright._internal`` (lifted there from
25
+ ``monorepo-v0.14.1/src/mostlyright/_bounds.py``) — see the parallel lift
26
+ inventory in ``mostlyright._internal.__init__``.
27
+
28
+ Public surface kept stable for Vojtech's existing ``mostlyright==0.14.1``
29
+ workflow: ``raw_metar`` is preserved on observation rows so MetPy re-parse
30
+ keeps working without preprocessing in v0.1.0.
31
+ """
32
+
33
+ from mostlyright.weather.obs import obs as obs # re-export Phase 7 public surface
34
+
35
+ __version__ = "0.1.0rc1"
36
+ __all__ = ["__version__", "obs"]
@@ -0,0 +1,347 @@
1
+ """AWC METAR transform — maps AWC JSON response to observation schema dict.
2
+
3
+ This is THE shared transform. Both the SDK and ingest worker import it.
4
+ Output dicts validate against specs/observation.json (additionalProperties: false).
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import math
10
+ import re
11
+ from datetime import UTC, datetime
12
+ from typing import Any
13
+
14
+ from mostlyright._internal._bounds import (
15
+ MAX_RAW_METAR_LEN,
16
+ MAX_VISIBILITY_MILES,
17
+ MAX_WX_CODES_LEN,
18
+ SKY_BASE_MAX_FT,
19
+ SLP_MAX_MB,
20
+ SLP_MIN_MB,
21
+ STATION_CODE_RE,
22
+ TEMP_MAX_C,
23
+ TEMP_MIN_C,
24
+ WIND_DIR_BOUNDS,
25
+ WIND_GUST_MAX,
26
+ WIND_SPEED_MAX,
27
+ bounded_float,
28
+ bounded_float_min,
29
+ bounded_int,
30
+ )
31
+ from mostlyright._internal._convert import celsius_to_fahrenheit, hpa_to_inhg
32
+
33
+
34
+ def icao_to_station_code(icao: str) -> str:
35
+ """Strip leading K for 4-letter CONUS ICAO codes."""
36
+ upper = icao.strip().upper()
37
+ if upper.startswith("K") and len(upper) == 4:
38
+ return upper[1:]
39
+ return upper
40
+
41
+
42
+ def parse_awc_visibility(vis: Any) -> float | None:
43
+ """Parse AWC visibility: '10+', '1/2', '2 1/4', '3/4', plain numbers.
44
+
45
+ Returns miles or None. Caps at 99.99.
46
+ """
47
+ if vis is None:
48
+ return None
49
+
50
+ s = str(vis)
51
+ if s == "" or s == "null":
52
+ return None
53
+
54
+ # "10+" -> 10
55
+ if s.endswith("+"):
56
+ try:
57
+ n = float(s[:-1])
58
+ except (ValueError, OverflowError):
59
+ return None
60
+ if not math.isfinite(n):
61
+ return None
62
+ return min(n, MAX_VISIBILITY_MILES)
63
+
64
+ # Mixed number: "1 1/2", "2 1/4"
65
+ if " " in s and "/" in s:
66
+ parts = s.split(" ", 1)
67
+ if len(parts) != 2:
68
+ return None
69
+ frac_parts = parts[1].split("/")
70
+ if len(frac_parts) != 2:
71
+ return None
72
+ try:
73
+ w = float(parts[0])
74
+ n = float(frac_parts[0])
75
+ d = float(frac_parts[1])
76
+ except (ValueError, OverflowError):
77
+ return None
78
+ if not (math.isfinite(w) and math.isfinite(n) and math.isfinite(d) and d != 0):
79
+ return None
80
+ return min(w + n / d, MAX_VISIBILITY_MILES)
81
+
82
+ # Simple fraction: "1/2", "1/4", "3/4", or "M1/4" (below-quarter-mile
83
+ # AWC/METAR convention — codex review W3A P2). The leading 'M' means
84
+ # "less than", which we represent as the same fractional value (the
85
+ # observation schema treats this as the visibility value, not a flag).
86
+ if "/" in s:
87
+ if s.startswith("M") or s.startswith("m"):
88
+ s = s[1:]
89
+ frac_parts = s.split("/")
90
+ if len(frac_parts) != 2:
91
+ return None
92
+ try:
93
+ n = float(frac_parts[0])
94
+ d = float(frac_parts[1])
95
+ except (ValueError, OverflowError):
96
+ return None
97
+ if not (math.isfinite(n) and math.isfinite(d) and d != 0):
98
+ return None
99
+ return min(n / d, MAX_VISIBILITY_MILES)
100
+
101
+ # Plain number
102
+ try:
103
+ n = float(s)
104
+ except (ValueError, OverflowError):
105
+ return None
106
+ if not math.isfinite(n):
107
+ return None
108
+ return min(n, MAX_VISIBILITY_MILES)
109
+
110
+
111
+ def map_cloud_cover(cover: str | None) -> str | None:
112
+ """Map AWC cloud cover code to standard abbreviation."""
113
+ if cover is None:
114
+ return None
115
+ upper = cover.upper()
116
+ if upper in ("CLR", "SKC", "FEW", "SCT", "BKN", "OVC", "VV"):
117
+ return upper
118
+ if upper == "CAVOK":
119
+ return "CLR"
120
+ return None
121
+
122
+
123
+ def _cloud_layer(layer: Any) -> tuple[str | None, int | None]:
124
+ """Extract cover and base from a cloud layer dict. Safe against non-dict entries."""
125
+ if not isinstance(layer, dict):
126
+ return None, None
127
+ base = bounded_int(_safe_int(layer.get("base")), 0, SKY_BASE_MAX_FT)
128
+ return map_cloud_cover(layer.get("cover")), base
129
+
130
+
131
+ def _safe_int(v: Any) -> int | None:
132
+ """Convert to int. Returns None on bad input."""
133
+ if v is None:
134
+ return None
135
+ try:
136
+ f = float(v)
137
+ if not math.isfinite(f):
138
+ return None
139
+ return round(f)
140
+ except (ValueError, TypeError, OverflowError):
141
+ return None
142
+
143
+
144
+ def _safe_float(v: Any) -> float | None:
145
+ """Convert to float. Returns None on bad input."""
146
+ if v is None:
147
+ return None
148
+ try:
149
+ f = float(v)
150
+ return f if math.isfinite(f) else None
151
+ except (ValueError, TypeError, OverflowError):
152
+ return None
153
+
154
+
155
+ def _safe_precip(v: Any) -> float | None:
156
+ """Parse precipitation. Trace 'T' → 0.0, numeric passthrough, else None."""
157
+ if v is None:
158
+ return None
159
+ if isinstance(v, str) and v.strip().upper() == "T":
160
+ return 0.0
161
+ return _safe_float(v)
162
+
163
+
164
+ _PK_WND_RE = re.compile(r"PK WND (\d{3})(\d{2,3})/(\d{4})")
165
+
166
+ # T-group in METAR remarks: T{s}{SSS}{s}{DDD}
167
+ # s=0 positive, s=1 negative. SSS/DDD = tenths of °C.
168
+ # Example: T02560167 → 25.6°C / 16.7°C. T10390061 → -3.9°C / 6.1°C.
169
+ _TGROUP_RE = re.compile(r"\bT([01])(\d{3})([01])(\d{3})\b")
170
+
171
+
172
+ def _parse_peak_wind(
173
+ raw_metar: str | None,
174
+ ) -> tuple[int | None, int | None, str | None]:
175
+ """Parse PK WND from METAR remarks. Returns (dir, speed_kt, time_hhmm)."""
176
+ if not raw_metar:
177
+ return None, None, None
178
+ match = _PK_WND_RE.search(raw_metar)
179
+ if not match:
180
+ return None, None, None
181
+ direction = int(match.group(1))
182
+ speed = int(match.group(2))
183
+ time_hhmm = match.group(3)
184
+ if not (0 <= direction <= 360) or speed < 0:
185
+ return None, None, None
186
+ return direction, speed, time_hhmm
187
+
188
+
189
+ def _parse_tgroup(raw_metar: str | None) -> tuple[float | None, float | None]:
190
+ """Parse T-group from METAR remarks for tenths-precision temperature.
191
+
192
+ ASOS stations always include T-group in remarks. Format: T{s}{SSS}{s}{DDD}
193
+ where s=0 positive, s=1 negative, SSS=temp tenths °C, DDD=dewpoint tenths °C.
194
+ Searches only the remarks section (after RMK) to avoid false positives.
195
+ Returns (temp_c, dewpoint_c) or (None, None) if not found.
196
+ """
197
+ if not raw_metar:
198
+ return None, None
199
+ # T-group is a remarks-only element — search only after RMK.
200
+ # No RMK section = no T-group. Do NOT fallback to full string
201
+ # to avoid false positives on body group patterns.
202
+ rmk_idx = raw_metar.find("RMK")
203
+ if rmk_idx < 0:
204
+ return None, None
205
+ match = _TGROUP_RE.search(raw_metar[rmk_idx:])
206
+ if not match:
207
+ return None, None
208
+ t_sign = -1 if match.group(1) == "1" else 1
209
+ t_val = int(match.group(2)) / 10.0 * t_sign
210
+ d_sign = -1 if match.group(3) == "1" else 1
211
+ d_val = int(match.group(4)) / 10.0 * d_sign
212
+ return t_val, d_val
213
+
214
+
215
+ def awc_to_observation(m: dict[str, Any]) -> dict[str, Any] | None:
216
+ """Convert a parsed AWC METAR dict to an observation schema dict.
217
+
218
+ Returns None if icaoId or obsTime is invalid.
219
+ Output matches specs/observation.json (no extra fields).
220
+ """
221
+ icao_id = m.get("icaoId")
222
+ if not isinstance(icao_id, str) or not icao_id:
223
+ return None
224
+
225
+ obs_time = m.get("obsTime")
226
+ if not isinstance(obs_time, int | float):
227
+ return None
228
+
229
+ station_code = icao_to_station_code(icao_id)
230
+ if not STATION_CODE_RE.match(station_code):
231
+ return None
232
+
233
+ try:
234
+ dt = datetime.fromtimestamp(obs_time, tz=UTC)
235
+ except (OSError, OverflowError, ValueError):
236
+ return None
237
+ if not (1970 <= dt.year <= 2100):
238
+ return None
239
+ observed_at = dt.strftime("%Y-%m-%dT%H:%M:%SZ")
240
+
241
+ metar_type = (m.get("metarType") or "METAR").upper()
242
+ observation_type = "SPECI" if metar_type == "SPECI" else "METAR"
243
+
244
+ # Wind direction: handle "VRB" -> None, bounded [0, 360]
245
+ wdir: int | None = None
246
+ raw_wdir = m.get("wdir")
247
+ if raw_wdir is not None:
248
+ if isinstance(raw_wdir, int | float):
249
+ wdir = bounded_int(int(raw_wdir), *WIND_DIR_BOUNDS)
250
+ elif raw_wdir != "VRB":
251
+ try:
252
+ parsed = float(raw_wdir)
253
+ if math.isfinite(parsed):
254
+ wdir = bounded_int(int(parsed), *WIND_DIR_BOUNDS)
255
+ except (ValueError, TypeError):
256
+ pass
257
+
258
+ wspd = bounded_int(_safe_int(m.get("wspd")), 0, WIND_SPEED_MAX)
259
+ wgst = bounded_int(_safe_int(m.get("wgst")), 0, WIND_GUST_MAX)
260
+
261
+ # Altimeter: AWC altim is in hPa, convert to inHg (no rounding)
262
+ altim = hpa_to_inhg(_safe_float(m.get("altim")))
263
+
264
+ # Sea-level pressure (already in mb/hPa)
265
+ slp = _safe_float(m.get("slp"))
266
+ if slp is not None and not (SLP_MIN_MB <= slp <= SLP_MAX_MB):
267
+ slp = None
268
+
269
+ # Cloud layers (safe against non-dict entries)
270
+ clouds = m.get("clouds") or []
271
+ cov1, base1 = _cloud_layer(clouds[0]) if len(clouds) > 0 else (None, None)
272
+ cov2, base2 = _cloud_layer(clouds[1]) if len(clouds) > 1 else (None, None)
273
+ cov3, base3 = _cloud_layer(clouds[2]) if len(clouds) > 2 else (None, None)
274
+ cov4, base4 = _cloud_layer(clouds[3]) if len(clouds) > 3 else (None, None)
275
+
276
+ # Raw METAR (truncate to 2048)
277
+ raw_ob = m.get("rawOb")
278
+ raw_metar: str | None = None
279
+ if isinstance(raw_ob, str):
280
+ raw_metar = raw_ob[:MAX_RAW_METAR_LEN]
281
+
282
+ # Weather codes
283
+ raw_wx = m.get("wxString")
284
+ weather_codes: str | None = None
285
+ if isinstance(raw_wx, str):
286
+ weather_codes = raw_wx[:MAX_WX_CODES_LEN]
287
+
288
+ # Temperature: T-group (tenths precision) overrides body group (whole degree).
289
+ # ASOS always includes T-group in remarks. If present, use it.
290
+ # Note: KNYC (Central Park) is NOT an ASOS station — may lack T-group,
291
+ # falling back to whole-degree body group temps from AWC.
292
+ temp_c = _safe_float(m.get("temp"))
293
+ dewp_c = _safe_float(m.get("dewp"))
294
+ tgroup_temp, tgroup_dewp = _parse_tgroup(raw_metar)
295
+ if tgroup_temp is not None:
296
+ temp_c = tgroup_temp
297
+ if tgroup_dewp is not None:
298
+ dewp_c = tgroup_dewp
299
+ temp_c = bounded_float(temp_c, TEMP_MIN_C, TEMP_MAX_C, field="temp_c")
300
+ dewp_c = bounded_float(dewp_c, TEMP_MIN_C, TEMP_MAX_C, field="dewpoint_c")
301
+ temp_f = celsius_to_fahrenheit(temp_c)
302
+ dewpoint_f = celsius_to_fahrenheit(dewp_c)
303
+
304
+ # Peak wind from METAR remarks (PK WND dddss/hhmm), bounded
305
+ pk_dir, pk_spd, pk_time = _parse_peak_wind(raw_metar)
306
+ pk_dir = bounded_int(pk_dir, *WIND_DIR_BOUNDS)
307
+ pk_spd = bounded_int(pk_spd, 0, WIND_GUST_MAX)
308
+
309
+ # Precipitation (AWC provides 'precip' field; 'T' = trace → 0.0, non-negative)
310
+ precip = bounded_float_min(_safe_precip(m.get("precip")), 0.0)
311
+
312
+ # QC field bitmask
313
+ qc_raw = m.get("qcField")
314
+ qc_field = _safe_int(qc_raw)
315
+
316
+ return {
317
+ "station_code": station_code,
318
+ "observed_at": observed_at,
319
+ "observation_type": observation_type,
320
+ "source": "awc",
321
+ "temp_c": temp_c,
322
+ "dewpoint_c": dewp_c,
323
+ "temp_f": temp_f,
324
+ "dewpoint_f": dewpoint_f,
325
+ "wind_dir_degrees": wdir,
326
+ "wind_speed_kt": wspd,
327
+ "wind_gust_kt": wgst,
328
+ "altimeter_inhg": altim,
329
+ "sea_level_pressure_mb": slp,
330
+ "sky_cover_1": cov1,
331
+ "sky_base_1_ft": base1,
332
+ "sky_cover_2": cov2,
333
+ "sky_base_2_ft": base2,
334
+ "sky_cover_3": cov3,
335
+ "sky_base_3_ft": base3,
336
+ "sky_cover_4": cov4,
337
+ "sky_base_4_ft": base4,
338
+ "visibility_miles": parse_awc_visibility(m.get("visib")),
339
+ "weather_codes": weather_codes,
340
+ "precip_1hr_inches": precip,
341
+ "peak_wind_gust_kt": pk_spd,
342
+ "peak_wind_dir": pk_dir,
343
+ "peak_wind_time": pk_time,
344
+ "snow_depth_inches": None, # not available from AWC
345
+ "qc_field": qc_field,
346
+ "raw_metar": raw_metar,
347
+ }
@@ -0,0 +1,186 @@
1
+ """IEM CLI daily climate report parser.
2
+
3
+ Parses IEM's cli.py JSON endpoint into climate records matching
4
+ specs/climate.json. THE Kalshi settlement source.
5
+
6
+ Report type priority determines dedup: final (3.0) overwrites
7
+ preliminary (1.0), but a second final never overwrites the first
8
+ (strict >, first-seen wins at equal priority). The overnight final
9
+ IS the Kalshi settlement value.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import logging
15
+ import re
16
+ from datetime import date, datetime, timezone
17
+ from typing import Any
18
+
19
+ log = logging.getLogger(__name__)
20
+
21
+ REPORT_TYPE_PRIORITY: dict[str, float] = {
22
+ "final": 3.0,
23
+ "ncei_final": 2.5,
24
+ "correction": 2.0,
25
+ "preliminary": 1.0,
26
+ "estimated": 0.0,
27
+ }
28
+
29
+ # Climate temp bounds from specs/climate.json
30
+ HIGH_TEMP_MIN_F = -60
31
+ HIGH_TEMP_MAX_F = 150
32
+ LOW_TEMP_MIN_F = -80
33
+ LOW_TEMP_MAX_F = 130
34
+
35
+ _PRODUCT_TS_RE = re.compile(r"^(\d{12})")
36
+ _DATE_RE = re.compile(r"^\d{4}-\d{2}-\d{2}$")
37
+
38
+
39
+ def _parse_product_timestamp(product: str) -> datetime | None:
40
+ """Extract issued timestamp from product field.
41
+
42
+ Format: "202501160620-KFFC-CDUS42-CLIATL" -> first 12 chars = YYYYMMDDHHmm.
43
+ """
44
+ if not product:
45
+ return None
46
+ m = _PRODUCT_TS_RE.match(product)
47
+ if not m:
48
+ return None
49
+ try:
50
+ return datetime.strptime(m.group(1), "%Y%m%d%H%M").replace(tzinfo=timezone.utc) # noqa: UP017
51
+ except ValueError:
52
+ return None
53
+
54
+
55
+ def infer_report_type(product: str | None, observation_date: str) -> str:
56
+ """Infer report type from product timestamp vs observation date.
57
+
58
+ - Same day as observation -> preliminary
59
+ - Next day, 04:00-10:00 UTC -> final (overnight report)
60
+ - Next day, outside window -> correction
61
+ - >1 day later -> correction
62
+ - Unparseable -> preliminary (safe default)
63
+ """
64
+ if not product:
65
+ return "preliminary"
66
+
67
+ issued = _parse_product_timestamp(product)
68
+ if issued is None:
69
+ return "preliminary"
70
+
71
+ try:
72
+ obs_date = date.fromisoformat(observation_date)
73
+ except (ValueError, TypeError):
74
+ return "preliminary"
75
+
76
+ issued_date = issued.date()
77
+ delta_days = (issued_date - obs_date).days
78
+
79
+ if delta_days <= 0:
80
+ return "preliminary"
81
+ if delta_days == 1:
82
+ # 04:00-10:00 UTC window: empirically derived for CONUS ASOS stations.
83
+ # Eastern WFOs issue overnight CLI ~midnight local (04-05 UTC).
84
+ # Western WFOs (PHX UTC-7) issue ~midnight local (07 UTC).
85
+ # The window covers all CONUS timezones.
86
+ if 4 <= issued.hour <= 10:
87
+ return "final"
88
+ return "correction"
89
+ # >1 day later
90
+ return "correction"
91
+
92
+
93
+ def _parse_temp(val: Any) -> int | None:
94
+ """Parse temperature value. 'M', null, empty -> None. Otherwise int."""
95
+ if val is None or val == "M" or val == "":
96
+ return None
97
+ try:
98
+ return round(float(val))
99
+ except (ValueError, TypeError):
100
+ return None
101
+
102
+
103
+ def parse_cli_record(record: dict[str, Any], station_code: str) -> dict[str, Any] | None:
104
+ """Parse a single IEM CLI record into a climate schema dict.
105
+
106
+ Returns dict with 9 fields (8 schema + report_type_priority), or None
107
+ if both high and low are missing or observation_date is invalid.
108
+
109
+ observation_date: Uses IEM's `valid` field directly. IEM CLI data
110
+ reports by local calendar day (NWS convention). This is THE correct
111
+ behavior: "March 31 high" means the station's local March 31.
112
+ """
113
+ observation_date = record.get("valid")
114
+ if not observation_date or not isinstance(observation_date, str):
115
+ return None
116
+ if not _DATE_RE.match(observation_date):
117
+ return None
118
+ # Codex W3A P2: regex matches "2025-02-31" but it's not a real date.
119
+ # Validate via date.fromisoformat() so invalid dates drop here (the
120
+ # alternative is infer_report_type() catching the ValueError later and
121
+ # silently classifying as "preliminary" — would corrupt settlement data).
122
+ try:
123
+ date.fromisoformat(observation_date)
124
+ except ValueError:
125
+ return None
126
+
127
+ high = _parse_temp(record.get("high"))
128
+ low = _parse_temp(record.get("low"))
129
+
130
+ # Bounds check: reject physically impossible settlement values
131
+ if high is not None and not (HIGH_TEMP_MIN_F <= high <= HIGH_TEMP_MAX_F):
132
+ log.warning(
133
+ "%s %s: high_temp_f=%d out of bounds [%d, %d], setting None",
134
+ station_code,
135
+ observation_date,
136
+ high,
137
+ HIGH_TEMP_MIN_F,
138
+ HIGH_TEMP_MAX_F,
139
+ )
140
+ high = None
141
+ if low is not None and not (LOW_TEMP_MIN_F <= low <= LOW_TEMP_MAX_F):
142
+ log.warning(
143
+ "%s %s: low_temp_f=%d out of bounds [%d, %d], setting None",
144
+ station_code,
145
+ observation_date,
146
+ low,
147
+ LOW_TEMP_MIN_F,
148
+ LOW_TEMP_MAX_F,
149
+ )
150
+ low = None
151
+
152
+ if high is None and low is None:
153
+ return None
154
+
155
+ product = record.get("product") or None
156
+ report_type = infer_report_type(product, observation_date)
157
+ priority = REPORT_TYPE_PRIORITY[report_type]
158
+
159
+ # Extract issued_at from product timestamp
160
+ issued_at: str | None = None
161
+ if product:
162
+ issued_dt = _parse_product_timestamp(product)
163
+ if issued_dt:
164
+ issued_at = issued_dt.strftime("%Y-%m-%dT%H:%M:%SZ")
165
+
166
+ return {
167
+ "station_code": station_code,
168
+ "observation_date": observation_date,
169
+ "high_temp_f": high,
170
+ "low_temp_f": low,
171
+ "report_type": report_type,
172
+ "report_type_priority": priority,
173
+ "source": "iem",
174
+ "product_id": product if isinstance(product, str) else None,
175
+ "issued_at": issued_at,
176
+ }
177
+
178
+
179
+ def parse_cli_response(data: list[dict[str, Any]], station_code: str) -> list[dict[str, Any]]:
180
+ """Parse all CLI records, filter None (both temps missing)."""
181
+ results: list[dict[str, Any]] = []
182
+ for record in data:
183
+ parsed = parse_cli_record(record, station_code)
184
+ if parsed is not None:
185
+ results.append(parsed)
186
+ return results
@@ -0,0 +1,20 @@
1
+ """HTTP fetcher modules for mostlyright.weather data sources.
2
+
3
+ Sprint 0 Wave 3B (Lane F, net-new code — NOT lifted from mostlyright==0.14.1):
4
+
5
+ - ``awc`` — AWC live METAR fetcher (last ~168 hours, no historical depth).
6
+ - ``iem_asos`` — IEM ASOS historical METAR (CSV) with monthly chunking + cache.
7
+ - ``iem_cli`` — NWS CLI settlement fetcher via IEM ``json/cli.py``. Year granularity.
8
+ - ``ghcnh`` — NOAA GHCNh historical PSV fetcher with annual cache.
9
+
10
+ Each fetcher returns either RAW response dicts (AWC) or local file paths
11
+ (IEM ASOS / IEM CLI / GHCNh after ``download_with_retry`` atomic write).
12
+ Callers compose with parsers in :mod:`mostlyright.weather` (``_awc``, ``_iem``,
13
+ ``_climate``, ``_ghcnh``) to produce schema-valid observation / climate dicts.
14
+
15
+ Fetchers do NOT call the parsers directly — parser composition lives in
16
+ ``mostlyright.weather.observations.fetch()`` (Wave 4) so the cache layer can
17
+ sit between raw bytes and parsed observations.
18
+ """
19
+
20
+ from __future__ import annotations