mostlyrightmd 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. mostlyright/__init__.py +46 -0
  2. mostlyright/_compose.py +338 -0
  3. mostlyright/_exact_fetch.py +162 -0
  4. mostlyright/_internal/__init__.py +31 -0
  5. mostlyright/_internal/_bounds.py +167 -0
  6. mostlyright/_internal/_cache_dir.py +90 -0
  7. mostlyright/_internal/_capabilities.py +274 -0
  8. mostlyright/_internal/_convert.py +241 -0
  9. mostlyright/_internal/_http.py +71 -0
  10. mostlyright/_internal/_pairs.py +522 -0
  11. mostlyright/_internal/_pandas_compat.py +75 -0
  12. mostlyright/_internal/_stations.py +692 -0
  13. mostlyright/_internal/_toon.py +350 -0
  14. mostlyright/_internal/exceptions.py +69 -0
  15. mostlyright/_internal/merge/__init__.py +29 -0
  16. mostlyright/_internal/merge/_schemas.py +120 -0
  17. mostlyright/_internal/merge/climate.py +68 -0
  18. mostlyright/_internal/merge/observations.py +43 -0
  19. mostlyright/_internal/models/__init__.py +7 -0
  20. mostlyright/_internal/models/_base.py +49 -0
  21. mostlyright/_internal/models/availability.py +69 -0
  22. mostlyright/_internal/models/observation.py +129 -0
  23. mostlyright/_internal/models/station.py +115 -0
  24. mostlyright/_internal/specs/book_snapshot.json +81 -0
  25. mostlyright/_internal/specs/brackets.json +63 -0
  26. mostlyright/_internal/specs/candle.json +66 -0
  27. mostlyright/_internal/specs/climate.json +60 -0
  28. mostlyright/_internal/specs/daily_extreme.json +56 -0
  29. mostlyright/_internal/specs/data_version.json +38 -0
  30. mostlyright/_internal/specs/event.json +62 -0
  31. mostlyright/_internal/specs/forecast.json +177 -0
  32. mostlyright/_internal/specs/forecast_series.json +128 -0
  33. mostlyright/_internal/specs/market.json +63 -0
  34. mostlyright/_internal/specs/market_unified.json +92 -0
  35. mostlyright/_internal/specs/observation.json +144 -0
  36. mostlyright/_internal/specs/observation_ledger.json +50 -0
  37. mostlyright/_internal/specs/observation_qc.json +24 -0
  38. mostlyright/_internal/specs/omo.json +78 -0
  39. mostlyright/_internal/specs/series.json +58 -0
  40. mostlyright/_internal/specs/settlement-join.json +76 -0
  41. mostlyright/_internal/specs/settlement_record.json +56 -0
  42. mostlyright/_internal/specs/snapshot.json +72 -0
  43. mostlyright/_internal/specs/synoptic_extremes.json +75 -0
  44. mostlyright/_internal/versioning.py +132 -0
  45. mostlyright/core/__init__.py +62 -0
  46. mostlyright/core/_backend_dispatch.py +174 -0
  47. mostlyright/core/_json_safe.py +177 -0
  48. mostlyright/core/_narwhals_compat.py +132 -0
  49. mostlyright/core/_polars_compat.py +55 -0
  50. mostlyright/core/exceptions.py +704 -0
  51. mostlyright/core/formats/__init__.py +42 -0
  52. mostlyright/core/formats/_toon.py +344 -0
  53. mostlyright/core/formats/_toon_list_codec.py +213 -0
  54. mostlyright/core/formats/csv.py +57 -0
  55. mostlyright/core/formats/dataframe.py +34 -0
  56. mostlyright/core/formats/json.py +83 -0
  57. mostlyright/core/formats/parquet.py +56 -0
  58. mostlyright/core/formats/toon.py +434 -0
  59. mostlyright/core/merge.py +129 -0
  60. mostlyright/core/result.py +192 -0
  61. mostlyright/core/schema.py +334 -0
  62. mostlyright/core/schemas/__init__.py +39 -0
  63. mostlyright/core/schemas/forecast.py +122 -0
  64. mostlyright/core/schemas/forecast_nwp.py +192 -0
  65. mostlyright/core/schemas/observation.py +201 -0
  66. mostlyright/core/schemas/observation_ledger.py +117 -0
  67. mostlyright/core/schemas/observation_qc.py +75 -0
  68. mostlyright/core/schemas/settlement.py +164 -0
  69. mostlyright/core/temporal/__init__.py +18 -0
  70. mostlyright/core/temporal/knowledge_view.py +109 -0
  71. mostlyright/core/temporal/leakage.py +147 -0
  72. mostlyright/core/temporal/timepoint.py +253 -0
  73. mostlyright/core/validator.py +465 -0
  74. mostlyright/discover.py +100 -0
  75. mostlyright/discovery.py +273 -0
  76. mostlyright/forecasts.py +267 -0
  77. mostlyright/international.py +423 -0
  78. mostlyright/live/__init__.py +39 -0
  79. mostlyright/live/_latest.py +194 -0
  80. mostlyright/live/_sources.py +106 -0
  81. mostlyright/live/_stream.py +108 -0
  82. mostlyright/mode2.py +235 -0
  83. mostlyright/preprocessing.py +173 -0
  84. mostlyright/qc.py +240 -0
  85. mostlyright/research.py +1669 -0
  86. mostlyright/snapshot.py +504 -0
  87. mostlyright/transforms.py +201 -0
  88. mostlyrightmd-0.1.0.dist-info/METADATA +48 -0
  89. mostlyrightmd-0.1.0.dist-info/RECORD +90 -0
  90. mostlyrightmd-0.1.0.dist-info/WHEEL +4 -0
@@ -0,0 +1,46 @@
1
+ """mostlyright — local-first SDK for prediction-market weather settlement research.
2
+
3
+ Sprint 0 v0.1.0 ships:
4
+ - ``mostlyright.research(station, from_date, to_date, ...)`` — the v0.14.1 ``pairs()`` join,
5
+ lifted from monorepo-v0.14.1, calling AWC + IEM + GHCNh + NWS CLI directly.
6
+ - ``mostlyright.snapshot`` — settlement-window math (LST, market_close_utc).
7
+
8
+ Adjacent surfaces:
9
+ - ``mostlyright.weather`` — observations + climate + forecasts (sibling package ``mostlyrightmd-weather``).
10
+ - ``mostlyright.markets`` — Kalshi + Polymarket metadata (sibling package ``mostlyrightmd-markets``,
11
+ ships v0.1.0 in Sprint 0.5).
12
+
13
+ Namespace note: ``mostlyright`` is a split-distribution namespace package. Core owns this
14
+ ``__init__.py``; sibling distributions ``mostlyrightmd-weather`` and ``mostlyrightmd-markets`` ship
15
+ subdirectories (``mostlyright/weather/``, ``mostlyright/markets/``) WITHOUT their own
16
+ namespace-root ``__init__.py``. The pkgutil declaration below extends ``__path__`` so Python's
17
+ import machinery finds those subpackages from whichever site-packages location installed them.
18
+ """
19
+
20
+ # Split-distribution namespace: extend __path__ to discover sibling packages' contributions.
21
+ __path__ = __import__("pkgutil").extend_path(__path__, __name__)
22
+
23
+ __version__ = "0.1.0rc1"
24
+
25
+ from mostlyright.discover import discover
26
+ from mostlyright.research import research
27
+
28
+ __all__ = ["__version__", "discover", "live", "research"]
29
+
30
+
31
+ # Lazy `mostlyright.live` access (Phase 11). Both `discover` and `research`
32
+ # above already eagerly import `mostlyright.core` (which pulls pandas via
33
+ # `core.validator`), so the eager-pandas path is pre-existing and NOT a
34
+ # Phase 11 regression. Even so, we expose `live` through a module-level
35
+ # `__getattr__` hook so `import mostlyright` doesn't pull in
36
+ # `mostlyright.weather` (via the live module's deferred fetcher imports
37
+ # that fire on first use, not first attribute access). First access via
38
+ # `mostlyright.live.stream(...)` resolves and caches the submodule.
39
+ def __getattr__(name: str):
40
+ if name == "live":
41
+ import mostlyright.live as _live
42
+
43
+ # Cache on the module so subsequent accesses skip __getattr__.
44
+ globals()["live"] = _live
45
+ return _live
46
+ raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
@@ -0,0 +1,338 @@
1
+ """Phase 10 — composable ``research()`` dispatcher.
2
+
3
+ Translates the new selectors (``city=``, ``contract=``, ``contracts=``)
4
+ into resolution metadata + station tuples that the existing
5
+ station-based ``research()`` machinery consumes. Cross-issuer annotation
6
+ (``settles_for``) is computed here so the dispatch layer is the single
7
+ source of truth for "which markets settle against which stations."
8
+
9
+ The dispatcher is intentionally pure (no I/O, no DataFrame
10
+ construction) so unit tests run instantly and the same logic can be
11
+ reused by ``discover()`` and the TS counterpart.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import warnings
17
+ from typing import Any
18
+
19
+ #: The valid selector kwarg names. Exactly one must be provided on each
20
+ #: ``research()`` invocation; passing zero or >1 raises ``ValueError``.
21
+ _SELECTOR_NAMES: tuple[str, ...] = ("station", "city", "contract", "contracts")
22
+
23
+
24
+ #: Kalshi short-ticker → canonical city slug. Real Kalshi tickers use
25
+ #: variable-length city suffixes: ``KXHIGHNY-...`` (NY → NYC),
26
+ #: ``KXHIGHCHI-...`` (CHI → CHI), ``KXHIGHLAX-...`` (LAX → LAX). The
27
+ #: ``KALSHI_SETTLEMENT_STATIONS`` catalog is keyed by the canonical
28
+ #: 3-letter city slug; this alias table normalizes the variable-length
29
+ #: Kalshi suffix to the catalog key before lookup. Phase 10 iter-1 codex
30
+ #: HIGH: without this, ``kalshi:KXHIGHNY-25MAY26-T79`` (the actual
31
+ #: ROADMAP example) would fail to resolve.
32
+ _KALSHI_TICKER_ALIASES: dict[str, str] = {
33
+ "NY": "NYC",
34
+ # All other Kalshi cities use the canonical 3-letter slug as their
35
+ # ticker suffix verbatim (identity mapping is implicit).
36
+ }
37
+
38
+
39
+ #: Kalshi-short ↔ Polymarket-long city slug alias. Architect iter-1 HIGH:
40
+ #: ``resolve_city`` and ``annotate_settles_for`` need to recognize BOTH
41
+ #: forms so a single call with EITHER input surfaces the cross-issuer
42
+ #: settlement neighborhood. Without this, ``resolve_city("LAX")`` would
43
+ #: miss Polymarket's KLAX entry (Polymarket keys it as ``los_angeles``);
44
+ #: ``resolve_city("chicago")`` would miss Kalshi's KMDW (Kalshi keys it
45
+ #: as ``CHI``). Bi-directional table — looked up either way.
46
+ _CITY_SLUG_ALIASES: dict[str, tuple[str, str]] = {
47
+ # short_kalshi: (long_polymarket, canonical_kalshi_upper)
48
+ "nyc": ("nyc", "NYC"),
49
+ "chi": ("chicago", "CHI"),
50
+ "lax": ("los_angeles", "LAX"),
51
+ "mia": ("miami", "MIA"),
52
+ "den": ("denver", "DEN"),
53
+ "bos": ("boston", "BOS"),
54
+ "aus": ("austin", "AUS"),
55
+ "dca": ("washington_dc", "DCA"),
56
+ "phl": ("philadelphia", "PHL"),
57
+ "sfo": ("san_francisco", "SFO"),
58
+ "sea": ("seattle", "SEA"),
59
+ "atl": ("atlanta", "ATL"),
60
+ "hou": ("houston", "HOU"),
61
+ "dal": ("dallas", "DAL"),
62
+ "phx": ("phoenix", "PHX"),
63
+ "msp": ("minneapolis", "MSP"),
64
+ "dtw": ("detroit", "DTW"),
65
+ }
66
+
67
+ # Build reverse lookup so passing the Polymarket long form also surfaces
68
+ # the Kalshi short form.
69
+ _CITY_SLUG_ALIASES_REVERSE: dict[str, tuple[str, str]] = {
70
+ long_poly: (short_kalshi, kalshi_upper)
71
+ for short_kalshi, (long_poly, kalshi_upper) in _CITY_SLUG_ALIASES.items()
72
+ }
73
+
74
+
75
+ def _normalize_city_slugs(city: str) -> tuple[str, str]:
76
+ """Return ``(polymarket_slug_lower, kalshi_slug_upper)`` for ``city``.
77
+
78
+ Accepts either form (``"nyc"`` or ``"NYC"``, ``"chicago"`` or ``"CHI"``)
79
+ and returns both canonical forms so callers can probe either catalog.
80
+
81
+ Falls back to ``(city.lower(), city.upper())`` for cities not in the
82
+ alias table (international cities the user might pass).
83
+ """
84
+ lower = city.lower()
85
+ upper = city.upper()
86
+ if lower in _CITY_SLUG_ALIASES:
87
+ long_poly, kalshi_upper = _CITY_SLUG_ALIASES[lower]
88
+ return long_poly, kalshi_upper
89
+ if lower in _CITY_SLUG_ALIASES_REVERSE:
90
+ short_kalshi, kalshi_upper = _CITY_SLUG_ALIASES_REVERSE[lower]
91
+ return lower, kalshi_upper
92
+ return lower, upper
93
+
94
+
95
+ class StationOverrideWarning(UserWarning):
96
+ """Emitted when ``station_override=`` deliberately mismatches the
97
+ contract's canonical settlement station.
98
+
99
+ The output row carries ``settlement_mismatch=True`` so downstream
100
+ backtest code can filter / flag these silently-divergent rows.
101
+ """
102
+
103
+
104
+ def validate_selectors(
105
+ *,
106
+ station: str | None = None,
107
+ city: str | None = None,
108
+ contract: str | None = None,
109
+ contracts: list[str] | tuple[str, ...] | None = None,
110
+ ) -> str:
111
+ """Validate that exactly one selector is provided; return the active name.
112
+
113
+ Args:
114
+ station, city, contract, contracts: the four mutually-exclusive
115
+ selectors. Exactly one must be non-None / non-empty.
116
+
117
+ Returns:
118
+ The name of the active selector (``"station"`` / ``"city"`` /
119
+ ``"contract"`` / ``"contracts"``).
120
+
121
+ Raises:
122
+ ValueError: zero or >1 selectors provided.
123
+ """
124
+ provided: list[str] = []
125
+ if station is not None and station != "":
126
+ provided.append("station")
127
+ if city is not None and city != "":
128
+ provided.append("city")
129
+ if contract is not None and contract != "":
130
+ provided.append("contract")
131
+ if contracts is not None and len(contracts) > 0:
132
+ provided.append("contracts")
133
+ if not provided:
134
+ raise ValueError(
135
+ "research(): exactly one of station=, city=, contract=, contracts= must be provided"
136
+ )
137
+ if len(provided) > 1:
138
+ raise ValueError(f"research(): selectors are mutually exclusive; got {provided!r}")
139
+ return provided[0]
140
+
141
+
142
+ def resolve_contract(contract_id: str) -> tuple[str, str]:
143
+ """Resolve a ``"<issuer>:<id>"`` string to ``(station, issuer)``.
144
+
145
+ Supported issuers:
146
+ - ``kalshi:`` — ``KHIGH*``/``KXHIGH*``/``KLOW*``/``KXLOW*`` city tickers.
147
+ - ``polymarket:`` — event/market ids. v0.2 raises NotImplementedError
148
+ with an actionable message (the resolver lives in
149
+ :mod:`mostlyright.markets._per_event_station` but requires a fetched
150
+ event payload to identify the city; Phase 10 v0.2 surfaces this as
151
+ a clear error and defers the integration to v0.3).
152
+
153
+ Args:
154
+ contract_id: ``"<issuer>:<id>"`` string (e.g.
155
+ ``"kalshi:KXHIGHNYC"`` or ``"polymarket:0x..."``).
156
+
157
+ Returns:
158
+ Tuple of ``(station_icao, issuer_name)``.
159
+
160
+ Raises:
161
+ ValueError: malformed contract id or unknown issuer.
162
+ NotImplementedError: Polymarket contract resolution (deferred).
163
+ """
164
+ if not isinstance(contract_id, str) or ":" not in contract_id:
165
+ raise ValueError(f"contract id must be `<issuer>:<id>`; got {contract_id!r}")
166
+ issuer, raw = contract_id.split(":", 1)
167
+ issuer = issuer.lower()
168
+ raw_upper = raw.upper()
169
+ if issuer == "kalshi":
170
+ from datetime import date as _date
171
+
172
+ from mostlyright.markets.catalog import kalshi_nhigh, kalshi_nlow
173
+
174
+ # Kalshi tickers come in two prefix families:
175
+ # KHIGH<CITY>* / KXHIGH<CITY>* → NHIGH (daily-high)
176
+ # KLOW<CITY>* / KXLOW<CITY>* → NLOW (daily-low)
177
+ # The existing kalshi_nhigh / kalshi_nlow resolvers were built for
178
+ # the legacy KHIGH<CITY> / KLOW<CITY> shape. Modern Kalshi market
179
+ # tickers use the KX-prefix exchange convention
180
+ # (KXHIGH<CITY>-<DATE>-<STRIKE>); strip the `KX` to feed the
181
+ # resolver and pass the bare city portion. The resolver's own
182
+ # validation (`startswith("KHIGH")` / `startswith("KLOW")` +
183
+ # length>5) does the city-ticker validity check.
184
+ # Strip just the 'X' from the KX exchange prefix so KXHIGH<CITY>
185
+ # becomes KHIGH<CITY> (the legacy resolver's expected format).
186
+ # KX = position [0..1] but the literal 'K' is kept; drop position [1].
187
+ normalized = raw_upper
188
+ if normalized.startswith("KX"):
189
+ normalized = "K" + normalized[2:] # KXHIGHNYC → KHIGHNYC
190
+ # Many full Kalshi tickers carry a trailing -DATE-STRIKE suffix
191
+ # (e.g. KXHIGHNYC-25MAY26-T79 → KXHIGHNYC). Pull the city portion
192
+ # by trimming at the first '-'.
193
+ city_only = normalized.split("-", 1)[0]
194
+ # Extract the variable-length city suffix and normalize via the
195
+ # Kalshi-ticker alias table so KXHIGHNY → NY → NYC (the canonical
196
+ # catalog key). Iter-1 codex HIGH.
197
+ if city_only.startswith("KHIGH") and len(city_only) > 5:
198
+ short = city_only[5:]
199
+ canonical = _KALSHI_TICKER_ALIASES.get(short, short)
200
+ r = kalshi_nhigh.resolve(f"KHIGH{canonical}", _date.today())
201
+ return r.settlement_station, "kalshi"
202
+ if city_only.startswith("KLOW") and len(city_only) > 4:
203
+ short = city_only[4:]
204
+ canonical = _KALSHI_TICKER_ALIASES.get(short, short)
205
+ r = kalshi_nlow.resolve(f"KLOW{canonical}", _date.today())
206
+ return r.settlement_station, "kalshi"
207
+ raise ValueError(
208
+ f"unsupported kalshi contract format: {raw!r}; "
209
+ "expected KHIGH<CITY>* / KXHIGH<CITY>* / KLOW<CITY>* / "
210
+ "KXLOW<CITY>* prefix"
211
+ )
212
+ if issuer == "polymarket":
213
+ raise NotImplementedError(
214
+ "polymarket contract resolution requires event_id → station lookup "
215
+ "via polymarket_discover() or polymarket_settle(); Phase 10 v0.2 "
216
+ "defers this integration to v0.3. Use `city='nyc'` or pass the "
217
+ "station explicitly via `station_override=` until then."
218
+ )
219
+ raise ValueError(f"unknown issuer prefix: {issuer!r}; expected kalshi or polymarket")
220
+
221
+
222
+ def resolve_city(city: str) -> tuple[str, ...]:
223
+ """Resolve a city slug to all stations any issuer settles against.
224
+
225
+ Returns a deduplicated tuple in stable order:
226
+ 1. Kalshi's settlement station (if the city is in the Kalshi catalog).
227
+ 2. Polymarket's default + high + low stations (if in Polymarket catalog).
228
+ 3. Polymarket per-city denylist entries (forbidden-but-known stations
229
+ surfaced so quants can SEE the full neighborhood for explicit
230
+ ``station_override=``).
231
+
232
+ For ``"NYC"`` returns (``"KNYC"``, ``"KLGA"``, ``"KJFK"``, ``"KEWR"``)
233
+ — KNYC is Kalshi's, KLGA is Polymarket's, KJFK + KEWR are the
234
+ denylist backstops Polymarket forbids.
235
+
236
+ Args:
237
+ city: city slug. Accepts ``"NYC"`` (Kalshi upper) or ``"nyc"``
238
+ (Polymarket lower); both are normalized.
239
+
240
+ Returns:
241
+ Tuple of station ICAOs.
242
+
243
+ Raises:
244
+ ValueError: city not in either catalog.
245
+ """
246
+ if not isinstance(city, str) or not city:
247
+ raise ValueError(f"city must be a non-empty str; got {city!r}")
248
+
249
+ from mostlyright.markets._per_event_station import load_polymarket_city_stations
250
+ from mostlyright.markets.catalog.kalshi_stations import (
251
+ KALSHI_SETTLEMENT_STATIONS,
252
+ )
253
+ from mostlyright.markets.polymarket import KNOWN_WRONG_STATIONS as POLY_WRONG
254
+
255
+ # Iter-1 python-architect HIGH: normalize via the cross-issuer slug
256
+ # alias table so a single call (with either "NYC" or "nyc", "CHI" or
257
+ # "chicago", "LAX" or "los_angeles") surfaces the full cross-issuer
258
+ # settlement neighborhood from BOTH catalogs.
259
+ poly_slug, kalshi_slug = _normalize_city_slugs(city)
260
+ out: list[str] = []
261
+ if kalshi_slug in KALSHI_SETTLEMENT_STATIONS:
262
+ out.append(KALSHI_SETTLEMENT_STATIONS[kalshi_slug].station)
263
+ poly = load_polymarket_city_stations()
264
+ if poly_slug in poly:
265
+ # Preserve insertion order across the measure keys.
266
+ for measure in ("default", "high", "low"):
267
+ st = poly[poly_slug].get(measure)
268
+ if st and st not in out:
269
+ out.append(st)
270
+ for st in sorted(POLY_WRONG.get(poly_slug, frozenset())):
271
+ if st not in out:
272
+ out.append(st)
273
+ if not out:
274
+ raise ValueError(f"unknown city {city!r}; not in kalshi or polymarket catalogs")
275
+ return tuple(out)
276
+
277
+
278
+ def annotate_settles_for(station: str, city: str | None) -> list[str]:
279
+ """Return the list of ``"<issuer>:<ticker>"`` markers that settle
280
+ against ``station`` for ``city``.
281
+
282
+ Empty list means no known issuer settles against this station for
283
+ this city (typically a denylist entry surfaced by
284
+ :func:`resolve_city` for the caller's awareness).
285
+
286
+ Args:
287
+ station: 4-char K-prefix ICAO.
288
+ city: city slug (optional; when None, returns empty list).
289
+
290
+ Returns:
291
+ Sorted list of ``"kalshi:CITY"`` / ``"polymarket:city"`` markers.
292
+ """
293
+ out: list[str] = []
294
+ if city is None:
295
+ return out
296
+ from mostlyright.markets._per_event_station import load_polymarket_city_stations
297
+ from mostlyright.markets.catalog.kalshi_stations import (
298
+ KALSHI_SETTLEMENT_STATIONS,
299
+ )
300
+
301
+ # Iter-1 python-architect HIGH: use cross-issuer slug alias so the
302
+ # annotation works regardless of which slug-form the caller passed.
303
+ poly_slug, kalshi_slug = _normalize_city_slugs(city)
304
+ if (
305
+ kalshi_slug in KALSHI_SETTLEMENT_STATIONS
306
+ and KALSHI_SETTLEMENT_STATIONS[kalshi_slug].station == station
307
+ ):
308
+ out.append(f"kalshi:{kalshi_slug}")
309
+ poly = load_polymarket_city_stations()
310
+ if poly_slug in poly and station in poly[poly_slug].values():
311
+ out.append(f"polymarket:{poly_slug}")
312
+ return sorted(out)
313
+
314
+
315
+ def emit_override_warning(contract_station: str, override_station: str) -> None:
316
+ """Helper: emit :class:`StationOverrideWarning` for a deliberate mismatch."""
317
+ warnings.warn(
318
+ f"station_override={override_station!r} differs from contract's "
319
+ f"canonical settlement station {contract_station!r}; output row will "
320
+ f"carry settlement_mismatch=True",
321
+ StationOverrideWarning,
322
+ stacklevel=3,
323
+ )
324
+
325
+
326
+ __all__ = [
327
+ "StationOverrideWarning",
328
+ "annotate_settles_for",
329
+ "emit_override_warning",
330
+ "resolve_city",
331
+ "resolve_contract",
332
+ "validate_selectors",
333
+ ]
334
+
335
+
336
+ # Silence the `Any` import warning — kept for ruff future-proofing if/when
337
+ # the dispatch layer needs to type DataFrame returns.
338
+ _ = Any
@@ -0,0 +1,162 @@
1
+ """Exact-window obs fetcher — bypasses year-aligned monthly cache.
2
+
3
+ Used by `mostlyright.weather.obs(strategy="exact_window")` to serve small,
4
+ caller-bounded windows (e.g. 1-month backtest replays) without pulling a
5
+ full calendar year of IEM CSV.
6
+
7
+ DOES NOT WRITE to the canonical `observations/{STATION}/{YYYY}/{MM}.parquet`
8
+ cache — exact_window queries are treated as transient. Callers who need
9
+ warm-cache speedups for repeated calls should use `strategy="warm_cache"`.
10
+
11
+ Source filtering is enforced at the FETCHER BOUNDARY (not post-merge):
12
+ post-merge filtering would silently drop rows where the named source lost
13
+ the priority tie to a HIGHER-priority source that this call also fetched.
14
+ By gating each fetcher behind `source in (None, "<name>")`, the merge sees
15
+ only rows from the requested source(s) and the priority resolution is
16
+ semantically correct.
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ from datetime import UTC, date, datetime, timedelta
22
+ from typing import TYPE_CHECKING, Any, Literal
23
+
24
+ from mostlyright._internal.merge import merge_observations
25
+ from mostlyright.weather._awc import awc_to_observation
26
+ from mostlyright.weather._fetchers.awc import fetch_awc_metars
27
+ from mostlyright.weather._fetchers.ghcnh import download_ghcnh
28
+ from mostlyright.weather._fetchers.iem_asos import download_iem_asos
29
+ from mostlyright.weather._ghcnh import parse_ghcnh_file
30
+ from mostlyright.weather._iem import parse_iem_file
31
+
32
+ if TYPE_CHECKING:
33
+ from mostlyright._internal.models.station import StationInfo
34
+
35
+
36
+ Source = Literal["iem", "ghcnh", "awc"]
37
+
38
+
39
+ def _exact_fetch_observations(
40
+ info: StationInfo,
41
+ from_date_iso: str,
42
+ to_date_iso: str,
43
+ *,
44
+ source: Source | None = None,
45
+ ) -> list[dict[str, Any]]:
46
+ """Fetch obs rows for the exact [from_date, to_date] window.
47
+
48
+ Skips year-normalization in IEM by passing ``exact_window=True`` to
49
+ ``download_iem_asos``. Does NOT write to the canonical monthly parquet cache.
50
+
51
+ Parameters
52
+ ----------
53
+ info : StationInfo
54
+ Resolved station metadata (icao, code, ghcnh_id, ...).
55
+ from_date_iso, to_date_iso : str
56
+ ISO YYYY-MM-DD strings, inclusive bounds for the obs window.
57
+ source : {"iem", "ghcnh", "awc"} | None
58
+ If set, only that source is queried (fetcher-boundary enforcement).
59
+ If None, all three are queried and merged via SOURCE_PRIORITY in
60
+ ``mostlyright._internal.merge.observations`` (AWC > IEM > GHCNh).
61
+
62
+ Returns
63
+ -------
64
+ list[dict]
65
+ Merged observation rows for the window. NOT cached to canonical
66
+ monthly parquet; callers wanting cache benefit should use
67
+ ``strategy="warm_cache"``.
68
+ """
69
+ # Local import: research depends on weather, but _exact_fetch only needs
70
+ # the _sources_root helper for path layout — keep the dependency one-way
71
+ # by importing inside the function.
72
+ from mostlyright.research import _sources_root
73
+
74
+ from_date = date.fromisoformat(from_date_iso)
75
+ to_date = date.fromisoformat(to_date_iso)
76
+ # Mirror research.py:1167 — extend by 1 day to capture the pre-midnight
77
+ # UTC tail of the last LST settlement window.
78
+ extended_to = to_date + timedelta(days=1)
79
+
80
+ sources_root = _sources_root()
81
+ rows: list[dict[str, Any]] = []
82
+
83
+ # --- IEM ASOS ----------------------------------------------------------
84
+ # Fetcher-boundary enforcement: skip IEM entirely if caller asked for a
85
+ # different source. Separate dest_dir namespace (per B-5) — exact-window
86
+ # CSVs live in `sources/iem_asos_exact/`, NEVER in `sources/iem_asos/`.
87
+ if source in (None, "iem"):
88
+ iem_exact_dir = sources_root / "iem_asos_exact"
89
+ # IEM has two report types: 3 (METAR) and 4 (SPECI). Mirror the
90
+ # canonical _fetch_iem_month behavior so the merge sees both.
91
+ for report_type, override in ((3, "METAR"), (4, "SPECI")):
92
+ paths = download_iem_asos(
93
+ info,
94
+ from_date,
95
+ extended_to,
96
+ iem_exact_dir,
97
+ report_type=report_type,
98
+ exact_window=True,
99
+ )
100
+ for p in paths:
101
+ rows.extend(parse_iem_file(p, observation_type_override=override))
102
+
103
+ # --- AWC METAR (live 168h only) ---------------------------------------
104
+ # ``fetch_awc_metars`` is live-only — no date range. If ``to_date`` is older
105
+ # than ``now - 168h``, AWC will return zero rows for the window. Skip the
106
+ # HTTP call in that case. We DO NOT filter rows by UTC date here — the
107
+ # final aggregation layer in obs.py buckets by LST settlement_date, which
108
+ # correctly captures the post-midnight-UTC tail of the last LST day for
109
+ # negative-offset US stations (codex iter-1 CRITICAL #2).
110
+ if source in (None, "awc"):
111
+ now_utc = datetime.now(UTC)
112
+ awc_horizon = now_utc.date() - timedelta(days=7)
113
+ if to_date >= awc_horizon:
114
+ raw_metars = fetch_awc_metars([info.icao], hours=168)
115
+ for m in raw_metars:
116
+ obs = awc_to_observation(m)
117
+ if obs is None:
118
+ continue
119
+ # Defensive: AWC may serve unrelated stations from cached
120
+ # responses. Drop those; let settlement-date bucketing handle
121
+ # window filtering downstream.
122
+ if obs.get("station_code") != info.code:
123
+ continue
124
+ rows.append(obs)
125
+
126
+ # --- GHCNh (per-station-year) -----------------------------------------
127
+ if source in (None, "ghcnh"):
128
+ import httpx
129
+
130
+ from mostlyright.weather.cache import _is_current_lst_year
131
+
132
+ ghcnh_dir = sources_root / "ghcnh"
133
+ # Per-station-year files; iterate calendar years touching the window.
134
+ # Mirror research.py:_fetch_ghcnh_year mutable-period gate: NCEI
135
+ # republishes the current LST year's PSV as new months land, so
136
+ # callers MUST force a re-download for that year (codex iter-1 HIGH).
137
+ for year in range(from_date.year, extended_to.year + 1):
138
+ skip_cache = _is_current_lst_year(info.icao, year)
139
+ try:
140
+ psv_path = download_ghcnh(info.ghcnh_id, year, ghcnh_dir, skip_cache=skip_cache)
141
+ except httpx.HTTPStatusError as exc:
142
+ # NCEI returns 404 for stations without data; mirror
143
+ # _fetch_ghcnh_year's graceful skip.
144
+ if exc.response.status_code == 404:
145
+ continue
146
+ raise
147
+ for row in parse_ghcnh_file(psv_path):
148
+ if row.get("station_code") != info.code:
149
+ continue
150
+ rows.append(row)
151
+
152
+ # Pre-sort by (observed_at, source) BEFORE merge — mirrors research.py R2
153
+ # mitigation. merge_observations uses first-seen-wins at equal priority
154
+ # and returns `list(best.values())` in dict-insertion order, so input
155
+ # order is load-bearing for both tie-break determinism AND survivor order.
156
+ rows.sort(key=lambda r: (r.get("observed_at") or "", r.get("source") or ""))
157
+
158
+ # merge_observations takes a single positional list, NO source_priority kwarg.
159
+ # Priority is hard-coded via SOURCE_PRIORITY in the merge module. Source
160
+ # filtering already happened at the fetcher boundary above — do NOT
161
+ # post-filter merged rows by source.
162
+ return merge_observations(rows)
@@ -0,0 +1,31 @@
1
+ """mostlyright._internal — shared utilities lifted from monorepo-v0.14.1.
2
+
3
+ NOT a public API. Module names start with underscore to discourage downstream
4
+ use; rely on ``mostlyright.research()`` / ``mostlyright.snapshot.*`` instead.
5
+
6
+ Lift inventory (provenance for parity-critical code). Source SHA refers to the
7
+ v0.14.1 release tag of ``Tarabcak/monorepo`` (commit
8
+ ``514fcdab227e845145ca32b989355647466231d9``); ``_pairs.py`` additionally
9
+ pins the exact source-file blob SHA from that tree.
10
+
11
+ | Module | Source path | Source SHA | Lift date | Modifications |
12
+ |-----------------------|----------------------------------------------------------|--------------|------------|------------------------------------------------------------------------|
13
+ | _http.py | monorepo-v0.14.1/src/mostlyright/_http.py | 514fcda | 2026-05-21 | namespace rename only (mostlyright -> mostlyright._internal) |
14
+ | _convert.py | monorepo-v0.14.1/src/mostlyright/_convert.py | 514fcda | 2026-05-21 | namespace rename only |
15
+ | _bounds.py | monorepo-v0.14.1/src/mostlyright/_bounds.py | 514fcda | 2026-05-21 | namespace rename only |
16
+ | _capabilities.py | monorepo-v0.14.1/src/mostlyright/_capabilities.py | 514fcda | 2026-05-21 | namespace rename only |
17
+ | _toon.py | monorepo-v0.14.1/src/mostlyright/_toon.py | 514fcda | 2026-05-22 | ruff-clean RUF002/003 (replace EN DASH in inline comments); body identical |
18
+ | exceptions.py | monorepo-v0.14.1/src/mostlyright/exceptions.py | 514fcda | 2026-05-21 | namespace rename only |
19
+ | versioning.py | monorepo-v0.14.1/src/mostlyright/versioning.py | 514fcda | 2026-05-21 | namespace rename only |
20
+ | models/ | monorepo-v0.14.1/src/mostlyright/models/ | 514fcda | 2026-05-21 | namespace rename only |
21
+ | specs/*.json | monorepo-v0.14.1/src/mostlyright/specs/ | 514fcda | 2026-05-21 | none (data-only) |
22
+ | _stations.py | monorepo-v0.14.1/src/mostlyright/_stations.py | 514fcda | 2026-05-22 | none (pure-data module; no imports to rename) |
23
+ | _pairs.py | monorepo-v0.14.1/src/mostlyright/pairs.py | e78eed5 (blob, in tree 514fcda) | 2026-05-22 | TOON imports + ``to_toon`` function excised; namespace rename |
24
+ | merge/observations.py | monorepo-v0.14.1/ingest/storage/parquet.py:47-48,246-261 | 514fcda | 2026-05-21 | rename ``_dedup_rows`` -> ``merge_observations`` (public API) |
25
+ | merge/climate.py | monorepo-v0.14.1/ingest/storage/parquet.py:477-494 | 514fcda | 2026-05-21 | rename ``_dedup_climate_rows`` -> ``merge_climate`` (public API) |
26
+ | merge/_schemas.py | monorepo-v0.14.1/ingest/storage/parquet.py:50-103 | 514fcda | 2026-05-21 | none (verbatim lift; field order + dtypes preserved) |
27
+
28
+ Any drift in ``merge/`` or ``_pairs.py`` invalidates every historical Kalshi
29
+ NHIGH/NLOW settlement — treat as load-bearing and re-run the parity gate
30
+ (``tests/test_parity.py``) before merging changes here.
31
+ """