mostlyrightmd 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mostlyright/__init__.py +46 -0
- mostlyright/_compose.py +338 -0
- mostlyright/_exact_fetch.py +162 -0
- mostlyright/_internal/__init__.py +31 -0
- mostlyright/_internal/_bounds.py +167 -0
- mostlyright/_internal/_cache_dir.py +90 -0
- mostlyright/_internal/_capabilities.py +274 -0
- mostlyright/_internal/_convert.py +241 -0
- mostlyright/_internal/_http.py +71 -0
- mostlyright/_internal/_pairs.py +522 -0
- mostlyright/_internal/_pandas_compat.py +75 -0
- mostlyright/_internal/_stations.py +692 -0
- mostlyright/_internal/_toon.py +350 -0
- mostlyright/_internal/exceptions.py +69 -0
- mostlyright/_internal/merge/__init__.py +29 -0
- mostlyright/_internal/merge/_schemas.py +120 -0
- mostlyright/_internal/merge/climate.py +68 -0
- mostlyright/_internal/merge/observations.py +43 -0
- mostlyright/_internal/models/__init__.py +7 -0
- mostlyright/_internal/models/_base.py +49 -0
- mostlyright/_internal/models/availability.py +69 -0
- mostlyright/_internal/models/observation.py +129 -0
- mostlyright/_internal/models/station.py +115 -0
- mostlyright/_internal/specs/book_snapshot.json +81 -0
- mostlyright/_internal/specs/brackets.json +63 -0
- mostlyright/_internal/specs/candle.json +66 -0
- mostlyright/_internal/specs/climate.json +60 -0
- mostlyright/_internal/specs/daily_extreme.json +56 -0
- mostlyright/_internal/specs/data_version.json +38 -0
- mostlyright/_internal/specs/event.json +62 -0
- mostlyright/_internal/specs/forecast.json +177 -0
- mostlyright/_internal/specs/forecast_series.json +128 -0
- mostlyright/_internal/specs/market.json +63 -0
- mostlyright/_internal/specs/market_unified.json +92 -0
- mostlyright/_internal/specs/observation.json +144 -0
- mostlyright/_internal/specs/observation_ledger.json +50 -0
- mostlyright/_internal/specs/observation_qc.json +24 -0
- mostlyright/_internal/specs/omo.json +78 -0
- mostlyright/_internal/specs/series.json +58 -0
- mostlyright/_internal/specs/settlement-join.json +76 -0
- mostlyright/_internal/specs/settlement_record.json +56 -0
- mostlyright/_internal/specs/snapshot.json +72 -0
- mostlyright/_internal/specs/synoptic_extremes.json +75 -0
- mostlyright/_internal/versioning.py +132 -0
- mostlyright/core/__init__.py +62 -0
- mostlyright/core/_backend_dispatch.py +174 -0
- mostlyright/core/_json_safe.py +177 -0
- mostlyright/core/_narwhals_compat.py +132 -0
- mostlyright/core/_polars_compat.py +55 -0
- mostlyright/core/exceptions.py +704 -0
- mostlyright/core/formats/__init__.py +42 -0
- mostlyright/core/formats/_toon.py +344 -0
- mostlyright/core/formats/_toon_list_codec.py +213 -0
- mostlyright/core/formats/csv.py +57 -0
- mostlyright/core/formats/dataframe.py +34 -0
- mostlyright/core/formats/json.py +83 -0
- mostlyright/core/formats/parquet.py +56 -0
- mostlyright/core/formats/toon.py +434 -0
- mostlyright/core/merge.py +129 -0
- mostlyright/core/result.py +192 -0
- mostlyright/core/schema.py +334 -0
- mostlyright/core/schemas/__init__.py +39 -0
- mostlyright/core/schemas/forecast.py +122 -0
- mostlyright/core/schemas/forecast_nwp.py +192 -0
- mostlyright/core/schemas/observation.py +201 -0
- mostlyright/core/schemas/observation_ledger.py +117 -0
- mostlyright/core/schemas/observation_qc.py +75 -0
- mostlyright/core/schemas/settlement.py +164 -0
- mostlyright/core/temporal/__init__.py +18 -0
- mostlyright/core/temporal/knowledge_view.py +109 -0
- mostlyright/core/temporal/leakage.py +147 -0
- mostlyright/core/temporal/timepoint.py +253 -0
- mostlyright/core/validator.py +465 -0
- mostlyright/discover.py +100 -0
- mostlyright/discovery.py +273 -0
- mostlyright/forecasts.py +267 -0
- mostlyright/international.py +423 -0
- mostlyright/live/__init__.py +39 -0
- mostlyright/live/_latest.py +194 -0
- mostlyright/live/_sources.py +106 -0
- mostlyright/live/_stream.py +108 -0
- mostlyright/mode2.py +235 -0
- mostlyright/preprocessing.py +173 -0
- mostlyright/qc.py +240 -0
- mostlyright/research.py +1669 -0
- mostlyright/snapshot.py +504 -0
- mostlyright/transforms.py +201 -0
- mostlyrightmd-0.1.0.dist-info/METADATA +48 -0
- mostlyrightmd-0.1.0.dist-info/RECORD +90 -0
- mostlyrightmd-0.1.0.dist-info/WHEEL +4 -0
mostlyright/__init__.py
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
"""mostlyright — local-first SDK for prediction-market weather settlement research.
|
|
2
|
+
|
|
3
|
+
Sprint 0 v0.1.0 ships:
|
|
4
|
+
- ``mostlyright.research(station, from_date, to_date, ...)`` — the v0.14.1 ``pairs()`` join,
|
|
5
|
+
lifted from monorepo-v0.14.1, calling AWC + IEM + GHCNh + NWS CLI directly.
|
|
6
|
+
- ``mostlyright.snapshot`` — settlement-window math (LST, market_close_utc).
|
|
7
|
+
|
|
8
|
+
Adjacent surfaces:
|
|
9
|
+
- ``mostlyright.weather`` — observations + climate + forecasts (sibling package ``mostlyrightmd-weather``).
|
|
10
|
+
- ``mostlyright.markets`` — Kalshi + Polymarket metadata (sibling package ``mostlyrightmd-markets``,
|
|
11
|
+
ships v0.1.0 in Sprint 0.5).
|
|
12
|
+
|
|
13
|
+
Namespace note: ``mostlyright`` is a split-distribution namespace package. Core owns this
|
|
14
|
+
``__init__.py``; sibling distributions ``mostlyrightmd-weather`` and ``mostlyrightmd-markets`` ship
|
|
15
|
+
subdirectories (``mostlyright/weather/``, ``mostlyright/markets/``) WITHOUT their own
|
|
16
|
+
namespace-root ``__init__.py``. The pkgutil declaration below extends ``__path__`` so Python's
|
|
17
|
+
import machinery finds those subpackages from whichever site-packages location installed them.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
# Split-distribution namespace: extend __path__ to discover sibling packages' contributions.
|
|
21
|
+
__path__ = __import__("pkgutil").extend_path(__path__, __name__)
|
|
22
|
+
|
|
23
|
+
__version__ = "0.1.0rc1"
|
|
24
|
+
|
|
25
|
+
from mostlyright.discover import discover
|
|
26
|
+
from mostlyright.research import research
|
|
27
|
+
|
|
28
|
+
__all__ = ["__version__", "discover", "live", "research"]
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
# Lazy `mostlyright.live` access (Phase 11). Both `discover` and `research`
|
|
32
|
+
# above already eagerly import `mostlyright.core` (which pulls pandas via
|
|
33
|
+
# `core.validator`), so the eager-pandas path is pre-existing and NOT a
|
|
34
|
+
# Phase 11 regression. Even so, we expose `live` through a module-level
|
|
35
|
+
# `__getattr__` hook so `import mostlyright` doesn't pull in
|
|
36
|
+
# `mostlyright.weather` (via the live module's deferred fetcher imports
|
|
37
|
+
# that fire on first use, not first attribute access). First access via
|
|
38
|
+
# `mostlyright.live.stream(...)` resolves and caches the submodule.
|
|
39
|
+
def __getattr__(name: str):
|
|
40
|
+
if name == "live":
|
|
41
|
+
import mostlyright.live as _live
|
|
42
|
+
|
|
43
|
+
# Cache on the module so subsequent accesses skip __getattr__.
|
|
44
|
+
globals()["live"] = _live
|
|
45
|
+
return _live
|
|
46
|
+
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
|
mostlyright/_compose.py
ADDED
|
@@ -0,0 +1,338 @@
|
|
|
1
|
+
"""Phase 10 — composable ``research()`` dispatcher.
|
|
2
|
+
|
|
3
|
+
Translates the new selectors (``city=``, ``contract=``, ``contracts=``)
|
|
4
|
+
into resolution metadata + station tuples that the existing
|
|
5
|
+
station-based ``research()`` machinery consumes. Cross-issuer annotation
|
|
6
|
+
(``settles_for``) is computed here so the dispatch layer is the single
|
|
7
|
+
source of truth for "which markets settle against which stations."
|
|
8
|
+
|
|
9
|
+
The dispatcher is intentionally pure (no I/O, no DataFrame
|
|
10
|
+
construction) so unit tests run instantly and the same logic can be
|
|
11
|
+
reused by ``discover()`` and the TS counterpart.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import warnings
|
|
17
|
+
from typing import Any
|
|
18
|
+
|
|
19
|
+
#: The valid selector kwarg names. Exactly one must be provided on each
|
|
20
|
+
#: ``research()`` invocation; passing zero or >1 raises ``ValueError``.
|
|
21
|
+
_SELECTOR_NAMES: tuple[str, ...] = ("station", "city", "contract", "contracts")
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
#: Kalshi short-ticker → canonical city slug. Real Kalshi tickers use
|
|
25
|
+
#: variable-length city suffixes: ``KXHIGHNY-...`` (NY → NYC),
|
|
26
|
+
#: ``KXHIGHCHI-...`` (CHI → CHI), ``KXHIGHLAX-...`` (LAX → LAX). The
|
|
27
|
+
#: ``KALSHI_SETTLEMENT_STATIONS`` catalog is keyed by the canonical
|
|
28
|
+
#: 3-letter city slug; this alias table normalizes the variable-length
|
|
29
|
+
#: Kalshi suffix to the catalog key before lookup. Phase 10 iter-1 codex
|
|
30
|
+
#: HIGH: without this, ``kalshi:KXHIGHNY-25MAY26-T79`` (the actual
|
|
31
|
+
#: ROADMAP example) would fail to resolve.
|
|
32
|
+
_KALSHI_TICKER_ALIASES: dict[str, str] = {
|
|
33
|
+
"NY": "NYC",
|
|
34
|
+
# All other Kalshi cities use the canonical 3-letter slug as their
|
|
35
|
+
# ticker suffix verbatim (identity mapping is implicit).
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
#: Kalshi-short ↔ Polymarket-long city slug alias. Architect iter-1 HIGH:
|
|
40
|
+
#: ``resolve_city`` and ``annotate_settles_for`` need to recognize BOTH
|
|
41
|
+
#: forms so a single call with EITHER input surfaces the cross-issuer
|
|
42
|
+
#: settlement neighborhood. Without this, ``resolve_city("LAX")`` would
|
|
43
|
+
#: miss Polymarket's KLAX entry (Polymarket keys it as ``los_angeles``);
|
|
44
|
+
#: ``resolve_city("chicago")`` would miss Kalshi's KMDW (Kalshi keys it
|
|
45
|
+
#: as ``CHI``). Bi-directional table — looked up either way.
|
|
46
|
+
_CITY_SLUG_ALIASES: dict[str, tuple[str, str]] = {
|
|
47
|
+
# short_kalshi: (long_polymarket, canonical_kalshi_upper)
|
|
48
|
+
"nyc": ("nyc", "NYC"),
|
|
49
|
+
"chi": ("chicago", "CHI"),
|
|
50
|
+
"lax": ("los_angeles", "LAX"),
|
|
51
|
+
"mia": ("miami", "MIA"),
|
|
52
|
+
"den": ("denver", "DEN"),
|
|
53
|
+
"bos": ("boston", "BOS"),
|
|
54
|
+
"aus": ("austin", "AUS"),
|
|
55
|
+
"dca": ("washington_dc", "DCA"),
|
|
56
|
+
"phl": ("philadelphia", "PHL"),
|
|
57
|
+
"sfo": ("san_francisco", "SFO"),
|
|
58
|
+
"sea": ("seattle", "SEA"),
|
|
59
|
+
"atl": ("atlanta", "ATL"),
|
|
60
|
+
"hou": ("houston", "HOU"),
|
|
61
|
+
"dal": ("dallas", "DAL"),
|
|
62
|
+
"phx": ("phoenix", "PHX"),
|
|
63
|
+
"msp": ("minneapolis", "MSP"),
|
|
64
|
+
"dtw": ("detroit", "DTW"),
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
# Build reverse lookup so passing the Polymarket long form also surfaces
|
|
68
|
+
# the Kalshi short form.
|
|
69
|
+
_CITY_SLUG_ALIASES_REVERSE: dict[str, tuple[str, str]] = {
|
|
70
|
+
long_poly: (short_kalshi, kalshi_upper)
|
|
71
|
+
for short_kalshi, (long_poly, kalshi_upper) in _CITY_SLUG_ALIASES.items()
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def _normalize_city_slugs(city: str) -> tuple[str, str]:
|
|
76
|
+
"""Return ``(polymarket_slug_lower, kalshi_slug_upper)`` for ``city``.
|
|
77
|
+
|
|
78
|
+
Accepts either form (``"nyc"`` or ``"NYC"``, ``"chicago"`` or ``"CHI"``)
|
|
79
|
+
and returns both canonical forms so callers can probe either catalog.
|
|
80
|
+
|
|
81
|
+
Falls back to ``(city.lower(), city.upper())`` for cities not in the
|
|
82
|
+
alias table (international cities the user might pass).
|
|
83
|
+
"""
|
|
84
|
+
lower = city.lower()
|
|
85
|
+
upper = city.upper()
|
|
86
|
+
if lower in _CITY_SLUG_ALIASES:
|
|
87
|
+
long_poly, kalshi_upper = _CITY_SLUG_ALIASES[lower]
|
|
88
|
+
return long_poly, kalshi_upper
|
|
89
|
+
if lower in _CITY_SLUG_ALIASES_REVERSE:
|
|
90
|
+
short_kalshi, kalshi_upper = _CITY_SLUG_ALIASES_REVERSE[lower]
|
|
91
|
+
return lower, kalshi_upper
|
|
92
|
+
return lower, upper
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
class StationOverrideWarning(UserWarning):
|
|
96
|
+
"""Emitted when ``station_override=`` deliberately mismatches the
|
|
97
|
+
contract's canonical settlement station.
|
|
98
|
+
|
|
99
|
+
The output row carries ``settlement_mismatch=True`` so downstream
|
|
100
|
+
backtest code can filter / flag these silently-divergent rows.
|
|
101
|
+
"""
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def validate_selectors(
|
|
105
|
+
*,
|
|
106
|
+
station: str | None = None,
|
|
107
|
+
city: str | None = None,
|
|
108
|
+
contract: str | None = None,
|
|
109
|
+
contracts: list[str] | tuple[str, ...] | None = None,
|
|
110
|
+
) -> str:
|
|
111
|
+
"""Validate that exactly one selector is provided; return the active name.
|
|
112
|
+
|
|
113
|
+
Args:
|
|
114
|
+
station, city, contract, contracts: the four mutually-exclusive
|
|
115
|
+
selectors. Exactly one must be non-None / non-empty.
|
|
116
|
+
|
|
117
|
+
Returns:
|
|
118
|
+
The name of the active selector (``"station"`` / ``"city"`` /
|
|
119
|
+
``"contract"`` / ``"contracts"``).
|
|
120
|
+
|
|
121
|
+
Raises:
|
|
122
|
+
ValueError: zero or >1 selectors provided.
|
|
123
|
+
"""
|
|
124
|
+
provided: list[str] = []
|
|
125
|
+
if station is not None and station != "":
|
|
126
|
+
provided.append("station")
|
|
127
|
+
if city is not None and city != "":
|
|
128
|
+
provided.append("city")
|
|
129
|
+
if contract is not None and contract != "":
|
|
130
|
+
provided.append("contract")
|
|
131
|
+
if contracts is not None and len(contracts) > 0:
|
|
132
|
+
provided.append("contracts")
|
|
133
|
+
if not provided:
|
|
134
|
+
raise ValueError(
|
|
135
|
+
"research(): exactly one of station=, city=, contract=, contracts= must be provided"
|
|
136
|
+
)
|
|
137
|
+
if len(provided) > 1:
|
|
138
|
+
raise ValueError(f"research(): selectors are mutually exclusive; got {provided!r}")
|
|
139
|
+
return provided[0]
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def resolve_contract(contract_id: str) -> tuple[str, str]:
|
|
143
|
+
"""Resolve a ``"<issuer>:<id>"`` string to ``(station, issuer)``.
|
|
144
|
+
|
|
145
|
+
Supported issuers:
|
|
146
|
+
- ``kalshi:`` — ``KHIGH*``/``KXHIGH*``/``KLOW*``/``KXLOW*`` city tickers.
|
|
147
|
+
- ``polymarket:`` — event/market ids. v0.2 raises NotImplementedError
|
|
148
|
+
with an actionable message (the resolver lives in
|
|
149
|
+
:mod:`mostlyright.markets._per_event_station` but requires a fetched
|
|
150
|
+
event payload to identify the city; Phase 10 v0.2 surfaces this as
|
|
151
|
+
a clear error and defers the integration to v0.3).
|
|
152
|
+
|
|
153
|
+
Args:
|
|
154
|
+
contract_id: ``"<issuer>:<id>"`` string (e.g.
|
|
155
|
+
``"kalshi:KXHIGHNYC"`` or ``"polymarket:0x..."``).
|
|
156
|
+
|
|
157
|
+
Returns:
|
|
158
|
+
Tuple of ``(station_icao, issuer_name)``.
|
|
159
|
+
|
|
160
|
+
Raises:
|
|
161
|
+
ValueError: malformed contract id or unknown issuer.
|
|
162
|
+
NotImplementedError: Polymarket contract resolution (deferred).
|
|
163
|
+
"""
|
|
164
|
+
if not isinstance(contract_id, str) or ":" not in contract_id:
|
|
165
|
+
raise ValueError(f"contract id must be `<issuer>:<id>`; got {contract_id!r}")
|
|
166
|
+
issuer, raw = contract_id.split(":", 1)
|
|
167
|
+
issuer = issuer.lower()
|
|
168
|
+
raw_upper = raw.upper()
|
|
169
|
+
if issuer == "kalshi":
|
|
170
|
+
from datetime import date as _date
|
|
171
|
+
|
|
172
|
+
from mostlyright.markets.catalog import kalshi_nhigh, kalshi_nlow
|
|
173
|
+
|
|
174
|
+
# Kalshi tickers come in two prefix families:
|
|
175
|
+
# KHIGH<CITY>* / KXHIGH<CITY>* → NHIGH (daily-high)
|
|
176
|
+
# KLOW<CITY>* / KXLOW<CITY>* → NLOW (daily-low)
|
|
177
|
+
# The existing kalshi_nhigh / kalshi_nlow resolvers were built for
|
|
178
|
+
# the legacy KHIGH<CITY> / KLOW<CITY> shape. Modern Kalshi market
|
|
179
|
+
# tickers use the KX-prefix exchange convention
|
|
180
|
+
# (KXHIGH<CITY>-<DATE>-<STRIKE>); strip the `KX` to feed the
|
|
181
|
+
# resolver and pass the bare city portion. The resolver's own
|
|
182
|
+
# validation (`startswith("KHIGH")` / `startswith("KLOW")` +
|
|
183
|
+
# length>5) does the city-ticker validity check.
|
|
184
|
+
# Strip just the 'X' from the KX exchange prefix so KXHIGH<CITY>
|
|
185
|
+
# becomes KHIGH<CITY> (the legacy resolver's expected format).
|
|
186
|
+
# KX = position [0..1] but the literal 'K' is kept; drop position [1].
|
|
187
|
+
normalized = raw_upper
|
|
188
|
+
if normalized.startswith("KX"):
|
|
189
|
+
normalized = "K" + normalized[2:] # KXHIGHNYC → KHIGHNYC
|
|
190
|
+
# Many full Kalshi tickers carry a trailing -DATE-STRIKE suffix
|
|
191
|
+
# (e.g. KXHIGHNYC-25MAY26-T79 → KXHIGHNYC). Pull the city portion
|
|
192
|
+
# by trimming at the first '-'.
|
|
193
|
+
city_only = normalized.split("-", 1)[0]
|
|
194
|
+
# Extract the variable-length city suffix and normalize via the
|
|
195
|
+
# Kalshi-ticker alias table so KXHIGHNY → NY → NYC (the canonical
|
|
196
|
+
# catalog key). Iter-1 codex HIGH.
|
|
197
|
+
if city_only.startswith("KHIGH") and len(city_only) > 5:
|
|
198
|
+
short = city_only[5:]
|
|
199
|
+
canonical = _KALSHI_TICKER_ALIASES.get(short, short)
|
|
200
|
+
r = kalshi_nhigh.resolve(f"KHIGH{canonical}", _date.today())
|
|
201
|
+
return r.settlement_station, "kalshi"
|
|
202
|
+
if city_only.startswith("KLOW") and len(city_only) > 4:
|
|
203
|
+
short = city_only[4:]
|
|
204
|
+
canonical = _KALSHI_TICKER_ALIASES.get(short, short)
|
|
205
|
+
r = kalshi_nlow.resolve(f"KLOW{canonical}", _date.today())
|
|
206
|
+
return r.settlement_station, "kalshi"
|
|
207
|
+
raise ValueError(
|
|
208
|
+
f"unsupported kalshi contract format: {raw!r}; "
|
|
209
|
+
"expected KHIGH<CITY>* / KXHIGH<CITY>* / KLOW<CITY>* / "
|
|
210
|
+
"KXLOW<CITY>* prefix"
|
|
211
|
+
)
|
|
212
|
+
if issuer == "polymarket":
|
|
213
|
+
raise NotImplementedError(
|
|
214
|
+
"polymarket contract resolution requires event_id → station lookup "
|
|
215
|
+
"via polymarket_discover() or polymarket_settle(); Phase 10 v0.2 "
|
|
216
|
+
"defers this integration to v0.3. Use `city='nyc'` or pass the "
|
|
217
|
+
"station explicitly via `station_override=` until then."
|
|
218
|
+
)
|
|
219
|
+
raise ValueError(f"unknown issuer prefix: {issuer!r}; expected kalshi or polymarket")
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
def resolve_city(city: str) -> tuple[str, ...]:
|
|
223
|
+
"""Resolve a city slug to all stations any issuer settles against.
|
|
224
|
+
|
|
225
|
+
Returns a deduplicated tuple in stable order:
|
|
226
|
+
1. Kalshi's settlement station (if the city is in the Kalshi catalog).
|
|
227
|
+
2. Polymarket's default + high + low stations (if in Polymarket catalog).
|
|
228
|
+
3. Polymarket per-city denylist entries (forbidden-but-known stations
|
|
229
|
+
surfaced so quants can SEE the full neighborhood for explicit
|
|
230
|
+
``station_override=``).
|
|
231
|
+
|
|
232
|
+
For ``"NYC"`` returns (``"KNYC"``, ``"KLGA"``, ``"KJFK"``, ``"KEWR"``)
|
|
233
|
+
— KNYC is Kalshi's, KLGA is Polymarket's, KJFK + KEWR are the
|
|
234
|
+
denylist backstops Polymarket forbids.
|
|
235
|
+
|
|
236
|
+
Args:
|
|
237
|
+
city: city slug. Accepts ``"NYC"`` (Kalshi upper) or ``"nyc"``
|
|
238
|
+
(Polymarket lower); both are normalized.
|
|
239
|
+
|
|
240
|
+
Returns:
|
|
241
|
+
Tuple of station ICAOs.
|
|
242
|
+
|
|
243
|
+
Raises:
|
|
244
|
+
ValueError: city not in either catalog.
|
|
245
|
+
"""
|
|
246
|
+
if not isinstance(city, str) or not city:
|
|
247
|
+
raise ValueError(f"city must be a non-empty str; got {city!r}")
|
|
248
|
+
|
|
249
|
+
from mostlyright.markets._per_event_station import load_polymarket_city_stations
|
|
250
|
+
from mostlyright.markets.catalog.kalshi_stations import (
|
|
251
|
+
KALSHI_SETTLEMENT_STATIONS,
|
|
252
|
+
)
|
|
253
|
+
from mostlyright.markets.polymarket import KNOWN_WRONG_STATIONS as POLY_WRONG
|
|
254
|
+
|
|
255
|
+
# Iter-1 python-architect HIGH: normalize via the cross-issuer slug
|
|
256
|
+
# alias table so a single call (with either "NYC" or "nyc", "CHI" or
|
|
257
|
+
# "chicago", "LAX" or "los_angeles") surfaces the full cross-issuer
|
|
258
|
+
# settlement neighborhood from BOTH catalogs.
|
|
259
|
+
poly_slug, kalshi_slug = _normalize_city_slugs(city)
|
|
260
|
+
out: list[str] = []
|
|
261
|
+
if kalshi_slug in KALSHI_SETTLEMENT_STATIONS:
|
|
262
|
+
out.append(KALSHI_SETTLEMENT_STATIONS[kalshi_slug].station)
|
|
263
|
+
poly = load_polymarket_city_stations()
|
|
264
|
+
if poly_slug in poly:
|
|
265
|
+
# Preserve insertion order across the measure keys.
|
|
266
|
+
for measure in ("default", "high", "low"):
|
|
267
|
+
st = poly[poly_slug].get(measure)
|
|
268
|
+
if st and st not in out:
|
|
269
|
+
out.append(st)
|
|
270
|
+
for st in sorted(POLY_WRONG.get(poly_slug, frozenset())):
|
|
271
|
+
if st not in out:
|
|
272
|
+
out.append(st)
|
|
273
|
+
if not out:
|
|
274
|
+
raise ValueError(f"unknown city {city!r}; not in kalshi or polymarket catalogs")
|
|
275
|
+
return tuple(out)
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
def annotate_settles_for(station: str, city: str | None) -> list[str]:
|
|
279
|
+
"""Return the list of ``"<issuer>:<ticker>"`` markers that settle
|
|
280
|
+
against ``station`` for ``city``.
|
|
281
|
+
|
|
282
|
+
Empty list means no known issuer settles against this station for
|
|
283
|
+
this city (typically a denylist entry surfaced by
|
|
284
|
+
:func:`resolve_city` for the caller's awareness).
|
|
285
|
+
|
|
286
|
+
Args:
|
|
287
|
+
station: 4-char K-prefix ICAO.
|
|
288
|
+
city: city slug (optional; when None, returns empty list).
|
|
289
|
+
|
|
290
|
+
Returns:
|
|
291
|
+
Sorted list of ``"kalshi:CITY"`` / ``"polymarket:city"`` markers.
|
|
292
|
+
"""
|
|
293
|
+
out: list[str] = []
|
|
294
|
+
if city is None:
|
|
295
|
+
return out
|
|
296
|
+
from mostlyright.markets._per_event_station import load_polymarket_city_stations
|
|
297
|
+
from mostlyright.markets.catalog.kalshi_stations import (
|
|
298
|
+
KALSHI_SETTLEMENT_STATIONS,
|
|
299
|
+
)
|
|
300
|
+
|
|
301
|
+
# Iter-1 python-architect HIGH: use cross-issuer slug alias so the
|
|
302
|
+
# annotation works regardless of which slug-form the caller passed.
|
|
303
|
+
poly_slug, kalshi_slug = _normalize_city_slugs(city)
|
|
304
|
+
if (
|
|
305
|
+
kalshi_slug in KALSHI_SETTLEMENT_STATIONS
|
|
306
|
+
and KALSHI_SETTLEMENT_STATIONS[kalshi_slug].station == station
|
|
307
|
+
):
|
|
308
|
+
out.append(f"kalshi:{kalshi_slug}")
|
|
309
|
+
poly = load_polymarket_city_stations()
|
|
310
|
+
if poly_slug in poly and station in poly[poly_slug].values():
|
|
311
|
+
out.append(f"polymarket:{poly_slug}")
|
|
312
|
+
return sorted(out)
|
|
313
|
+
|
|
314
|
+
|
|
315
|
+
def emit_override_warning(contract_station: str, override_station: str) -> None:
|
|
316
|
+
"""Helper: emit :class:`StationOverrideWarning` for a deliberate mismatch."""
|
|
317
|
+
warnings.warn(
|
|
318
|
+
f"station_override={override_station!r} differs from contract's "
|
|
319
|
+
f"canonical settlement station {contract_station!r}; output row will "
|
|
320
|
+
f"carry settlement_mismatch=True",
|
|
321
|
+
StationOverrideWarning,
|
|
322
|
+
stacklevel=3,
|
|
323
|
+
)
|
|
324
|
+
|
|
325
|
+
|
|
326
|
+
__all__ = [
|
|
327
|
+
"StationOverrideWarning",
|
|
328
|
+
"annotate_settles_for",
|
|
329
|
+
"emit_override_warning",
|
|
330
|
+
"resolve_city",
|
|
331
|
+
"resolve_contract",
|
|
332
|
+
"validate_selectors",
|
|
333
|
+
]
|
|
334
|
+
|
|
335
|
+
|
|
336
|
+
# Silence the `Any` import warning — kept for ruff future-proofing if/when
|
|
337
|
+
# the dispatch layer needs to type DataFrame returns.
|
|
338
|
+
_ = Any
|
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
"""Exact-window obs fetcher — bypasses year-aligned monthly cache.
|
|
2
|
+
|
|
3
|
+
Used by `mostlyright.weather.obs(strategy="exact_window")` to serve small,
|
|
4
|
+
caller-bounded windows (e.g. 1-month backtest replays) without pulling a
|
|
5
|
+
full calendar year of IEM CSV.
|
|
6
|
+
|
|
7
|
+
DOES NOT WRITE to the canonical `observations/{STATION}/{YYYY}/{MM}.parquet`
|
|
8
|
+
cache — exact_window queries are treated as transient. Callers who need
|
|
9
|
+
warm-cache speedups for repeated calls should use `strategy="warm_cache"`.
|
|
10
|
+
|
|
11
|
+
Source filtering is enforced at the FETCHER BOUNDARY (not post-merge):
|
|
12
|
+
post-merge filtering would silently drop rows where the named source lost
|
|
13
|
+
the priority tie to a HIGHER-priority source that this call also fetched.
|
|
14
|
+
By gating each fetcher behind `source in (None, "<name>")`, the merge sees
|
|
15
|
+
only rows from the requested source(s) and the priority resolution is
|
|
16
|
+
semantically correct.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
from datetime import UTC, date, datetime, timedelta
|
|
22
|
+
from typing import TYPE_CHECKING, Any, Literal
|
|
23
|
+
|
|
24
|
+
from mostlyright._internal.merge import merge_observations
|
|
25
|
+
from mostlyright.weather._awc import awc_to_observation
|
|
26
|
+
from mostlyright.weather._fetchers.awc import fetch_awc_metars
|
|
27
|
+
from mostlyright.weather._fetchers.ghcnh import download_ghcnh
|
|
28
|
+
from mostlyright.weather._fetchers.iem_asos import download_iem_asos
|
|
29
|
+
from mostlyright.weather._ghcnh import parse_ghcnh_file
|
|
30
|
+
from mostlyright.weather._iem import parse_iem_file
|
|
31
|
+
|
|
32
|
+
if TYPE_CHECKING:
|
|
33
|
+
from mostlyright._internal.models.station import StationInfo
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
Source = Literal["iem", "ghcnh", "awc"]
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _exact_fetch_observations(
|
|
40
|
+
info: StationInfo,
|
|
41
|
+
from_date_iso: str,
|
|
42
|
+
to_date_iso: str,
|
|
43
|
+
*,
|
|
44
|
+
source: Source | None = None,
|
|
45
|
+
) -> list[dict[str, Any]]:
|
|
46
|
+
"""Fetch obs rows for the exact [from_date, to_date] window.
|
|
47
|
+
|
|
48
|
+
Skips year-normalization in IEM by passing ``exact_window=True`` to
|
|
49
|
+
``download_iem_asos``. Does NOT write to the canonical monthly parquet cache.
|
|
50
|
+
|
|
51
|
+
Parameters
|
|
52
|
+
----------
|
|
53
|
+
info : StationInfo
|
|
54
|
+
Resolved station metadata (icao, code, ghcnh_id, ...).
|
|
55
|
+
from_date_iso, to_date_iso : str
|
|
56
|
+
ISO YYYY-MM-DD strings, inclusive bounds for the obs window.
|
|
57
|
+
source : {"iem", "ghcnh", "awc"} | None
|
|
58
|
+
If set, only that source is queried (fetcher-boundary enforcement).
|
|
59
|
+
If None, all three are queried and merged via SOURCE_PRIORITY in
|
|
60
|
+
``mostlyright._internal.merge.observations`` (AWC > IEM > GHCNh).
|
|
61
|
+
|
|
62
|
+
Returns
|
|
63
|
+
-------
|
|
64
|
+
list[dict]
|
|
65
|
+
Merged observation rows for the window. NOT cached to canonical
|
|
66
|
+
monthly parquet; callers wanting cache benefit should use
|
|
67
|
+
``strategy="warm_cache"``.
|
|
68
|
+
"""
|
|
69
|
+
# Local import: research depends on weather, but _exact_fetch only needs
|
|
70
|
+
# the _sources_root helper for path layout — keep the dependency one-way
|
|
71
|
+
# by importing inside the function.
|
|
72
|
+
from mostlyright.research import _sources_root
|
|
73
|
+
|
|
74
|
+
from_date = date.fromisoformat(from_date_iso)
|
|
75
|
+
to_date = date.fromisoformat(to_date_iso)
|
|
76
|
+
# Mirror research.py:1167 — extend by 1 day to capture the pre-midnight
|
|
77
|
+
# UTC tail of the last LST settlement window.
|
|
78
|
+
extended_to = to_date + timedelta(days=1)
|
|
79
|
+
|
|
80
|
+
sources_root = _sources_root()
|
|
81
|
+
rows: list[dict[str, Any]] = []
|
|
82
|
+
|
|
83
|
+
# --- IEM ASOS ----------------------------------------------------------
|
|
84
|
+
# Fetcher-boundary enforcement: skip IEM entirely if caller asked for a
|
|
85
|
+
# different source. Separate dest_dir namespace (per B-5) — exact-window
|
|
86
|
+
# CSVs live in `sources/iem_asos_exact/`, NEVER in `sources/iem_asos/`.
|
|
87
|
+
if source in (None, "iem"):
|
|
88
|
+
iem_exact_dir = sources_root / "iem_asos_exact"
|
|
89
|
+
# IEM has two report types: 3 (METAR) and 4 (SPECI). Mirror the
|
|
90
|
+
# canonical _fetch_iem_month behavior so the merge sees both.
|
|
91
|
+
for report_type, override in ((3, "METAR"), (4, "SPECI")):
|
|
92
|
+
paths = download_iem_asos(
|
|
93
|
+
info,
|
|
94
|
+
from_date,
|
|
95
|
+
extended_to,
|
|
96
|
+
iem_exact_dir,
|
|
97
|
+
report_type=report_type,
|
|
98
|
+
exact_window=True,
|
|
99
|
+
)
|
|
100
|
+
for p in paths:
|
|
101
|
+
rows.extend(parse_iem_file(p, observation_type_override=override))
|
|
102
|
+
|
|
103
|
+
# --- AWC METAR (live 168h only) ---------------------------------------
|
|
104
|
+
# ``fetch_awc_metars`` is live-only — no date range. If ``to_date`` is older
|
|
105
|
+
# than ``now - 168h``, AWC will return zero rows for the window. Skip the
|
|
106
|
+
# HTTP call in that case. We DO NOT filter rows by UTC date here — the
|
|
107
|
+
# final aggregation layer in obs.py buckets by LST settlement_date, which
|
|
108
|
+
# correctly captures the post-midnight-UTC tail of the last LST day for
|
|
109
|
+
# negative-offset US stations (codex iter-1 CRITICAL #2).
|
|
110
|
+
if source in (None, "awc"):
|
|
111
|
+
now_utc = datetime.now(UTC)
|
|
112
|
+
awc_horizon = now_utc.date() - timedelta(days=7)
|
|
113
|
+
if to_date >= awc_horizon:
|
|
114
|
+
raw_metars = fetch_awc_metars([info.icao], hours=168)
|
|
115
|
+
for m in raw_metars:
|
|
116
|
+
obs = awc_to_observation(m)
|
|
117
|
+
if obs is None:
|
|
118
|
+
continue
|
|
119
|
+
# Defensive: AWC may serve unrelated stations from cached
|
|
120
|
+
# responses. Drop those; let settlement-date bucketing handle
|
|
121
|
+
# window filtering downstream.
|
|
122
|
+
if obs.get("station_code") != info.code:
|
|
123
|
+
continue
|
|
124
|
+
rows.append(obs)
|
|
125
|
+
|
|
126
|
+
# --- GHCNh (per-station-year) -----------------------------------------
|
|
127
|
+
if source in (None, "ghcnh"):
|
|
128
|
+
import httpx
|
|
129
|
+
|
|
130
|
+
from mostlyright.weather.cache import _is_current_lst_year
|
|
131
|
+
|
|
132
|
+
ghcnh_dir = sources_root / "ghcnh"
|
|
133
|
+
# Per-station-year files; iterate calendar years touching the window.
|
|
134
|
+
# Mirror research.py:_fetch_ghcnh_year mutable-period gate: NCEI
|
|
135
|
+
# republishes the current LST year's PSV as new months land, so
|
|
136
|
+
# callers MUST force a re-download for that year (codex iter-1 HIGH).
|
|
137
|
+
for year in range(from_date.year, extended_to.year + 1):
|
|
138
|
+
skip_cache = _is_current_lst_year(info.icao, year)
|
|
139
|
+
try:
|
|
140
|
+
psv_path = download_ghcnh(info.ghcnh_id, year, ghcnh_dir, skip_cache=skip_cache)
|
|
141
|
+
except httpx.HTTPStatusError as exc:
|
|
142
|
+
# NCEI returns 404 for stations without data; mirror
|
|
143
|
+
# _fetch_ghcnh_year's graceful skip.
|
|
144
|
+
if exc.response.status_code == 404:
|
|
145
|
+
continue
|
|
146
|
+
raise
|
|
147
|
+
for row in parse_ghcnh_file(psv_path):
|
|
148
|
+
if row.get("station_code") != info.code:
|
|
149
|
+
continue
|
|
150
|
+
rows.append(row)
|
|
151
|
+
|
|
152
|
+
# Pre-sort by (observed_at, source) BEFORE merge — mirrors research.py R2
|
|
153
|
+
# mitigation. merge_observations uses first-seen-wins at equal priority
|
|
154
|
+
# and returns `list(best.values())` in dict-insertion order, so input
|
|
155
|
+
# order is load-bearing for both tie-break determinism AND survivor order.
|
|
156
|
+
rows.sort(key=lambda r: (r.get("observed_at") or "", r.get("source") or ""))
|
|
157
|
+
|
|
158
|
+
# merge_observations takes a single positional list, NO source_priority kwarg.
|
|
159
|
+
# Priority is hard-coded via SOURCE_PRIORITY in the merge module. Source
|
|
160
|
+
# filtering already happened at the fetcher boundary above — do NOT
|
|
161
|
+
# post-filter merged rows by source.
|
|
162
|
+
return merge_observations(rows)
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
"""mostlyright._internal — shared utilities lifted from monorepo-v0.14.1.
|
|
2
|
+
|
|
3
|
+
NOT a public API. Module names start with underscore to discourage downstream
|
|
4
|
+
use; rely on ``mostlyright.research()`` / ``mostlyright.snapshot.*`` instead.
|
|
5
|
+
|
|
6
|
+
Lift inventory (provenance for parity-critical code). Source SHA refers to the
|
|
7
|
+
v0.14.1 release tag of ``Tarabcak/monorepo`` (commit
|
|
8
|
+
``514fcdab227e845145ca32b989355647466231d9``); ``_pairs.py`` additionally
|
|
9
|
+
pins the exact source-file blob SHA from that tree.
|
|
10
|
+
|
|
11
|
+
| Module | Source path | Source SHA | Lift date | Modifications |
|
|
12
|
+
|-----------------------|----------------------------------------------------------|--------------|------------|------------------------------------------------------------------------|
|
|
13
|
+
| _http.py | monorepo-v0.14.1/src/mostlyright/_http.py | 514fcda | 2026-05-21 | namespace rename only (mostlyright -> mostlyright._internal) |
|
|
14
|
+
| _convert.py | monorepo-v0.14.1/src/mostlyright/_convert.py | 514fcda | 2026-05-21 | namespace rename only |
|
|
15
|
+
| _bounds.py | monorepo-v0.14.1/src/mostlyright/_bounds.py | 514fcda | 2026-05-21 | namespace rename only |
|
|
16
|
+
| _capabilities.py | monorepo-v0.14.1/src/mostlyright/_capabilities.py | 514fcda | 2026-05-21 | namespace rename only |
|
|
17
|
+
| _toon.py | monorepo-v0.14.1/src/mostlyright/_toon.py | 514fcda | 2026-05-22 | ruff-clean RUF002/003 (replace EN DASH in inline comments); body identical |
|
|
18
|
+
| exceptions.py | monorepo-v0.14.1/src/mostlyright/exceptions.py | 514fcda | 2026-05-21 | namespace rename only |
|
|
19
|
+
| versioning.py | monorepo-v0.14.1/src/mostlyright/versioning.py | 514fcda | 2026-05-21 | namespace rename only |
|
|
20
|
+
| models/ | monorepo-v0.14.1/src/mostlyright/models/ | 514fcda | 2026-05-21 | namespace rename only |
|
|
21
|
+
| specs/*.json | monorepo-v0.14.1/src/mostlyright/specs/ | 514fcda | 2026-05-21 | none (data-only) |
|
|
22
|
+
| _stations.py | monorepo-v0.14.1/src/mostlyright/_stations.py | 514fcda | 2026-05-22 | none (pure-data module; no imports to rename) |
|
|
23
|
+
| _pairs.py | monorepo-v0.14.1/src/mostlyright/pairs.py | e78eed5 (blob, in tree 514fcda) | 2026-05-22 | TOON imports + ``to_toon`` function excised; namespace rename |
|
|
24
|
+
| merge/observations.py | monorepo-v0.14.1/ingest/storage/parquet.py:47-48,246-261 | 514fcda | 2026-05-21 | rename ``_dedup_rows`` -> ``merge_observations`` (public API) |
|
|
25
|
+
| merge/climate.py | monorepo-v0.14.1/ingest/storage/parquet.py:477-494 | 514fcda | 2026-05-21 | rename ``_dedup_climate_rows`` -> ``merge_climate`` (public API) |
|
|
26
|
+
| merge/_schemas.py | monorepo-v0.14.1/ingest/storage/parquet.py:50-103 | 514fcda | 2026-05-21 | none (verbatim lift; field order + dtypes preserved) |
|
|
27
|
+
|
|
28
|
+
Any drift in ``merge/`` or ``_pairs.py`` invalidates every historical Kalshi
|
|
29
|
+
NHIGH/NLOW settlement — treat as load-bearing and re-run the parity gate
|
|
30
|
+
(``tests/test_parity.py``) before merging changes here.
|
|
31
|
+
"""
|