PyPI - mostlyrightmd - Versions diffs - 1.2.0__tar.gz → 1.3.0__tar.gz - Mend

mostlyrightmd 1.2.0tar.gz → 1.3.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (156) hide show

{mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: mostlyrightmd
-Version: 1.2.0
+Version: 1.3.0
 Summary: Python SDK for quants, ML engineers, and AI agents — one interface to public data. Adapters ship weather + prediction-market settlements (Kalshi NHIGH/NLOW, Polymarket) today; SEC filings, Federal Reserve series, court filings, FDA approvals, and equities are next. Schema-versioned, leakage-free, local-first. Imports as `mostlyright`.
 Project-URL: Homepage, https://mostlyright.md
 Project-URL: Documentation, https://mostlyright.md/docs/sdk/

{mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "mostlyrightmd"
-version = "1.2.0"
+version = "1.3.0"
 description = "Python SDK for quants, ML engineers, and AI agents — one interface to public data. Adapters ship weather + prediction-market settlements (Kalshi NHIGH/NLOW, Polymarket) today; SEC filings, Federal Reserve series, court filings, FDA approvals, and equities are next. Schema-versioned, leakage-free, local-first. Imports as `mostlyright`."
 readme = "README.md"
 license = "MIT"

{mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/core/exceptions.py RENAMED Viewed

@@ -31,6 +31,7 @@ __all__ = [
     "DeprecatedModelWarning",
     "GribIntegrityError",
     "HistoricalDepthError",
+    "IssuedAtMissingError",
     "LeakageError",
     "LiveStreamError",
     "NoLiveDataError",
@@ -38,6 +39,7 @@ __all__ = [
     "NwpError",
     "NwpModelNotAvailableError",
     "NwpModelRetiredError",
+    "OpenMeteoSeamlessLeakageError",
     "PayloadTooLargeError",
     "SchemaValidationError",
     "SourceMismatchError",
@@ -738,6 +740,113 @@ class LiveStreamError(TradewindsError):
     default_error_code = "LIVE_STREAM_ERROR"
+#: Sentinel value used when raising IssuedAtMissingError / OpenMeteoSeamlessLeakageError
+#: before the offending DataFrame is known (e.g. seamless-endpoint refusal at fetch
+#: time, where no rows have been retrieved yet). LeakageError requires non-empty
+#: ``as_of`` so OpenMeteoSeamlessLeakageError supplies this sentinel rather than
+#: forging a timestamp.
+_NO_AS_OF_SENTINEL = "(seamless-endpoint-refused-before-fetch)"
+class IssuedAtMissingError(SchemaValidationError):
+    """A forecast row is missing the ``issued_at`` field.
+    Raised when a forecast row would land in the DataFrame with
+    ``issued_at IS NULL``. For Open-Meteo Previous Runs API this should be
+    impossible by construction (the fetcher derives ``issued_at`` per row
+    via the conservative lower-bound formula). For Live mode, this is
+    raised when cycle-math fallback cannot derive a non-null cycle.
+    Origin: ``Tarabcak/mostlyright#70`` — the legacy seamless-feed bug
+    where ``/forecast_series`` proxied Open-Meteo's seamless endpoint
+    without preserving ``issued_at``, causing post-snapshot model runs to
+    silently leak into training data.
+    Phase 20 OM-04.
+    """
+    default_error_code = "ISSUED_AT_MISSING"
+    def __init__(
+        self,
+        message: str = "",
+        *,
+        source: str | None = None,
+        violating_count: int = 0,
+        sample_violations: list[dict[str, Any]] | None = None,
+        request_id: str | None = None,
+        error_code: str | None = None,
+    ) -> None:
+        super().__init__(
+            message,
+            schema_id="schema.forecast.station.v1",
+            violations=[{"column": "issued_at", "rule": "non_null"}],
+            sample_violations=sample_violations,
+            source=source,
+            request_id=request_id,
+            error_code=error_code,
+        )
+        self.violating_count: int = violating_count
+    def _payload(self) -> dict[str, Any]:
+        payload = super()._payload()
+        payload["name"] = "IssuedAtMissingError"
+        payload["violating_count"] = self.violating_count
+        payload["origin_issue"] = "Tarabcak/mostlyright#70"
+        return payload
+class OpenMeteoSeamlessLeakageError(LeakageError):
+    """The Open-Meteo Historical Forecast (seamless) endpoint was used
+    without ``allow_leakage=True`` opt-in.
+    Per Phase 20 D-01 (locked decision): the seamless endpoint silently
+    stitches forecasts from multiple model cycles into a continuous
+    timeseries; the cycle that produced each value is unrecoverable from
+    the response. :class:`LeakageDetector` rejects rows tagged
+    ``source="open_meteo.seamless"`` whenever ``as_of`` is asserted.
+    Origin: ``Tarabcak/mostlyright#70``.
+    Phase 20 OM-04.
+    """
+    default_error_code = "OPEN_METEO_SEAMLESS_LEAKAGE"
+    def __init__(
+        self,
+        message: str = "",
+        *,
+        model: str = "",
+        endpoint_url: str = "",
+        as_of: str | None = None,
+        violating_count: int = 0,
+        sample_violations: list[dict[str, Any]] | None = None,
+        source: str | None = None,
+        request_id: str | None = None,
+        error_code: str | None = None,
+    ) -> None:
+        super().__init__(
+            message,
+            as_of=as_of or _NO_AS_OF_SENTINEL,
+            violating_count=violating_count,
+            sample_violations=sample_violations,
+            source=source,
+            request_id=request_id,
+            error_code=error_code,
+        )
+        self.model: str = model
+        self.endpoint_url: str = endpoint_url
+    def _payload(self) -> dict[str, Any]:
+        payload = super()._payload()
+        payload["name"] = "OpenMeteoSeamlessLeakageError"
+        payload["model"] = self.model
+        payload["endpoint_url"] = self.endpoint_url
+        payload["origin_issue"] = "Tarabcak/mostlyright#70"
+        return payload
 class NoLiveDataError(LiveStreamError):
     """:func:`mostlyright.live.latest` returned no observations for the station.

{mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/core/schemas/__init__.py RENAMED Viewed

@@ -10,9 +10,9 @@ Each schema is eagerly registered with the Validator at import time so
 without any explicit register-call boilerplate.
 """
-from mostlyright.core.validator import register_schema
+from mostlyright.core.validator import _SCHEMA_REGISTRY, register_schema
-from .forecast import ForecastSchema
+from .forecast import ForecastSchema, StationForecastSchema
 from .forecast_nwp import NwpForecastSchema
 from .observation import ObservationSchema
 from .observation_ledger import ObservationLedgerSchema
@@ -21,6 +21,11 @@ from .settlement import SettlementSchema
 # Eager registration — Validator can look up each schema by ID immediately.
 register_schema(ObservationSchema)
+# Phase 20 OM-02: register canonical StationForecastSchema FIRST so the
+# canonical schema_id wins on any registry-iteration that visits in
+# insertion order; ForecastSchema (back-compat alias to
+# schema.forecast.iem_mos.v1) registers second.
+register_schema(StationForecastSchema)
 register_schema(ForecastSchema)
 register_schema(SettlementSchema)
 # Phase 2.1 additions.
@@ -29,11 +34,18 @@ register_schema(ObservationQCSchema)
 # Phase 3.2 addition.
 register_schema(NwpForecastSchema)
+#: Public alias for the validator's registry dict, so callers and tests
+#: can look up schemas by id without reaching into ``core.validator``'s
+#: underscored internal. Phase 20 OM-02.
+SCHEMA_REGISTRY = _SCHEMA_REGISTRY
 __all__ = [
+    "SCHEMA_REGISTRY",
     "ForecastSchema",
     "NwpForecastSchema",
     "ObservationLedgerSchema",
     "ObservationQCSchema",
     "ObservationSchema",
     "SettlementSchema",
+    "StationForecastSchema",
 ]

mostlyrightmd-1.3.0/src/mostlyright/core/schemas/forecast.py ADDED Viewed

@@ -0,0 +1,177 @@
+"""Phase 20: Unified per-station forecast schema.
+``schema.forecast.station.v1`` covers both IEM MOS rows and Open-Meteo rows
+in a single column set. ``schema.forecast.iem_mos.v1`` is retained as a
+back-compat alias (same class semantics, different ``schema_id``). Both
+register via ``core/schemas/__init__.py``.
+The unified schema marks IEM MOS core columns (``temp_c``, ``dew_point_c``,
+etc.) as nullable because Open-Meteo may not provide all of them. Open-Meteo
+extras (``apparent_temp_c``, ``shortwave_radiation_wm2``, ``cape_jkg``, etc.)
+are always nullable — IEM MOS rows leave them null; Open-Meteo rows populate
+them.
+Source discrimination is via the ``source`` column (e.g. ``iem.archive``,
+``open_meteo.previous_runs``, ``open_meteo.live``).
+Temporal mapping (design.md §A):
+- ``event_time = valid_at``
+- ``knowledge_time = issued_at``
+"""
+from __future__ import annotations
+from typing import ClassVar
+from ..schema import ColumnSpec, Schema
+class StationForecastSchema(Schema):
+    """``schema.forecast.station.v1`` — unified per-station forecast schema.
+    Covers IEM MOS shared core + Open-Meteo extras. Source identity via
+    the ``source`` column. Two ``schema_id`` strings register against the
+    same column set: ``schema.forecast.station.v1`` (canonical) and
+    ``schema.forecast.iem_mos.v1`` (back-compat alias via subclass).
+    Phase 20 OM-02.
+    """
+    schema_id: ClassVar[str] = "schema.forecast.station.v1"
+    # Phase 20 PLAN-11 review (codex HIGH #1): the unified station-forecast
+    # schema is a union over 5 source identifiers — `iem.archive` (IEM MOS
+    # archive rows) plus the 4 Open-Meteo endpoints. Validator branches on
+    # `_registered_sources` (set form) when present and falls back to
+    # `_registered_source` (single-value form) for legacy schemas.
+    _registered_source: ClassVar[str] = "open_meteo.previous_runs"
+    _registered_sources: ClassVar[frozenset[str]] = frozenset(
+        {
+            "iem.archive",
+            "open_meteo.previous_runs",
+            "open_meteo.single_run",
+            "open_meteo.live",
+            "open_meteo.seamless",
+        }
+    )
+    COLUMNS: ClassVar[list[ColumnSpec]] = [
+        # === Identity (all required, nullable=False) ===
+        ColumnSpec(name="station", dtype="string", units=None, nullable=False),
+        ColumnSpec(
+            name="issued_at",
+            dtype="timestamp_utc",
+            units=None,
+            nullable=True,
+            notes=(
+                "model run time (knowledge_time). Nullable to accommodate "
+                "Phase 20 open_meteo.seamless rows whose cycle is "
+                "unrecoverable from the response. LeakageDetector + "
+                "assert_issued_at_populated() are the runtime gates that "
+                "reject null issued_at in training-data paths."
+            ),
+        ),
+        ColumnSpec(
+            name="valid_at",
+            dtype="timestamp_utc",
+            units=None,
+            nullable=False,
+            notes="forecast target time (event_time)",
+        ),
+        ColumnSpec(
+            name="forecast_hour",
+            dtype="int64",
+            units="hours",
+            nullable=False,
+            notes="(valid_at - issued_at).total_seconds() / 3600",
+        ),
+        ColumnSpec(
+            name="model",
+            dtype="string",
+            units=None,
+            nullable=False,
+            notes="e.g. NBE, GFS, LAV, MET, gfs_global, ecmwf_ifs025",
+        ),
+        ColumnSpec(
+            name="source",
+            dtype="string",
+            units=None,
+            nullable=False,
+            notes="iem.archive | open_meteo.previous_runs | open_meteo.single_run | open_meteo.live",
+        ),
+        # === IEM MOS core (nullable because Open-Meteo may not supply all) ===
+        ColumnSpec(name="temp_c", dtype="float64", units="celsius", nullable=True),
+        ColumnSpec(name="dew_point_c", dtype="float64", units="celsius", nullable=True),
+        ColumnSpec(name="wind_speed_ms", dtype="float64", units="m/s", nullable=True),
+        ColumnSpec(name="wind_dir_deg", dtype="int64", units="degrees", nullable=True),
+        ColumnSpec(
+            name="precip_probability",
+            dtype="float64",
+            units="probability",
+            nullable=True,
+            notes="bounded [0, 1]",
+        ),
+        ColumnSpec(
+            name="sky_cover_pct",
+            dtype="int64",
+            units="percent",
+            nullable=True,
+            notes="bounded [0, 100]",
+        ),
+        # === Open-Meteo extras (always nullable; null for iem.archive rows) ===
+        ColumnSpec(name="apparent_temp_c", dtype="float64", units="celsius", nullable=True),
+        ColumnSpec(name="shortwave_radiation_wm2", dtype="float64", units="W/m^2", nullable=True),
+        ColumnSpec(name="direct_radiation_wm2", dtype="float64", units="W/m^2", nullable=True),
+        ColumnSpec(name="cape_jkg", dtype="float64", units="J/kg", nullable=True),
+        ColumnSpec(name="precipitation_mm", dtype="float64", units="mm", nullable=True),
+        ColumnSpec(name="cloud_cover_pct", dtype="int64", units="percent", nullable=True),
+        ColumnSpec(name="surface_pressure_hpa", dtype="float64", units="hPa", nullable=True),
+        ColumnSpec(name="pressure_msl_hpa", dtype="float64", units="hPa", nullable=True),
+        ColumnSpec(name="freezing_level_m", dtype="int64", units="meters", nullable=True),
+        ColumnSpec(name="snow_depth_m", dtype="float64", units="meters", nullable=True),
+        ColumnSpec(name="visibility_m", dtype="int64", units="meters", nullable=True),
+        ColumnSpec(name="wind_gusts_ms", dtype="float64", units="m/s", nullable=True),
+        ColumnSpec(
+            name="weather_code",
+            dtype="int64",
+            units="WMO 4677",
+            nullable=True,
+            notes="WMO weather code (clear, fog, rain, snow, etc.)",
+        ),
+        # === Provenance ===
+        ColumnSpec(
+            name="retrieved_at",
+            dtype="timestamp_utc",
+            units=None,
+            nullable=False,
+            notes="wall-clock time the row was fetched from upstream",
+        ),
+    ]
+    #: Imperial-mode renames apply to temperature, wind speed, and wind gusts.
+    #: ``valid_at`` / ``issued_at`` / ``retrieved_at`` are model-internal
+    #: timestamps and keep their canonical names.
+    IMPERIAL_RENAMES: ClassVar[dict[str, str]] = {
+        "temp_c": "temp_F",
+        "dew_point_c": "dew_point_F",
+        "apparent_temp_c": "apparent_temp_F",
+        "wind_speed_ms": "wind_speed_kt",
+        "wind_gusts_ms": "wind_gusts_kt",
+    }
+class ForecastSchema(StationForecastSchema):
+    """Back-compat alias for ``schema.forecast.iem_mos.v1``.
+    Same class semantics as :class:`StationForecastSchema`. Retained so
+    existing IEM MOS parity fixtures and Phase 17 callers continue to work
+    unchanged. New code should reference :class:`StationForecastSchema` and
+    the canonical ``schema.forecast.station.v1`` ``schema_id``.
+    Phase 20 OM-02.
+    """
+    schema_id: ClassVar[str] = "schema.forecast.iem_mos.v1"
+    _registered_source: ClassVar[str] = "iem.archive"
+__all__ = ["ForecastSchema", "StationForecastSchema"]

{mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/core/temporal/leakage.py RENAMED Viewed

@@ -20,7 +20,11 @@ from typing import TYPE_CHECKING
 import pandas as pd
-from mostlyright.core.exceptions import LeakageError, SchemaValidationError
+from mostlyright.core.exceptions import (
+    IssuedAtMissingError,
+    LeakageError,
+    SchemaValidationError,
+)
 from mostlyright.core.result import TradewindsResult
 from mostlyright.core.temporal.timepoint import TimePoint
@@ -28,10 +32,19 @@ if TYPE_CHECKING:
     pass
-__all__ = ["LeakageDetector", "assert_no_leakage"]
+__all__ = [
+    "LeakageDetector",
+    "assert_issued_at_populated",
+    "assert_no_leakage",
+]
 _SAMPLE_CAP = 10
+#: Smaller cap for the issued_at assertion — leakage payloads should fit on
+#: one screen when surfaced through MCP, and a forecast frame missing
+#: ``issued_at`` is a structural bug rather than a per-row data issue. Phase
+#: 20 OM-04.
+_ISSUED_AT_SAMPLE_CAP = 5
 def assert_no_leakage(df: pd.DataFrame | TradewindsResult, as_of: TimePoint) -> None:
@@ -124,6 +137,52 @@ def assert_no_leakage(df: pd.DataFrame | TradewindsResult, as_of: TimePoint) ->
     )
+def assert_issued_at_populated(df: pd.DataFrame | TradewindsResult) -> None:
+    """Raise :class:`IssuedAtMissingError` if any row has null ``issued_at``.
+    Forecast rows MUST carry their model-run time to be leakage-safe; a
+    missing ``issued_at`` means we cannot verify the cycle predated the
+    ``as_of`` cutoff in :func:`research`. For Open-Meteo this should be
+    impossible by construction (the fetcher derives ``issued_at`` per row),
+    so this check is a defensive net.
+    Mirrors structural conventions of :func:`assert_no_leakage`:
+    :class:`TradewindsResult` (and any duck-typed ``.df`` carrier)
+    unwrap, column-existence guard, sample-cap.
+    Phase 20 OM-04.
+    """
+    if isinstance(df, TradewindsResult):
+        df = df.frame_as_pandas()
+    elif hasattr(df, "df") and not hasattr(df, "columns"):
+        # Duck-type for non-TradewindsResult wrappers (e.g. test doubles).
+        df = df.df
+    if "issued_at" not in df.columns:
+        raise SchemaValidationError(
+            "assert_issued_at_populated requires 'issued_at' column",
+            schema_id="schema.forecast.station.v1",
+            violations=[{"column": "issued_at", "rule": "required"}],
+        )
+    if len(df) == 0:
+        return  # empty frame vacuously satisfies
+    nulls_mask = df["issued_at"].isna()
+    violating_count = int(nulls_mask.sum())
+    if violating_count == 0:
+        return
+    null_indices = df.index[nulls_mask].tolist()
+    samples = [{"row_idx": int(idx)} for idx in null_indices[:_ISSUED_AT_SAMPLE_CAP]]
+    raise IssuedAtMissingError(
+        f"{violating_count} row(s) have null issued_at; cannot verify leakage-safety",
+        violating_count=violating_count,
+        sample_violations=samples,
+    )
 class LeakageDetector:
     """Convenience wrapper for repeated detection against a fixed ``as_of``."""
@@ -145,3 +204,12 @@ class LeakageDetector:
         wrapper (unwrapped inside :func:`assert_no_leakage`).
         """
         assert_no_leakage(df, self._as_of)
+    def check_issued_at(self, df: pd.DataFrame | TradewindsResult) -> None:
+        """Raise :class:`IssuedAtMissingError` if any row has null ``issued_at``.
+        Phase 20 OM-04 extension. Independent of ``as_of`` — the bound
+        cutoff is irrelevant when the row carries no model-run time at
+        all.
+        """
+        assert_issued_at_populated(df)

{mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/core/validator.py RENAMED Viewed

@@ -291,8 +291,27 @@ def validate_dataframe(
         )
     registered_source = getattr(schema_cls, "_registered_source", None)
+    # Phase 20 PLAN-11 review (codex HIGH #1): support union schemas via
+    # _registered_sources — a frozenset of permitted source identifiers.
+    # schema.forecast.station.v1 covers IEM MOS + 4 Open-Meteo endpoints;
+    # _registered_source (singular) is the legacy single-source guard.
+    registered_sources = getattr(schema_cls, "_registered_sources", None)
     if (
-        registered_source is not None
+        registered_sources is not None
+        and data_source not in registered_sources
+        and allow_source_drift is None
+    ):
+        raise SourceMismatchError(
+            f"Source drift: data is {data_source!r}, schema permits "
+            f"{sorted(registered_sources)!r}",
+            schema_source=",".join(sorted(registered_sources)),
+            data_source=data_source,
+            role=None,
+            catalog_warning=None,
+        )
+    elif (
+        registered_sources is None
+        and registered_source is not None
         and data_source != registered_source
         and allow_source_drift is None
     ):

{mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/research.py RENAMED Viewed

@@ -1379,6 +1379,109 @@ def _validate_research_kwargs(
         )
+_FORECAST_SOURCES_ALLOWED: frozenset[str] = frozenset({"iem_mos", "open_meteo"})
+def _fetch_open_meteo_range(
+    info: StationInfo,
+    from_date: str,
+    to_date: str,
+    *,
+    model: str,
+) -> dict[str, list[dict[str, Any]]]:
+    """Phase 20 OM-05 — fetch Open-Meteo forecasts grouped by settlement date.
+    Wraps ``mostlyright.weather._fetchers._open_meteo.fetch_open_meteo`` in
+    training mode (Previous Runs API) and pivots its tabular DataFrame
+    into the ``{date_iso: [forecast_row, ...]}`` shape that
+    ``build_pairs(forecasts_by_date=...)`` expects. Each row carries
+    ``model`` / ``issued_at`` / ``valid_at`` / ``temperature_f`` /
+    ``pop_6hr_pct`` / ``qpf_6hr_in`` keys for build_pairs_row compatibility.
+    """
+    import pandas as pd
+    from mostlyright.weather._fetchers._open_meteo import fetch_open_meteo
+    df = fetch_open_meteo(info.icao, from_date, to_date, model=model, mode="training")
+    groups: dict[str, list[dict[str, Any]]] = {}
+    if df is None or df.empty:
+        return groups
+    for _, row in df.iterrows():
+        ftime = row.get("valid_at")
+        if ftime is None or (isinstance(ftime, float) and ftime != ftime):
+            continue
+        try:
+            ftime_dt = pd.to_datetime(ftime, utc=True)
+        except Exception:
+            continue
+        try:
+            date_iso = settlement_date_for(ftime_dt.strftime("%Y-%m-%dT%H:%M:%SZ"), info.code)
+        except Exception:
+            date_iso = ftime_dt.strftime("%Y-%m-%d")
+        issued_at = row.get("issued_at")
+        try:
+            issued_iso = (
+                pd.to_datetime(issued_at, utc=True).strftime("%Y-%m-%dT%H:%M:%SZ")
+                if issued_at is not None
+                and not (isinstance(issued_at, float) and issued_at != issued_at)
+                else None
+            )
+        except Exception:
+            issued_iso = None
+        valid_iso = ftime_dt.strftime("%Y-%m-%dT%H:%M:%SZ")
+        temp_c = row.get("temp_c")
+        temperature_f: float | None = None
+        if temp_c is not None and not (isinstance(temp_c, float) and temp_c != temp_c):
+            try:
+                temperature_f = float(temp_c) * 9.0 / 5.0 + 32.0
+            except (TypeError, ValueError):
+                temperature_f = None
+        pop_prob = row.get("precip_probability")
+        pop_6hr_pct: float | None = None
+        if pop_prob is not None and not (isinstance(pop_prob, float) and pop_prob != pop_prob):
+            try:
+                pop_6hr_pct = float(pop_prob) * 100.0
+            except (TypeError, ValueError):
+                pop_6hr_pct = None
+        precip_mm = row.get("precipitation_mm")
+        qpf_6hr_in: float | None = None
+        if precip_mm is not None and not (isinstance(precip_mm, float) and precip_mm != precip_mm):
+            try:
+                qpf_6hr_in = float(precip_mm) / 25.4
+            except (TypeError, ValueError):
+                qpf_6hr_in = None
+        fcst_row: dict[str, Any] = {
+            "model": row.get("model"),
+            "issued_at": issued_iso,
+            "valid_at": valid_iso,
+            "temperature_f": temperature_f,
+            "pop_6hr_pct": pop_6hr_pct,
+            "qpf_6hr_in": qpf_6hr_in,
+            "source": row.get("source"),
+        }
+        groups.setdefault(date_iso, []).append(fcst_row)
+    return groups
+def _normalize_forecast_source(
+    forecast_source: str | list[str] | tuple[str, ...] | None,
+) -> tuple[str, ...]:
+    """Normalize the forecast_source kwarg to a sorted tuple of allowed values."""
+    if forecast_source is None:
+        return ("iem_mos",)
+    if isinstance(forecast_source, str):
+        items: tuple[str, ...] = (forecast_source,)
+    else:
+        items = tuple(forecast_source)
+    bad = [s for s in items if s not in _FORECAST_SOURCES_ALLOWED]
+    if bad:
+        raise ValueError(
+            f"forecast_source: unknown value(s) {bad}; "
+            f"allowed = {sorted(_FORECAST_SOURCES_ALLOWED)}"
+        )
+    return items
 def research(
     station: str | None = None,
     from_date: str | None = None,
@@ -1394,6 +1497,7 @@ def research(
     include_forecast: bool = False,
     forecast_model: str | None = None,
     forecast_models: list[str] | None = None,
+    forecast_source: str | list[str] | tuple[str, ...] = "iem_mos",
     as_dataframe: bool = True,
     tz_override: str | None = None,
     qc: bool = False,
@@ -1584,15 +1688,48 @@ def research(
     iem_mos_by_date: dict[str, list[dict[str, Any]]] = {}
     nwp_by_model_date: dict[str, dict[str, list[dict[str, Any]]]] = {}
     if include_forecast:
+        fcst_sources = _normalize_forecast_source(forecast_source)
         # Phase 17 Wave 4 iter-3 review HIGH: thread forecast_model through to
         # the fetcher so callers asking for ``forecast_model="gfs"`` get a
         # GFS pull, not a default-NBE pull whose rows then get filtered out
-        # by build_pairs_row's model-name match. ``forecast_model`` is the
-        # user-facing case (lowercase IEM MOS model id); _fetch_iem_mos_range
-        # passes it directly to fetch_iem_mos which validates against
-        # SUPPORTED_MOS_MODELS.
-        iem_model = (forecast_model or "nbe").lower()
-        iem_mos_by_date = _fetch_iem_mos_range(info, from_date, to_date, model=iem_model)
+        # by build_pairs_row's model-name match.
+        if "iem_mos" in fcst_sources:
+            iem_model = (forecast_model or "nbe").lower()
+            iem_mos_by_date = _fetch_iem_mos_range(info, from_date, to_date, model=iem_model)
+        if "open_meteo" in fcst_sources:
+            # Phase 20 OM-05: Open-Meteo forecast source. Default model
+            # gfs_global matches the IEM MOS "nbe" parity-default ethos:
+            # the most-traded prediction-market model for US stations.
+            om_model = forecast_model or "gfs_global"
+            from mostlyright.weather._fetchers._open_meteo_models import (
+                OPEN_METEO_MODELS,
+            )
+            if om_model not in OPEN_METEO_MODELS:
+                # Phase 20 PLAN-11 review (codex HIGH #3): a typo or legacy
+                # IEM MOS model id like "nbe" would otherwise silently drop
+                # Open-Meteo forecasts and leave fcst_* columns null — the
+                # caller would not learn the source failed. Hard-fail with
+                # a hint so the typo surfaces immediately.
+                raise ValueError(
+                    f"forecast_source=\"open_meteo\" requires forecast_model "
+                    f"in OPEN_METEO_MODELS (36 keys); got {om_model!r}. "
+                    f"Pick one of the 36 registered Open-Meteo keys (e.g. "
+                    f"'gfs_global', 'ecmwf_ifs_hres', 'dwd_icon_global') "
+                    f"or drop forecast_source=\"open_meteo\" to use the "
+                    f"default IEM MOS path."
+                )
+            om_by_date = _fetch_open_meteo_range(
+                info, from_date, to_date, model=om_model
+            )
+            # Concatenate: never silently merge — every row carries its
+            # source identity. build_pairs accepts a single dict so we
+            # merge OM rows into iem_mos_by_date when both sources are
+            # selected (build_pairs_row already discriminates via
+            # row.get("source")).
+            for date_iso, rows in om_by_date.items():
+                iem_mos_by_date.setdefault(date_iso, []).extend(rows)
         if forecast_models:
             nwp_by_model_date = _fetch_nwp_models_range(
                 info, from_date, to_date, list(forecast_models)

mostlyrightmd 1.2.0__tar.gz → 1.3.0__tar.gz

mostlyrightmd 1.2.0tar.gz → 1.3.0tar.gz