mostlyrightmd 1.2.0__tar.gz → 1.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/PKG-INFO +1 -1
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/pyproject.toml +1 -1
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/core/exceptions.py +109 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/core/schemas/__init__.py +14 -2
- mostlyrightmd-1.3.0/src/mostlyright/core/schemas/forecast.py +177 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/core/temporal/leakage.py +70 -2
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/core/validator.py +20 -1
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/research.py +143 -6
- mostlyrightmd-1.3.0/tests/core/test_schemas/test_forecast.py +113 -0
- mostlyrightmd-1.3.0/tests/test_exceptions_phase18.py +79 -0
- mostlyrightmd-1.3.0/tests/test_leakage_issued_at.py +97 -0
- mostlyrightmd-1.3.0/tests/test_research_forecast_source.py +39 -0
- mostlyrightmd-1.3.0/tests/test_schemas_codegen.py +111 -0
- mostlyrightmd-1.3.0/tests/test_schemas_station_forecast.py +98 -0
- mostlyrightmd-1.2.0/src/mostlyright/core/schemas/forecast.py +0 -122
- mostlyrightmd-1.2.0/tests/core/test_schemas/test_forecast.py +0 -104
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/.gitignore +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/README.md +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/__init__.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/_compose.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/_exact_fetch.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/_internal/__init__.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/_internal/_bounds.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/_internal/_cache_dir.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/_internal/_capabilities.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/_internal/_convert.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/_internal/_http.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/_internal/_pairs.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/_internal/_pandas_compat.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/_internal/_stations.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/_internal/_toon.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/_internal/exceptions.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/_internal/merge/__init__.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/_internal/merge/_schemas.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/_internal/merge/climate.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/_internal/merge/observations.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/_internal/models/__init__.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/_internal/models/_base.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/_internal/models/availability.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/_internal/models/observation.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/_internal/models/station.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/_internal/specs/book_snapshot.json +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/_internal/specs/brackets.json +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/_internal/specs/candle.json +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/_internal/specs/climate.json +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/_internal/specs/daily_extreme.json +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/_internal/specs/data_version.json +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/_internal/specs/event.json +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/_internal/specs/forecast.json +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/_internal/specs/forecast_series.json +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/_internal/specs/market.json +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/_internal/specs/market_unified.json +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/_internal/specs/observation.json +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/_internal/specs/observation_ledger.json +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/_internal/specs/observation_qc.json +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/_internal/specs/omo.json +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/_internal/specs/series.json +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/_internal/specs/settlement-join.json +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/_internal/specs/settlement_record.json +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/_internal/specs/snapshot.json +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/_internal/specs/synoptic_extremes.json +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/_internal/versioning.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/core/__init__.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/core/_backend_dispatch.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/core/_json_safe.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/core/_narwhals_compat.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/core/_polars_compat.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/core/formats/__init__.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/core/formats/_toon.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/core/formats/_toon_list_codec.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/core/formats/csv.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/core/formats/dataframe.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/core/formats/json.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/core/formats/parquet.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/core/formats/toon.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/core/merge.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/core/result.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/core/schema.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/core/schemas/forecast_nwp.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/core/schemas/observation.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/core/schemas/observation_ledger.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/core/schemas/observation_qc.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/core/schemas/settlement.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/core/temporal/__init__.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/core/temporal/knowledge_view.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/core/temporal/timepoint.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/discover.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/discovery.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/forecasts.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/international.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/live/__init__.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/live/_latest.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/live/_sources.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/live/_stream.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/mode2.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/preprocessing.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/qc.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/snapshot.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/src/mostlyright/transforms.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/tests/_internal/__init__.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/tests/_internal/merge/__init__.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/tests/_internal/merge/test_awc_gap_filled_by_iem.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/tests/_internal/merge/test_climate.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/tests/_internal/merge/test_observations.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/tests/_internal/models/__init__.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/tests/_internal/models/test_availability.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/tests/_internal/models/test_base.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/tests/_internal/models/test_observation.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/tests/_internal/models/test_station.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/tests/_internal/test_bounds.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/tests/_internal/test_capabilities.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/tests/_internal/test_convert.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/tests/_internal/test_exceptions.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/tests/_internal/test_http.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/tests/_internal/test_pairs.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/tests/_internal/test_stations.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/tests/_internal/test_versioning.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/tests/core/__init__.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/tests/core/temporal/__init__.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/tests/core/temporal/test_knowledge_view.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/tests/core/temporal/test_leakage.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/tests/core/test_exceptions.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/tests/core/test_formats.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/tests/core/test_json_safe.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/tests/core/test_merge.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/tests/core/test_result.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/tests/core/test_schema.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/tests/core/test_schemas/__init__.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/tests/core/test_schemas/test_observation.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/tests/core/test_schemas/test_settlement.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/tests/core/test_timepoint.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/tests/core/test_validator.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/tests/test_backend_dispatch.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/tests/test_cache_env_back_compat.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/tests/test_compose.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/tests/test_discover.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/tests/test_discovery_real.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/tests/test_exact_fetch.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/tests/test_exceptions_data_availability.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/tests/test_exceptions_phase17.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/tests/test_exceptions_phase17_plan06.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/tests/test_forecast_nwp_schema_phase17.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/tests/test_international.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/tests/test_live_latest.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/tests/test_live_stream.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/tests/test_mode2.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/tests/test_namespace.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/tests/test_phase_3x.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/tests/test_polars_cross_backend.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/tests/test_polars_invariants.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/tests/test_qc_wired.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/tests/test_research.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/tests/test_research_kwarg_validation.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/tests/test_research_prefetch.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/tests/test_snapshot.py +0 -0
- {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.0}/tests/test_transforms_preprocessing.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mostlyrightmd
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.3.0
|
|
4
4
|
Summary: Python SDK for quants, ML engineers, and AI agents — one interface to public data. Adapters ship weather + prediction-market settlements (Kalshi NHIGH/NLOW, Polymarket) today; SEC filings, Federal Reserve series, court filings, FDA approvals, and equities are next. Schema-versioned, leakage-free, local-first. Imports as `mostlyright`.
|
|
5
5
|
Project-URL: Homepage, https://mostlyright.md
|
|
6
6
|
Project-URL: Documentation, https://mostlyright.md/docs/sdk/
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "mostlyrightmd"
|
|
3
|
-
version = "1.
|
|
3
|
+
version = "1.3.0"
|
|
4
4
|
description = "Python SDK for quants, ML engineers, and AI agents — one interface to public data. Adapters ship weather + prediction-market settlements (Kalshi NHIGH/NLOW, Polymarket) today; SEC filings, Federal Reserve series, court filings, FDA approvals, and equities are next. Schema-versioned, leakage-free, local-first. Imports as `mostlyright`."
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
license = "MIT"
|
|
@@ -31,6 +31,7 @@ __all__ = [
|
|
|
31
31
|
"DeprecatedModelWarning",
|
|
32
32
|
"GribIntegrityError",
|
|
33
33
|
"HistoricalDepthError",
|
|
34
|
+
"IssuedAtMissingError",
|
|
34
35
|
"LeakageError",
|
|
35
36
|
"LiveStreamError",
|
|
36
37
|
"NoLiveDataError",
|
|
@@ -38,6 +39,7 @@ __all__ = [
|
|
|
38
39
|
"NwpError",
|
|
39
40
|
"NwpModelNotAvailableError",
|
|
40
41
|
"NwpModelRetiredError",
|
|
42
|
+
"OpenMeteoSeamlessLeakageError",
|
|
41
43
|
"PayloadTooLargeError",
|
|
42
44
|
"SchemaValidationError",
|
|
43
45
|
"SourceMismatchError",
|
|
@@ -738,6 +740,113 @@ class LiveStreamError(TradewindsError):
|
|
|
738
740
|
default_error_code = "LIVE_STREAM_ERROR"
|
|
739
741
|
|
|
740
742
|
|
|
743
|
+
#: Sentinel value used when raising IssuedAtMissingError / OpenMeteoSeamlessLeakageError
|
|
744
|
+
#: before the offending DataFrame is known (e.g. seamless-endpoint refusal at fetch
|
|
745
|
+
#: time, where no rows have been retrieved yet). LeakageError requires non-empty
|
|
746
|
+
#: ``as_of`` so OpenMeteoSeamlessLeakageError supplies this sentinel rather than
|
|
747
|
+
#: forging a timestamp.
|
|
748
|
+
_NO_AS_OF_SENTINEL = "(seamless-endpoint-refused-before-fetch)"
|
|
749
|
+
|
|
750
|
+
|
|
751
|
+
class IssuedAtMissingError(SchemaValidationError):
|
|
752
|
+
"""A forecast row is missing the ``issued_at`` field.
|
|
753
|
+
|
|
754
|
+
Raised when a forecast row would land in the DataFrame with
|
|
755
|
+
``issued_at IS NULL``. For Open-Meteo Previous Runs API this should be
|
|
756
|
+
impossible by construction (the fetcher derives ``issued_at`` per row
|
|
757
|
+
via the conservative lower-bound formula). For Live mode, this is
|
|
758
|
+
raised when cycle-math fallback cannot derive a non-null cycle.
|
|
759
|
+
|
|
760
|
+
Origin: ``Tarabcak/mostlyright#70`` — the legacy seamless-feed bug
|
|
761
|
+
where ``/forecast_series`` proxied Open-Meteo's seamless endpoint
|
|
762
|
+
without preserving ``issued_at``, causing post-snapshot model runs to
|
|
763
|
+
silently leak into training data.
|
|
764
|
+
|
|
765
|
+
Phase 20 OM-04.
|
|
766
|
+
"""
|
|
767
|
+
|
|
768
|
+
default_error_code = "ISSUED_AT_MISSING"
|
|
769
|
+
|
|
770
|
+
def __init__(
|
|
771
|
+
self,
|
|
772
|
+
message: str = "",
|
|
773
|
+
*,
|
|
774
|
+
source: str | None = None,
|
|
775
|
+
violating_count: int = 0,
|
|
776
|
+
sample_violations: list[dict[str, Any]] | None = None,
|
|
777
|
+
request_id: str | None = None,
|
|
778
|
+
error_code: str | None = None,
|
|
779
|
+
) -> None:
|
|
780
|
+
super().__init__(
|
|
781
|
+
message,
|
|
782
|
+
schema_id="schema.forecast.station.v1",
|
|
783
|
+
violations=[{"column": "issued_at", "rule": "non_null"}],
|
|
784
|
+
sample_violations=sample_violations,
|
|
785
|
+
source=source,
|
|
786
|
+
request_id=request_id,
|
|
787
|
+
error_code=error_code,
|
|
788
|
+
)
|
|
789
|
+
self.violating_count: int = violating_count
|
|
790
|
+
|
|
791
|
+
def _payload(self) -> dict[str, Any]:
|
|
792
|
+
payload = super()._payload()
|
|
793
|
+
payload["name"] = "IssuedAtMissingError"
|
|
794
|
+
payload["violating_count"] = self.violating_count
|
|
795
|
+
payload["origin_issue"] = "Tarabcak/mostlyright#70"
|
|
796
|
+
return payload
|
|
797
|
+
|
|
798
|
+
|
|
799
|
+
class OpenMeteoSeamlessLeakageError(LeakageError):
|
|
800
|
+
"""The Open-Meteo Historical Forecast (seamless) endpoint was used
|
|
801
|
+
without ``allow_leakage=True`` opt-in.
|
|
802
|
+
|
|
803
|
+
Per Phase 20 D-01 (locked decision): the seamless endpoint silently
|
|
804
|
+
stitches forecasts from multiple model cycles into a continuous
|
|
805
|
+
timeseries; the cycle that produced each value is unrecoverable from
|
|
806
|
+
the response. :class:`LeakageDetector` rejects rows tagged
|
|
807
|
+
``source="open_meteo.seamless"`` whenever ``as_of`` is asserted.
|
|
808
|
+
|
|
809
|
+
Origin: ``Tarabcak/mostlyright#70``.
|
|
810
|
+
|
|
811
|
+
Phase 20 OM-04.
|
|
812
|
+
"""
|
|
813
|
+
|
|
814
|
+
default_error_code = "OPEN_METEO_SEAMLESS_LEAKAGE"
|
|
815
|
+
|
|
816
|
+
def __init__(
|
|
817
|
+
self,
|
|
818
|
+
message: str = "",
|
|
819
|
+
*,
|
|
820
|
+
model: str = "",
|
|
821
|
+
endpoint_url: str = "",
|
|
822
|
+
as_of: str | None = None,
|
|
823
|
+
violating_count: int = 0,
|
|
824
|
+
sample_violations: list[dict[str, Any]] | None = None,
|
|
825
|
+
source: str | None = None,
|
|
826
|
+
request_id: str | None = None,
|
|
827
|
+
error_code: str | None = None,
|
|
828
|
+
) -> None:
|
|
829
|
+
super().__init__(
|
|
830
|
+
message,
|
|
831
|
+
as_of=as_of or _NO_AS_OF_SENTINEL,
|
|
832
|
+
violating_count=violating_count,
|
|
833
|
+
sample_violations=sample_violations,
|
|
834
|
+
source=source,
|
|
835
|
+
request_id=request_id,
|
|
836
|
+
error_code=error_code,
|
|
837
|
+
)
|
|
838
|
+
self.model: str = model
|
|
839
|
+
self.endpoint_url: str = endpoint_url
|
|
840
|
+
|
|
841
|
+
def _payload(self) -> dict[str, Any]:
|
|
842
|
+
payload = super()._payload()
|
|
843
|
+
payload["name"] = "OpenMeteoSeamlessLeakageError"
|
|
844
|
+
payload["model"] = self.model
|
|
845
|
+
payload["endpoint_url"] = self.endpoint_url
|
|
846
|
+
payload["origin_issue"] = "Tarabcak/mostlyright#70"
|
|
847
|
+
return payload
|
|
848
|
+
|
|
849
|
+
|
|
741
850
|
class NoLiveDataError(LiveStreamError):
|
|
742
851
|
""":func:`mostlyright.live.latest` returned no observations for the station.
|
|
743
852
|
|
|
@@ -10,9 +10,9 @@ Each schema is eagerly registered with the Validator at import time so
|
|
|
10
10
|
without any explicit register-call boilerplate.
|
|
11
11
|
"""
|
|
12
12
|
|
|
13
|
-
from mostlyright.core.validator import register_schema
|
|
13
|
+
from mostlyright.core.validator import _SCHEMA_REGISTRY, register_schema
|
|
14
14
|
|
|
15
|
-
from .forecast import ForecastSchema
|
|
15
|
+
from .forecast import ForecastSchema, StationForecastSchema
|
|
16
16
|
from .forecast_nwp import NwpForecastSchema
|
|
17
17
|
from .observation import ObservationSchema
|
|
18
18
|
from .observation_ledger import ObservationLedgerSchema
|
|
@@ -21,6 +21,11 @@ from .settlement import SettlementSchema
|
|
|
21
21
|
|
|
22
22
|
# Eager registration — Validator can look up each schema by ID immediately.
|
|
23
23
|
register_schema(ObservationSchema)
|
|
24
|
+
# Phase 20 OM-02: register canonical StationForecastSchema FIRST so the
|
|
25
|
+
# canonical schema_id wins on any registry-iteration that visits in
|
|
26
|
+
# insertion order; ForecastSchema (back-compat alias to
|
|
27
|
+
# schema.forecast.iem_mos.v1) registers second.
|
|
28
|
+
register_schema(StationForecastSchema)
|
|
24
29
|
register_schema(ForecastSchema)
|
|
25
30
|
register_schema(SettlementSchema)
|
|
26
31
|
# Phase 2.1 additions.
|
|
@@ -29,11 +34,18 @@ register_schema(ObservationQCSchema)
|
|
|
29
34
|
# Phase 3.2 addition.
|
|
30
35
|
register_schema(NwpForecastSchema)
|
|
31
36
|
|
|
37
|
+
#: Public alias for the validator's registry dict, so callers and tests
|
|
38
|
+
#: can look up schemas by id without reaching into ``core.validator``'s
|
|
39
|
+
#: underscored internal. Phase 20 OM-02.
|
|
40
|
+
SCHEMA_REGISTRY = _SCHEMA_REGISTRY
|
|
41
|
+
|
|
32
42
|
__all__ = [
|
|
43
|
+
"SCHEMA_REGISTRY",
|
|
33
44
|
"ForecastSchema",
|
|
34
45
|
"NwpForecastSchema",
|
|
35
46
|
"ObservationLedgerSchema",
|
|
36
47
|
"ObservationQCSchema",
|
|
37
48
|
"ObservationSchema",
|
|
38
49
|
"SettlementSchema",
|
|
50
|
+
"StationForecastSchema",
|
|
39
51
|
]
|
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
"""Phase 20: Unified per-station forecast schema.
|
|
2
|
+
|
|
3
|
+
``schema.forecast.station.v1`` covers both IEM MOS rows and Open-Meteo rows
|
|
4
|
+
in a single column set. ``schema.forecast.iem_mos.v1`` is retained as a
|
|
5
|
+
back-compat alias (same class semantics, different ``schema_id``). Both
|
|
6
|
+
register via ``core/schemas/__init__.py``.
|
|
7
|
+
|
|
8
|
+
The unified schema marks IEM MOS core columns (``temp_c``, ``dew_point_c``,
|
|
9
|
+
etc.) as nullable because Open-Meteo may not provide all of them. Open-Meteo
|
|
10
|
+
extras (``apparent_temp_c``, ``shortwave_radiation_wm2``, ``cape_jkg``, etc.)
|
|
11
|
+
are always nullable — IEM MOS rows leave them null; Open-Meteo rows populate
|
|
12
|
+
them.
|
|
13
|
+
|
|
14
|
+
Source discrimination is via the ``source`` column (e.g. ``iem.archive``,
|
|
15
|
+
``open_meteo.previous_runs``, ``open_meteo.live``).
|
|
16
|
+
|
|
17
|
+
Temporal mapping (design.md §A):
|
|
18
|
+
- ``event_time = valid_at``
|
|
19
|
+
- ``knowledge_time = issued_at``
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
from __future__ import annotations
|
|
23
|
+
|
|
24
|
+
from typing import ClassVar
|
|
25
|
+
|
|
26
|
+
from ..schema import ColumnSpec, Schema
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class StationForecastSchema(Schema):
|
|
30
|
+
"""``schema.forecast.station.v1`` — unified per-station forecast schema.
|
|
31
|
+
|
|
32
|
+
Covers IEM MOS shared core + Open-Meteo extras. Source identity via
|
|
33
|
+
the ``source`` column. Two ``schema_id`` strings register against the
|
|
34
|
+
same column set: ``schema.forecast.station.v1`` (canonical) and
|
|
35
|
+
``schema.forecast.iem_mos.v1`` (back-compat alias via subclass).
|
|
36
|
+
|
|
37
|
+
Phase 20 OM-02.
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
schema_id: ClassVar[str] = "schema.forecast.station.v1"
|
|
41
|
+
# Phase 20 PLAN-11 review (codex HIGH #1): the unified station-forecast
|
|
42
|
+
# schema is a union over 5 source identifiers — `iem.archive` (IEM MOS
|
|
43
|
+
# archive rows) plus the 4 Open-Meteo endpoints. Validator branches on
|
|
44
|
+
# `_registered_sources` (set form) when present and falls back to
|
|
45
|
+
# `_registered_source` (single-value form) for legacy schemas.
|
|
46
|
+
_registered_source: ClassVar[str] = "open_meteo.previous_runs"
|
|
47
|
+
_registered_sources: ClassVar[frozenset[str]] = frozenset(
|
|
48
|
+
{
|
|
49
|
+
"iem.archive",
|
|
50
|
+
"open_meteo.previous_runs",
|
|
51
|
+
"open_meteo.single_run",
|
|
52
|
+
"open_meteo.live",
|
|
53
|
+
"open_meteo.seamless",
|
|
54
|
+
}
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
COLUMNS: ClassVar[list[ColumnSpec]] = [
|
|
58
|
+
# === Identity (all required, nullable=False) ===
|
|
59
|
+
ColumnSpec(name="station", dtype="string", units=None, nullable=False),
|
|
60
|
+
ColumnSpec(
|
|
61
|
+
name="issued_at",
|
|
62
|
+
dtype="timestamp_utc",
|
|
63
|
+
units=None,
|
|
64
|
+
nullable=True,
|
|
65
|
+
notes=(
|
|
66
|
+
"model run time (knowledge_time). Nullable to accommodate "
|
|
67
|
+
"Phase 20 open_meteo.seamless rows whose cycle is "
|
|
68
|
+
"unrecoverable from the response. LeakageDetector + "
|
|
69
|
+
"assert_issued_at_populated() are the runtime gates that "
|
|
70
|
+
"reject null issued_at in training-data paths."
|
|
71
|
+
),
|
|
72
|
+
),
|
|
73
|
+
ColumnSpec(
|
|
74
|
+
name="valid_at",
|
|
75
|
+
dtype="timestamp_utc",
|
|
76
|
+
units=None,
|
|
77
|
+
nullable=False,
|
|
78
|
+
notes="forecast target time (event_time)",
|
|
79
|
+
),
|
|
80
|
+
ColumnSpec(
|
|
81
|
+
name="forecast_hour",
|
|
82
|
+
dtype="int64",
|
|
83
|
+
units="hours",
|
|
84
|
+
nullable=False,
|
|
85
|
+
notes="(valid_at - issued_at).total_seconds() / 3600",
|
|
86
|
+
),
|
|
87
|
+
ColumnSpec(
|
|
88
|
+
name="model",
|
|
89
|
+
dtype="string",
|
|
90
|
+
units=None,
|
|
91
|
+
nullable=False,
|
|
92
|
+
notes="e.g. NBE, GFS, LAV, MET, gfs_global, ecmwf_ifs025",
|
|
93
|
+
),
|
|
94
|
+
ColumnSpec(
|
|
95
|
+
name="source",
|
|
96
|
+
dtype="string",
|
|
97
|
+
units=None,
|
|
98
|
+
nullable=False,
|
|
99
|
+
notes="iem.archive | open_meteo.previous_runs | open_meteo.single_run | open_meteo.live",
|
|
100
|
+
),
|
|
101
|
+
# === IEM MOS core (nullable because Open-Meteo may not supply all) ===
|
|
102
|
+
ColumnSpec(name="temp_c", dtype="float64", units="celsius", nullable=True),
|
|
103
|
+
ColumnSpec(name="dew_point_c", dtype="float64", units="celsius", nullable=True),
|
|
104
|
+
ColumnSpec(name="wind_speed_ms", dtype="float64", units="m/s", nullable=True),
|
|
105
|
+
ColumnSpec(name="wind_dir_deg", dtype="int64", units="degrees", nullable=True),
|
|
106
|
+
ColumnSpec(
|
|
107
|
+
name="precip_probability",
|
|
108
|
+
dtype="float64",
|
|
109
|
+
units="probability",
|
|
110
|
+
nullable=True,
|
|
111
|
+
notes="bounded [0, 1]",
|
|
112
|
+
),
|
|
113
|
+
ColumnSpec(
|
|
114
|
+
name="sky_cover_pct",
|
|
115
|
+
dtype="int64",
|
|
116
|
+
units="percent",
|
|
117
|
+
nullable=True,
|
|
118
|
+
notes="bounded [0, 100]",
|
|
119
|
+
),
|
|
120
|
+
# === Open-Meteo extras (always nullable; null for iem.archive rows) ===
|
|
121
|
+
ColumnSpec(name="apparent_temp_c", dtype="float64", units="celsius", nullable=True),
|
|
122
|
+
ColumnSpec(name="shortwave_radiation_wm2", dtype="float64", units="W/m^2", nullable=True),
|
|
123
|
+
ColumnSpec(name="direct_radiation_wm2", dtype="float64", units="W/m^2", nullable=True),
|
|
124
|
+
ColumnSpec(name="cape_jkg", dtype="float64", units="J/kg", nullable=True),
|
|
125
|
+
ColumnSpec(name="precipitation_mm", dtype="float64", units="mm", nullable=True),
|
|
126
|
+
ColumnSpec(name="cloud_cover_pct", dtype="int64", units="percent", nullable=True),
|
|
127
|
+
ColumnSpec(name="surface_pressure_hpa", dtype="float64", units="hPa", nullable=True),
|
|
128
|
+
ColumnSpec(name="pressure_msl_hpa", dtype="float64", units="hPa", nullable=True),
|
|
129
|
+
ColumnSpec(name="freezing_level_m", dtype="int64", units="meters", nullable=True),
|
|
130
|
+
ColumnSpec(name="snow_depth_m", dtype="float64", units="meters", nullable=True),
|
|
131
|
+
ColumnSpec(name="visibility_m", dtype="int64", units="meters", nullable=True),
|
|
132
|
+
ColumnSpec(name="wind_gusts_ms", dtype="float64", units="m/s", nullable=True),
|
|
133
|
+
ColumnSpec(
|
|
134
|
+
name="weather_code",
|
|
135
|
+
dtype="int64",
|
|
136
|
+
units="WMO 4677",
|
|
137
|
+
nullable=True,
|
|
138
|
+
notes="WMO weather code (clear, fog, rain, snow, etc.)",
|
|
139
|
+
),
|
|
140
|
+
# === Provenance ===
|
|
141
|
+
ColumnSpec(
|
|
142
|
+
name="retrieved_at",
|
|
143
|
+
dtype="timestamp_utc",
|
|
144
|
+
units=None,
|
|
145
|
+
nullable=False,
|
|
146
|
+
notes="wall-clock time the row was fetched from upstream",
|
|
147
|
+
),
|
|
148
|
+
]
|
|
149
|
+
|
|
150
|
+
#: Imperial-mode renames apply to temperature, wind speed, and wind gusts.
|
|
151
|
+
#: ``valid_at`` / ``issued_at`` / ``retrieved_at`` are model-internal
|
|
152
|
+
#: timestamps and keep their canonical names.
|
|
153
|
+
IMPERIAL_RENAMES: ClassVar[dict[str, str]] = {
|
|
154
|
+
"temp_c": "temp_F",
|
|
155
|
+
"dew_point_c": "dew_point_F",
|
|
156
|
+
"apparent_temp_c": "apparent_temp_F",
|
|
157
|
+
"wind_speed_ms": "wind_speed_kt",
|
|
158
|
+
"wind_gusts_ms": "wind_gusts_kt",
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
class ForecastSchema(StationForecastSchema):
|
|
163
|
+
"""Back-compat alias for ``schema.forecast.iem_mos.v1``.
|
|
164
|
+
|
|
165
|
+
Same class semantics as :class:`StationForecastSchema`. Retained so
|
|
166
|
+
existing IEM MOS parity fixtures and Phase 17 callers continue to work
|
|
167
|
+
unchanged. New code should reference :class:`StationForecastSchema` and
|
|
168
|
+
the canonical ``schema.forecast.station.v1`` ``schema_id``.
|
|
169
|
+
|
|
170
|
+
Phase 20 OM-02.
|
|
171
|
+
"""
|
|
172
|
+
|
|
173
|
+
schema_id: ClassVar[str] = "schema.forecast.iem_mos.v1"
|
|
174
|
+
_registered_source: ClassVar[str] = "iem.archive"
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
__all__ = ["ForecastSchema", "StationForecastSchema"]
|
|
@@ -20,7 +20,11 @@ from typing import TYPE_CHECKING
|
|
|
20
20
|
|
|
21
21
|
import pandas as pd
|
|
22
22
|
|
|
23
|
-
from mostlyright.core.exceptions import
|
|
23
|
+
from mostlyright.core.exceptions import (
|
|
24
|
+
IssuedAtMissingError,
|
|
25
|
+
LeakageError,
|
|
26
|
+
SchemaValidationError,
|
|
27
|
+
)
|
|
24
28
|
from mostlyright.core.result import TradewindsResult
|
|
25
29
|
from mostlyright.core.temporal.timepoint import TimePoint
|
|
26
30
|
|
|
@@ -28,10 +32,19 @@ if TYPE_CHECKING:
|
|
|
28
32
|
pass
|
|
29
33
|
|
|
30
34
|
|
|
31
|
-
__all__ = [
|
|
35
|
+
__all__ = [
|
|
36
|
+
"LeakageDetector",
|
|
37
|
+
"assert_issued_at_populated",
|
|
38
|
+
"assert_no_leakage",
|
|
39
|
+
]
|
|
32
40
|
|
|
33
41
|
|
|
34
42
|
_SAMPLE_CAP = 10
|
|
43
|
+
#: Smaller cap for the issued_at assertion — leakage payloads should fit on
|
|
44
|
+
#: one screen when surfaced through MCP, and a forecast frame missing
|
|
45
|
+
#: ``issued_at`` is a structural bug rather than a per-row data issue. Phase
|
|
46
|
+
#: 20 OM-04.
|
|
47
|
+
_ISSUED_AT_SAMPLE_CAP = 5
|
|
35
48
|
|
|
36
49
|
|
|
37
50
|
def assert_no_leakage(df: pd.DataFrame | TradewindsResult, as_of: TimePoint) -> None:
|
|
@@ -124,6 +137,52 @@ def assert_no_leakage(df: pd.DataFrame | TradewindsResult, as_of: TimePoint) ->
|
|
|
124
137
|
)
|
|
125
138
|
|
|
126
139
|
|
|
140
|
+
def assert_issued_at_populated(df: pd.DataFrame | TradewindsResult) -> None:
|
|
141
|
+
"""Raise :class:`IssuedAtMissingError` if any row has null ``issued_at``.
|
|
142
|
+
|
|
143
|
+
Forecast rows MUST carry their model-run time to be leakage-safe; a
|
|
144
|
+
missing ``issued_at`` means we cannot verify the cycle predated the
|
|
145
|
+
``as_of`` cutoff in :func:`research`. For Open-Meteo this should be
|
|
146
|
+
impossible by construction (the fetcher derives ``issued_at`` per row),
|
|
147
|
+
so this check is a defensive net.
|
|
148
|
+
|
|
149
|
+
Mirrors structural conventions of :func:`assert_no_leakage`:
|
|
150
|
+
:class:`TradewindsResult` (and any duck-typed ``.df`` carrier)
|
|
151
|
+
unwrap, column-existence guard, sample-cap.
|
|
152
|
+
|
|
153
|
+
Phase 20 OM-04.
|
|
154
|
+
"""
|
|
155
|
+
if isinstance(df, TradewindsResult):
|
|
156
|
+
df = df.frame_as_pandas()
|
|
157
|
+
elif hasattr(df, "df") and not hasattr(df, "columns"):
|
|
158
|
+
# Duck-type for non-TradewindsResult wrappers (e.g. test doubles).
|
|
159
|
+
df = df.df
|
|
160
|
+
|
|
161
|
+
if "issued_at" not in df.columns:
|
|
162
|
+
raise SchemaValidationError(
|
|
163
|
+
"assert_issued_at_populated requires 'issued_at' column",
|
|
164
|
+
schema_id="schema.forecast.station.v1",
|
|
165
|
+
violations=[{"column": "issued_at", "rule": "required"}],
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
if len(df) == 0:
|
|
169
|
+
return # empty frame vacuously satisfies
|
|
170
|
+
|
|
171
|
+
nulls_mask = df["issued_at"].isna()
|
|
172
|
+
violating_count = int(nulls_mask.sum())
|
|
173
|
+
if violating_count == 0:
|
|
174
|
+
return
|
|
175
|
+
|
|
176
|
+
null_indices = df.index[nulls_mask].tolist()
|
|
177
|
+
samples = [{"row_idx": int(idx)} for idx in null_indices[:_ISSUED_AT_SAMPLE_CAP]]
|
|
178
|
+
|
|
179
|
+
raise IssuedAtMissingError(
|
|
180
|
+
f"{violating_count} row(s) have null issued_at; cannot verify leakage-safety",
|
|
181
|
+
violating_count=violating_count,
|
|
182
|
+
sample_violations=samples,
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
|
|
127
186
|
class LeakageDetector:
|
|
128
187
|
"""Convenience wrapper for repeated detection against a fixed ``as_of``."""
|
|
129
188
|
|
|
@@ -145,3 +204,12 @@ class LeakageDetector:
|
|
|
145
204
|
wrapper (unwrapped inside :func:`assert_no_leakage`).
|
|
146
205
|
"""
|
|
147
206
|
assert_no_leakage(df, self._as_of)
|
|
207
|
+
|
|
208
|
+
def check_issued_at(self, df: pd.DataFrame | TradewindsResult) -> None:
|
|
209
|
+
"""Raise :class:`IssuedAtMissingError` if any row has null ``issued_at``.
|
|
210
|
+
|
|
211
|
+
Phase 20 OM-04 extension. Independent of ``as_of`` — the bound
|
|
212
|
+
cutoff is irrelevant when the row carries no model-run time at
|
|
213
|
+
all.
|
|
214
|
+
"""
|
|
215
|
+
assert_issued_at_populated(df)
|
|
@@ -291,8 +291,27 @@ def validate_dataframe(
|
|
|
291
291
|
)
|
|
292
292
|
|
|
293
293
|
registered_source = getattr(schema_cls, "_registered_source", None)
|
|
294
|
+
# Phase 20 PLAN-11 review (codex HIGH #1): support union schemas via
|
|
295
|
+
# _registered_sources — a frozenset of permitted source identifiers.
|
|
296
|
+
# schema.forecast.station.v1 covers IEM MOS + 4 Open-Meteo endpoints;
|
|
297
|
+
# _registered_source (singular) is the legacy single-source guard.
|
|
298
|
+
registered_sources = getattr(schema_cls, "_registered_sources", None)
|
|
294
299
|
if (
|
|
295
|
-
|
|
300
|
+
registered_sources is not None
|
|
301
|
+
and data_source not in registered_sources
|
|
302
|
+
and allow_source_drift is None
|
|
303
|
+
):
|
|
304
|
+
raise SourceMismatchError(
|
|
305
|
+
f"Source drift: data is {data_source!r}, schema permits "
|
|
306
|
+
f"{sorted(registered_sources)!r}",
|
|
307
|
+
schema_source=",".join(sorted(registered_sources)),
|
|
308
|
+
data_source=data_source,
|
|
309
|
+
role=None,
|
|
310
|
+
catalog_warning=None,
|
|
311
|
+
)
|
|
312
|
+
elif (
|
|
313
|
+
registered_sources is None
|
|
314
|
+
and registered_source is not None
|
|
296
315
|
and data_source != registered_source
|
|
297
316
|
and allow_source_drift is None
|
|
298
317
|
):
|
|
@@ -1379,6 +1379,109 @@ def _validate_research_kwargs(
|
|
|
1379
1379
|
)
|
|
1380
1380
|
|
|
1381
1381
|
|
|
1382
|
+
_FORECAST_SOURCES_ALLOWED: frozenset[str] = frozenset({"iem_mos", "open_meteo"})
|
|
1383
|
+
|
|
1384
|
+
|
|
1385
|
+
def _fetch_open_meteo_range(
|
|
1386
|
+
info: StationInfo,
|
|
1387
|
+
from_date: str,
|
|
1388
|
+
to_date: str,
|
|
1389
|
+
*,
|
|
1390
|
+
model: str,
|
|
1391
|
+
) -> dict[str, list[dict[str, Any]]]:
|
|
1392
|
+
"""Phase 20 OM-05 — fetch Open-Meteo forecasts grouped by settlement date.
|
|
1393
|
+
|
|
1394
|
+
Wraps ``mostlyright.weather._fetchers._open_meteo.fetch_open_meteo`` in
|
|
1395
|
+
training mode (Previous Runs API) and pivots its tabular DataFrame
|
|
1396
|
+
into the ``{date_iso: [forecast_row, ...]}`` shape that
|
|
1397
|
+
``build_pairs(forecasts_by_date=...)`` expects. Each row carries
|
|
1398
|
+
``model`` / ``issued_at`` / ``valid_at`` / ``temperature_f`` /
|
|
1399
|
+
``pop_6hr_pct`` / ``qpf_6hr_in`` keys for build_pairs_row compatibility.
|
|
1400
|
+
"""
|
|
1401
|
+
import pandas as pd
|
|
1402
|
+
|
|
1403
|
+
from mostlyright.weather._fetchers._open_meteo import fetch_open_meteo
|
|
1404
|
+
|
|
1405
|
+
df = fetch_open_meteo(info.icao, from_date, to_date, model=model, mode="training")
|
|
1406
|
+
groups: dict[str, list[dict[str, Any]]] = {}
|
|
1407
|
+
if df is None or df.empty:
|
|
1408
|
+
return groups
|
|
1409
|
+
for _, row in df.iterrows():
|
|
1410
|
+
ftime = row.get("valid_at")
|
|
1411
|
+
if ftime is None or (isinstance(ftime, float) and ftime != ftime):
|
|
1412
|
+
continue
|
|
1413
|
+
try:
|
|
1414
|
+
ftime_dt = pd.to_datetime(ftime, utc=True)
|
|
1415
|
+
except Exception:
|
|
1416
|
+
continue
|
|
1417
|
+
try:
|
|
1418
|
+
date_iso = settlement_date_for(ftime_dt.strftime("%Y-%m-%dT%H:%M:%SZ"), info.code)
|
|
1419
|
+
except Exception:
|
|
1420
|
+
date_iso = ftime_dt.strftime("%Y-%m-%d")
|
|
1421
|
+
issued_at = row.get("issued_at")
|
|
1422
|
+
try:
|
|
1423
|
+
issued_iso = (
|
|
1424
|
+
pd.to_datetime(issued_at, utc=True).strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
1425
|
+
if issued_at is not None
|
|
1426
|
+
and not (isinstance(issued_at, float) and issued_at != issued_at)
|
|
1427
|
+
else None
|
|
1428
|
+
)
|
|
1429
|
+
except Exception:
|
|
1430
|
+
issued_iso = None
|
|
1431
|
+
valid_iso = ftime_dt.strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
1432
|
+
temp_c = row.get("temp_c")
|
|
1433
|
+
temperature_f: float | None = None
|
|
1434
|
+
if temp_c is not None and not (isinstance(temp_c, float) and temp_c != temp_c):
|
|
1435
|
+
try:
|
|
1436
|
+
temperature_f = float(temp_c) * 9.0 / 5.0 + 32.0
|
|
1437
|
+
except (TypeError, ValueError):
|
|
1438
|
+
temperature_f = None
|
|
1439
|
+
pop_prob = row.get("precip_probability")
|
|
1440
|
+
pop_6hr_pct: float | None = None
|
|
1441
|
+
if pop_prob is not None and not (isinstance(pop_prob, float) and pop_prob != pop_prob):
|
|
1442
|
+
try:
|
|
1443
|
+
pop_6hr_pct = float(pop_prob) * 100.0
|
|
1444
|
+
except (TypeError, ValueError):
|
|
1445
|
+
pop_6hr_pct = None
|
|
1446
|
+
precip_mm = row.get("precipitation_mm")
|
|
1447
|
+
qpf_6hr_in: float | None = None
|
|
1448
|
+
if precip_mm is not None and not (isinstance(precip_mm, float) and precip_mm != precip_mm):
|
|
1449
|
+
try:
|
|
1450
|
+
qpf_6hr_in = float(precip_mm) / 25.4
|
|
1451
|
+
except (TypeError, ValueError):
|
|
1452
|
+
qpf_6hr_in = None
|
|
1453
|
+
fcst_row: dict[str, Any] = {
|
|
1454
|
+
"model": row.get("model"),
|
|
1455
|
+
"issued_at": issued_iso,
|
|
1456
|
+
"valid_at": valid_iso,
|
|
1457
|
+
"temperature_f": temperature_f,
|
|
1458
|
+
"pop_6hr_pct": pop_6hr_pct,
|
|
1459
|
+
"qpf_6hr_in": qpf_6hr_in,
|
|
1460
|
+
"source": row.get("source"),
|
|
1461
|
+
}
|
|
1462
|
+
groups.setdefault(date_iso, []).append(fcst_row)
|
|
1463
|
+
return groups
|
|
1464
|
+
|
|
1465
|
+
|
|
1466
|
+
def _normalize_forecast_source(
|
|
1467
|
+
forecast_source: str | list[str] | tuple[str, ...] | None,
|
|
1468
|
+
) -> tuple[str, ...]:
|
|
1469
|
+
"""Normalize the forecast_source kwarg to a sorted tuple of allowed values."""
|
|
1470
|
+
if forecast_source is None:
|
|
1471
|
+
return ("iem_mos",)
|
|
1472
|
+
if isinstance(forecast_source, str):
|
|
1473
|
+
items: tuple[str, ...] = (forecast_source,)
|
|
1474
|
+
else:
|
|
1475
|
+
items = tuple(forecast_source)
|
|
1476
|
+
bad = [s for s in items if s not in _FORECAST_SOURCES_ALLOWED]
|
|
1477
|
+
if bad:
|
|
1478
|
+
raise ValueError(
|
|
1479
|
+
f"forecast_source: unknown value(s) {bad}; "
|
|
1480
|
+
f"allowed = {sorted(_FORECAST_SOURCES_ALLOWED)}"
|
|
1481
|
+
)
|
|
1482
|
+
return items
|
|
1483
|
+
|
|
1484
|
+
|
|
1382
1485
|
def research(
|
|
1383
1486
|
station: str | None = None,
|
|
1384
1487
|
from_date: str | None = None,
|
|
@@ -1394,6 +1497,7 @@ def research(
|
|
|
1394
1497
|
include_forecast: bool = False,
|
|
1395
1498
|
forecast_model: str | None = None,
|
|
1396
1499
|
forecast_models: list[str] | None = None,
|
|
1500
|
+
forecast_source: str | list[str] | tuple[str, ...] = "iem_mos",
|
|
1397
1501
|
as_dataframe: bool = True,
|
|
1398
1502
|
tz_override: str | None = None,
|
|
1399
1503
|
qc: bool = False,
|
|
@@ -1584,15 +1688,48 @@ def research(
|
|
|
1584
1688
|
iem_mos_by_date: dict[str, list[dict[str, Any]]] = {}
|
|
1585
1689
|
nwp_by_model_date: dict[str, dict[str, list[dict[str, Any]]]] = {}
|
|
1586
1690
|
if include_forecast:
|
|
1691
|
+
fcst_sources = _normalize_forecast_source(forecast_source)
|
|
1587
1692
|
# Phase 17 Wave 4 iter-3 review HIGH: thread forecast_model through to
|
|
1588
1693
|
# the fetcher so callers asking for ``forecast_model="gfs"`` get a
|
|
1589
1694
|
# GFS pull, not a default-NBE pull whose rows then get filtered out
|
|
1590
|
-
# by build_pairs_row's model-name match.
|
|
1591
|
-
|
|
1592
|
-
|
|
1593
|
-
|
|
1594
|
-
|
|
1595
|
-
|
|
1695
|
+
# by build_pairs_row's model-name match.
|
|
1696
|
+
if "iem_mos" in fcst_sources:
|
|
1697
|
+
iem_model = (forecast_model or "nbe").lower()
|
|
1698
|
+
iem_mos_by_date = _fetch_iem_mos_range(info, from_date, to_date, model=iem_model)
|
|
1699
|
+
if "open_meteo" in fcst_sources:
|
|
1700
|
+
# Phase 20 OM-05: Open-Meteo forecast source. Default model
|
|
1701
|
+
# gfs_global matches the IEM MOS "nbe" parity-default ethos:
|
|
1702
|
+
# the most-traded prediction-market model for US stations.
|
|
1703
|
+
om_model = forecast_model or "gfs_global"
|
|
1704
|
+
from mostlyright.weather._fetchers._open_meteo_models import (
|
|
1705
|
+
OPEN_METEO_MODELS,
|
|
1706
|
+
)
|
|
1707
|
+
|
|
1708
|
+
if om_model not in OPEN_METEO_MODELS:
|
|
1709
|
+
# Phase 20 PLAN-11 review (codex HIGH #3): a typo or legacy
|
|
1710
|
+
# IEM MOS model id like "nbe" would otherwise silently drop
|
|
1711
|
+
# Open-Meteo forecasts and leave fcst_* columns null — the
|
|
1712
|
+
# caller would not learn the source failed. Hard-fail with
|
|
1713
|
+
# a hint so the typo surfaces immediately.
|
|
1714
|
+
raise ValueError(
|
|
1715
|
+
f"forecast_source=\"open_meteo\" requires forecast_model "
|
|
1716
|
+
f"in OPEN_METEO_MODELS (36 keys); got {om_model!r}. "
|
|
1717
|
+
f"Pick one of the 36 registered Open-Meteo keys (e.g. "
|
|
1718
|
+
f"'gfs_global', 'ecmwf_ifs_hres', 'dwd_icon_global') "
|
|
1719
|
+
f"or drop forecast_source=\"open_meteo\" to use the "
|
|
1720
|
+
f"default IEM MOS path."
|
|
1721
|
+
)
|
|
1722
|
+
|
|
1723
|
+
om_by_date = _fetch_open_meteo_range(
|
|
1724
|
+
info, from_date, to_date, model=om_model
|
|
1725
|
+
)
|
|
1726
|
+
# Concatenate: never silently merge — every row carries its
|
|
1727
|
+
# source identity. build_pairs accepts a single dict so we
|
|
1728
|
+
# merge OM rows into iem_mos_by_date when both sources are
|
|
1729
|
+
# selected (build_pairs_row already discriminates via
|
|
1730
|
+
# row.get("source")).
|
|
1731
|
+
for date_iso, rows in om_by_date.items():
|
|
1732
|
+
iem_mos_by_date.setdefault(date_iso, []).extend(rows)
|
|
1596
1733
|
if forecast_models:
|
|
1597
1734
|
nwp_by_model_date = _fetch_nwp_models_range(
|
|
1598
1735
|
info, from_date, to_date, list(forecast_models)
|