mostlyrightmd 1.2.0__tar.gz → 1.3.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (156) hide show
  1. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/PKG-INFO +1 -1
  2. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/pyproject.toml +1 -1
  3. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/src/mostlyright/core/exceptions.py +109 -0
  4. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/src/mostlyright/core/schemas/__init__.py +14 -2
  5. mostlyrightmd-1.3.1/src/mostlyright/core/schemas/forecast.py +177 -0
  6. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/src/mostlyright/core/temporal/leakage.py +70 -2
  7. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/src/mostlyright/core/validator.py +19 -1
  8. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/src/mostlyright/research.py +141 -6
  9. mostlyrightmd-1.3.1/tests/core/test_schemas/test_forecast.py +113 -0
  10. mostlyrightmd-1.3.1/tests/test_exceptions_phase18.py +79 -0
  11. mostlyrightmd-1.3.1/tests/test_leakage_issued_at.py +97 -0
  12. mostlyrightmd-1.3.1/tests/test_research_forecast_source.py +39 -0
  13. mostlyrightmd-1.3.1/tests/test_schemas_codegen.py +111 -0
  14. mostlyrightmd-1.3.1/tests/test_schemas_station_forecast.py +98 -0
  15. mostlyrightmd-1.2.0/src/mostlyright/core/schemas/forecast.py +0 -122
  16. mostlyrightmd-1.2.0/tests/core/test_schemas/test_forecast.py +0 -104
  17. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/.gitignore +0 -0
  18. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/README.md +0 -0
  19. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/src/mostlyright/__init__.py +0 -0
  20. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/src/mostlyright/_compose.py +0 -0
  21. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/src/mostlyright/_exact_fetch.py +0 -0
  22. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/src/mostlyright/_internal/__init__.py +0 -0
  23. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/src/mostlyright/_internal/_bounds.py +0 -0
  24. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/src/mostlyright/_internal/_cache_dir.py +0 -0
  25. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/src/mostlyright/_internal/_capabilities.py +0 -0
  26. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/src/mostlyright/_internal/_convert.py +0 -0
  27. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/src/mostlyright/_internal/_http.py +0 -0
  28. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/src/mostlyright/_internal/_pairs.py +0 -0
  29. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/src/mostlyright/_internal/_pandas_compat.py +0 -0
  30. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/src/mostlyright/_internal/_stations.py +0 -0
  31. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/src/mostlyright/_internal/_toon.py +0 -0
  32. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/src/mostlyright/_internal/exceptions.py +0 -0
  33. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/src/mostlyright/_internal/merge/__init__.py +0 -0
  34. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/src/mostlyright/_internal/merge/_schemas.py +0 -0
  35. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/src/mostlyright/_internal/merge/climate.py +0 -0
  36. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/src/mostlyright/_internal/merge/observations.py +0 -0
  37. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/src/mostlyright/_internal/models/__init__.py +0 -0
  38. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/src/mostlyright/_internal/models/_base.py +0 -0
  39. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/src/mostlyright/_internal/models/availability.py +0 -0
  40. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/src/mostlyright/_internal/models/observation.py +0 -0
  41. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/src/mostlyright/_internal/models/station.py +0 -0
  42. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/src/mostlyright/_internal/specs/book_snapshot.json +0 -0
  43. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/src/mostlyright/_internal/specs/brackets.json +0 -0
  44. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/src/mostlyright/_internal/specs/candle.json +0 -0
  45. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/src/mostlyright/_internal/specs/climate.json +0 -0
  46. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/src/mostlyright/_internal/specs/daily_extreme.json +0 -0
  47. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/src/mostlyright/_internal/specs/data_version.json +0 -0
  48. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/src/mostlyright/_internal/specs/event.json +0 -0
  49. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/src/mostlyright/_internal/specs/forecast.json +0 -0
  50. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/src/mostlyright/_internal/specs/forecast_series.json +0 -0
  51. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/src/mostlyright/_internal/specs/market.json +0 -0
  52. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/src/mostlyright/_internal/specs/market_unified.json +0 -0
  53. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/src/mostlyright/_internal/specs/observation.json +0 -0
  54. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/src/mostlyright/_internal/specs/observation_ledger.json +0 -0
  55. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/src/mostlyright/_internal/specs/observation_qc.json +0 -0
  56. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/src/mostlyright/_internal/specs/omo.json +0 -0
  57. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/src/mostlyright/_internal/specs/series.json +0 -0
  58. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/src/mostlyright/_internal/specs/settlement-join.json +0 -0
  59. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/src/mostlyright/_internal/specs/settlement_record.json +0 -0
  60. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/src/mostlyright/_internal/specs/snapshot.json +0 -0
  61. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/src/mostlyright/_internal/specs/synoptic_extremes.json +0 -0
  62. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/src/mostlyright/_internal/versioning.py +0 -0
  63. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/src/mostlyright/core/__init__.py +0 -0
  64. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/src/mostlyright/core/_backend_dispatch.py +0 -0
  65. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/src/mostlyright/core/_json_safe.py +0 -0
  66. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/src/mostlyright/core/_narwhals_compat.py +0 -0
  67. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/src/mostlyright/core/_polars_compat.py +0 -0
  68. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/src/mostlyright/core/formats/__init__.py +0 -0
  69. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/src/mostlyright/core/formats/_toon.py +0 -0
  70. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/src/mostlyright/core/formats/_toon_list_codec.py +0 -0
  71. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/src/mostlyright/core/formats/csv.py +0 -0
  72. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/src/mostlyright/core/formats/dataframe.py +0 -0
  73. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/src/mostlyright/core/formats/json.py +0 -0
  74. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/src/mostlyright/core/formats/parquet.py +0 -0
  75. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/src/mostlyright/core/formats/toon.py +0 -0
  76. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/src/mostlyright/core/merge.py +0 -0
  77. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/src/mostlyright/core/result.py +0 -0
  78. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/src/mostlyright/core/schema.py +0 -0
  79. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/src/mostlyright/core/schemas/forecast_nwp.py +0 -0
  80. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/src/mostlyright/core/schemas/observation.py +0 -0
  81. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/src/mostlyright/core/schemas/observation_ledger.py +0 -0
  82. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/src/mostlyright/core/schemas/observation_qc.py +0 -0
  83. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/src/mostlyright/core/schemas/settlement.py +0 -0
  84. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/src/mostlyright/core/temporal/__init__.py +0 -0
  85. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/src/mostlyright/core/temporal/knowledge_view.py +0 -0
  86. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/src/mostlyright/core/temporal/timepoint.py +0 -0
  87. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/src/mostlyright/discover.py +0 -0
  88. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/src/mostlyright/discovery.py +0 -0
  89. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/src/mostlyright/forecasts.py +0 -0
  90. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/src/mostlyright/international.py +0 -0
  91. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/src/mostlyright/live/__init__.py +0 -0
  92. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/src/mostlyright/live/_latest.py +0 -0
  93. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/src/mostlyright/live/_sources.py +0 -0
  94. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/src/mostlyright/live/_stream.py +0 -0
  95. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/src/mostlyright/mode2.py +0 -0
  96. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/src/mostlyright/preprocessing.py +0 -0
  97. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/src/mostlyright/qc.py +0 -0
  98. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/src/mostlyright/snapshot.py +0 -0
  99. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/src/mostlyright/transforms.py +0 -0
  100. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/tests/_internal/__init__.py +0 -0
  101. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/tests/_internal/merge/__init__.py +0 -0
  102. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/tests/_internal/merge/test_awc_gap_filled_by_iem.py +0 -0
  103. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/tests/_internal/merge/test_climate.py +0 -0
  104. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/tests/_internal/merge/test_observations.py +0 -0
  105. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/tests/_internal/models/__init__.py +0 -0
  106. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/tests/_internal/models/test_availability.py +0 -0
  107. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/tests/_internal/models/test_base.py +0 -0
  108. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/tests/_internal/models/test_observation.py +0 -0
  109. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/tests/_internal/models/test_station.py +0 -0
  110. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/tests/_internal/test_bounds.py +0 -0
  111. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/tests/_internal/test_capabilities.py +0 -0
  112. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/tests/_internal/test_convert.py +0 -0
  113. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/tests/_internal/test_exceptions.py +0 -0
  114. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/tests/_internal/test_http.py +0 -0
  115. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/tests/_internal/test_pairs.py +0 -0
  116. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/tests/_internal/test_stations.py +0 -0
  117. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/tests/_internal/test_versioning.py +0 -0
  118. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/tests/core/__init__.py +0 -0
  119. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/tests/core/temporal/__init__.py +0 -0
  120. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/tests/core/temporal/test_knowledge_view.py +0 -0
  121. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/tests/core/temporal/test_leakage.py +0 -0
  122. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/tests/core/test_exceptions.py +0 -0
  123. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/tests/core/test_formats.py +0 -0
  124. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/tests/core/test_json_safe.py +0 -0
  125. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/tests/core/test_merge.py +0 -0
  126. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/tests/core/test_result.py +0 -0
  127. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/tests/core/test_schema.py +0 -0
  128. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/tests/core/test_schemas/__init__.py +0 -0
  129. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/tests/core/test_schemas/test_observation.py +0 -0
  130. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/tests/core/test_schemas/test_settlement.py +0 -0
  131. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/tests/core/test_timepoint.py +0 -0
  132. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/tests/core/test_validator.py +0 -0
  133. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/tests/test_backend_dispatch.py +0 -0
  134. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/tests/test_cache_env_back_compat.py +0 -0
  135. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/tests/test_compose.py +0 -0
  136. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/tests/test_discover.py +0 -0
  137. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/tests/test_discovery_real.py +0 -0
  138. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/tests/test_exact_fetch.py +0 -0
  139. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/tests/test_exceptions_data_availability.py +0 -0
  140. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/tests/test_exceptions_phase17.py +0 -0
  141. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/tests/test_exceptions_phase17_plan06.py +0 -0
  142. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/tests/test_forecast_nwp_schema_phase17.py +0 -0
  143. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/tests/test_international.py +0 -0
  144. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/tests/test_live_latest.py +0 -0
  145. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/tests/test_live_stream.py +0 -0
  146. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/tests/test_mode2.py +0 -0
  147. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/tests/test_namespace.py +0 -0
  148. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/tests/test_phase_3x.py +0 -0
  149. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/tests/test_polars_cross_backend.py +0 -0
  150. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/tests/test_polars_invariants.py +0 -0
  151. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/tests/test_qc_wired.py +0 -0
  152. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/tests/test_research.py +0 -0
  153. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/tests/test_research_kwarg_validation.py +0 -0
  154. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/tests/test_research_prefetch.py +0 -0
  155. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/tests/test_snapshot.py +0 -0
  156. {mostlyrightmd-1.2.0 → mostlyrightmd-1.3.1}/tests/test_transforms_preprocessing.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mostlyrightmd
3
- Version: 1.2.0
3
+ Version: 1.3.1
4
4
  Summary: Python SDK for quants, ML engineers, and AI agents — one interface to public data. Adapters ship weather + prediction-market settlements (Kalshi NHIGH/NLOW, Polymarket) today; SEC filings, Federal Reserve series, court filings, FDA approvals, and equities are next. Schema-versioned, leakage-free, local-first. Imports as `mostlyright`.
5
5
  Project-URL: Homepage, https://mostlyright.md
6
6
  Project-URL: Documentation, https://mostlyright.md/docs/sdk/
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "mostlyrightmd"
3
- version = "1.2.0"
3
+ version = "1.3.1"
4
4
  description = "Python SDK for quants, ML engineers, and AI agents — one interface to public data. Adapters ship weather + prediction-market settlements (Kalshi NHIGH/NLOW, Polymarket) today; SEC filings, Federal Reserve series, court filings, FDA approvals, and equities are next. Schema-versioned, leakage-free, local-first. Imports as `mostlyright`."
5
5
  readme = "README.md"
6
6
  license = "MIT"
@@ -31,6 +31,7 @@ __all__ = [
31
31
  "DeprecatedModelWarning",
32
32
  "GribIntegrityError",
33
33
  "HistoricalDepthError",
34
+ "IssuedAtMissingError",
34
35
  "LeakageError",
35
36
  "LiveStreamError",
36
37
  "NoLiveDataError",
@@ -38,6 +39,7 @@ __all__ = [
38
39
  "NwpError",
39
40
  "NwpModelNotAvailableError",
40
41
  "NwpModelRetiredError",
42
+ "OpenMeteoSeamlessLeakageError",
41
43
  "PayloadTooLargeError",
42
44
  "SchemaValidationError",
43
45
  "SourceMismatchError",
@@ -738,6 +740,113 @@ class LiveStreamError(TradewindsError):
738
740
  default_error_code = "LIVE_STREAM_ERROR"
739
741
 
740
742
 
743
+ #: Sentinel value used when raising IssuedAtMissingError / OpenMeteoSeamlessLeakageError
744
+ #: before the offending DataFrame is known (e.g. seamless-endpoint refusal at fetch
745
+ #: time, where no rows have been retrieved yet). LeakageError requires non-empty
746
+ #: ``as_of`` so OpenMeteoSeamlessLeakageError supplies this sentinel rather than
747
+ #: forging a timestamp.
748
+ _NO_AS_OF_SENTINEL = "(seamless-endpoint-refused-before-fetch)"
749
+
750
+
751
+ class IssuedAtMissingError(SchemaValidationError):
752
+ """A forecast row is missing the ``issued_at`` field.
753
+
754
+ Raised when a forecast row would land in the DataFrame with
755
+ ``issued_at IS NULL``. For Open-Meteo Previous Runs API this should be
756
+ impossible by construction (the fetcher derives ``issued_at`` per row
757
+ via the conservative lower-bound formula). For Live mode, this is
758
+ raised when cycle-math fallback cannot derive a non-null cycle.
759
+
760
+ Origin: ``Tarabcak/mostlyright#70`` — the legacy seamless-feed bug
761
+ where ``/forecast_series`` proxied Open-Meteo's seamless endpoint
762
+ without preserving ``issued_at``, causing post-snapshot model runs to
763
+ silently leak into training data.
764
+
765
+ Phase 20 OM-04.
766
+ """
767
+
768
+ default_error_code = "ISSUED_AT_MISSING"
769
+
770
+ def __init__(
771
+ self,
772
+ message: str = "",
773
+ *,
774
+ source: str | None = None,
775
+ violating_count: int = 0,
776
+ sample_violations: list[dict[str, Any]] | None = None,
777
+ request_id: str | None = None,
778
+ error_code: str | None = None,
779
+ ) -> None:
780
+ super().__init__(
781
+ message,
782
+ schema_id="schema.forecast.station.v1",
783
+ violations=[{"column": "issued_at", "rule": "non_null"}],
784
+ sample_violations=sample_violations,
785
+ source=source,
786
+ request_id=request_id,
787
+ error_code=error_code,
788
+ )
789
+ self.violating_count: int = violating_count
790
+
791
+ def _payload(self) -> dict[str, Any]:
792
+ payload = super()._payload()
793
+ payload["name"] = "IssuedAtMissingError"
794
+ payload["violating_count"] = self.violating_count
795
+ payload["origin_issue"] = "Tarabcak/mostlyright#70"
796
+ return payload
797
+
798
+
799
+ class OpenMeteoSeamlessLeakageError(LeakageError):
800
+ """The Open-Meteo Historical Forecast (seamless) endpoint was used
801
+ without ``allow_leakage=True`` opt-in.
802
+
803
+ Per Phase 20 D-01 (locked decision): the seamless endpoint silently
804
+ stitches forecasts from multiple model cycles into a continuous
805
+ timeseries; the cycle that produced each value is unrecoverable from
806
+ the response. :class:`LeakageDetector` rejects rows tagged
807
+ ``source="open_meteo.seamless"`` whenever ``as_of`` is asserted.
808
+
809
+ Origin: ``Tarabcak/mostlyright#70``.
810
+
811
+ Phase 20 OM-04.
812
+ """
813
+
814
+ default_error_code = "OPEN_METEO_SEAMLESS_LEAKAGE"
815
+
816
+ def __init__(
817
+ self,
818
+ message: str = "",
819
+ *,
820
+ model: str = "",
821
+ endpoint_url: str = "",
822
+ as_of: str | None = None,
823
+ violating_count: int = 0,
824
+ sample_violations: list[dict[str, Any]] | None = None,
825
+ source: str | None = None,
826
+ request_id: str | None = None,
827
+ error_code: str | None = None,
828
+ ) -> None:
829
+ super().__init__(
830
+ message,
831
+ as_of=as_of or _NO_AS_OF_SENTINEL,
832
+ violating_count=violating_count,
833
+ sample_violations=sample_violations,
834
+ source=source,
835
+ request_id=request_id,
836
+ error_code=error_code,
837
+ )
838
+ self.model: str = model
839
+ self.endpoint_url: str = endpoint_url
840
+
841
+ def _payload(self) -> dict[str, Any]:
842
+ payload = super()._payload()
843
+ payload["name"] = "OpenMeteoSeamlessLeakageError"
844
+ payload["model"] = self.model
845
+ payload["endpoint_url"] = self.endpoint_url
846
+ payload["origin_issue"] = "Tarabcak/mostlyright#70"
847
+ return payload
848
+
849
+
741
850
  class NoLiveDataError(LiveStreamError):
742
851
  """:func:`mostlyright.live.latest` returned no observations for the station.
743
852
 
@@ -10,9 +10,9 @@ Each schema is eagerly registered with the Validator at import time so
10
10
  without any explicit register-call boilerplate.
11
11
  """
12
12
 
13
- from mostlyright.core.validator import register_schema
13
+ from mostlyright.core.validator import _SCHEMA_REGISTRY, register_schema
14
14
 
15
- from .forecast import ForecastSchema
15
+ from .forecast import ForecastSchema, StationForecastSchema
16
16
  from .forecast_nwp import NwpForecastSchema
17
17
  from .observation import ObservationSchema
18
18
  from .observation_ledger import ObservationLedgerSchema
@@ -21,6 +21,11 @@ from .settlement import SettlementSchema
21
21
 
22
22
  # Eager registration — Validator can look up each schema by ID immediately.
23
23
  register_schema(ObservationSchema)
24
+ # Phase 20 OM-02: register canonical StationForecastSchema FIRST so the
25
+ # canonical schema_id wins on any registry-iteration that visits in
26
+ # insertion order; ForecastSchema (back-compat alias to
27
+ # schema.forecast.iem_mos.v1) registers second.
28
+ register_schema(StationForecastSchema)
24
29
  register_schema(ForecastSchema)
25
30
  register_schema(SettlementSchema)
26
31
  # Phase 2.1 additions.
@@ -29,11 +34,18 @@ register_schema(ObservationQCSchema)
29
34
  # Phase 3.2 addition.
30
35
  register_schema(NwpForecastSchema)
31
36
 
37
+ #: Public alias for the validator's registry dict, so callers and tests
38
+ #: can look up schemas by id without reaching into ``core.validator``'s
39
+ #: underscored internal. Phase 20 OM-02.
40
+ SCHEMA_REGISTRY = _SCHEMA_REGISTRY
41
+
32
42
  __all__ = [
43
+ "SCHEMA_REGISTRY",
33
44
  "ForecastSchema",
34
45
  "NwpForecastSchema",
35
46
  "ObservationLedgerSchema",
36
47
  "ObservationQCSchema",
37
48
  "ObservationSchema",
38
49
  "SettlementSchema",
50
+ "StationForecastSchema",
39
51
  ]
@@ -0,0 +1,177 @@
1
+ """Phase 20: Unified per-station forecast schema.
2
+
3
+ ``schema.forecast.station.v1`` covers both IEM MOS rows and Open-Meteo rows
4
+ in a single column set. ``schema.forecast.iem_mos.v1`` is retained as a
5
+ back-compat alias (same class semantics, different ``schema_id``). Both
6
+ register via ``core/schemas/__init__.py``.
7
+
8
+ The unified schema marks IEM MOS core columns (``temp_c``, ``dew_point_c``,
9
+ etc.) as nullable because Open-Meteo may not provide all of them. Open-Meteo
10
+ extras (``apparent_temp_c``, ``shortwave_radiation_wm2``, ``cape_jkg``, etc.)
11
+ are always nullable — IEM MOS rows leave them null; Open-Meteo rows populate
12
+ them.
13
+
14
+ Source discrimination is via the ``source`` column (e.g. ``iem.archive``,
15
+ ``open_meteo.previous_runs``, ``open_meteo.live``).
16
+
17
+ Temporal mapping (design.md §A):
18
+ - ``event_time = valid_at``
19
+ - ``knowledge_time = issued_at``
20
+ """
21
+
22
+ from __future__ import annotations
23
+
24
+ from typing import ClassVar
25
+
26
+ from ..schema import ColumnSpec, Schema
27
+
28
+
29
+ class StationForecastSchema(Schema):
30
+ """``schema.forecast.station.v1`` — unified per-station forecast schema.
31
+
32
+ Covers IEM MOS shared core + Open-Meteo extras. Source identity via
33
+ the ``source`` column. Two ``schema_id`` strings register against the
34
+ same column set: ``schema.forecast.station.v1`` (canonical) and
35
+ ``schema.forecast.iem_mos.v1`` (back-compat alias via subclass).
36
+
37
+ Phase 20 OM-02.
38
+ """
39
+
40
+ schema_id: ClassVar[str] = "schema.forecast.station.v1"
41
+ # Phase 20 PLAN-11 review (codex HIGH #1): the unified station-forecast
42
+ # schema is a union over 5 source identifiers — `iem.archive` (IEM MOS
43
+ # archive rows) plus the 4 Open-Meteo endpoints. Validator branches on
44
+ # `_registered_sources` (set form) when present and falls back to
45
+ # `_registered_source` (single-value form) for legacy schemas.
46
+ _registered_source: ClassVar[str] = "open_meteo.previous_runs"
47
+ _registered_sources: ClassVar[frozenset[str]] = frozenset(
48
+ {
49
+ "iem.archive",
50
+ "open_meteo.previous_runs",
51
+ "open_meteo.single_run",
52
+ "open_meteo.live",
53
+ "open_meteo.seamless",
54
+ }
55
+ )
56
+
57
+ COLUMNS: ClassVar[list[ColumnSpec]] = [
58
+ # === Identity (all required, nullable=False) ===
59
+ ColumnSpec(name="station", dtype="string", units=None, nullable=False),
60
+ ColumnSpec(
61
+ name="issued_at",
62
+ dtype="timestamp_utc",
63
+ units=None,
64
+ nullable=True,
65
+ notes=(
66
+ "model run time (knowledge_time). Nullable to accommodate "
67
+ "Phase 20 open_meteo.seamless rows whose cycle is "
68
+ "unrecoverable from the response. LeakageDetector + "
69
+ "assert_issued_at_populated() are the runtime gates that "
70
+ "reject null issued_at in training-data paths."
71
+ ),
72
+ ),
73
+ ColumnSpec(
74
+ name="valid_at",
75
+ dtype="timestamp_utc",
76
+ units=None,
77
+ nullable=False,
78
+ notes="forecast target time (event_time)",
79
+ ),
80
+ ColumnSpec(
81
+ name="forecast_hour",
82
+ dtype="int64",
83
+ units="hours",
84
+ nullable=False,
85
+ notes="(valid_at - issued_at).total_seconds() / 3600",
86
+ ),
87
+ ColumnSpec(
88
+ name="model",
89
+ dtype="string",
90
+ units=None,
91
+ nullable=False,
92
+ notes="e.g. NBE, GFS, LAV, MET, gfs_global, ecmwf_ifs025",
93
+ ),
94
+ ColumnSpec(
95
+ name="source",
96
+ dtype="string",
97
+ units=None,
98
+ nullable=False,
99
+ notes="iem.archive | open_meteo.previous_runs | open_meteo.single_run | open_meteo.live",
100
+ ),
101
+ # === IEM MOS core (nullable because Open-Meteo may not supply all) ===
102
+ ColumnSpec(name="temp_c", dtype="float64", units="celsius", nullable=True),
103
+ ColumnSpec(name="dew_point_c", dtype="float64", units="celsius", nullable=True),
104
+ ColumnSpec(name="wind_speed_ms", dtype="float64", units="m/s", nullable=True),
105
+ ColumnSpec(name="wind_dir_deg", dtype="int64", units="degrees", nullable=True),
106
+ ColumnSpec(
107
+ name="precip_probability",
108
+ dtype="float64",
109
+ units="probability",
110
+ nullable=True,
111
+ notes="bounded [0, 1]",
112
+ ),
113
+ ColumnSpec(
114
+ name="sky_cover_pct",
115
+ dtype="int64",
116
+ units="percent",
117
+ nullable=True,
118
+ notes="bounded [0, 100]",
119
+ ),
120
+ # === Open-Meteo extras (always nullable; null for iem.archive rows) ===
121
+ ColumnSpec(name="apparent_temp_c", dtype="float64", units="celsius", nullable=True),
122
+ ColumnSpec(name="shortwave_radiation_wm2", dtype="float64", units="W/m^2", nullable=True),
123
+ ColumnSpec(name="direct_radiation_wm2", dtype="float64", units="W/m^2", nullable=True),
124
+ ColumnSpec(name="cape_jkg", dtype="float64", units="J/kg", nullable=True),
125
+ ColumnSpec(name="precipitation_mm", dtype="float64", units="mm", nullable=True),
126
+ ColumnSpec(name="cloud_cover_pct", dtype="int64", units="percent", nullable=True),
127
+ ColumnSpec(name="surface_pressure_hpa", dtype="float64", units="hPa", nullable=True),
128
+ ColumnSpec(name="pressure_msl_hpa", dtype="float64", units="hPa", nullable=True),
129
+ ColumnSpec(name="freezing_level_m", dtype="int64", units="meters", nullable=True),
130
+ ColumnSpec(name="snow_depth_m", dtype="float64", units="meters", nullable=True),
131
+ ColumnSpec(name="visibility_m", dtype="int64", units="meters", nullable=True),
132
+ ColumnSpec(name="wind_gusts_ms", dtype="float64", units="m/s", nullable=True),
133
+ ColumnSpec(
134
+ name="weather_code",
135
+ dtype="int64",
136
+ units="WMO 4677",
137
+ nullable=True,
138
+ notes="WMO weather code (clear, fog, rain, snow, etc.)",
139
+ ),
140
+ # === Provenance ===
141
+ ColumnSpec(
142
+ name="retrieved_at",
143
+ dtype="timestamp_utc",
144
+ units=None,
145
+ nullable=False,
146
+ notes="wall-clock time the row was fetched from upstream",
147
+ ),
148
+ ]
149
+
150
+ #: Imperial-mode renames apply to temperature, wind speed, and wind gusts.
151
+ #: ``valid_at`` / ``issued_at`` / ``retrieved_at`` are model-internal
152
+ #: timestamps and keep their canonical names.
153
+ IMPERIAL_RENAMES: ClassVar[dict[str, str]] = {
154
+ "temp_c": "temp_F",
155
+ "dew_point_c": "dew_point_F",
156
+ "apparent_temp_c": "apparent_temp_F",
157
+ "wind_speed_ms": "wind_speed_kt",
158
+ "wind_gusts_ms": "wind_gusts_kt",
159
+ }
160
+
161
+
162
+ class ForecastSchema(StationForecastSchema):
163
+ """Back-compat alias for ``schema.forecast.iem_mos.v1``.
164
+
165
+ Same class semantics as :class:`StationForecastSchema`. Retained so
166
+ existing IEM MOS parity fixtures and Phase 17 callers continue to work
167
+ unchanged. New code should reference :class:`StationForecastSchema` and
168
+ the canonical ``schema.forecast.station.v1`` ``schema_id``.
169
+
170
+ Phase 20 OM-02.
171
+ """
172
+
173
+ schema_id: ClassVar[str] = "schema.forecast.iem_mos.v1"
174
+ _registered_source: ClassVar[str] = "iem.archive"
175
+
176
+
177
+ __all__ = ["ForecastSchema", "StationForecastSchema"]
@@ -20,7 +20,11 @@ from typing import TYPE_CHECKING
20
20
 
21
21
  import pandas as pd
22
22
 
23
- from mostlyright.core.exceptions import LeakageError, SchemaValidationError
23
+ from mostlyright.core.exceptions import (
24
+ IssuedAtMissingError,
25
+ LeakageError,
26
+ SchemaValidationError,
27
+ )
24
28
  from mostlyright.core.result import TradewindsResult
25
29
  from mostlyright.core.temporal.timepoint import TimePoint
26
30
 
@@ -28,10 +32,19 @@ if TYPE_CHECKING:
28
32
  pass
29
33
 
30
34
 
31
- __all__ = ["LeakageDetector", "assert_no_leakage"]
35
+ __all__ = [
36
+ "LeakageDetector",
37
+ "assert_issued_at_populated",
38
+ "assert_no_leakage",
39
+ ]
32
40
 
33
41
 
34
42
  _SAMPLE_CAP = 10
43
+ #: Smaller cap for the issued_at assertion — leakage payloads should fit on
44
+ #: one screen when surfaced through MCP, and a forecast frame missing
45
+ #: ``issued_at`` is a structural bug rather than a per-row data issue. Phase
46
+ #: 20 OM-04.
47
+ _ISSUED_AT_SAMPLE_CAP = 5
35
48
 
36
49
 
37
50
  def assert_no_leakage(df: pd.DataFrame | TradewindsResult, as_of: TimePoint) -> None:
@@ -124,6 +137,52 @@ def assert_no_leakage(df: pd.DataFrame | TradewindsResult, as_of: TimePoint) ->
124
137
  )
125
138
 
126
139
 
140
+ def assert_issued_at_populated(df: pd.DataFrame | TradewindsResult) -> None:
141
+ """Raise :class:`IssuedAtMissingError` if any row has null ``issued_at``.
142
+
143
+ Forecast rows MUST carry their model-run time to be leakage-safe; a
144
+ missing ``issued_at`` means we cannot verify the cycle predated the
145
+ ``as_of`` cutoff in :func:`research`. For Open-Meteo this should be
146
+ impossible by construction (the fetcher derives ``issued_at`` per row),
147
+ so this check is a defensive net.
148
+
149
+ Mirrors structural conventions of :func:`assert_no_leakage`:
150
+ :class:`TradewindsResult` (and any duck-typed ``.df`` carrier)
151
+ unwrap, column-existence guard, sample-cap.
152
+
153
+ Phase 20 OM-04.
154
+ """
155
+ if isinstance(df, TradewindsResult):
156
+ df = df.frame_as_pandas()
157
+ elif hasattr(df, "df") and not hasattr(df, "columns"):
158
+ # Duck-type for non-TradewindsResult wrappers (e.g. test doubles).
159
+ df = df.df
160
+
161
+ if "issued_at" not in df.columns:
162
+ raise SchemaValidationError(
163
+ "assert_issued_at_populated requires 'issued_at' column",
164
+ schema_id="schema.forecast.station.v1",
165
+ violations=[{"column": "issued_at", "rule": "required"}],
166
+ )
167
+
168
+ if len(df) == 0:
169
+ return # empty frame vacuously satisfies
170
+
171
+ nulls_mask = df["issued_at"].isna()
172
+ violating_count = int(nulls_mask.sum())
173
+ if violating_count == 0:
174
+ return
175
+
176
+ null_indices = df.index[nulls_mask].tolist()
177
+ samples = [{"row_idx": int(idx)} for idx in null_indices[:_ISSUED_AT_SAMPLE_CAP]]
178
+
179
+ raise IssuedAtMissingError(
180
+ f"{violating_count} row(s) have null issued_at; cannot verify leakage-safety",
181
+ violating_count=violating_count,
182
+ sample_violations=samples,
183
+ )
184
+
185
+
127
186
  class LeakageDetector:
128
187
  """Convenience wrapper for repeated detection against a fixed ``as_of``."""
129
188
 
@@ -145,3 +204,12 @@ class LeakageDetector:
145
204
  wrapper (unwrapped inside :func:`assert_no_leakage`).
146
205
  """
147
206
  assert_no_leakage(df, self._as_of)
207
+
208
+ def check_issued_at(self, df: pd.DataFrame | TradewindsResult) -> None:
209
+ """Raise :class:`IssuedAtMissingError` if any row has null ``issued_at``.
210
+
211
+ Phase 20 OM-04 extension. Independent of ``as_of`` — the bound
212
+ cutoff is irrelevant when the row carries no model-run time at
213
+ all.
214
+ """
215
+ assert_issued_at_populated(df)
@@ -291,8 +291,26 @@ def validate_dataframe(
291
291
  )
292
292
 
293
293
  registered_source = getattr(schema_cls, "_registered_source", None)
294
+ # Phase 20 PLAN-11 review (codex HIGH #1): support union schemas via
295
+ # _registered_sources — a frozenset of permitted source identifiers.
296
+ # schema.forecast.station.v1 covers IEM MOS + 4 Open-Meteo endpoints;
297
+ # _registered_source (singular) is the legacy single-source guard.
298
+ registered_sources = getattr(schema_cls, "_registered_sources", None)
294
299
  if (
295
- registered_source is not None
300
+ registered_sources is not None
301
+ and data_source not in registered_sources
302
+ and allow_source_drift is None
303
+ ):
304
+ raise SourceMismatchError(
305
+ f"Source drift: data is {data_source!r}, schema permits {sorted(registered_sources)!r}",
306
+ schema_source=",".join(sorted(registered_sources)),
307
+ data_source=data_source,
308
+ role=None,
309
+ catalog_warning=None,
310
+ )
311
+ elif (
312
+ registered_sources is None
313
+ and registered_source is not None
296
314
  and data_source != registered_source
297
315
  and allow_source_drift is None
298
316
  ):
@@ -1379,6 +1379,109 @@ def _validate_research_kwargs(
1379
1379
  )
1380
1380
 
1381
1381
 
1382
+ _FORECAST_SOURCES_ALLOWED: frozenset[str] = frozenset({"iem_mos", "open_meteo"})
1383
+
1384
+
1385
+ def _fetch_open_meteo_range(
1386
+ info: StationInfo,
1387
+ from_date: str,
1388
+ to_date: str,
1389
+ *,
1390
+ model: str,
1391
+ ) -> dict[str, list[dict[str, Any]]]:
1392
+ """Phase 20 OM-05 — fetch Open-Meteo forecasts grouped by settlement date.
1393
+
1394
+ Wraps ``mostlyright.weather._fetchers._open_meteo.fetch_open_meteo`` in
1395
+ training mode (Previous Runs API) and pivots its tabular DataFrame
1396
+ into the ``{date_iso: [forecast_row, ...]}`` shape that
1397
+ ``build_pairs(forecasts_by_date=...)`` expects. Each row carries
1398
+ ``model`` / ``issued_at`` / ``valid_at`` / ``temperature_f`` /
1399
+ ``pop_6hr_pct`` / ``qpf_6hr_in`` keys for build_pairs_row compatibility.
1400
+ """
1401
+ import pandas as pd
1402
+
1403
+ from mostlyright.weather._fetchers._open_meteo import fetch_open_meteo
1404
+
1405
+ df = fetch_open_meteo(info.icao, from_date, to_date, model=model, mode="training")
1406
+ groups: dict[str, list[dict[str, Any]]] = {}
1407
+ if df is None or df.empty:
1408
+ return groups
1409
+ for _, row in df.iterrows():
1410
+ ftime = row.get("valid_at")
1411
+ if ftime is None or (isinstance(ftime, float) and ftime != ftime):
1412
+ continue
1413
+ try:
1414
+ ftime_dt = pd.to_datetime(ftime, utc=True)
1415
+ except Exception:
1416
+ continue
1417
+ try:
1418
+ date_iso = settlement_date_for(ftime_dt.strftime("%Y-%m-%dT%H:%M:%SZ"), info.code)
1419
+ except Exception:
1420
+ date_iso = ftime_dt.strftime("%Y-%m-%d")
1421
+ issued_at = row.get("issued_at")
1422
+ try:
1423
+ issued_iso = (
1424
+ pd.to_datetime(issued_at, utc=True).strftime("%Y-%m-%dT%H:%M:%SZ")
1425
+ if issued_at is not None
1426
+ and not (isinstance(issued_at, float) and issued_at != issued_at)
1427
+ else None
1428
+ )
1429
+ except Exception:
1430
+ issued_iso = None
1431
+ valid_iso = ftime_dt.strftime("%Y-%m-%dT%H:%M:%SZ")
1432
+ temp_c = row.get("temp_c")
1433
+ temperature_f: float | None = None
1434
+ if temp_c is not None and not (isinstance(temp_c, float) and temp_c != temp_c):
1435
+ try:
1436
+ temperature_f = float(temp_c) * 9.0 / 5.0 + 32.0
1437
+ except (TypeError, ValueError):
1438
+ temperature_f = None
1439
+ pop_prob = row.get("precip_probability")
1440
+ pop_6hr_pct: float | None = None
1441
+ if pop_prob is not None and not (isinstance(pop_prob, float) and pop_prob != pop_prob):
1442
+ try:
1443
+ pop_6hr_pct = float(pop_prob) * 100.0
1444
+ except (TypeError, ValueError):
1445
+ pop_6hr_pct = None
1446
+ precip_mm = row.get("precipitation_mm")
1447
+ qpf_6hr_in: float | None = None
1448
+ if precip_mm is not None and not (isinstance(precip_mm, float) and precip_mm != precip_mm):
1449
+ try:
1450
+ qpf_6hr_in = float(precip_mm) / 25.4
1451
+ except (TypeError, ValueError):
1452
+ qpf_6hr_in = None
1453
+ fcst_row: dict[str, Any] = {
1454
+ "model": row.get("model"),
1455
+ "issued_at": issued_iso,
1456
+ "valid_at": valid_iso,
1457
+ "temperature_f": temperature_f,
1458
+ "pop_6hr_pct": pop_6hr_pct,
1459
+ "qpf_6hr_in": qpf_6hr_in,
1460
+ "source": row.get("source"),
1461
+ }
1462
+ groups.setdefault(date_iso, []).append(fcst_row)
1463
+ return groups
1464
+
1465
+
1466
+ def _normalize_forecast_source(
1467
+ forecast_source: str | list[str] | tuple[str, ...] | None,
1468
+ ) -> tuple[str, ...]:
1469
+ """Normalize the forecast_source kwarg to a sorted tuple of allowed values."""
1470
+ if forecast_source is None:
1471
+ return ("iem_mos",)
1472
+ if isinstance(forecast_source, str):
1473
+ items: tuple[str, ...] = (forecast_source,)
1474
+ else:
1475
+ items = tuple(forecast_source)
1476
+ bad = [s for s in items if s not in _FORECAST_SOURCES_ALLOWED]
1477
+ if bad:
1478
+ raise ValueError(
1479
+ f"forecast_source: unknown value(s) {bad}; "
1480
+ f"allowed = {sorted(_FORECAST_SOURCES_ALLOWED)}"
1481
+ )
1482
+ return items
1483
+
1484
+
1382
1485
  def research(
1383
1486
  station: str | None = None,
1384
1487
  from_date: str | None = None,
@@ -1394,6 +1497,7 @@ def research(
1394
1497
  include_forecast: bool = False,
1395
1498
  forecast_model: str | None = None,
1396
1499
  forecast_models: list[str] | None = None,
1500
+ forecast_source: str | list[str] | tuple[str, ...] = "iem_mos",
1397
1501
  as_dataframe: bool = True,
1398
1502
  tz_override: str | None = None,
1399
1503
  qc: bool = False,
@@ -1584,15 +1688,46 @@ def research(
1584
1688
  iem_mos_by_date: dict[str, list[dict[str, Any]]] = {}
1585
1689
  nwp_by_model_date: dict[str, dict[str, list[dict[str, Any]]]] = {}
1586
1690
  if include_forecast:
1691
+ fcst_sources = _normalize_forecast_source(forecast_source)
1587
1692
  # Phase 17 Wave 4 iter-3 review HIGH: thread forecast_model through to
1588
1693
  # the fetcher so callers asking for ``forecast_model="gfs"`` get a
1589
1694
  # GFS pull, not a default-NBE pull whose rows then get filtered out
1590
- # by build_pairs_row's model-name match. ``forecast_model`` is the
1591
- # user-facing case (lowercase IEM MOS model id); _fetch_iem_mos_range
1592
- # passes it directly to fetch_iem_mos which validates against
1593
- # SUPPORTED_MOS_MODELS.
1594
- iem_model = (forecast_model or "nbe").lower()
1595
- iem_mos_by_date = _fetch_iem_mos_range(info, from_date, to_date, model=iem_model)
1695
+ # by build_pairs_row's model-name match.
1696
+ if "iem_mos" in fcst_sources:
1697
+ iem_model = (forecast_model or "nbe").lower()
1698
+ iem_mos_by_date = _fetch_iem_mos_range(info, from_date, to_date, model=iem_model)
1699
+ if "open_meteo" in fcst_sources:
1700
+ # Phase 20 OM-05: Open-Meteo forecast source. Default model
1701
+ # gfs_global matches the IEM MOS "nbe" parity-default ethos:
1702
+ # the most-traded prediction-market model for US stations.
1703
+ om_model = forecast_model or "gfs_global"
1704
+ from mostlyright.weather._fetchers._open_meteo_models import (
1705
+ OPEN_METEO_MODELS,
1706
+ )
1707
+
1708
+ if om_model not in OPEN_METEO_MODELS:
1709
+ # Phase 20 PLAN-11 review (codex HIGH #3): a typo or legacy
1710
+ # IEM MOS model id like "nbe" would otherwise silently drop
1711
+ # Open-Meteo forecasts and leave fcst_* columns null — the
1712
+ # caller would not learn the source failed. Hard-fail with
1713
+ # a hint so the typo surfaces immediately.
1714
+ raise ValueError(
1715
+ f'forecast_source="open_meteo" requires forecast_model '
1716
+ f"in OPEN_METEO_MODELS (36 keys); got {om_model!r}. "
1717
+ f"Pick one of the 36 registered Open-Meteo keys (e.g. "
1718
+ f"'gfs_global', 'ecmwf_ifs_hres', 'dwd_icon_global') "
1719
+ f'or drop forecast_source="open_meteo" to use the '
1720
+ f"default IEM MOS path."
1721
+ )
1722
+
1723
+ om_by_date = _fetch_open_meteo_range(info, from_date, to_date, model=om_model)
1724
+ # Concatenate: never silently merge — every row carries its
1725
+ # source identity. build_pairs accepts a single dict so we
1726
+ # merge OM rows into iem_mos_by_date when both sources are
1727
+ # selected (build_pairs_row already discriminates via
1728
+ # row.get("source")).
1729
+ for date_iso, rows in om_by_date.items():
1730
+ iem_mos_by_date.setdefault(date_iso, []).extend(rows)
1596
1731
  if forecast_models:
1597
1732
  nwp_by_model_date = _fetch_nwp_models_range(
1598
1733
  info, from_date, to_date, list(forecast_models)