mostlyrightmd 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mostlyright/__init__.py +46 -0
- mostlyright/_compose.py +338 -0
- mostlyright/_exact_fetch.py +162 -0
- mostlyright/_internal/__init__.py +31 -0
- mostlyright/_internal/_bounds.py +167 -0
- mostlyright/_internal/_cache_dir.py +90 -0
- mostlyright/_internal/_capabilities.py +274 -0
- mostlyright/_internal/_convert.py +241 -0
- mostlyright/_internal/_http.py +71 -0
- mostlyright/_internal/_pairs.py +522 -0
- mostlyright/_internal/_pandas_compat.py +75 -0
- mostlyright/_internal/_stations.py +692 -0
- mostlyright/_internal/_toon.py +350 -0
- mostlyright/_internal/exceptions.py +69 -0
- mostlyright/_internal/merge/__init__.py +29 -0
- mostlyright/_internal/merge/_schemas.py +120 -0
- mostlyright/_internal/merge/climate.py +68 -0
- mostlyright/_internal/merge/observations.py +43 -0
- mostlyright/_internal/models/__init__.py +7 -0
- mostlyright/_internal/models/_base.py +49 -0
- mostlyright/_internal/models/availability.py +69 -0
- mostlyright/_internal/models/observation.py +129 -0
- mostlyright/_internal/models/station.py +115 -0
- mostlyright/_internal/specs/book_snapshot.json +81 -0
- mostlyright/_internal/specs/brackets.json +63 -0
- mostlyright/_internal/specs/candle.json +66 -0
- mostlyright/_internal/specs/climate.json +60 -0
- mostlyright/_internal/specs/daily_extreme.json +56 -0
- mostlyright/_internal/specs/data_version.json +38 -0
- mostlyright/_internal/specs/event.json +62 -0
- mostlyright/_internal/specs/forecast.json +177 -0
- mostlyright/_internal/specs/forecast_series.json +128 -0
- mostlyright/_internal/specs/market.json +63 -0
- mostlyright/_internal/specs/market_unified.json +92 -0
- mostlyright/_internal/specs/observation.json +144 -0
- mostlyright/_internal/specs/observation_ledger.json +50 -0
- mostlyright/_internal/specs/observation_qc.json +24 -0
- mostlyright/_internal/specs/omo.json +78 -0
- mostlyright/_internal/specs/series.json +58 -0
- mostlyright/_internal/specs/settlement-join.json +76 -0
- mostlyright/_internal/specs/settlement_record.json +56 -0
- mostlyright/_internal/specs/snapshot.json +72 -0
- mostlyright/_internal/specs/synoptic_extremes.json +75 -0
- mostlyright/_internal/versioning.py +132 -0
- mostlyright/core/__init__.py +62 -0
- mostlyright/core/_backend_dispatch.py +174 -0
- mostlyright/core/_json_safe.py +177 -0
- mostlyright/core/_narwhals_compat.py +132 -0
- mostlyright/core/_polars_compat.py +55 -0
- mostlyright/core/exceptions.py +704 -0
- mostlyright/core/formats/__init__.py +42 -0
- mostlyright/core/formats/_toon.py +344 -0
- mostlyright/core/formats/_toon_list_codec.py +213 -0
- mostlyright/core/formats/csv.py +57 -0
- mostlyright/core/formats/dataframe.py +34 -0
- mostlyright/core/formats/json.py +83 -0
- mostlyright/core/formats/parquet.py +56 -0
- mostlyright/core/formats/toon.py +434 -0
- mostlyright/core/merge.py +129 -0
- mostlyright/core/result.py +192 -0
- mostlyright/core/schema.py +334 -0
- mostlyright/core/schemas/__init__.py +39 -0
- mostlyright/core/schemas/forecast.py +122 -0
- mostlyright/core/schemas/forecast_nwp.py +192 -0
- mostlyright/core/schemas/observation.py +201 -0
- mostlyright/core/schemas/observation_ledger.py +117 -0
- mostlyright/core/schemas/observation_qc.py +75 -0
- mostlyright/core/schemas/settlement.py +164 -0
- mostlyright/core/temporal/__init__.py +18 -0
- mostlyright/core/temporal/knowledge_view.py +109 -0
- mostlyright/core/temporal/leakage.py +147 -0
- mostlyright/core/temporal/timepoint.py +253 -0
- mostlyright/core/validator.py +465 -0
- mostlyright/discover.py +100 -0
- mostlyright/discovery.py +273 -0
- mostlyright/forecasts.py +267 -0
- mostlyright/international.py +423 -0
- mostlyright/live/__init__.py +39 -0
- mostlyright/live/_latest.py +194 -0
- mostlyright/live/_sources.py +106 -0
- mostlyright/live/_stream.py +108 -0
- mostlyright/mode2.py +235 -0
- mostlyright/preprocessing.py +173 -0
- mostlyright/qc.py +240 -0
- mostlyright/research.py +1669 -0
- mostlyright/snapshot.py +504 -0
- mostlyright/transforms.py +201 -0
- mostlyrightmd-0.1.0.dist-info/METADATA +48 -0
- mostlyrightmd-0.1.0.dist-info/RECORD +90 -0
- mostlyrightmd-0.1.0.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
"""Schema-derived bounds and validation helpers.
|
|
2
|
+
|
|
3
|
+
Constants from specs/observation.json. Shared by AWC, GHCNh, and IEM parsers.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
import logging
|
|
9
|
+
import re
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
|
|
12
|
+
log = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
# Pressure bounds (observation.json: sea_level_pressure_mb)
|
|
15
|
+
SLP_MIN_MB = 870.0
|
|
16
|
+
SLP_MAX_MB = 1084.0
|
|
17
|
+
|
|
18
|
+
# Temperature bounds (°C). World records: -89.2°C (Vostok) / 56.7°C (Death Valley).
|
|
19
|
+
TEMP_MIN_C = -90.0
|
|
20
|
+
TEMP_MAX_C = 60.0
|
|
21
|
+
|
|
22
|
+
# String length limits
|
|
23
|
+
MAX_RAW_METAR_LEN = 2048
|
|
24
|
+
MAX_WX_CODES_LEN = 256
|
|
25
|
+
|
|
26
|
+
# Visibility (observation.json: visibility_miles max)
|
|
27
|
+
MAX_VISIBILITY_MILES = 99.99
|
|
28
|
+
|
|
29
|
+
# Wind bounds (observation.json: wind_dir_degrees, wind_speed_kt, wind_gust_kt)
|
|
30
|
+
WIND_DIR_BOUNDS = (0, 360)
|
|
31
|
+
WIND_SPEED_MAX = 200
|
|
32
|
+
WIND_GUST_MAX = 250
|
|
33
|
+
|
|
34
|
+
# Sky (observation.json: sky_base max)
|
|
35
|
+
SKY_BASE_MAX_FT = 60000
|
|
36
|
+
|
|
37
|
+
# Station code regex - security boundary: codes flow into Hive partition paths.
|
|
38
|
+
# Use `\A...\Z` (not `^...$`) so trailing-newline inputs like "KJFK\n" fail to
|
|
39
|
+
# match. Python's `$` matches BEFORE a trailing newline; `\Z` requires the
|
|
40
|
+
# absolute end of string. Codex review fix.
|
|
41
|
+
STATION_CODE_RE = re.compile(r"\A[A-Z]{3,4}\Z")
|
|
42
|
+
|
|
43
|
+
# GHCNh station identifier regex. The NCEI archive uses two id flavors:
|
|
44
|
+
# - ICAO-derived joined USAF-WBAN form, e.g. ``"744860-94789"`` for KJFK
|
|
45
|
+
# - 11-character NCEI station ids, alphanumeric
|
|
46
|
+
# Either way: alphanumeric + hyphen, length-bounded, anchored. This is a
|
|
47
|
+
# SECURITY BOUNDARY identical to STATION_CODE_RE: ids flow into URL params
|
|
48
|
+
# and cache paths, so any path-separator character (/, \, ., space) must be
|
|
49
|
+
# rejected. Codex/Rob H8 fix.
|
|
50
|
+
GHCNH_STATION_ID_RE = re.compile(r"\A[A-Z0-9][A-Z0-9-]{0,31}\Z")
|
|
51
|
+
|
|
52
|
+
# Year range for timestamp validation
|
|
53
|
+
MIN_YEAR = 1940
|
|
54
|
+
MAX_YEAR = 2100
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def bounded_int(val: int | None, lo: int, hi: int) -> int | None:
|
|
58
|
+
"""Return val if within [lo, hi], else None."""
|
|
59
|
+
if val is None:
|
|
60
|
+
return None
|
|
61
|
+
return val if lo <= val <= hi else None
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def bounded_float(val: float | None, lo: float, hi: float, *, field: str = "") -> float | None:
|
|
65
|
+
"""Return val if within [lo, hi], else None. Logs out-of-bounds values."""
|
|
66
|
+
if val is None:
|
|
67
|
+
return None
|
|
68
|
+
if lo <= val <= hi:
|
|
69
|
+
return val
|
|
70
|
+
ctx = f" ({field})" if field else ""
|
|
71
|
+
log.warning(
|
|
72
|
+
"bounded_float%s: %.4f outside [%.1f, %.1f], setting to None",
|
|
73
|
+
ctx,
|
|
74
|
+
val,
|
|
75
|
+
lo,
|
|
76
|
+
hi,
|
|
77
|
+
)
|
|
78
|
+
return None
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def bounded_float_min(val: float | None, lo: float) -> float | None:
|
|
82
|
+
"""Return val if >= lo, else None."""
|
|
83
|
+
if val is None:
|
|
84
|
+
return None
|
|
85
|
+
return val if val >= lo else None
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
# ---------------------------------------------------------------------------
|
|
89
|
+
# Path-boundary validators (Rob PR #2 C1/H8 - path traversal hardening)
|
|
90
|
+
# ---------------------------------------------------------------------------
|
|
91
|
+
#
|
|
92
|
+
# Every fetcher and cache helper that uses a caller-supplied station string
|
|
93
|
+
# inside a URL parameter OR a filesystem path goes through one of these
|
|
94
|
+
# validators FIRST. The downstream parsers already use STATION_CODE_RE; the
|
|
95
|
+
# boundary fix is making sure raw fetcher/cache entry points do too.
|
|
96
|
+
#
|
|
97
|
+
# A station value like ``"../../../tmp/evil"`` would otherwise resolve outside
|
|
98
|
+
# the cache root via ``dest_dir / station / file`` -- the validators reject
|
|
99
|
+
# anything that does not match the strict regex.
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def validate_icao_for_path(value: object, *, field: str = "station") -> str:
|
|
103
|
+
"""Return ``value`` validated as a 3-4 letter uppercase ICAO/IATA code.
|
|
104
|
+
|
|
105
|
+
Raises ``ValueError`` with the field name for any input that fails
|
|
106
|
+
``STATION_CODE_RE``. Accepts only ``str``; rejects bytes, None, ints,
|
|
107
|
+
and any value containing path separators, whitespace, or non-ASCII chars.
|
|
108
|
+
|
|
109
|
+
Used at every fetcher and cache entry point that puts the station value
|
|
110
|
+
into a URL param or a filesystem path (Rob PR #2 C1/H8).
|
|
111
|
+
"""
|
|
112
|
+
if not isinstance(value, str):
|
|
113
|
+
raise ValueError(
|
|
114
|
+
f"{field} must be a str (got {type(value).__name__}); "
|
|
115
|
+
f"unsafe to use in URL or cache path"
|
|
116
|
+
)
|
|
117
|
+
if not STATION_CODE_RE.match(value):
|
|
118
|
+
raise ValueError(
|
|
119
|
+
f"{field}={value!r} does not match STATION_CODE_RE "
|
|
120
|
+
f"(3-4 uppercase letters); refusing to use as URL or path component"
|
|
121
|
+
)
|
|
122
|
+
return value
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def validate_ghcnh_id_for_path(value: object, *, field: str = "station_id") -> str:
|
|
126
|
+
"""Return ``value`` validated as a GHCNh station identifier.
|
|
127
|
+
|
|
128
|
+
Accepts ``str`` matching ``GHCNH_STATION_ID_RE`` (alphanumeric + hyphen,
|
|
129
|
+
1-32 chars, first char alphanumeric). Rejects everything else with
|
|
130
|
+
``ValueError``. NCEI uses both ICAO-derived (``"744860-94789"``) and 11-
|
|
131
|
+
char native ids; this pattern covers both while still rejecting path
|
|
132
|
+
separators, whitespace, and quoting characters (Rob PR #2 H8).
|
|
133
|
+
"""
|
|
134
|
+
if not isinstance(value, str):
|
|
135
|
+
raise ValueError(
|
|
136
|
+
f"{field} must be a str (got {type(value).__name__}); "
|
|
137
|
+
f"unsafe to use in URL or cache path"
|
|
138
|
+
)
|
|
139
|
+
if not GHCNH_STATION_ID_RE.match(value):
|
|
140
|
+
raise ValueError(
|
|
141
|
+
f"{field}={value!r} does not match GHCNH_STATION_ID_RE "
|
|
142
|
+
f"(alphanumeric + hyphen, 1-32 chars); refusing to use as URL "
|
|
143
|
+
f"or path component"
|
|
144
|
+
)
|
|
145
|
+
return value
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def assert_path_under(path: Path, root: Path, *, field: str = "path") -> Path:
|
|
149
|
+
"""Defense-in-depth: assert ``path`` resolves under ``root``.
|
|
150
|
+
|
|
151
|
+
Used after the regex validators above as a second line of defense against
|
|
152
|
+
path-traversal. ``Path.resolve()`` follows symlinks and ``..`` segments;
|
|
153
|
+
``is_relative_to`` then confirms the resolved path is still inside the
|
|
154
|
+
resolved root. Either filter alone would suffice; both together make a
|
|
155
|
+
path-escape regression require breaking BOTH defenses.
|
|
156
|
+
|
|
157
|
+
Returns the resolved path on success; raises ``ValueError`` on escape
|
|
158
|
+
(Rob PR #2 C1).
|
|
159
|
+
"""
|
|
160
|
+
resolved = path.resolve()
|
|
161
|
+
rroot = root.resolve()
|
|
162
|
+
if not resolved.is_relative_to(rroot):
|
|
163
|
+
raise ValueError(
|
|
164
|
+
f"{field}={path!r} resolves to {resolved!r}, outside root {rroot!r}; "
|
|
165
|
+
f"refusing path-traversal"
|
|
166
|
+
)
|
|
167
|
+
return resolved
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
"""Resolve the on-disk cache directory.
|
|
2
|
+
|
|
3
|
+
Resolution order (highest precedence first):
|
|
4
|
+
1. ``MOSTLYRIGHT_CACHE_DIR`` env var (canonical, post-Phase-12).
|
|
5
|
+
2. ``TRADEWINDS_CACHE_DIR`` env var (legacy; emits DeprecationWarning;
|
|
6
|
+
scheduled for removal in v0.3).
|
|
7
|
+
3. Default: ``~/.mostlyright/cache/v1/``.
|
|
8
|
+
|
|
9
|
+
In v0.3 the ``TRADEWINDS_CACHE_DIR`` branch will be removed; users on v0.2.x get
|
|
10
|
+
one full release to migrate. Migration is byte-equivalent: ``mv ~/.tradewinds
|
|
11
|
+
~/.mostlyright`` works without schema change.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import os
|
|
17
|
+
import warnings
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
from typing import Final
|
|
20
|
+
|
|
21
|
+
_DEFAULT: Final[Path] = Path.home() / ".mostlyright" / "cache" / "v1"
|
|
22
|
+
_DEFAULT_ROOT: Final[Path] = Path.home() / ".mostlyright" / "cache" # without /v1
|
|
23
|
+
_LEGACY_ENV: Final[str] = "TRADEWINDS_CACHE_DIR"
|
|
24
|
+
_CANONICAL_ENV: Final[str] = "MOSTLYRIGHT_CACHE_DIR"
|
|
25
|
+
_DEPRECATION_MESSAGE: Final[str] = (
|
|
26
|
+
f"{_LEGACY_ENV} is deprecated; use {_CANONICAL_ENV}. "
|
|
27
|
+
f"Support will be removed in v0.3. Run: mv ~/.tradewinds ~/.mostlyright"
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _resolve_env_value(stacklevel: int = 2) -> str | None:
|
|
32
|
+
"""Read the env-var override (canonical → legacy + warn), or return None.
|
|
33
|
+
|
|
34
|
+
Shared by :func:`resolve_cache_dir` (which appends ``/v1`` for the canonical
|
|
35
|
+
"full cache dir" callers) and by the 3 legacy ``_cache_root()`` helpers
|
|
36
|
+
in :mod:`mostlyright.discovery`, :mod:`mostlyright.weather.cache`, and
|
|
37
|
+
:mod:`mostlyright.markets._trades_cache` (which preserve the legacy
|
|
38
|
+
"env var = cache ROOT without /v1, callers append CACHE_VERSION" contract).
|
|
39
|
+
|
|
40
|
+
Single source of truth for the resolution order + deprecation warning, so a
|
|
41
|
+
future change to either (e.g. adding XDG fallback, changing the v0.3
|
|
42
|
+
removal text) propagates to all consumers atomically.
|
|
43
|
+
"""
|
|
44
|
+
canonical = os.environ.get(_CANONICAL_ENV)
|
|
45
|
+
if canonical:
|
|
46
|
+
return canonical
|
|
47
|
+
legacy = os.environ.get(_LEGACY_ENV)
|
|
48
|
+
if legacy:
|
|
49
|
+
warnings.warn(
|
|
50
|
+
_DEPRECATION_MESSAGE,
|
|
51
|
+
DeprecationWarning,
|
|
52
|
+
stacklevel=stacklevel + 1,
|
|
53
|
+
)
|
|
54
|
+
return legacy
|
|
55
|
+
return None
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def resolve_cache_dir() -> Path:
|
|
59
|
+
"""Return the full cache directory (with ``/v1`` default).
|
|
60
|
+
|
|
61
|
+
Resolution order: ``MOSTLYRIGHT_CACHE_DIR`` env > ``TRADEWINDS_CACHE_DIR`` env
|
|
62
|
+
(DeprecationWarning, removed v0.3) > ``~/.mostlyright/cache/v1``.
|
|
63
|
+
|
|
64
|
+
New callers that want the canonical full cache directory should use this
|
|
65
|
+
function. Existing callers that build paths via ``_cache_root() / CACHE_VERSION``
|
|
66
|
+
use :func:`resolve_cache_root_without_v1` to preserve the legacy contract.
|
|
67
|
+
"""
|
|
68
|
+
value = _resolve_env_value(stacklevel=2)
|
|
69
|
+
if value is not None:
|
|
70
|
+
return Path(value)
|
|
71
|
+
return _DEFAULT
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def resolve_cache_root_without_v1() -> Path:
|
|
75
|
+
"""Return the cache ROOT (without ``/v1``) — the legacy contract.
|
|
76
|
+
|
|
77
|
+
Used by the 3 ``_cache_root()`` helpers in ``discovery.py``,
|
|
78
|
+
``weather/cache.py``, and ``markets/_trades_cache.py`` which still append
|
|
79
|
+
``CACHE_VERSION='v1'`` themselves. Same resolution order as
|
|
80
|
+
:func:`resolve_cache_dir` (canonical → legacy + warn → default), but the
|
|
81
|
+
default is ``~/.mostlyright/cache`` (no ``/v1``), and env-var overrides are
|
|
82
|
+
returned verbatim (legacy semantic: env var IS the cache root).
|
|
83
|
+
|
|
84
|
+
v0.3 work: collapse the two helpers into one when the legacy contract is
|
|
85
|
+
retired alongside ``TRADEWINDS_CACHE_DIR``.
|
|
86
|
+
"""
|
|
87
|
+
value = _resolve_env_value(stacklevel=2)
|
|
88
|
+
if value is not None:
|
|
89
|
+
return Path(value).expanduser()
|
|
90
|
+
return _DEFAULT_ROOT
|
|
@@ -0,0 +1,274 @@
|
|
|
1
|
+
"""Capabilities and schema introspection for the mostlyright SDK.
|
|
2
|
+
|
|
3
|
+
Provides:
|
|
4
|
+
- _METHOD_INDEX: structured summary of all public SDK methods
|
|
5
|
+
- _SCHEMA_FILES: mapping of entity names to spec filenames
|
|
6
|
+
- _SCHEMA_CACHE: in-process cache of loaded JSON schemas
|
|
7
|
+
- load_schema(): load a JSON schema from specs/ by entity name
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import copy
|
|
13
|
+
import json
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
from typing import Any
|
|
16
|
+
|
|
17
|
+
# ---------------------------------------------------------------------------
|
|
18
|
+
# Schema file mapping
|
|
19
|
+
# "pairs" is intentionally omitted — specs/settlement-join.json describes the
|
|
20
|
+
# observation-settlement join schema, not the pairs() output columns.
|
|
21
|
+
# A specs/pairs.json spec is deferred to Sprint 4. Until then, use
|
|
22
|
+
# feature_catalog(source_filter="pairs") to inspect pairs columns.
|
|
23
|
+
# ---------------------------------------------------------------------------
|
|
24
|
+
|
|
25
|
+
_SCHEMA_FILES: dict[str, str] = {
|
|
26
|
+
"observation": "observation.json",
|
|
27
|
+
"climate": "climate.json",
|
|
28
|
+
"snapshot": "snapshot.json",
|
|
29
|
+
"data_version": "data_version.json",
|
|
30
|
+
"forecast": "forecast.json",
|
|
31
|
+
"forecast_series": "forecast_series.json",
|
|
32
|
+
"candle": "candle.json",
|
|
33
|
+
"market": "market.json",
|
|
34
|
+
"market_unified": "market_unified.json",
|
|
35
|
+
"synoptic_extremes": "synoptic_extremes.json",
|
|
36
|
+
"omo": "omo.json",
|
|
37
|
+
# brackets.json is available for bracket/range queries
|
|
38
|
+
"brackets": "brackets.json",
|
|
39
|
+
# Phase 3.1 — daily_extreme.v1 resolution schema for daily_extremes() rollup.
|
|
40
|
+
"daily_extreme": "daily_extreme.json",
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
# 5 additional specs ship in ``mostlyright/_internal/specs/`` but are intentionally
|
|
44
|
+
# not exposed via ``client.schema()``:
|
|
45
|
+
# - event.json — Kalshi event metadata (Sprint 3)
|
|
46
|
+
# - series.json — Kalshi series (Sprint 3)
|
|
47
|
+
# - settlement_record.json — Kalshi settlement records (Sprint 3)
|
|
48
|
+
# - settlement-join.json — internal observation↔settlement join contract
|
|
49
|
+
# - book_snapshot.json — order-book snapshot (Sprint 3+)
|
|
50
|
+
#
|
|
51
|
+
# They ride in the wheel because validation/ and ingest/ test suites use
|
|
52
|
+
# them, and that's cheaper than maintaining a separate "tests-only" resource
|
|
53
|
+
# bundle. When a Sprint 3 API surfaces them, lift them into ``_SCHEMA_FILES``
|
|
54
|
+
# so ``client.schema()`` can reach them publicly. Until then they're
|
|
55
|
+
# inspectable but not part of the documented SDK contract.
|
|
56
|
+
|
|
57
|
+
_SCHEMA_CACHE: dict[str, dict[str, Any]] = {}
|
|
58
|
+
|
|
59
|
+
# Path to the specs/ directory that ships inside the ``mostlyright`` wheel.
|
|
60
|
+
# Before 0.14.1 this pointed at the repo-root ``specs/`` via
|
|
61
|
+
# ``parent.parent.parent``, which works for editable / source installs but
|
|
62
|
+
# broke in every pip-installed environment — the wheel never shipped the
|
|
63
|
+
# specs. Vu caught it with ``client.schema('observation')`` returning
|
|
64
|
+
# FileNotFoundError in a clean PyPI install.
|
|
65
|
+
#
|
|
66
|
+
# Moving specs into the package (``mostlyright/_internal/specs/``) means
|
|
67
|
+
# ``Path(__file__).parent / "specs"`` resolves to the packaged copy in
|
|
68
|
+
# both wheel and source layouts — single path, no fallback logic.
|
|
69
|
+
SPECS_DIR: Path = Path(__file__).parent / "specs"
|
|
70
|
+
|
|
71
|
+
# Backward-compat alias: the pre-0.14.1 private name. Downstream code
|
|
72
|
+
# still imports this; new code should use ``SPECS_DIR``.
|
|
73
|
+
_SPECS_DIR: Path = SPECS_DIR
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def load_schema(entity: str) -> dict[str, Any]:
|
|
77
|
+
"""Load and return the JSON Schema for a data entity.
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
entity: Entity name (e.g. "observation", "climate").
|
|
81
|
+
|
|
82
|
+
Returns:
|
|
83
|
+
Parsed JSON Schema dict.
|
|
84
|
+
|
|
85
|
+
Raises:
|
|
86
|
+
ValueError: If entity is not in the supported set.
|
|
87
|
+
"""
|
|
88
|
+
if entity not in _SCHEMA_FILES:
|
|
89
|
+
valid = sorted(_SCHEMA_FILES)
|
|
90
|
+
raise ValueError(
|
|
91
|
+
f"Unknown entity {entity!r}. "
|
|
92
|
+
f"Supported: {valid}. "
|
|
93
|
+
f"Note: 'pairs' is not supported — see feature_catalog(source_filter='pairs')."
|
|
94
|
+
)
|
|
95
|
+
# Codex review W2-C P2 fix: dict(...) only copies the top-level mapping;
|
|
96
|
+
# nested objects (properties, $ref blocks) remain shared with _SCHEMA_CACHE.
|
|
97
|
+
# Caller-side mutations would corrupt every subsequent load_schema() call
|
|
98
|
+
# process-wide. Deep-copy so the contract holds.
|
|
99
|
+
if entity in _SCHEMA_CACHE:
|
|
100
|
+
return copy.deepcopy(_SCHEMA_CACHE[entity])
|
|
101
|
+
|
|
102
|
+
spec_path = SPECS_DIR / _SCHEMA_FILES[entity]
|
|
103
|
+
with spec_path.open() as f:
|
|
104
|
+
schema = json.load(f)
|
|
105
|
+
_SCHEMA_CACHE[entity] = schema
|
|
106
|
+
return copy.deepcopy(schema)
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
# ---------------------------------------------------------------------------
|
|
110
|
+
# Method index — structured summary of all public SDK methods
|
|
111
|
+
# Used by capabilities() to return a machine-readable method index.
|
|
112
|
+
# ---------------------------------------------------------------------------
|
|
113
|
+
|
|
114
|
+
_METHOD_INDEX: list[dict[str, Any]] = [
|
|
115
|
+
{
|
|
116
|
+
"name": "observations",
|
|
117
|
+
"description": "Get weather observations with optional temporal transform DSL.",
|
|
118
|
+
"required_params": ["station"],
|
|
119
|
+
"optional_params": [
|
|
120
|
+
"from_date",
|
|
121
|
+
"to_date",
|
|
122
|
+
"as_of",
|
|
123
|
+
"obs_type",
|
|
124
|
+
"resolution",
|
|
125
|
+
"units",
|
|
126
|
+
"tz",
|
|
127
|
+
"format",
|
|
128
|
+
"columns",
|
|
129
|
+
"features",
|
|
130
|
+
"limit",
|
|
131
|
+
"offset",
|
|
132
|
+
"as_dataframe",
|
|
133
|
+
"save_path",
|
|
134
|
+
],
|
|
135
|
+
},
|
|
136
|
+
{
|
|
137
|
+
"name": "climate",
|
|
138
|
+
"description": "Get daily climate reports.",
|
|
139
|
+
"required_params": ["station"],
|
|
140
|
+
"optional_params": [
|
|
141
|
+
"from_date",
|
|
142
|
+
"to_date",
|
|
143
|
+
"units",
|
|
144
|
+
"format",
|
|
145
|
+
"columns",
|
|
146
|
+
"limit",
|
|
147
|
+
"offset",
|
|
148
|
+
"as_dataframe",
|
|
149
|
+
"save_path",
|
|
150
|
+
],
|
|
151
|
+
},
|
|
152
|
+
{
|
|
153
|
+
"name": "climate_gaps",
|
|
154
|
+
"description": "Find gaps in climate history.",
|
|
155
|
+
"required_params": ["station"],
|
|
156
|
+
"optional_params": ["from_date", "to_date", "as_dataframe"],
|
|
157
|
+
},
|
|
158
|
+
{
|
|
159
|
+
"name": "data_version",
|
|
160
|
+
"description": "Return a reproducible version token for the dataset at a point in time.",
|
|
161
|
+
"required_params": ["station", "as_of"],
|
|
162
|
+
"optional_params": ["from_date", "to_date"],
|
|
163
|
+
},
|
|
164
|
+
{
|
|
165
|
+
"name": "snapshot",
|
|
166
|
+
"description": "Return everything an AI agent would have known at UTC moment as_of.",
|
|
167
|
+
"required_params": ["station", "as_of"],
|
|
168
|
+
"optional_params": [
|
|
169
|
+
"cli_publication_delay_hours",
|
|
170
|
+
"include_forecast",
|
|
171
|
+
"tz_override",
|
|
172
|
+
"format",
|
|
173
|
+
],
|
|
174
|
+
},
|
|
175
|
+
{
|
|
176
|
+
"name": "pairs",
|
|
177
|
+
"description": "Return one row per settlement date joining observations + climate + forecast.",
|
|
178
|
+
"required_params": ["station", "from_date", "to_date"],
|
|
179
|
+
"optional_params": [
|
|
180
|
+
"include_forecast",
|
|
181
|
+
"forecast_model",
|
|
182
|
+
"as_dataframe",
|
|
183
|
+
"format",
|
|
184
|
+
],
|
|
185
|
+
},
|
|
186
|
+
{
|
|
187
|
+
"name": "forecasts",
|
|
188
|
+
"description": "Historical IEM MOS forecasts (discrete runs with issued_at).",
|
|
189
|
+
"required_params": ["station"],
|
|
190
|
+
"optional_params": [
|
|
191
|
+
"from_date",
|
|
192
|
+
"to_date",
|
|
193
|
+
"model",
|
|
194
|
+
"format",
|
|
195
|
+
"columns",
|
|
196
|
+
"as_dataframe",
|
|
197
|
+
"save_path",
|
|
198
|
+
],
|
|
199
|
+
},
|
|
200
|
+
{
|
|
201
|
+
"name": "forecast_series",
|
|
202
|
+
"description": "Historical Open-Meteo forecast series (seamless hourly).",
|
|
203
|
+
"required_params": ["station"],
|
|
204
|
+
"optional_params": [
|
|
205
|
+
"from_date",
|
|
206
|
+
"to_date",
|
|
207
|
+
"model",
|
|
208
|
+
"format",
|
|
209
|
+
"columns",
|
|
210
|
+
"as_dataframe",
|
|
211
|
+
"save_path",
|
|
212
|
+
],
|
|
213
|
+
},
|
|
214
|
+
{
|
|
215
|
+
"name": "feature_catalog",
|
|
216
|
+
"description": "Return all available features with descriptions and metadata.",
|
|
217
|
+
"required_params": [],
|
|
218
|
+
"optional_params": ["include_transforms", "source_filter", "format"],
|
|
219
|
+
},
|
|
220
|
+
{
|
|
221
|
+
"name": "describe",
|
|
222
|
+
"description": "Return an LLM-ready summary of available features for a station.",
|
|
223
|
+
"required_params": ["station"],
|
|
224
|
+
"optional_params": [],
|
|
225
|
+
},
|
|
226
|
+
{
|
|
227
|
+
"name": "availability",
|
|
228
|
+
"description": "Return actual data availability for a station.",
|
|
229
|
+
"required_params": ["station"],
|
|
230
|
+
"optional_params": ["as_of", "data_type"],
|
|
231
|
+
},
|
|
232
|
+
{
|
|
233
|
+
"name": "stations",
|
|
234
|
+
"description": "Return all stations with active Kalshi markets and SDK support.",
|
|
235
|
+
"required_params": [],
|
|
236
|
+
"optional_params": [],
|
|
237
|
+
},
|
|
238
|
+
{
|
|
239
|
+
"name": "station",
|
|
240
|
+
"description": "Return metadata for a single station by code.",
|
|
241
|
+
"required_params": ["code"],
|
|
242
|
+
"optional_params": [],
|
|
243
|
+
},
|
|
244
|
+
{
|
|
245
|
+
"name": "schema",
|
|
246
|
+
"description": "Return the JSON Schema for a data entity.",
|
|
247
|
+
"required_params": ["entity"],
|
|
248
|
+
"optional_params": [],
|
|
249
|
+
},
|
|
250
|
+
{
|
|
251
|
+
"name": "as_tools",
|
|
252
|
+
"description": "Return Anthropic-compatible tool definitions for all callable SDK methods.",
|
|
253
|
+
"required_params": [],
|
|
254
|
+
"optional_params": ["include_market", "include_experimental"],
|
|
255
|
+
},
|
|
256
|
+
{
|
|
257
|
+
"name": "capabilities",
|
|
258
|
+
"description": "Return a structured summary of all SDK methods and their parameters.",
|
|
259
|
+
"required_params": [],
|
|
260
|
+
"optional_params": [],
|
|
261
|
+
},
|
|
262
|
+
{
|
|
263
|
+
"name": "estimate_tokens",
|
|
264
|
+
"description": "Estimate token count for a data query without fetching records.",
|
|
265
|
+
"required_params": ["station", "from_date", "to_date"],
|
|
266
|
+
"optional_params": ["method", "format", "columns"],
|
|
267
|
+
},
|
|
268
|
+
{
|
|
269
|
+
"name": "stream",
|
|
270
|
+
"description": "Async generator yielding new METAR observations from AWC.",
|
|
271
|
+
"required_params": ["station"],
|
|
272
|
+
"optional_params": ["interval", "max_obs", "units", "timeout"],
|
|
273
|
+
},
|
|
274
|
+
]
|