parsimony-eia 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,15 @@
1
+ """US Energy Information Administration (EIA): fetch + catalog enumeration.
2
+
3
+ API docs: https://www.eia.gov/opendata/documentation.php
4
+
5
+ The EIA v2 Open Data API is keyed. Supply ``EIA_API_KEY`` (or bind it via
6
+ ``load(api_key=...)`` / ``Connector.bind``); a missing key fast-fails with
7
+ :class:`~parsimony.errors.UnauthorizedError`. Register a free key at
8
+ https://www.eia.gov/opendata/register.php.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ from parsimony_eia.connectors import CONNECTORS, load
14
+
15
+ __all__ = ["CONNECTORS", "load"]
parsimony_eia/_http.py ADDED
@@ -0,0 +1,96 @@
1
+ """US EIA v2 Open Data API transport: key resolution, client, typed-error chokepoint.
2
+
3
+ The EIA v2 API is a single keyed REST host. The key rides as an ``?api_key=``
4
+ query param (``api_key`` is in parsimony-core's sensitive-param set, so it is
5
+ auto-redacted from logs) and is also stripped from provenance via
6
+ ``secrets=("api_key",)`` on every verb.
7
+
8
+ ``eia_get`` is the per-package mapper chokepoint: EIA returns a clean HTTP 400
9
+ with a useful JSON body for a bad measure / frequency / facet argument
10
+ (``{"error": "Invalid data 'x' ... valid data are 'value'", "code": 400}``).
11
+ ``fetch_json`` would collapse that into a generic ``ProviderError(400)`` and drop
12
+ the actionable text, so this chokepoint reads the 400 body and raises
13
+ ``InvalidParameterError`` preserving EIA's message; every other status falls
14
+ through to the canonical kernel mappers.
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ import os
20
+ from typing import Any
21
+
22
+ import httpx
23
+ from parsimony.errors import InvalidParameterError, UnauthorizedError
24
+ from parsimony.transport import HttpClient, map_http_error, map_timeout_error
25
+ from parsimony.transport.helpers import make_http_client
26
+
27
+ PROVIDER = "eia"
28
+ ENV_VAR = "EIA_API_KEY"
29
+ BASE_URL = "https://api.eia.gov/v2"
30
+
31
+ # EIA caps every /data page at 5000 rows: a request with ``length`` > 5000 is
32
+ # silently clamped back to 5000 (verified live), so a single call can never
33
+ # return more than 5000 of a dataset's ``total`` rows. The fetch connector pages
34
+ # through with ``offset`` to reach completeness.
35
+ PAGE_SIZE = 5000
36
+
37
+ # Connectivity for the route-tree enumeration fan-out (~272 nodes). EIA enforces
38
+ # a short-window per-second cap and 429s a 6-wide fan-out; 4 keeps the crawl
39
+ # under that ceiling (the pooled-client retry policy still absorbs the rare 429)
40
+ # so a node is never dropped to a silently-shrunk catalog.
41
+ ENUMERATE_CONCURRENCY = 4
42
+
43
+
44
+ def resolve_key(api_key: str) -> str:
45
+ """Resolve the API key (arg → ``EIA_API_KEY`` env fallback) or fast-fail."""
46
+ key = api_key or os.environ.get(ENV_VAR, "")
47
+ if not key:
48
+ raise UnauthorizedError(PROVIDER, env_var=ENV_VAR)
49
+ return key
50
+
51
+
52
+ def make_eia_client(api_key: str, *, timeout: float = 60.0) -> HttpClient:
53
+ """Build the EIA client with the key fixed as a (redacted) default query param."""
54
+ key = resolve_key(api_key)
55
+ return make_http_client(BASE_URL, query_params={"api_key": key}, timeout=timeout)
56
+
57
+
58
+ def _extract_400_message(response: httpx.Response) -> str:
59
+ """Pull EIA's human-readable error string out of a 400 body (bounded length)."""
60
+ try:
61
+ err = response.json().get("error")
62
+ except ValueError:
63
+ err = None
64
+ if isinstance(err, str) and err.strip():
65
+ return err.strip()[:300]
66
+ return "invalid request parameter"
67
+
68
+
69
+ def eia_get(
70
+ http: HttpClient,
71
+ path: str,
72
+ *,
73
+ params: dict[str, Any] | None = None,
74
+ op_name: str,
75
+ ) -> dict[str, Any]:
76
+ """GET ``{BASE_URL}/{path}`` and return the ``response`` object (dict).
77
+
78
+ Drops ``None`` params, raises for status, maps a 400 to a message-preserving
79
+ ``InvalidParameterError`` and every other error family to the canonical typed
80
+ errors. ``HttpClient.request`` never calls ``raise_for_status`` itself, so we
81
+ do it here and feed both error families to the kernel mappers.
82
+ """
83
+ filtered = {k: v for k, v in (params or {}).items() if v is not None}
84
+ try:
85
+ response = http.request("GET", f"/{path.lstrip('/')}", params=filtered or None)
86
+ response.raise_for_status()
87
+ except httpx.HTTPStatusError as exc:
88
+ if exc.response.status_code == 400:
89
+ raise InvalidParameterError(PROVIDER, _extract_400_message(exc.response)) from exc
90
+ map_http_error(exc, provider=PROVIDER, op_name=op_name)
91
+ except httpx.TimeoutException as exc:
92
+ map_timeout_error(exc, provider=PROVIDER, op_name=op_name)
93
+
94
+ body = response.json()
95
+ inner = body.get("response") if isinstance(body, dict) else None
96
+ return inner if isinstance(inner, dict) else {}
@@ -0,0 +1,37 @@
1
+ """Build the EIA dataset catalog snapshot.
2
+
3
+ Maintainer tooling, not part of the plugin contract: ``enumerate_eia`` walks the
4
+ v2 route tree to one row per leaf dataset (with its measure/facet manifest), the
5
+ rows become catalog entities, and the catalog is indexed and built. The series
6
+ within a dataset are not catalogued (EIA's ~2M-series universe is the facet
7
+ cartesian product) — they stay fetchable by route+facets or legacy series id.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ from parsimony.catalog import Catalog
13
+ from parsimony.catalog.policy import discovery_indexes
14
+ from parsimony.catalog.source import entities_from_raw
15
+
16
+ from parsimony_eia.connectors.enumerate import enumerate_eia
17
+ from parsimony_eia.outputs import EIA_ENUMERATE_OUTPUT
18
+
19
+ CATALOG_NAMESPACE = "eia"
20
+
21
+
22
+ def build_eia_catalog(*, api_key: str | None = None) -> Catalog:
23
+ """Enumerate every EIA leaf dataset and build a searchable catalog snapshot.
24
+
25
+ ``api_key`` falls back to ``EIA_API_KEY`` inside ``enumerate_eia`` (which
26
+ fast-fails with :class:`~parsimony.errors.UnauthorizedError` if neither is
27
+ set), so the snapshot can be built straight from the environment.
28
+ """
29
+ result = enumerate_eia(api_key=(api_key or "").strip())
30
+ entries = entities_from_raw(result, EIA_ENUMERATE_OUTPUT)
31
+ catalog = Catalog(CATALOG_NAMESPACE, indexes=discovery_indexes(entries), default_field="title")
32
+ catalog.set_entities(entries)
33
+ catalog.build()
34
+ return catalog
35
+
36
+
37
+ __all__ = ["CATALOG_NAMESPACE", "build_eia_catalog"]
@@ -0,0 +1,19 @@
1
+ """eia connector registry."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from parsimony.connector import Connectors
6
+
7
+ from parsimony_eia.connectors.enumerate import enumerate_eia
8
+ from parsimony_eia.connectors.fetch import eia_facets, eia_fetch, eia_fetch_series
9
+ from parsimony_eia.search import eia_search
10
+
11
+ CONNECTORS = Connectors([eia_fetch, eia_fetch_series, eia_facets, enumerate_eia, eia_search])
12
+
13
+
14
+ def load(*, api_key: str) -> Connectors:
15
+ """Return :data:`CONNECTORS` with ``api_key`` bound on every keyed connector."""
16
+ return CONNECTORS.bind(api_key=api_key)
17
+
18
+
19
+ __all__ = ["CONNECTORS", "load"]
@@ -0,0 +1,209 @@
1
+ """EIA catalog enumeration: walk the v2 route tree to one row per leaf dataset.
2
+
3
+ The EIA v2 API is a hierarchy of *route nodes*. A node either lists child
4
+ ``routes`` (a category) or terminates as a *leaf dataset* carrying ``data`` (the
5
+ measures), ``facets`` (the dimensions), ``frequency`` and a ``startPeriod`` /
6
+ ``endPeriod``. Walking the tree from ``/v2/`` and stopping at the leaves yields
7
+ the authoritative full universe of addressable datasets (archetype B fan-out;
8
+ ~232 leaves at the time of writing, ~272 node fetches). Route-node child lists
9
+ are inline and never paginated, so the walk is complete with no list-pagination
10
+ trap.
11
+
12
+ Each leaf becomes one catalog row whose metadata is a *dimension manifest*: the
13
+ measures it accepts as ``data[0]=`` and the facet ids it accepts as ``facets[]``
14
+ filters, with the measure/facet vocabulary also folded into the indexed
15
+ ``description`` so it is BM25-findable. The series within a dataset (the facet
16
+ cartesian product) are not catalogued — they are fetchable by id via
17
+ ``eia_fetch`` facet filters, so discovery is at the dataset tier and access is
18
+ total (the BLS/SDMX two-tier split).
19
+
20
+ Best-effort: a node whose fetch fails is logged and skipped (its subtree is lost
21
+ for that run) so a transient blip yields a partial — not empty — catalog.
22
+ """
23
+
24
+ from __future__ import annotations
25
+
26
+ import logging
27
+ from typing import Any
28
+
29
+ import httpx
30
+ import pandas as pd
31
+ from parsimony.connector import enumerator
32
+ from parsimony.transport import HttpClient, pooled_client
33
+
34
+ from parsimony_eia._http import make_eia_client
35
+ from parsimony_eia.outputs import EIA_ENUMERATE_OUTPUT, ENUMERATE_COLUMNS
36
+
37
+ logger = logging.getLogger(__name__)
38
+
39
+ # Cap the embedded raw description so a verbose dataset blurb doesn't dominate the
40
+ # indexed text (mirrors parsimony_shared's embedder-friendly cap without adding a
41
+ # dependency for one constant).
42
+ DESCRIPTION_CHAR_CAP = 1500
43
+
44
+
45
+ def _get_node(
46
+ client: HttpClient,
47
+ route: str,
48
+ ) -> dict[str, Any] | None:
49
+ """Best-effort GET of one route node's metadata. ``None`` on any failure."""
50
+ path = f"/{route}" if route else "/"
51
+ try:
52
+ resp = client.request("GET", path)
53
+ resp.raise_for_status()
54
+ body = resp.json()
55
+ except (httpx.HTTPError, ValueError) as exc:
56
+ logger.warning("EIA enumerate: node %r failed: %s", route or "<root>", exc)
57
+ return None
58
+ inner = body.get("response") if isinstance(body, dict) else None
59
+ return inner if isinstance(inner, dict) else None
60
+
61
+
62
+ def _load_top_routes(client: HttpClient) -> list[str]:
63
+ """Return the top-level route ids from ``/v2/``.
64
+
65
+ This is the **bounding seam** for live tests: monkeypatch it to a 1–2 route
66
+ slice so the tree walk fires a handful of requests instead of crawling all
67
+ ~272 nodes. ``enumerate_eia`` calls it by bare name, so the patch takes.
68
+ """
69
+ root = _get_node(client, "")
70
+ if root is None:
71
+ return []
72
+ return [str(c["id"]) for c in (root.get("routes") or []) if isinstance(c, dict) and c.get("id")]
73
+
74
+
75
+ def _walk(
76
+ client: HttpClient,
77
+ routes: list[str],
78
+ leaves: list[dict[str, Any]],
79
+ ) -> None:
80
+ """Breadth-first serial walk: fetch each node in turn, recurse into children,
81
+ collect leaf datasets (annotated with their route path) into ``leaves``."""
82
+ next_level: list[str] = []
83
+ for route in routes:
84
+ node = _get_node(client, route)
85
+ if node is None:
86
+ continue
87
+ child_routes = node.get("routes")
88
+ if isinstance(child_routes, list) and child_routes:
89
+ next_level.extend(
90
+ f"{route}/{c['id']}" for c in child_routes if isinstance(c, dict) and c.get("id")
91
+ )
92
+ else:
93
+ node["__route__"] = route
94
+ leaves.append(node)
95
+ if next_level:
96
+ _walk(client, next_level, leaves)
97
+
98
+
99
+ def _measure_units(data: dict[str, Any], measures: list[str]) -> list[str]:
100
+ seen: list[str] = []
101
+ for m in measures:
102
+ units = str((data.get(m) or {}).get("units") or "").strip() if isinstance(data.get(m), dict) else ""
103
+ if units and units not in seen:
104
+ seen.append(units)
105
+ return seen
106
+
107
+
108
+ def _synthesize_description(
109
+ *,
110
+ name: str,
111
+ category: str,
112
+ raw_description: str,
113
+ measures: list[str],
114
+ units: list[str],
115
+ facet_pairs: list[tuple[str, str]],
116
+ frequencies: list[str],
117
+ start: str,
118
+ end: str,
119
+ ) -> str:
120
+ """Fold the dataset's query vocabulary into one indexed description string."""
121
+ parts: list[str] = []
122
+ if name:
123
+ parts.append(name)
124
+ parts.append(f"{category} energy data from the U.S. Energy Information Administration (EIA)")
125
+ if raw_description:
126
+ parts.append(raw_description[:DESCRIPTION_CHAR_CAP])
127
+ if measures:
128
+ m = ", ".join(measures)
129
+ parts.append(f"Measures: {m}" + (f" ({', '.join(units)})" if units else ""))
130
+ if facet_pairs:
131
+ rendered = ", ".join(f"{fid} ({desc})" if desc else fid for fid, desc in facet_pairs)
132
+ parts.append(f"Facets: {rendered}")
133
+ if frequencies:
134
+ parts.append(f"Frequencies: {', '.join(frequencies)}")
135
+ if start or end:
136
+ parts.append(f"Coverage {start or '?'}..{end or '?'}")
137
+ return ". ".join(p for p in parts if p)
138
+
139
+
140
+ def _dataset_row(node: dict[str, Any]) -> dict[str, str]:
141
+ """Build one catalog row (the dimension manifest) from a leaf dataset node."""
142
+ route = str(node.get("__route__") or "")
143
+ name = str(node.get("name") or route)
144
+ raw_description = str(node.get("description") or "")
145
+ category = route.split("/", 1)[0]
146
+
147
+ data = node.get("data") if isinstance(node.get("data"), dict) else {}
148
+ measures = [str(k) for k in (data or {})]
149
+ units = _measure_units(data or {}, measures)
150
+
151
+ facets_raw = node.get("facets") if isinstance(node.get("facets"), list) else []
152
+ facet_pairs: list[tuple[str, str]] = []
153
+ for f in facets_raw or []:
154
+ if isinstance(f, dict) and f.get("id"):
155
+ facet_pairs.append((str(f["id"]), str(f.get("description") or "").strip()))
156
+ facet_ids = [fid for fid, _ in facet_pairs]
157
+
158
+ freqs_raw = node.get("frequency") if isinstance(node.get("frequency"), list) else []
159
+ frequencies = [str(f["id"]) for f in (freqs_raw or []) if isinstance(f, dict) and f.get("id")]
160
+
161
+ start = str(node.get("startPeriod") or "")
162
+ end = str(node.get("endPeriod") or "")
163
+
164
+ return {
165
+ "code": route,
166
+ "title": name,
167
+ "description": _synthesize_description(
168
+ name=name,
169
+ category=category,
170
+ raw_description=raw_description,
171
+ measures=measures,
172
+ units=units,
173
+ facet_pairs=facet_pairs,
174
+ frequencies=frequencies,
175
+ start=start,
176
+ end=end,
177
+ ),
178
+ "category": category,
179
+ "measures": ",".join(measures),
180
+ "facets": ",".join(facet_ids),
181
+ "frequencies": ",".join(frequencies),
182
+ "default_frequency": str(node.get("defaultFrequency") or ""),
183
+ "start": start,
184
+ "end": end,
185
+ "units": ",".join(units),
186
+ }
187
+
188
+
189
+ @enumerator(output=EIA_ENUMERATE_OUTPUT, tags=["macro", "energy", "us"], secrets=("api_key",))
190
+ def enumerate_eia(api_key: str = "") -> pd.DataFrame:
191
+ """Enumerate every EIA v2 leaf dataset by walking the route tree.
192
+
193
+ Emits one row per addressable dataset (KEY=route path) carrying its measure
194
+ and facet manifest, for catalog indexing. A missing key fast-fails with
195
+ ``UnauthorizedError`` before any network call.
196
+ """
197
+ http = make_eia_client(api_key)
198
+ leaves: list[dict[str, Any]] = []
199
+
200
+ with pooled_client(http) as shared:
201
+ top_routes = _load_top_routes(shared)
202
+ if not top_routes:
203
+ logger.warning("EIA enumerate: /v2/ returned no top-level routes")
204
+ return pd.DataFrame(columns=list(ENUMERATE_COLUMNS))
205
+ _walk(shared, top_routes, leaves)
206
+
207
+ rows = [_dataset_row(leaf) for leaf in leaves]
208
+ logger.info("EIA enumerate: %d leaf datasets across %d top-level routes", len(rows), len(top_routes))
209
+ return pd.DataFrame(rows, columns=list(ENUMERATE_COLUMNS))
@@ -0,0 +1,321 @@
1
+ """EIA data-fetch connectors.
2
+
3
+ Three verbs over the EIA v2 API:
4
+
5
+ * ``eia_fetch`` — fetch a dataset by its route path (``petroleum/pri/spt``) with
6
+ an optional measure, facet filters and a date window.
7
+ * ``eia_fetch_series`` — fetch by a legacy APIv1 series id (``PET.RWTC.D``,
8
+ ``ELEC.SALES.CO-RES.A``) via the ``/v2/seriesid/{id}`` path. This addressing
9
+ scheme lives **outside** the route tree the catalog enumerates, so it is the
10
+ only connector that reaches a famous series straight from its well-known id.
11
+ * ``eia_facets`` — list the valid values of one facet dimension, so a fetch can
12
+ be narrowed to a specific series (essential on huge datasets).
13
+
14
+ **Pagination is mandatory, not optional.** EIA caps every ``/data`` (and
15
+ ``/seriesid``) response at 5,000 rows; a dataset like ``petroleum/pri/spt`` daily
16
+ has 91,285 rows and ``electricity/rto/region-data`` hourly has ~18.7M. The
17
+ single-page predecessor silently returned the first 5,000 of whatever matched.
18
+ Both fetch verbs read ``response.total`` and page through with ``offset`` until
19
+ they have it all — guarded by a pre-pagination row-count ceiling that raises an
20
+ actionable ``InvalidParameterError`` (echoing EIA's own "constrain with facet,
21
+ start, end" guidance) so an unbounded request fails loud instead of truncating.
22
+ """
23
+
24
+ from __future__ import annotations
25
+
26
+ import re
27
+ from typing import Annotated, Any
28
+
29
+ import pandas as pd
30
+ from parsimony.connector import connector
31
+ from parsimony.errors import EmptyDataError, InvalidParameterError
32
+ from parsimony.transport import HttpClient
33
+
34
+ from parsimony_eia._http import PAGE_SIZE, eia_get, make_eia_client
35
+ from parsimony_eia.outputs import EIA_FACETS_OUTPUT, EIA_FETCH_OUTPUT, EIA_SERIES_OUTPUT
36
+
37
+ # A single fetch is bounded to this many rows. Above it, EIA's universe is too
38
+ # large to pull whole (electricity hourly alone is ~18.7M rows); we refuse with
39
+ # an actionable narrowing message rather than either truncate silently (the bug
40
+ # we are fixing) or page through millions of rows. ~60 pages.
41
+ MAX_FETCH_ROWS = 300_000
42
+
43
+
44
+ def _to_int(value: Any) -> int:
45
+ try:
46
+ return int(str(value))
47
+ except (TypeError, ValueError):
48
+ return 0
49
+
50
+
51
+ def _facet_params(facets: dict[str, Any] | None) -> dict[str, Any]:
52
+ """Render a ``{facet_id: value | [values]}`` dict into EIA query params.
53
+
54
+ ``{"duoarea": "NUS", "product": ["EPCBRENT", "EPD2DC"]}`` →
55
+ ``facets[duoarea][]=NUS`` and a repeated ``facets[product][]=...``. httpx
56
+ expands a list-valued param into repeated keys.
57
+ """
58
+ out: dict[str, Any] = {}
59
+ for raw_id, val in (facets or {}).items():
60
+ fid = str(raw_id).strip()
61
+ if not fid:
62
+ continue
63
+ key = f"facets[{fid}][]"
64
+ if isinstance(val, (list, tuple, set)):
65
+ out[key] = [str(v) for v in val]
66
+ else:
67
+ out[key] = str(val)
68
+ return out
69
+
70
+
71
+ def _normalize_periods(raw: pd.Series) -> pd.Series:
72
+ """Coerce EIA period strings (any frequency) to datetimes.
73
+
74
+ EIA period shapes by frequency: ``2024`` (annual), ``2024-03`` (monthly),
75
+ ``2024-03-15`` (daily/weekly), ``2025-Q4`` (quarterly), ``2026-06-10T07``
76
+ (hourly), ``2026-06-10T03-04`` (local-hourly, trailing TZ band). pandas can
77
+ parse all but the quarter form, which we expand to its quarter-start month,
78
+ and the local-hourly TZ band, which we trim to the bare hour.
79
+ """
80
+ s = raw.astype("string").str.strip()
81
+ s = s.str.replace(r"^(\d{4})-Q1$", r"\1-01-01", regex=True)
82
+ s = s.str.replace(r"^(\d{4})-Q2$", r"\1-04-01", regex=True)
83
+ s = s.str.replace(r"^(\d{4})-Q3$", r"\1-07-01", regex=True)
84
+ s = s.str.replace(r"^(\d{4})-Q4$", r"\1-10-01", regex=True)
85
+ s = s.str.replace(r"(T\d{2})-\d{2}$", r"\1", regex=True)
86
+ return pd.to_datetime(s, errors="coerce", format="mixed")
87
+
88
+
89
+ def _detect_measure_col(df: pd.DataFrame) -> str | None:
90
+ """Find the measure column when the caller didn't name one (seriesid path).
91
+
92
+ EIA names the measure column inconsistently: petroleum uses ``value`` +
93
+ ``units``; electricity uses ``sales``/``price``/... + a ``<measure>-units``
94
+ sibling. Prefer ``value``; otherwise the column with a ``{col}-units`` twin.
95
+ """
96
+ if "value" in df.columns:
97
+ return "value"
98
+ for col in df.columns:
99
+ if isinstance(col, str) and f"{col}-units" in df.columns:
100
+ return col
101
+ return None
102
+
103
+
104
+ _DUP_LABEL_RE = re.compile(r"-(name|units|description)$")
105
+
106
+
107
+ def _natural_key_columns(df: pd.DataFrame) -> list[str]:
108
+ """Dimensional columns that identify a row (for boundary-dup-safe dedup).
109
+
110
+ EIA offset pagination over an unsorted result is lossless but can repeat a
111
+ row at a page boundary, so we dedup on the dimensional key: ``period`` plus
112
+ the facet code columns (dropping the value, the ``*-units``/``*-name``/
113
+ ``*-description`` label columns, and ``series-description``).
114
+ """
115
+ if "series" in df.columns and "period" in df.columns:
116
+ return ["period", "series"]
117
+ keys = [
118
+ c
119
+ for c in df.columns
120
+ if isinstance(c, str) and c not in ("value", "units") and not _DUP_LABEL_RE.search(c)
121
+ ]
122
+ return keys or list(df.columns)
123
+
124
+
125
+ def _shape_observations(
126
+ data: list[dict[str, Any]],
127
+ *,
128
+ key_column: str,
129
+ key_value: str,
130
+ title: str,
131
+ measure: str | None = None,
132
+ ) -> pd.DataFrame:
133
+ """Turn raw EIA rows into the long-format fetch frame.
134
+
135
+ Normalizes ``period`` to datetime, the selected/detected measure to a
136
+ ``value`` numeric column (coercing only that column so string facet metadata
137
+ survives), dedups boundary-duplicate rows, and stamps the KEY + title. Every
138
+ other EIA column (facet codes + their ``-name`` labels, ``series``, ``units``)
139
+ folds in as DATA so a multi-series fetch stays disambiguated.
140
+ """
141
+ df = pd.DataFrame(data)
142
+ if "period" in df.columns:
143
+ df["period"] = _normalize_periods(df["period"])
144
+ else:
145
+ df["period"] = pd.NaT
146
+
147
+ measure_col = measure if (measure and measure in df.columns) else _detect_measure_col(df)
148
+ if measure_col and measure_col != "value":
149
+ df = df.rename(columns={measure_col: "value"})
150
+ if "value" in df.columns:
151
+ df["value"] = pd.to_numeric(df["value"], errors="coerce")
152
+ else:
153
+ df["value"] = pd.NA
154
+
155
+ df = df.drop_duplicates(subset=_natural_key_columns(df), keep="first").reset_index(drop=True)
156
+
157
+ df[key_column] = key_value
158
+ df["title"] = title
159
+ return df
160
+
161
+
162
+ def _paginate(
163
+ http: HttpClient,
164
+ path: str,
165
+ base_params: dict[str, Any],
166
+ *,
167
+ op_name: str,
168
+ ) -> tuple[list[dict[str, Any]], dict[str, Any]]:
169
+ """Page through ALL rows for a /data or /seriesid request.
170
+
171
+ Returns ``(rows, first_response)`` — the full row list plus the first page's
172
+ response envelope (for its ``description``). Raises ``InvalidParameterError``
173
+ before paging if the match exceeds ``MAX_FETCH_ROWS``.
174
+ """
175
+ first = eia_get(
176
+ http, path, params={**base_params, "offset": 0, "length": PAGE_SIZE}, op_name=op_name
177
+ )
178
+ total = _to_int(first.get("total"))
179
+ rows: list[dict[str, Any]] = list(first.get("data") or [])
180
+
181
+ if total > MAX_FETCH_ROWS:
182
+ raise InvalidParameterError(
183
+ "eia",
184
+ f"this request matches {total} rows, above the {MAX_FETCH_ROWS}-row fetch ceiling; "
185
+ "narrow it with facets=, frequency=, start= or end= "
186
+ "(EIA caps every page at 5000 rows).",
187
+ )
188
+
189
+ offset = len(rows)
190
+ while offset < total:
191
+ page = eia_get(
192
+ http, path, params={**base_params, "offset": offset, "length": PAGE_SIZE}, op_name=op_name
193
+ )
194
+ chunk = list(page.get("data") or [])
195
+ if not chunk:
196
+ break
197
+ rows.extend(chunk)
198
+ offset += len(chunk)
199
+
200
+ return rows, first
201
+
202
+
203
+ @connector(output=EIA_FETCH_OUTPUT, tags=["macro", "energy", "us"], secrets=("api_key",))
204
+ def eia_fetch(
205
+ route: Annotated[str, "ns:eia"],
206
+ measure: str = "value",
207
+ facets: dict[str, Any] | None = None,
208
+ frequency: str | None = None,
209
+ start: str | None = None,
210
+ end: str | None = None,
211
+ api_key: str = "",
212
+ ) -> pd.DataFrame:
213
+ """Fetch an EIA dataset by route path (e.g. ``petroleum/pri/spt``).
214
+
215
+ ``measure`` selects EIA's required ``data[0]=`` facet — it is route-specific
216
+ (``value`` for most series; ``price``/``sales``/``revenue``/``customers`` for
217
+ electricity, ``heat-content`` etc. for coal). ``facets`` is a
218
+ ``{facet_id: value | [values]}`` dict (read the dataset's facet ids from the
219
+ catalog, and their valid values from ``eia_facets``) that narrows the result
220
+ to a series. The full result is paged in (EIA's 5,000-row page cap is handled
221
+ internally); a match above the row ceiling raises ``InvalidParameterError``
222
+ asking you to narrow it.
223
+ """
224
+ r = route.strip()
225
+ if not r:
226
+ raise InvalidParameterError("eia", "route must be non-empty")
227
+ m = measure.strip()
228
+ if not m:
229
+ raise InvalidParameterError("eia", "measure must be non-empty")
230
+
231
+ http = make_eia_client(api_key)
232
+ base_params: dict[str, Any] = {
233
+ "data[0]": m,
234
+ "frequency": frequency,
235
+ "start": start,
236
+ "end": end,
237
+ **_facet_params(facets),
238
+ }
239
+ rows, first = _paginate(http, f"{r}/data", base_params, op_name="eia_fetch")
240
+ if not rows:
241
+ raise EmptyDataError("eia", query_params={"route": r, "measure": m})
242
+
243
+ title = str(first.get("description") or r)
244
+ return _shape_observations(rows, key_column="route", key_value=r, title=title, measure=m)
245
+
246
+
247
+ @connector(output=EIA_SERIES_OUTPUT, tags=["macro", "energy", "us"], secrets=("api_key",))
248
+ def eia_fetch_series(
249
+ series_id: Annotated[str, "ns:eia"],
250
+ start: str | None = None,
251
+ end: str | None = None,
252
+ api_key: str = "",
253
+ ) -> pd.DataFrame:
254
+ """Fetch an EIA series by its legacy APIv1 series id (e.g. ``PET.RWTC.D``).
255
+
256
+ Uses the ``/v2/seriesid/{id}`` compatibility path, which reaches a fully
257
+ specified series straight from its well-known id — the addressing scheme used
258
+ across the EIA/FRED ecosystem and EIA's own data browsers. This path lives
259
+ outside the route tree the catalog indexes, so it is the way to retrieve a
260
+ famous series (WTI crude ``PET.RWTC.D``, Henry Hub ``NG.RNGWHHD.D``) without
261
+ composing a route + facet query. Paged in full like ``eia_fetch``.
262
+ """
263
+ sid = series_id.strip()
264
+ if not sid:
265
+ raise InvalidParameterError("eia", "series_id must be non-empty")
266
+
267
+ http = make_eia_client(api_key)
268
+ base_params: dict[str, Any] = {"start": start, "end": end}
269
+ rows, first = _paginate(http, f"seriesid/{sid}", base_params, op_name="eia_fetch_series")
270
+ if not rows:
271
+ raise EmptyDataError("eia", query_params={"series_id": sid})
272
+
273
+ # A single series id resolves to one series, so its per-row series-description
274
+ # (e.g. "Cushing, OK WTI Spot Price FOB") is a far better title than the
275
+ # generic dataset-level `description`; fall back to that, then the id.
276
+ series_desc = next(
277
+ (str(row["series-description"]) for row in rows if str(row.get("series-description") or "").strip()),
278
+ "",
279
+ )
280
+ title = series_desc or str(first.get("description") or sid)
281
+ return _shape_observations(rows, key_column="series_id", key_value=sid, title=title)
282
+
283
+
284
+ @connector(output=EIA_FACETS_OUTPUT, tags=["macro", "energy", "us"], secrets=("api_key",))
285
+ def eia_facets(
286
+ route: Annotated[str, "ns:eia"],
287
+ facet: str,
288
+ api_key: str = "",
289
+ ) -> pd.DataFrame:
290
+ """List the valid values of one facet dimension of an EIA dataset.
291
+
292
+ Given a dataset ``route`` and one of its facet ids (from the catalog's
293
+ ``facets`` metadata), returns the ``{id, name}`` value vocabulary so a fetch
294
+ can be narrowed to a specific series. This is the bridge that makes huge
295
+ datasets usable — e.g. ``electricity/rto/region-data`` is ~18.7M rows, so an
296
+ agent must narrow by ``respondent``/``fueltype`` facet values, which it
297
+ discovers here rather than by a blind (ceiling-rejected) full fetch.
298
+ """
299
+ r = route.strip()
300
+ f = facet.strip()
301
+ if not r:
302
+ raise InvalidParameterError("eia", "route must be non-empty")
303
+ if not f:
304
+ raise InvalidParameterError("eia", "facet must be non-empty")
305
+
306
+ http = make_eia_client(api_key)
307
+ resp = eia_get(http, f"{r}/facet/{f}", op_name="eia_facets")
308
+ values = resp.get("facets") or []
309
+ rows = [
310
+ {
311
+ "facet_value": str(v.get("id", "")),
312
+ "name": str(v.get("name") or v.get("id", "")),
313
+ "facet": f,
314
+ "route": r,
315
+ }
316
+ for v in values
317
+ if isinstance(v, dict) and v.get("id")
318
+ ]
319
+ if not rows:
320
+ raise EmptyDataError("eia", query_params={"route": r, "facet": f})
321
+ return pd.DataFrame(rows)
@@ -0,0 +1,81 @@
1
+ """EIA connector output schemas.
2
+
3
+ The catalog row is one per **leaf dataset** (the route-tree terminal node). Its
4
+ metadata carries the dataset's *query vocabulary* — the measures it accepts as
5
+ ``data[0]=`` and the facet dimensions it accepts as ``facets[...]`` filters — so
6
+ an agent can read a search hit and construct a precise ``eia_fetch`` /
7
+ ``eia_facets`` call without a blind round-trip (the SDMX/BLS dimension-manifest
8
+ pattern). The ``code`` KEY holds the route path (e.g. ``petroleum/pri/spt``).
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ from parsimony.result import Column, ColumnRole, OutputConfig
14
+
15
+ EIA_ENUMERATE_OUTPUT = OutputConfig(
16
+ columns=[
17
+ Column(name="code", role=ColumnRole.KEY, namespace="eia"),
18
+ Column(name="title", role=ColumnRole.TITLE),
19
+ Column(name="description", role=ColumnRole.METADATA),
20
+ Column(name="category", role=ColumnRole.METADATA),
21
+ Column(name="measures", role=ColumnRole.METADATA),
22
+ Column(name="facets", role=ColumnRole.METADATA),
23
+ Column(name="frequencies", role=ColumnRole.METADATA),
24
+ Column(name="default_frequency", role=ColumnRole.METADATA),
25
+ Column(name="start", role=ColumnRole.METADATA),
26
+ Column(name="end", role=ColumnRole.METADATA),
27
+ Column(name="units", role=ColumnRole.METADATA),
28
+ ]
29
+ )
30
+
31
+ ENUMERATE_COLUMNS: tuple[str, ...] = (
32
+ "code",
33
+ "title",
34
+ "description",
35
+ "category",
36
+ "measures",
37
+ "facets",
38
+ "frequencies",
39
+ "default_frequency",
40
+ "start",
41
+ "end",
42
+ "units",
43
+ )
44
+
45
+ # Fetch returns long-format observations. ``route`` is the KEY (param_key=route
46
+ # links a catalog hit back to the fetch parameter); the selected measure is
47
+ # normalized to ``value``; every other EIA column (facet codes + their ``-name``
48
+ # labels, ``series``, ``series-description``, ``units``) folds in as DATA so a
49
+ # multi-series fetch stays disambiguated.
50
+ EIA_FETCH_OUTPUT = OutputConfig(
51
+ columns=[
52
+ Column(name="route", role=ColumnRole.KEY, namespace="eia"),
53
+ Column(name="title", role=ColumnRole.TITLE),
54
+ Column(name="period", dtype="datetime", role=ColumnRole.DATA),
55
+ Column(name="value", dtype="numeric", role=ColumnRole.DATA),
56
+ ]
57
+ )
58
+
59
+ # Fetch by legacy APIv1 series id (the out-of-tree `/v2/seriesid/{id}` path —
60
+ # e.g. `PET.RWTC.D`, `ELEC.SALES.CO-RES.A`). Same observation shape as the route
61
+ # fetch; the series id fully specifies the measure, which is normalized to value.
62
+ EIA_SERIES_OUTPUT = OutputConfig(
63
+ columns=[
64
+ Column(name="series_id", role=ColumnRole.KEY, namespace="eia"),
65
+ Column(name="title", role=ColumnRole.TITLE),
66
+ Column(name="period", dtype="datetime", role=ColumnRole.DATA),
67
+ Column(name="value", dtype="numeric", role=ColumnRole.DATA),
68
+ ]
69
+ )
70
+
71
+ # Facet-value discovery: one row per valid value of a dataset's facet dimension,
72
+ # so an agent can narrow a fetch to a specific series (essential on huge datasets
73
+ # — electricity hourly is ~18M rows). KEY=facet_value, TITLE=name.
74
+ EIA_FACETS_OUTPUT = OutputConfig(
75
+ columns=[
76
+ Column(name="facet_value", role=ColumnRole.KEY, namespace="eia"),
77
+ Column(name="name", role=ColumnRole.TITLE),
78
+ Column(name="facet", role=ColumnRole.METADATA),
79
+ Column(name="route", role=ColumnRole.METADATA),
80
+ ]
81
+ )
parsimony_eia/py.typed ADDED
File without changes
@@ -0,0 +1,39 @@
1
+ """Semantic search over the published EIA dataset catalog."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from parsimony.catalog.search import CatalogSearchParams, make_local_search_connector
6
+ from parsimony.result import Column, ColumnRole, OutputConfig
7
+
8
+ from parsimony_eia.catalog_build import build_eia_catalog
9
+
10
+ EiaSearchParams = CatalogSearchParams
11
+
12
+ PARSIMONY_EIA_CATALOG_URL_ENV = "PARSIMONY_EIA_CATALOG_URL"
13
+
14
+ EIA_SEARCH_OUTPUT = OutputConfig(
15
+ columns=[
16
+ Column(name="code", role=ColumnRole.KEY, namespace="eia"),
17
+ Column(name="title", role=ColumnRole.TITLE),
18
+ Column(name="score", role=ColumnRole.DATA),
19
+ ]
20
+ )
21
+
22
+ eia_search = make_local_search_connector(
23
+ provider="eia",
24
+ default_url="hf://parsimony-dev/eia",
25
+ catalog_url_env_var=PARSIMONY_EIA_CATALOG_URL_ENV,
26
+ build_catalog=build_eia_catalog,
27
+ tags=["macro", "energy", "us", "tool"],
28
+ description=(
29
+ "Semantic-search the U.S. Energy Information Administration (EIA) Open Data catalog of "
30
+ "energy datasets (petroleum, natural gas, electricity, coal, nuclear, renewables, "
31
+ "emissions, international). Each hit is a dataset route plus its measure and facet "
32
+ "vocabulary; pass the route to eia_fetch(route=..., measure=..., facets=...) for "
33
+ "observations, eia_facets(route=..., facet=...) to list a facet's values, or use "
34
+ "eia_fetch_series(series_id=...) for a known legacy series id like PET.RWTC.D."
35
+ ),
36
+ output_columns=EIA_SEARCH_OUTPUT.columns,
37
+ )
38
+
39
+ __all__ = ["PARSIMONY_EIA_CATALOG_URL_ENV", "EiaSearchParams", "eia_search"]
@@ -0,0 +1,112 @@
1
+ Metadata-Version: 2.4
2
+ Name: parsimony-eia
3
+ Version: 0.0.1
4
+ Summary: U.S. Energy Information Administration connector for the parsimony framework
5
+ Project-URL: Homepage, https://www.eia.gov
6
+ Project-URL: Repository, https://github.com/ockham-sh/parsimony-connectors
7
+ Project-URL: Issues, https://github.com/ockham-sh/parsimony-connectors/issues
8
+ Author-email: "Ockham.sh" <team@ockham.sh>
9
+ License-Expression: Apache-2.0
10
+ License-File: LICENSE
11
+ Keywords: connectors,data,eia,finance,parsimony
12
+ Classifier: Development Status :: 4 - Beta
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: Intended Audience :: Financial and Insurance Industry
15
+ Classifier: License :: OSI Approved :: Apache Software License
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Programming Language :: Python :: 3.13
20
+ Classifier: Topic :: Office/Business :: Financial
21
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
22
+ Classifier: Typing :: Typed
23
+ Requires-Python: >=3.11
24
+ Requires-Dist: pandas<3,>=2.3.0
25
+ Requires-Dist: parsimony-core>=0.0.1
26
+ Requires-Dist: pydantic<3,>=2.11.1
27
+ Provides-Extra: dev
28
+ Requires-Dist: mypy>=1.10; extra == 'dev'
29
+ Requires-Dist: pytest-cov>=5.0; extra == 'dev'
30
+ Requires-Dist: pytest>=9.0.3; extra == 'dev'
31
+ Requires-Dist: respx>=0.22.0; extra == 'dev'
32
+ Requires-Dist: ruff>=0.15.10; extra == 'dev'
33
+ Description-Content-Type: text/markdown
34
+
35
+ # parsimony-eia
36
+
37
+ U.S. Energy Information Administration (EIA) connector — fetches energy data (petroleum, electricity, natural gas, coal, renewables) from the EIA v2 Open Data API.
38
+
39
+ Part of the [parsimony-connectors](https://github.com/ockham-sh/parsimony-connectors) monorepo. Distributed standalone on PyPI as `parsimony-eia`.
40
+
41
+ ## Connectors
42
+
43
+ | Name | Kind | Description |
44
+ |---|---|---|
45
+ | `eia_search` | tool | Semantic-search the dataset catalog; returns a dataset `route` to pass to `eia_fetch`. |
46
+ | `eia_fetch` | fetch | Fetch a dataset by `route` (e.g. `petroleum/pri/spt`) with optional `measure`, `facets`, `frequency`, `start`, `end`. Paged in full. |
47
+ | `eia_fetch_series` | fetch | Fetch by a legacy APIv1 series id (e.g. `PET.RWTC.D`) via the `/v2/seriesid/{id}` path. |
48
+ | `eia_facets` | tool | List the valid `{id, name}` values of a dataset's facet dimension, to narrow a fetch to a series. |
49
+ | `enumerate_eia` | enumerator | Walk the v2 route tree to one row per leaf dataset (the catalog feed). |
50
+
51
+ ## Coverage
52
+
53
+ The catalog indexes **all 232 leaf datasets** across EIA's 14 top-level
54
+ categories (petroleum, natural gas, electricity, coal, nuclear, renewables/
55
+ densified-biomass, total energy, emissions, international, SEDS, STEO, AEO, IEO,
56
+ crude-oil imports), each carrying its measure and facet-dimension manifest. EIA's
57
+ full series universe (~2M series) is the facet cartesian product of those
58
+ datasets — too large to catalog individually, but every series is **fetchable**:
59
+ by `route` + `facets` filters, or by its legacy series id via `eia_fetch_series`.
60
+ So discovery is at the dataset tier and access is total.
61
+
62
+ EIA caps every data response at 5,000 rows; the fetch connectors page through to
63
+ completeness automatically (a request matching more than 300,000 rows is refused
64
+ with guidance to narrow it). Data is U.S. federal public domain — cite as
65
+ "Source: U.S. Energy Information Administration".
66
+
67
+ ## Install
68
+
69
+ ```bash
70
+ pip install parsimony-eia
71
+ ```
72
+
73
+ Pulls in a compatible `parsimony-core` automatically. Verify discovery:
74
+
75
+ ```bash
76
+ python -c "from parsimony import discover; print([p.name for p in discover.iter_providers()])"
77
+ ```
78
+
79
+ ## Configuration
80
+
81
+ Set the following environment variable:
82
+
83
+ ```bash
84
+ export EIA_API_KEY="<your-key>"
85
+ ```
86
+
87
+ Get a free key at https://www.eia.gov/opendata/register.php.
88
+
89
+ ## Quick start
90
+
91
+ ```python
92
+ from parsimony_eia import CONNECTORS
93
+
94
+ result = CONNECTORS["eia_fetch"](route="petroleum/pri/spt", frequency="monthly")
95
+ print(result.data.head())
96
+ ```
97
+
98
+ For multi-plugin composition:
99
+
100
+ ```python
101
+ from parsimony import discover
102
+ connectors = discover.load_all()
103
+ ```
104
+
105
+ ## Provider
106
+
107
+ - Homepage: https://www.eia.gov
108
+ - API docs: https://www.eia.gov/opendata/documentation.php
109
+
110
+ ## License
111
+
112
+ See [LICENSE](./LICENSE).
@@ -0,0 +1,14 @@
1
+ parsimony_eia/__init__.py,sha256=9ASwtwLQ4K69zxYaTMFj02s7OzAr3ZvhqgRC6jXcKNw,525
2
+ parsimony_eia/_http.py,sha256=D0z4fBy1PipVVOcBtuOaSc3Pv_UgYUeX07e6LfUSoDk,3991
3
+ parsimony_eia/catalog_build.py,sha256=VUphJj3j7wLK45clGX5hGO5oyn5fLn21Q0c1fJYxErc,1508
4
+ parsimony_eia/outputs.py,sha256=FsqADK6fkSmwTt9Lq-Cnez0nIr5j7x9-MIXg5tltQqo,3303
5
+ parsimony_eia/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
+ parsimony_eia/search.py,sha256=vNsl4xl1-P9jApXKc5Au2DlDuvW79jWKkOn7S88t3-I,1579
7
+ parsimony_eia/connectors/__init__.py,sha256=ikVfyurz9wNFSqktx1qZ8MIpQRRXZIfwgg8zfCF_98E,601
8
+ parsimony_eia/connectors/enumerate.py,sha256=lErSaOrfMYc9z0NIlt4-6mydBsaXrj-ZE34navJfgxM,8085
9
+ parsimony_eia/connectors/fetch.py,sha256=jWpRaxXzNvk6Jo3sy-x1k_Z6eCHpUAluz4RQQpfozV4,12772
10
+ parsimony_eia-0.0.1.dist-info/METADATA,sha256=E2qizIje_f83954IxF7QRPEWSJi5pUJjPswRuwZA6lU,4216
11
+ parsimony_eia-0.0.1.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
12
+ parsimony_eia-0.0.1.dist-info/entry_points.txt,sha256=qJPVlobYtY-spSyALdzvxR-aFGE43NXOPopKOA-Fluw,42
13
+ parsimony_eia-0.0.1.dist-info/licenses/LICENSE,sha256=PtHUFTCSwal_QX2Ijk2cx_bpsPV6ooZUMCYAxKBHNu0,10760
14
+ parsimony_eia-0.0.1.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.30.1
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,2 @@
1
+ [parsimony.providers]
2
+ eia = parsimony_eia
@@ -0,0 +1,190 @@
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
+
7
+ 1. Definitions.
8
+
9
+ "License" shall mean the terms and conditions for use, reproduction,
10
+ and distribution as defined by Sections 1 through 9 of this document.
11
+
12
+ "Licensor" shall mean the copyright owner or entity authorized by
13
+ the copyright owner that is granting the License.
14
+
15
+ "Legal Entity" shall mean the union of the acting entity and all
16
+ other entities that control, are controlled by, or are under common
17
+ control with that entity. For the purposes of this definition,
18
+ "control" means (i) the power, direct or indirect, to cause the
19
+ direction or management of such entity, whether by contract or
20
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
+ outstanding shares, or (iii) beneficial ownership of such entity.
22
+
23
+ "You" (or "Your") shall mean an individual or Legal Entity
24
+ exercising permissions granted by this License.
25
+
26
+ "Source" form shall mean the preferred form for making modifications,
27
+ including but not limited to software source code, documentation
28
+ source, and configuration files.
29
+
30
+ "Object" form shall mean any form resulting from mechanical
31
+ transformation or translation of a Source form, including but
32
+ not limited to compiled object code, generated documentation,
33
+ and conversions to other media types.
34
+
35
+ "Work" shall mean the work of authorship, whether in Source or
36
+ Object form, made available under the License, as indicated by a
37
+ copyright notice that is included in or attached to the work
38
+ (an example is provided in the Appendix below).
39
+
40
+ "Derivative Works" shall mean any work, whether in Source or Object
41
+ form, that is based on (or derived from) the Work and for which the
42
+ editorial revisions, annotations, elaborations, or other modifications
43
+ represent, as a whole, an original work of authorship. For the purposes
44
+ of this License, Derivative Works shall not include works that remain
45
+ separable from, or merely link (or bind by name) to the interfaces of,
46
+ the Work and Derivative Works thereof.
47
+
48
+ "Contribution" shall mean any work of authorship, including
49
+ the original version of the Work and any modifications or additions
50
+ to that Work or Derivative Works thereof, that is intentionally
51
+ submitted to the Licensor for inclusion in the Work by the copyright owner
52
+ or by an individual or Legal Entity authorized to submit on behalf of
53
+ the copyright owner. For the purposes of this definition, "submitted"
54
+ means any form of electronic, verbal, or written communication sent
55
+ to the Licensor or its representatives, including but not limited to
56
+ communication on electronic mailing lists, source code control systems,
57
+ and issue tracking systems that are managed by, or on behalf of, the
58
+ Licensor for the purpose of discussing and improving the Work, but
59
+ excluding communication that is conspicuously marked or otherwise
60
+ designated in writing by the copyright owner as "Not a Contribution."
61
+
62
+ "Contributor" shall mean Licensor and any individual or Legal Entity
63
+ on behalf of whom a Contribution has been received by the Licensor and
64
+ subsequently incorporated within the Work.
65
+
66
+ 2. Grant of Copyright License. Subject to the terms and conditions of
67
+ this License, each Contributor hereby grants to You a perpetual,
68
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69
+ copyright license to reproduce, prepare Derivative Works of,
70
+ publicly display, publicly perform, sublicense, and distribute the
71
+ Work and such Derivative Works in Source or Object form.
72
+
73
+ 3. Grant of Patent License. Subject to the terms and conditions of
74
+ this License, each Contributor hereby grants to You a perpetual,
75
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76
+ (except as stated in this section) patent license to make, have made,
77
+ use, offer to sell, sell, import, and otherwise transfer the Work,
78
+ where such license applies only to those patent claims licensable
79
+ by such Contributor that are necessarily infringed by their
80
+ Contribution(s) alone or by combination of their Contribution(s)
81
+ with the Work to which such Contribution(s) was submitted. If You
82
+ institute patent litigation against any entity (including a
83
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
84
+ or a Contribution incorporated within the Work constitutes direct
85
+ or contributory patent infringement, then any patent licenses
86
+ granted to You under this License for that Work shall terminate
87
+ as of the date such litigation is filed.
88
+
89
+ 4. Redistribution. You may reproduce and distribute copies of the
90
+ Work or Derivative Works thereof in any medium, with or without
91
+ modifications, and in Source or Object form, provided that You
92
+ meet the following conditions:
93
+
94
+ (a) You must give any other recipients of the Work or
95
+ Derivative Works a copy of this License; and
96
+
97
+ (b) You must cause any modified files to carry prominent notices
98
+ stating that You changed the files; and
99
+
100
+ (c) You must retain, in the Source form of any Derivative Works
101
+ that You distribute, all copyright, patent, trademark, and
102
+ attribution notices from the Source form of the Work,
103
+ excluding those notices that do not pertain to any part of
104
+ the Derivative Works; and
105
+
106
+ (d) If the Work includes a "NOTICE" text file as part of its
107
+ distribution, then any Derivative Works that You distribute must
108
+ include a readable copy of the attribution notices contained
109
+ within such NOTICE file, excluding any notices that do not
110
+ pertain to any part of the Derivative Works, in at least one
111
+ of the following places: within a NOTICE text file distributed
112
+ as part of the Derivative Works; within the Source form or
113
+ documentation, if provided along with the Derivative Works; or,
114
+ within a display generated by the Derivative Works, if and
115
+ wherever such third-party notices normally appear. The contents
116
+ of the NOTICE file are for informational purposes only and
117
+ do not modify the License. You may add Your own attribution
118
+ notices within Derivative Works that You distribute, alongside
119
+ or as an addendum to the NOTICE text from the Work, provided
120
+ that such additional attribution notices cannot be construed
121
+ as modifying the License.
122
+
123
+ You may add Your own copyright statement to Your modifications and
124
+ may provide additional or different license terms and conditions
125
+ for use, reproduction, or distribution of Your modifications, or
126
+ for any such Derivative Works as a whole, provided Your use,
127
+ reproduction, and distribution of the Work otherwise complies with
128
+ the conditions stated in this License.
129
+
130
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
131
+ any Contribution intentionally submitted for inclusion in the Work
132
+ by You to the Licensor shall be under the terms and conditions of
133
+ this License, without any additional terms or conditions.
134
+ Notwithstanding the above, nothing herein shall supersede or modify
135
+ the terms of any separate license agreement you may have executed
136
+ with Licensor regarding such Contributions.
137
+
138
+ 6. Trademarks. This License does not grant permission to use the trade
139
+ names, trademarks, service marks, or product names of the Licensor,
140
+ except as required for reasonable and customary use in describing the
141
+ origin of the Work and reproducing the content of the NOTICE file.
142
+
143
+ 7. Disclaimer of Warranty. Unless required by applicable law or
144
+ agreed to in writing, Licensor provides the Work (and each
145
+ Contributor provides its Contributions) on an "AS IS" BASIS,
146
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147
+ implied, including, without limitation, any warranties or conditions
148
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149
+ PARTICULAR PURPOSE. You are solely responsible for determining the
150
+ appropriateness of using or redistributing the Work and assume any
151
+ risks associated with Your exercise of permissions under this License.
152
+
153
+ 8. Limitation of Liability. In no event and under no legal theory,
154
+ whether in tort (including negligence), contract, or otherwise,
155
+ unless required by applicable law (such as deliberate and grossly
156
+ negligent acts) or agreed to in writing, shall any Contributor be
157
+ liable to You for damages, including any direct, indirect, special,
158
+ incidental, or consequential damages of any character arising as a
159
+ result of this License or out of the use or inability to use the
160
+ Work (including but not limited to damages for loss of goodwill,
161
+ work stoppage, computer failure or malfunction, or any and all
162
+ other commercial damages or losses), even if such Contributor
163
+ has been advised of the possibility of such damages.
164
+
165
+ 9. Accepting Warranty or Additional Liability. While redistributing
166
+ the Work or Derivative Works thereof, You may choose to offer,
167
+ and charge a fee for, acceptance of support, warranty, indemnity,
168
+ or other liability obligations and/or rights consistent with this
169
+ License. However, in accepting such obligations, You may act only
170
+ on Your own behalf and on Your sole responsibility, not on behalf
171
+ of any other Contributor, and only if You agree to indemnify,
172
+ defend, and hold each Contributor harmless for any liability
173
+ incurred by, or claims asserted against, such Contributor by reason
174
+ of your accepting any such warranty or additional liability.
175
+
176
+ END OF TERMS AND CONDITIONS
177
+
178
+ Copyright 2026 Ockham.sh
179
+
180
+ Licensed under the Apache License, Version 2.0 (the "License");
181
+ you may not use this file except in compliance with the License.
182
+ You may obtain a copy of the License at
183
+
184
+ http://www.apache.org/licenses/LICENSE-2.0
185
+
186
+ Unless required by applicable law or agreed to in writing, software
187
+ distributed under the License is distributed on an "AS IS" BASIS,
188
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
189
+ See the License for the specific language governing permissions and
190
+ limitations under the License.