python-eia 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- eia/.agents/skills/eia/SKILL.md +172 -0
- eia/__init__.py +3 -1
- eia/cache.py +399 -0
- eia/catalog.py +137 -0
- eia/catalog_manager.py +464 -0
- eia/cli/app.py +4 -0
- eia/cli/cache_cmd.py +53 -0
- eia/cli/catalog_cmd.py +186 -0
- eia/client.py +309 -19
- {python_eia-0.2.0.dist-info → python_eia-0.3.0.dist-info}/METADATA +7 -3
- python_eia-0.3.0.dist-info/RECORD +22 -0
- python_eia-0.2.0.dist-info/RECORD +0 -16
- {python_eia-0.2.0.dist-info → python_eia-0.3.0.dist-info}/WHEEL +0 -0
- {python_eia-0.2.0.dist-info → python_eia-0.3.0.dist-info}/entry_points.txt +0 -0
eia/catalog.py
ADDED
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
"""Built-in data catalog and recipes for the EIA API v2.
|
|
2
|
+
|
|
3
|
+
The EIA API is a tree of routes. This module provides:
|
|
4
|
+
- Curated route metadata with descriptions and key facets
|
|
5
|
+
- Named "recipes" — pre-configured queries for common use cases
|
|
6
|
+
- Facet cheat-sheets so users don't have to discover facet values every time
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
from dataclasses import dataclass, field
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass(frozen=True)
|
|
15
|
+
class DataColumn:
|
|
16
|
+
"""Metadata for a data column from the API schema."""
|
|
17
|
+
|
|
18
|
+
id: str
|
|
19
|
+
units: str = ""
|
|
20
|
+
aggregation_method: str = ""
|
|
21
|
+
alias: str = ""
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@dataclass(frozen=True)
|
|
25
|
+
class Frequency:
|
|
26
|
+
"""Metadata for a frequency option from the API schema."""
|
|
27
|
+
|
|
28
|
+
id: str
|
|
29
|
+
description: str = ""
|
|
30
|
+
query: str = ""
|
|
31
|
+
format: str = ""
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@dataclass(frozen=True)
|
|
35
|
+
class FacetHint:
|
|
36
|
+
"""Documents a facet's key values without requiring an API call."""
|
|
37
|
+
|
|
38
|
+
id: str
|
|
39
|
+
description: str
|
|
40
|
+
common_values: dict[str, str] # hand-curated subset (value_id → human label)
|
|
41
|
+
values: dict[str, str] = field(default_factory=dict) # full API values (value_id → name)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@dataclass(frozen=True)
|
|
45
|
+
class RouteInfo:
|
|
46
|
+
"""Curated metadata for a data route."""
|
|
47
|
+
|
|
48
|
+
route: str
|
|
49
|
+
name: str
|
|
50
|
+
description: str
|
|
51
|
+
frequency: str # default frequency
|
|
52
|
+
facets: tuple[FacetHint, ...]
|
|
53
|
+
notes: str = ""
|
|
54
|
+
# --- API-fetched schema (optional, populated by refresh) ---
|
|
55
|
+
data_columns: tuple[DataColumn, ...] = ()
|
|
56
|
+
frequencies: tuple[Frequency, ...] = ()
|
|
57
|
+
start_period: str = ""
|
|
58
|
+
end_period: str = ""
|
|
59
|
+
default_date_format: str = ""
|
|
60
|
+
api_hash: str = ""
|
|
61
|
+
last_refreshed: str = ""
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
@dataclass(frozen=True)
|
|
65
|
+
class Recipe:
|
|
66
|
+
"""A named, pre-configured query for a common use case."""
|
|
67
|
+
|
|
68
|
+
id: str
|
|
69
|
+
name: str
|
|
70
|
+
description: str
|
|
71
|
+
route: str
|
|
72
|
+
facets: dict[str, str | list[str]]
|
|
73
|
+
frequency: str
|
|
74
|
+
notes: str = ""
|
|
75
|
+
cli_example: str = ""
|
|
76
|
+
python_example: str = ""
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
# ── Route & Recipe Catalog (loaded from YAML) ─────────────────────────
|
|
80
|
+
|
|
81
|
+
from eia.catalog_manager import EIACatalogManager as _EIACatalogManager
|
|
82
|
+
|
|
83
|
+
_mgr = _EIACatalogManager()
|
|
84
|
+
|
|
85
|
+
ROUTES: dict[str, RouteInfo] = {r.route: r for r in _mgr._load_routes()}
|
|
86
|
+
RECIPES: dict[str, Recipe] = {r.id: r for r in _mgr._load_recipes()}
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
# ── Convenience functions ──────────────────────────────────────────────
|
|
90
|
+
|
|
91
|
+
def get_route(route: str) -> RouteInfo:
|
|
92
|
+
"""Look up route metadata."""
|
|
93
|
+
if route not in ROUTES:
|
|
94
|
+
raise KeyError(
|
|
95
|
+
f"Unknown route '{route}'. Use catalog.list_routes() to see available routes."
|
|
96
|
+
)
|
|
97
|
+
return ROUTES[route]
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def get_recipe(recipe_id: str) -> Recipe:
|
|
101
|
+
"""Look up a named recipe."""
|
|
102
|
+
if recipe_id not in RECIPES:
|
|
103
|
+
raise KeyError(
|
|
104
|
+
f"Unknown recipe '{recipe_id}'. Available: {', '.join(RECIPES.keys())}"
|
|
105
|
+
)
|
|
106
|
+
return RECIPES[recipe_id]
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def list_routes() -> list[str]:
|
|
110
|
+
"""Return all cataloged route paths."""
|
|
111
|
+
return sorted(ROUTES.keys())
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def list_recipes() -> list[str]:
|
|
115
|
+
"""Return all recipe IDs."""
|
|
116
|
+
return sorted(RECIPES.keys())
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def summary() -> str:
|
|
120
|
+
"""Return a human-readable summary of the catalog."""
|
|
121
|
+
lines = ["EIA Data Catalog", "=" * 50, ""]
|
|
122
|
+
|
|
123
|
+
lines.append("Routes:")
|
|
124
|
+
for route_path, info in sorted(ROUTES.items()):
|
|
125
|
+
lines.append(f" {route_path}")
|
|
126
|
+
lines.append(f" {info.name}: {info.description}")
|
|
127
|
+
lines.append(f" Default frequency: {info.frequency}")
|
|
128
|
+
if info.notes:
|
|
129
|
+
lines.append(f" Note: {info.notes}")
|
|
130
|
+
|
|
131
|
+
lines.append("")
|
|
132
|
+
lines.append("Recipes (pre-configured queries):")
|
|
133
|
+
for recipe_id, recipe in sorted(RECIPES.items()):
|
|
134
|
+
lines.append(f" {recipe_id}: {recipe.name}")
|
|
135
|
+
lines.append(f" {recipe.description}")
|
|
136
|
+
|
|
137
|
+
return "\n".join(lines)
|
eia/catalog_manager.py
ADDED
|
@@ -0,0 +1,464 @@
|
|
|
1
|
+
"""YAML-backed catalog manager for EIA routes and recipes.
|
|
2
|
+
|
|
3
|
+
Loads curated route metadata and recipes from YAML files shipped with
|
|
4
|
+
the package, exposing them as dataclass instances and DataFrames.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import hashlib
|
|
10
|
+
import importlib.resources
|
|
11
|
+
import json
|
|
12
|
+
import logging
|
|
13
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
14
|
+
from dataclasses import dataclass, field
|
|
15
|
+
from datetime import datetime, timezone
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
from typing import TYPE_CHECKING, Any, Optional
|
|
18
|
+
|
|
19
|
+
import pandas as pd
|
|
20
|
+
import yaml
|
|
21
|
+
|
|
22
|
+
from eia.catalog import DataColumn, FacetHint, Frequency, Recipe, RouteInfo
|
|
23
|
+
|
|
24
|
+
if TYPE_CHECKING:
|
|
25
|
+
from eia.client import EIAClient
|
|
26
|
+
|
|
27
|
+
logger = logging.getLogger(__name__)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@dataclass
|
|
31
|
+
class RefreshResult:
|
|
32
|
+
"""Result of a catalog refresh operation."""
|
|
33
|
+
|
|
34
|
+
added: list[str] = field(default_factory=list)
|
|
35
|
+
updated: list[str] = field(default_factory=list)
|
|
36
|
+
removed: list[str] = field(default_factory=list)
|
|
37
|
+
unchanged: list[str] = field(default_factory=list)
|
|
38
|
+
errors: list[str] = field(default_factory=list)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class EIACatalogManager:
|
|
42
|
+
"""Manages the YAML-backed EIA data catalog.
|
|
43
|
+
|
|
44
|
+
Parameters
|
|
45
|
+
----------
|
|
46
|
+
client : EIAClient, optional
|
|
47
|
+
An EIA client instance, needed only for ``refresh()``.
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
def __init__(self, client: Optional[EIAClient] = None) -> None:
|
|
51
|
+
self._client = client
|
|
52
|
+
self._routes: list[RouteInfo] | None = None
|
|
53
|
+
self._recipes: list[Recipe] | None = None
|
|
54
|
+
|
|
55
|
+
# ── YAML loading ──────────────────────────────────────────────────
|
|
56
|
+
|
|
57
|
+
@staticmethod
|
|
58
|
+
def _read_yaml(filename: str) -> dict:
|
|
59
|
+
"""Read a YAML file from the ``eia.data`` package."""
|
|
60
|
+
ref = importlib.resources.files("eia.data").joinpath(filename)
|
|
61
|
+
with importlib.resources.as_file(ref) as path:
|
|
62
|
+
return yaml.safe_load(path.read_text(encoding="utf-8"))
|
|
63
|
+
|
|
64
|
+
def _load_routes(self) -> list[RouteInfo]:
|
|
65
|
+
"""Parse ``routes.yaml`` into a list of :class:`RouteInfo`."""
|
|
66
|
+
if self._routes is not None:
|
|
67
|
+
return self._routes
|
|
68
|
+
|
|
69
|
+
data = self._read_yaml("routes.yaml")
|
|
70
|
+
routes: list[RouteInfo] = []
|
|
71
|
+
for entry in data.get("routes", []):
|
|
72
|
+
facets = tuple(
|
|
73
|
+
FacetHint(
|
|
74
|
+
id=f["id"],
|
|
75
|
+
description=f.get("description", ""),
|
|
76
|
+
common_values=f.get("common_values") or {},
|
|
77
|
+
values=f.get("values") or {},
|
|
78
|
+
)
|
|
79
|
+
for f in entry.get("facets", [])
|
|
80
|
+
)
|
|
81
|
+
data_columns = tuple(
|
|
82
|
+
DataColumn(
|
|
83
|
+
id=c["id"],
|
|
84
|
+
units=c.get("units", ""),
|
|
85
|
+
aggregation_method=c.get("aggregation_method", ""),
|
|
86
|
+
alias=c.get("alias", ""),
|
|
87
|
+
)
|
|
88
|
+
for c in entry.get("data_columns", [])
|
|
89
|
+
)
|
|
90
|
+
frequencies = tuple(
|
|
91
|
+
Frequency(
|
|
92
|
+
id=f["id"],
|
|
93
|
+
description=f.get("description", ""),
|
|
94
|
+
query=f.get("query", ""),
|
|
95
|
+
format=f.get("format", ""),
|
|
96
|
+
)
|
|
97
|
+
for f in entry.get("frequencies", [])
|
|
98
|
+
)
|
|
99
|
+
routes.append(
|
|
100
|
+
RouteInfo(
|
|
101
|
+
route=entry["route"],
|
|
102
|
+
name=entry["name"],
|
|
103
|
+
description=entry.get("description", ""),
|
|
104
|
+
frequency=entry.get("frequency", ""),
|
|
105
|
+
facets=facets,
|
|
106
|
+
notes=entry.get("notes") or "",
|
|
107
|
+
data_columns=data_columns,
|
|
108
|
+
frequencies=frequencies,
|
|
109
|
+
start_period=entry.get("start_period", ""),
|
|
110
|
+
end_period=entry.get("end_period", ""),
|
|
111
|
+
default_date_format=entry.get("default_date_format", ""),
|
|
112
|
+
api_hash=entry.get("api_hash", ""),
|
|
113
|
+
last_refreshed=entry.get("last_refreshed", ""),
|
|
114
|
+
)
|
|
115
|
+
)
|
|
116
|
+
self._routes = routes
|
|
117
|
+
return routes
|
|
118
|
+
|
|
119
|
+
def _load_recipes(self) -> list[Recipe]:
|
|
120
|
+
"""Parse ``recipes.yaml`` into a list of :class:`Recipe`."""
|
|
121
|
+
if self._recipes is not None:
|
|
122
|
+
return self._recipes
|
|
123
|
+
|
|
124
|
+
data = self._read_yaml("recipes.yaml")
|
|
125
|
+
recipes: list[Recipe] = []
|
|
126
|
+
for entry in data.get("recipes", []):
|
|
127
|
+
facets: dict[str, str | list[str]] = {}
|
|
128
|
+
for k, v in (entry.get("facets") or {}).items():
|
|
129
|
+
facets[k] = v
|
|
130
|
+
|
|
131
|
+
recipes.append(
|
|
132
|
+
Recipe(
|
|
133
|
+
id=entry["id"],
|
|
134
|
+
name=entry["name"],
|
|
135
|
+
description=entry.get("description", ""),
|
|
136
|
+
route=entry["route"],
|
|
137
|
+
facets=facets,
|
|
138
|
+
frequency=entry.get("frequency", ""),
|
|
139
|
+
notes=entry.get("notes") or "",
|
|
140
|
+
cli_example=entry.get("cli_example") or "",
|
|
141
|
+
python_example=entry.get("python_example") or "",
|
|
142
|
+
)
|
|
143
|
+
)
|
|
144
|
+
self._recipes = recipes
|
|
145
|
+
return recipes
|
|
146
|
+
|
|
147
|
+
# ── Public API ────────────────────────────────────────────────────
|
|
148
|
+
|
|
149
|
+
def list_routes(self, query: str | None = None) -> pd.DataFrame:
|
|
150
|
+
"""Return a DataFrame of all cataloged routes.
|
|
151
|
+
|
|
152
|
+
Parameters
|
|
153
|
+
----------
|
|
154
|
+
query : str, optional
|
|
155
|
+
Case-insensitive filter applied to route, name, and description.
|
|
156
|
+
"""
|
|
157
|
+
routes = self._load_routes()
|
|
158
|
+
rows = [
|
|
159
|
+
{"route": r.route, "name": r.name, "description": r.description, "frequency": r.frequency}
|
|
160
|
+
for r in routes
|
|
161
|
+
]
|
|
162
|
+
df = pd.DataFrame(rows)
|
|
163
|
+
if query and not df.empty:
|
|
164
|
+
q = query.lower()
|
|
165
|
+
mask = (
|
|
166
|
+
df["route"].str.lower().str.contains(q, na=False)
|
|
167
|
+
| df["name"].str.lower().str.contains(q, na=False)
|
|
168
|
+
| df["description"].str.lower().str.contains(q, na=False)
|
|
169
|
+
)
|
|
170
|
+
df = df[mask]
|
|
171
|
+
return df
|
|
172
|
+
|
|
173
|
+
def list_recipes(self, query: str | None = None) -> pd.DataFrame:
|
|
174
|
+
"""Return a DataFrame of all cataloged recipes.
|
|
175
|
+
|
|
176
|
+
Parameters
|
|
177
|
+
----------
|
|
178
|
+
query : str, optional
|
|
179
|
+
Case-insensitive filter applied to id, name, and description.
|
|
180
|
+
"""
|
|
181
|
+
recipes = self._load_recipes()
|
|
182
|
+
rows = [
|
|
183
|
+
{"id": r.id, "name": r.name, "description": r.description, "route": r.route, "frequency": r.frequency}
|
|
184
|
+
for r in recipes
|
|
185
|
+
]
|
|
186
|
+
df = pd.DataFrame(rows)
|
|
187
|
+
if query and not df.empty:
|
|
188
|
+
q = query.lower()
|
|
189
|
+
mask = (
|
|
190
|
+
df["id"].str.lower().str.contains(q, na=False)
|
|
191
|
+
| df["name"].str.lower().str.contains(q, na=False)
|
|
192
|
+
| df["description"].str.lower().str.contains(q, na=False)
|
|
193
|
+
)
|
|
194
|
+
df = df[mask]
|
|
195
|
+
return df
|
|
196
|
+
|
|
197
|
+
def get_route(self, route: str) -> RouteInfo:
|
|
198
|
+
"""Look up a single route by path.
|
|
199
|
+
|
|
200
|
+
Raises
|
|
201
|
+
------
|
|
202
|
+
KeyError
|
|
203
|
+
If the route is not in the catalog.
|
|
204
|
+
"""
|
|
205
|
+
for r in self._load_routes():
|
|
206
|
+
if r.route == route:
|
|
207
|
+
return r
|
|
208
|
+
raise KeyError(
|
|
209
|
+
f"Unknown route '{route}'. Use catalog.list_routes() to see available routes."
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
def get_recipe(self, recipe_id: str) -> Recipe:
|
|
213
|
+
"""Look up a single recipe by ID.
|
|
214
|
+
|
|
215
|
+
Raises
|
|
216
|
+
------
|
|
217
|
+
KeyError
|
|
218
|
+
If the recipe ID is not in the catalog.
|
|
219
|
+
"""
|
|
220
|
+
for r in self._load_recipes():
|
|
221
|
+
if r.id == recipe_id:
|
|
222
|
+
return r
|
|
223
|
+
available = ", ".join(r.id for r in self._load_recipes())
|
|
224
|
+
raise KeyError(f"Unknown recipe '{recipe_id}'. Available: {available}")
|
|
225
|
+
|
|
226
|
+
@staticmethod
|
|
227
|
+
def _routes_yaml_path() -> Path:
|
|
228
|
+
"""Return the filesystem path to ``routes.yaml``."""
|
|
229
|
+
ref = importlib.resources.files("eia.data").joinpath("routes.yaml")
|
|
230
|
+
# as_file returns the real path for editable installs
|
|
231
|
+
with importlib.resources.as_file(ref) as p:
|
|
232
|
+
return Path(p)
|
|
233
|
+
|
|
234
|
+
@staticmethod
|
|
235
|
+
def _compute_api_hash(api_meta: dict[str, Any]) -> str:
|
|
236
|
+
"""SHA-256 of the API metadata response (deterministic)."""
|
|
237
|
+
return hashlib.sha256(
|
|
238
|
+
json.dumps(api_meta, sort_keys=True).encode()
|
|
239
|
+
).hexdigest()
|
|
240
|
+
|
|
241
|
+
@staticmethod
|
|
242
|
+
def _extract_schema_from_api(
|
|
243
|
+
api_meta: dict[str, Any],
|
|
244
|
+
) -> dict[str, Any]:
|
|
245
|
+
"""Extract schema fields from an API metadata response.
|
|
246
|
+
|
|
247
|
+
Returns a dict with keys matching RouteInfo API-fetched fields.
|
|
248
|
+
"""
|
|
249
|
+
data_columns = []
|
|
250
|
+
for col_id, col_data in (api_meta.get("data", {}) or {}).items():
|
|
251
|
+
if isinstance(col_data, dict):
|
|
252
|
+
data_columns.append({
|
|
253
|
+
"id": col_id,
|
|
254
|
+
"units": col_data.get("units", ""),
|
|
255
|
+
"aggregation_method": col_data.get("aggregation-method", ""),
|
|
256
|
+
"alias": col_data.get("alias", ""),
|
|
257
|
+
})
|
|
258
|
+
|
|
259
|
+
frequencies = []
|
|
260
|
+
for freq in api_meta.get("frequency", []):
|
|
261
|
+
if isinstance(freq, dict) and "id" in freq:
|
|
262
|
+
frequencies.append({
|
|
263
|
+
"id": freq["id"],
|
|
264
|
+
"description": freq.get("description", ""),
|
|
265
|
+
"query": freq.get("query", ""),
|
|
266
|
+
"format": freq.get("format", ""),
|
|
267
|
+
})
|
|
268
|
+
|
|
269
|
+
facets = []
|
|
270
|
+
for facet in api_meta.get("facets", []):
|
|
271
|
+
if isinstance(facet, dict) and "id" in facet:
|
|
272
|
+
facets.append({
|
|
273
|
+
"id": facet["id"],
|
|
274
|
+
"description": facet.get("description", ""),
|
|
275
|
+
})
|
|
276
|
+
|
|
277
|
+
return {
|
|
278
|
+
"name": api_meta.get("name", ""),
|
|
279
|
+
"description": api_meta.get("description", ""),
|
|
280
|
+
"default_frequency": api_meta.get("defaultFrequency", ""),
|
|
281
|
+
"start_period": api_meta.get("startPeriod", ""),
|
|
282
|
+
"end_period": api_meta.get("endPeriod", ""),
|
|
283
|
+
"default_date_format": api_meta.get("defaultDateFormat", ""),
|
|
284
|
+
"data_columns": data_columns,
|
|
285
|
+
"frequencies": frequencies,
|
|
286
|
+
"facets": facets,
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
@staticmethod
|
|
290
|
+
def _merge_route_entry(
|
|
291
|
+
existing: dict[str, Any],
|
|
292
|
+
schema: dict[str, Any],
|
|
293
|
+
api_hash: str,
|
|
294
|
+
facet_values: dict[str, dict[str, str]] | None = None,
|
|
295
|
+
) -> dict[str, Any]:
|
|
296
|
+
"""Merge API-fetched schema into an existing YAML route entry.
|
|
297
|
+
|
|
298
|
+
Preserves hand-curated fields: ``notes``, facets' ``common_values``.
|
|
299
|
+
Overwrites facets' ``values`` with full API-fetched values.
|
|
300
|
+
"""
|
|
301
|
+
merged = dict(existing)
|
|
302
|
+
|
|
303
|
+
# Overwrite API-fetched scalar fields
|
|
304
|
+
for key in ("name", "description", "start_period", "end_period",
|
|
305
|
+
"default_date_format", "data_columns"):
|
|
306
|
+
merged[key] = schema[key]
|
|
307
|
+
|
|
308
|
+
merged["frequency"] = schema.get("default_frequency", existing.get("frequency", ""))
|
|
309
|
+
merged["frequencies"] = schema["frequencies"]
|
|
310
|
+
merged["api_hash"] = api_hash
|
|
311
|
+
merged["last_refreshed"] = datetime.now(timezone.utc).isoformat(timespec="seconds")
|
|
312
|
+
|
|
313
|
+
# Merge facets: API IDs + descriptions overwritten, common_values preserved,
|
|
314
|
+
# values overwritten with full API-fetched values
|
|
315
|
+
existing_facets_by_id: dict[str, dict] = {
|
|
316
|
+
f["id"]: f for f in existing.get("facets", [])
|
|
317
|
+
}
|
|
318
|
+
facet_values = facet_values or {}
|
|
319
|
+
merged_facets = []
|
|
320
|
+
for api_facet in schema["facets"]:
|
|
321
|
+
fid = api_facet["id"]
|
|
322
|
+
old = existing_facets_by_id.get(fid, {})
|
|
323
|
+
merged_facet: dict[str, Any] = {
|
|
324
|
+
"id": fid,
|
|
325
|
+
"description": api_facet["description"],
|
|
326
|
+
}
|
|
327
|
+
# Preserve hand-curated common_values
|
|
328
|
+
if old.get("common_values"):
|
|
329
|
+
merged_facet["common_values"] = old["common_values"]
|
|
330
|
+
# Store full API-fetched values
|
|
331
|
+
if fid in facet_values:
|
|
332
|
+
merged_facet["values"] = facet_values[fid]
|
|
333
|
+
merged_facets.append(merged_facet)
|
|
334
|
+
merged["facets"] = merged_facets
|
|
335
|
+
|
|
336
|
+
return merged
|
|
337
|
+
|
|
338
|
+
def _save_yaml(self, route_entries: list[dict[str, Any]]) -> None:
|
|
339
|
+
"""Write route entries back to ``routes.yaml``."""
|
|
340
|
+
path = self._routes_yaml_path()
|
|
341
|
+
data = {"version": 1, "routes": route_entries}
|
|
342
|
+
path.write_text(
|
|
343
|
+
yaml.dump(data, default_flow_style=False, sort_keys=False, allow_unicode=True),
|
|
344
|
+
encoding="utf-8",
|
|
345
|
+
)
|
|
346
|
+
# Invalidate cached routes
|
|
347
|
+
self._routes = None
|
|
348
|
+
|
|
349
|
+
def _fetch_route_data(
|
|
350
|
+
self, route_path: str,
|
|
351
|
+
) -> tuple[dict[str, Any], dict[str, dict[str, str]], list[str]]:
|
|
352
|
+
"""Fetch metadata and all facet values for a single route.
|
|
353
|
+
|
|
354
|
+
Returns (api_meta, facet_values, errors).
|
|
355
|
+
"""
|
|
356
|
+
errors: list[str] = []
|
|
357
|
+
api_meta = self._client.get_metadata(route_path)
|
|
358
|
+
|
|
359
|
+
# Collect facet IDs, then fetch values in parallel
|
|
360
|
+
facet_ids = [
|
|
361
|
+
f["id"] for f in api_meta.get("facets", [])
|
|
362
|
+
if isinstance(f, dict) and "id" in f
|
|
363
|
+
]
|
|
364
|
+
|
|
365
|
+
facet_values: dict[str, dict[str, str]] = {}
|
|
366
|
+
|
|
367
|
+
def fetch_facet(fid: str) -> tuple[str, dict[str, str] | None, str | None]:
|
|
368
|
+
try:
|
|
369
|
+
fv_response = self._client.get_facet_values(route_path, fid)
|
|
370
|
+
values = {
|
|
371
|
+
item["id"]: item.get("name", item.get("description", ""))
|
|
372
|
+
for item in fv_response.get("facets", [])
|
|
373
|
+
if isinstance(item, dict) and "id" in item
|
|
374
|
+
}
|
|
375
|
+
return fid, values, None
|
|
376
|
+
except Exception as e:
|
|
377
|
+
return fid, None, f"Error fetching facet {fid} for {route_path}: {e}"
|
|
378
|
+
|
|
379
|
+
with ThreadPoolExecutor(max_workers=5) as pool:
|
|
380
|
+
for fid, values, err in pool.map(fetch_facet, facet_ids):
|
|
381
|
+
if err:
|
|
382
|
+
errors.append(err)
|
|
383
|
+
elif values is not None:
|
|
384
|
+
facet_values[fid] = values
|
|
385
|
+
|
|
386
|
+
return api_meta, facet_values, errors
|
|
387
|
+
|
|
388
|
+
def refresh(self, dry_run: bool = False) -> RefreshResult:
|
|
389
|
+
"""Fetch full API schema for each cataloged route and persist to YAML.
|
|
390
|
+
|
|
391
|
+
Fetches routes and facet values in parallel for speed.
|
|
392
|
+
Preserves hand-curated fields (notes, common_values).
|
|
393
|
+
|
|
394
|
+
Parameters
|
|
395
|
+
----------
|
|
396
|
+
dry_run : bool
|
|
397
|
+
If True, report what would change without modifying files.
|
|
398
|
+
|
|
399
|
+
Returns
|
|
400
|
+
-------
|
|
401
|
+
RefreshResult
|
|
402
|
+
Summary of added / updated / unchanged routes.
|
|
403
|
+
"""
|
|
404
|
+
if self._client is None:
|
|
405
|
+
raise RuntimeError(
|
|
406
|
+
"Cannot refresh without an EIA client. "
|
|
407
|
+
"Pass client= when constructing EIACatalogManager."
|
|
408
|
+
)
|
|
409
|
+
|
|
410
|
+
result = RefreshResult()
|
|
411
|
+
|
|
412
|
+
# Load raw YAML entries (dicts, not dataclasses) for merging
|
|
413
|
+
raw_data = self._read_yaml("routes.yaml")
|
|
414
|
+
raw_entries: list[dict[str, Any]] = raw_data.get("routes", [])
|
|
415
|
+
|
|
416
|
+
# Fetch all routes in parallel
|
|
417
|
+
fetch_results: dict[str, tuple[dict, dict, list]] = {}
|
|
418
|
+
|
|
419
|
+
def fetch_one(entry: dict[str, Any]) -> tuple[str, dict | None, dict | None, list[str]]:
|
|
420
|
+
route_path = entry["route"]
|
|
421
|
+
try:
|
|
422
|
+
api_meta, facet_values, errors = self._fetch_route_data(route_path)
|
|
423
|
+
return route_path, api_meta, facet_values, errors
|
|
424
|
+
except Exception as e:
|
|
425
|
+
return route_path, None, None, [f"Error fetching {route_path}: {e}"]
|
|
426
|
+
|
|
427
|
+
with ThreadPoolExecutor(max_workers=4) as pool:
|
|
428
|
+
futures = {pool.submit(fetch_one, entry): entry for entry in raw_entries}
|
|
429
|
+
for future in as_completed(futures):
|
|
430
|
+
route_path, api_meta, facet_values, errors = future.result()
|
|
431
|
+
fetch_results[route_path] = (api_meta, facet_values, errors)
|
|
432
|
+
|
|
433
|
+
# Process results in original order
|
|
434
|
+
updated_entries: list[dict[str, Any]] = []
|
|
435
|
+
for entry in raw_entries:
|
|
436
|
+
route_path = entry["route"]
|
|
437
|
+
api_meta, facet_values, errors = fetch_results[route_path]
|
|
438
|
+
result.errors.extend(errors)
|
|
439
|
+
|
|
440
|
+
if api_meta is None:
|
|
441
|
+
updated_entries.append(entry)
|
|
442
|
+
continue
|
|
443
|
+
|
|
444
|
+
hash_input = {"meta": api_meta, "facet_values": facet_values}
|
|
445
|
+
new_hash = self._compute_api_hash(hash_input)
|
|
446
|
+
|
|
447
|
+
if new_hash == entry.get("api_hash", ""):
|
|
448
|
+
result.unchanged.append(route_path)
|
|
449
|
+
updated_entries.append(entry)
|
|
450
|
+
continue
|
|
451
|
+
|
|
452
|
+
schema = self._extract_schema_from_api(api_meta)
|
|
453
|
+
merged = self._merge_route_entry(entry, schema, new_hash, facet_values)
|
|
454
|
+
result.updated.append(route_path)
|
|
455
|
+
updated_entries.append(merged)
|
|
456
|
+
|
|
457
|
+
if not dry_run and result.updated:
|
|
458
|
+
self._save_yaml(updated_entries)
|
|
459
|
+
logger.info(
|
|
460
|
+
"Refresh updated %d routes in routes.yaml.",
|
|
461
|
+
len(result.updated),
|
|
462
|
+
)
|
|
463
|
+
|
|
464
|
+
return result
|
eia/cli/app.py
CHANGED
|
@@ -40,6 +40,8 @@ from eia.cli.facets_cmd import facets_command # noqa: E402
|
|
|
40
40
|
from eia.cli.get_cmd import get_command # noqa: E402
|
|
41
41
|
from eia.cli.exec_cmd import exec_command # noqa: E402
|
|
42
42
|
from eia.cli.config_cmd import config_app # noqa: E402
|
|
43
|
+
from eia.cli.cache_cmd import cache_app # noqa: E402
|
|
44
|
+
from eia.cli.catalog_cmd import catalog_app # noqa: E402
|
|
43
45
|
|
|
44
46
|
app.command(name="routes")(routes_command)
|
|
45
47
|
app.command(name="meta")(meta_command)
|
|
@@ -47,6 +49,8 @@ app.command(name="facets")(facets_command)
|
|
|
47
49
|
app.command(name="get")(get_command)
|
|
48
50
|
app.command(name="exec")(exec_command)
|
|
49
51
|
app.add_typer(config_app, name="config", help="Configuration management")
|
|
52
|
+
app.add_typer(cache_app, name="cache", help="Cache management")
|
|
53
|
+
app.add_typer(catalog_app, name="catalog", help="Browse available data routes and pre-configured recipes")
|
|
50
54
|
|
|
51
55
|
|
|
52
56
|
def main() -> None:
|
eia/cli/cache_cmd.py
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
"""CLI commands for cache management."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Optional
|
|
6
|
+
|
|
7
|
+
import typer
|
|
8
|
+
|
|
9
|
+
cache_app = typer.Typer(help="Manage the local parquet cache.")
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@cache_app.command(name="status")
|
|
13
|
+
def cache_status():
|
|
14
|
+
"""Show cache statistics (files, size, routes)."""
|
|
15
|
+
from eia.cache import CacheConfig, CacheStore
|
|
16
|
+
|
|
17
|
+
store = CacheStore(CacheConfig())
|
|
18
|
+
info = store.status()
|
|
19
|
+
|
|
20
|
+
typer.echo(f"Cache path: {info['path']}")
|
|
21
|
+
typer.echo(f"Files: {info['files']}")
|
|
22
|
+
typer.echo(f"Size: {info['size_mb']} MB")
|
|
23
|
+
|
|
24
|
+
routes = info.get("routes", {})
|
|
25
|
+
if routes:
|
|
26
|
+
typer.echo("\nRoutes:")
|
|
27
|
+
for route, count in sorted(routes.items()):
|
|
28
|
+
typer.echo(f" {route}: {count} files")
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@cache_app.command(name="path")
|
|
32
|
+
def cache_path():
|
|
33
|
+
"""Print the cache directory path."""
|
|
34
|
+
from eia.cache import CacheConfig
|
|
35
|
+
|
|
36
|
+
typer.echo(str(CacheConfig().cache_dir))
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@cache_app.command(name="clear")
|
|
40
|
+
def cache_clear(
|
|
41
|
+
route: Optional[str] = typer.Option(None, "--route", "-r", help="Route to clear (e.g. electricity/rto/fuel-type-data)"),
|
|
42
|
+
frequency: Optional[str] = typer.Option(None, "--frequency", "-f", help="Frequency to clear (e.g. hourly)"),
|
|
43
|
+
):
|
|
44
|
+
"""Clear cached data.
|
|
45
|
+
|
|
46
|
+
Without flags, clears the entire cache. Use --route and/or --frequency
|
|
47
|
+
to target specific partitions.
|
|
48
|
+
"""
|
|
49
|
+
from eia.cache import CacheConfig, CacheStore
|
|
50
|
+
|
|
51
|
+
store = CacheStore(CacheConfig())
|
|
52
|
+
removed = store.clear(route=route, frequency=frequency)
|
|
53
|
+
typer.echo(f"Removed {removed} cached file(s).")
|