python-eia 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- eia/.agents/skills/eia/SKILL.md +172 -0
- eia/__init__.py +3 -1
- eia/cache.py +399 -0
- eia/catalog.py +137 -0
- eia/catalog_manager.py +464 -0
- eia/cli/app.py +4 -0
- eia/cli/cache_cmd.py +53 -0
- eia/cli/catalog_cmd.py +186 -0
- eia/client.py +309 -19
- {python_eia-0.2.0.dist-info → python_eia-0.3.0.dist-info}/METADATA +7 -3
- python_eia-0.3.0.dist-info/RECORD +22 -0
- python_eia-0.2.0.dist-info/RECORD +0 -16
- {python_eia-0.2.0.dist-info → python_eia-0.3.0.dist-info}/WHEEL +0 -0
- {python_eia-0.2.0.dist-info → python_eia-0.3.0.dist-info}/entry_points.txt +0 -0
eia/cli/catalog_cmd.py
ADDED
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
"""CLI command: browse the built-in data catalog and recipes."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Optional
|
|
6
|
+
|
|
7
|
+
import typer
|
|
8
|
+
from rich.console import Console
|
|
9
|
+
from rich.table import Table
|
|
10
|
+
from rich.panel import Panel
|
|
11
|
+
|
|
12
|
+
from eia.catalog import ROUTES, RECIPES
|
|
13
|
+
|
|
14
|
+
catalog_app = typer.Typer(no_args_is_help=True)
|
|
15
|
+
console = Console()
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@catalog_app.command("refresh")
|
|
19
|
+
def catalog_refresh(
|
|
20
|
+
dry_run: bool = typer.Option(True, "--dry-run/--apply", help="Show what would change without modifying files"),
|
|
21
|
+
):
|
|
22
|
+
"""Discover new routes from the live EIA API and compare with the catalog."""
|
|
23
|
+
from eia.cli.app import get_client
|
|
24
|
+
from eia.catalog_manager import EIACatalogManager
|
|
25
|
+
|
|
26
|
+
client = get_client()
|
|
27
|
+
mgr = EIACatalogManager(client)
|
|
28
|
+
result = mgr.refresh(dry_run=dry_run)
|
|
29
|
+
|
|
30
|
+
if result.errors:
|
|
31
|
+
for err in result.errors:
|
|
32
|
+
console.print(f"[red]Error:[/red] {err}")
|
|
33
|
+
|
|
34
|
+
if result.updated:
|
|
35
|
+
console.print(f"\n[blue]Updated schema ({len(result.updated)}):[/blue]")
|
|
36
|
+
for r in result.updated:
|
|
37
|
+
console.print(f" ~ {r}")
|
|
38
|
+
|
|
39
|
+
if result.added:
|
|
40
|
+
console.print(f"\n[green]New routes ({len(result.added)}):[/green]")
|
|
41
|
+
for r in result.added:
|
|
42
|
+
console.print(f" + {r}")
|
|
43
|
+
|
|
44
|
+
if result.removed:
|
|
45
|
+
console.print(f"\n[yellow]Routes in catalog but not discovered ({len(result.removed)}):[/yellow]")
|
|
46
|
+
for r in result.removed:
|
|
47
|
+
console.print(f" - {r}")
|
|
48
|
+
|
|
49
|
+
if not result.updated and not result.added:
|
|
50
|
+
console.print("[green]All routes up to date.[/green]")
|
|
51
|
+
|
|
52
|
+
console.print(f"\n[dim]Unchanged: {len(result.unchanged)} routes[/dim]")
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
@catalog_app.command("routes")
|
|
56
|
+
def catalog_routes(
|
|
57
|
+
query: Optional[str] = typer.Argument(None, help="Filter routes by keyword"),
|
|
58
|
+
):
|
|
59
|
+
"""List all cataloged data routes with descriptions."""
|
|
60
|
+
table = Table(title="EIA Data Routes", show_header=True, padding=(0, 1))
|
|
61
|
+
table.add_column("Route", style="cyan")
|
|
62
|
+
table.add_column("Name")
|
|
63
|
+
table.add_column("Frequency", style="green")
|
|
64
|
+
|
|
65
|
+
for route_path, info in sorted(ROUTES.items()):
|
|
66
|
+
if query:
|
|
67
|
+
q = query.lower()
|
|
68
|
+
if q not in route_path.lower() and q not in info.name.lower() and q not in info.description.lower():
|
|
69
|
+
continue
|
|
70
|
+
table.add_row(route_path, info.name, info.frequency)
|
|
71
|
+
|
|
72
|
+
console.print(table)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
@catalog_app.command("show")
|
|
76
|
+
def catalog_show(
|
|
77
|
+
route: str = typer.Argument(..., help="Route path (e.g. natural-gas/move/expc)"),
|
|
78
|
+
):
|
|
79
|
+
"""Show detailed info for a specific route, including facets and hints."""
|
|
80
|
+
from eia.catalog import get_route
|
|
81
|
+
|
|
82
|
+
try:
|
|
83
|
+
info = get_route(route)
|
|
84
|
+
except KeyError as e:
|
|
85
|
+
console.print(f"[red]{e}[/red]")
|
|
86
|
+
raise typer.Exit(1)
|
|
87
|
+
|
|
88
|
+
console.print(f"\n[bold]{info.name}[/bold]")
|
|
89
|
+
console.print(f" Route: {info.route}")
|
|
90
|
+
console.print(f" {info.description}")
|
|
91
|
+
console.print(f" Default frequency: {info.frequency}")
|
|
92
|
+
|
|
93
|
+
if info.notes:
|
|
94
|
+
console.print(f"\n [yellow]Note:[/yellow] {info.notes}")
|
|
95
|
+
|
|
96
|
+
if info.start_period or info.end_period:
|
|
97
|
+
console.print(f"\n Period: {info.start_period} → {info.end_period}")
|
|
98
|
+
if info.default_date_format:
|
|
99
|
+
console.print(f" Date format: {info.default_date_format}")
|
|
100
|
+
if info.last_refreshed:
|
|
101
|
+
console.print(f" [dim]Last refreshed: {info.last_refreshed}[/dim]")
|
|
102
|
+
|
|
103
|
+
if info.data_columns:
|
|
104
|
+
console.print(f"\n [bold]Data Columns:[/bold]")
|
|
105
|
+
col_table = Table(show_header=True, padding=(0, 1))
|
|
106
|
+
col_table.add_column("Column", style="green")
|
|
107
|
+
col_table.add_column("Alias")
|
|
108
|
+
col_table.add_column("Units")
|
|
109
|
+
col_table.add_column("Aggregation")
|
|
110
|
+
for col in info.data_columns:
|
|
111
|
+
col_table.add_row(col.id, col.alias, col.units, col.aggregation_method)
|
|
112
|
+
console.print(col_table)
|
|
113
|
+
|
|
114
|
+
if info.frequencies:
|
|
115
|
+
console.print(f"\n [bold]Frequencies:[/bold]")
|
|
116
|
+
freq_table = Table(show_header=True, padding=(0, 1))
|
|
117
|
+
freq_table.add_column("ID", style="green")
|
|
118
|
+
freq_table.add_column("Description")
|
|
119
|
+
freq_table.add_column("Query")
|
|
120
|
+
freq_table.add_column("Format")
|
|
121
|
+
for freq in info.frequencies:
|
|
122
|
+
freq_table.add_row(freq.id, freq.description, freq.query, freq.format)
|
|
123
|
+
console.print(freq_table)
|
|
124
|
+
|
|
125
|
+
for facet in info.facets:
|
|
126
|
+
console.print(f"\n [cyan]Facet: {facet.id}[/cyan] — {facet.description}")
|
|
127
|
+
if facet.common_values:
|
|
128
|
+
table = Table(show_header=True, padding=(0, 1))
|
|
129
|
+
table.add_column("Value", style="green")
|
|
130
|
+
table.add_column("Description")
|
|
131
|
+
for val_id, val_desc in sorted(facet.common_values.items()):
|
|
132
|
+
table.add_row(val_id, val_desc)
|
|
133
|
+
console.print(table)
|
|
134
|
+
|
|
135
|
+
console.print()
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
@catalog_app.command("recipes")
|
|
139
|
+
def catalog_recipes(
|
|
140
|
+
query: Optional[str] = typer.Argument(None, help="Filter recipes by keyword"),
|
|
141
|
+
):
|
|
142
|
+
"""List pre-configured query recipes for common use cases."""
|
|
143
|
+
table = Table(title="EIA Recipes", show_header=True, padding=(0, 1))
|
|
144
|
+
table.add_column("ID", style="cyan")
|
|
145
|
+
table.add_column("Name")
|
|
146
|
+
table.add_column("Route", style="green")
|
|
147
|
+
|
|
148
|
+
for recipe_id, recipe in sorted(RECIPES.items()):
|
|
149
|
+
if query:
|
|
150
|
+
q = query.lower()
|
|
151
|
+
if q not in recipe_id.lower() and q not in recipe.name.lower() and q not in recipe.description.lower():
|
|
152
|
+
continue
|
|
153
|
+
table.add_row(recipe_id, recipe.name, recipe.route)
|
|
154
|
+
|
|
155
|
+
console.print(table)
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
@catalog_app.command("recipe")
|
|
159
|
+
def catalog_recipe(
|
|
160
|
+
recipe_id: str = typer.Argument(..., help="Recipe ID (e.g. lng-exports-europe)"),
|
|
161
|
+
):
|
|
162
|
+
"""Show detailed info for a specific recipe, including code examples."""
|
|
163
|
+
from eia.catalog import get_recipe
|
|
164
|
+
|
|
165
|
+
try:
|
|
166
|
+
recipe = get_recipe(recipe_id)
|
|
167
|
+
except KeyError as e:
|
|
168
|
+
console.print(f"[red]{e}[/red]")
|
|
169
|
+
raise typer.Exit(1)
|
|
170
|
+
|
|
171
|
+
console.print(f"\n[bold]{recipe.name}[/bold]")
|
|
172
|
+
console.print(f" {recipe.description}")
|
|
173
|
+
console.print(f" Route: {recipe.route}")
|
|
174
|
+
console.print(f" Frequency: {recipe.frequency}")
|
|
175
|
+
console.print(f" Facets: {recipe.facets}")
|
|
176
|
+
|
|
177
|
+
if recipe.notes:
|
|
178
|
+
console.print(f"\n [yellow]Note:[/yellow] {recipe.notes}")
|
|
179
|
+
|
|
180
|
+
if recipe.cli_example:
|
|
181
|
+
console.print(Panel(recipe.cli_example, title="CLI Example", border_style="green"))
|
|
182
|
+
|
|
183
|
+
if recipe.python_example:
|
|
184
|
+
console.print(Panel(recipe.python_example, title="Python Example", border_style="blue"))
|
|
185
|
+
|
|
186
|
+
console.print()
|
eia/client.py
CHANGED
|
@@ -2,8 +2,10 @@ import requests
|
|
|
2
2
|
import logging
|
|
3
3
|
import pandas as pd
|
|
4
4
|
import re
|
|
5
|
+
from pathlib import Path
|
|
5
6
|
from urllib.parse import urlparse, parse_qs
|
|
6
7
|
from typing import (
|
|
8
|
+
TYPE_CHECKING,
|
|
7
9
|
List,
|
|
8
10
|
Dict,
|
|
9
11
|
Optional,
|
|
@@ -16,9 +18,14 @@ from typing import (
|
|
|
16
18
|
Protocol,
|
|
17
19
|
runtime_checkable,
|
|
18
20
|
)
|
|
21
|
+
|
|
22
|
+
if TYPE_CHECKING:
|
|
23
|
+
from eia.catalog import RouteInfo
|
|
19
24
|
import os
|
|
20
25
|
from dataclasses import dataclass, field
|
|
21
26
|
|
|
27
|
+
from eia.cache import CacheConfig, CacheStore, _facets_key
|
|
28
|
+
|
|
22
29
|
# Configure logging
|
|
23
30
|
logging.basicConfig(
|
|
24
31
|
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
|
|
@@ -87,9 +94,16 @@ class FacetInfo:
|
|
|
87
94
|
# Store the route slug for potential API calls
|
|
88
95
|
_route_slug: Optional[str] = field(default=None, repr=False)
|
|
89
96
|
_client: Optional["EIAClient"] = field(default=None, repr=False)
|
|
97
|
+
_cached_values: Optional[List["FacetValue"]] = field(default=None, repr=False)
|
|
90
98
|
|
|
91
99
|
def get_values(self) -> List[FacetValue]:
|
|
92
|
-
"""
|
|
100
|
+
"""Returns all possible values for this facet.
|
|
101
|
+
|
|
102
|
+
Uses cached catalog values when available, otherwise fetches from the API.
|
|
103
|
+
"""
|
|
104
|
+
if self._cached_values is not None:
|
|
105
|
+
return self._cached_values
|
|
106
|
+
|
|
93
107
|
if not self._client or not self._route_slug:
|
|
94
108
|
raise ValueError("Client and route slug must be set to fetch facet values.")
|
|
95
109
|
|
|
@@ -258,10 +272,17 @@ class FacetContainer(BaseFacetContainer):
|
|
|
258
272
|
class Data:
|
|
259
273
|
"""Represents a data endpoint in the EIA API with its metadata and query capabilities."""
|
|
260
274
|
|
|
261
|
-
def __init__(
|
|
275
|
+
def __init__(
|
|
276
|
+
self,
|
|
277
|
+
client: "EIAClient",
|
|
278
|
+
route: str,
|
|
279
|
+
metadata: Dict[str, Any],
|
|
280
|
+
cache: Optional[CacheStore] = None,
|
|
281
|
+
):
|
|
262
282
|
self._client = client
|
|
263
283
|
self._route = route
|
|
264
284
|
self._metadata = metadata
|
|
285
|
+
self._cache = cache
|
|
265
286
|
self.id = metadata.get("id", route.split("/")[-1])
|
|
266
287
|
self.name = metadata.get("name", "")
|
|
267
288
|
self.description = metadata.get("description", "")
|
|
@@ -281,16 +302,26 @@ class Data:
|
|
|
281
302
|
if isinstance(freq, dict) and "id" in freq
|
|
282
303
|
]
|
|
283
304
|
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
305
|
+
cached_facet_values = metadata.get("_facet_values", {})
|
|
306
|
+
facet_dict = {}
|
|
307
|
+
for facet_data in metadata.get("facets", []):
|
|
308
|
+
if not isinstance(facet_data, dict) or "id" not in facet_data:
|
|
309
|
+
continue
|
|
310
|
+
fid = facet_data["id"]
|
|
311
|
+
# Build cached FacetValue list from catalog values if available
|
|
312
|
+
cached_values = None
|
|
313
|
+
if fid in cached_facet_values:
|
|
314
|
+
cached_values = [
|
|
315
|
+
FacetValue(id=vid, name=vname)
|
|
316
|
+
for vid, vname in cached_facet_values[fid].items()
|
|
317
|
+
]
|
|
318
|
+
facet_dict[fid] = FacetInfo(
|
|
319
|
+
id=fid,
|
|
287
320
|
description=facet_data.get("description"),
|
|
288
|
-
_route_slug=route,
|
|
289
|
-
_client=client,
|
|
321
|
+
_route_slug=route,
|
|
322
|
+
_client=client,
|
|
323
|
+
_cached_values=cached_values,
|
|
290
324
|
)
|
|
291
|
-
for facet_data in metadata.get("facets", [])
|
|
292
|
-
if isinstance(facet_data, dict) and "id" in facet_data
|
|
293
|
-
}
|
|
294
325
|
# Use FacetContainer for attribute-based access
|
|
295
326
|
self.facets = FacetContainer(facet_dict)
|
|
296
327
|
|
|
@@ -327,8 +358,11 @@ class Data:
|
|
|
327
358
|
paginate: bool = True,
|
|
328
359
|
) -> pd.DataFrame:
|
|
329
360
|
"""
|
|
330
|
-
Retrieves data from this endpoint
|
|
331
|
-
|
|
361
|
+
Retrieves data from this endpoint with transparent caching.
|
|
362
|
+
|
|
363
|
+
On first call, fetches from the API and persists to a local parquet
|
|
364
|
+
cache. Subsequent calls for the same (or overlapping) date range
|
|
365
|
+
return cached data instantly, only fetching gaps.
|
|
332
366
|
|
|
333
367
|
Args:
|
|
334
368
|
data_columns: List of data column IDs to retrieve. If None, all available columns are fetched.
|
|
@@ -342,6 +376,175 @@ class Data:
|
|
|
342
376
|
output_format: Response format ('json' or 'xml'). Must be 'json' for DataFrame conversion.
|
|
343
377
|
paginate: Whether to automatically paginate through results (default: True).
|
|
344
378
|
|
|
379
|
+
Returns:
|
|
380
|
+
A pandas DataFrame containing the requested data
|
|
381
|
+
"""
|
|
382
|
+
# Cache requires start/end and must be json format
|
|
383
|
+
can_cache = (
|
|
384
|
+
self._cache is not None
|
|
385
|
+
and start is not None
|
|
386
|
+
and end is not None
|
|
387
|
+
and output_format == "json"
|
|
388
|
+
and offset is None
|
|
389
|
+
and length is None
|
|
390
|
+
)
|
|
391
|
+
|
|
392
|
+
if not can_cache:
|
|
393
|
+
return self._fetch(
|
|
394
|
+
data_columns=data_columns,
|
|
395
|
+
facets=facets,
|
|
396
|
+
frequency=frequency,
|
|
397
|
+
start=start,
|
|
398
|
+
end=end,
|
|
399
|
+
sort=sort,
|
|
400
|
+
length=length,
|
|
401
|
+
offset=offset,
|
|
402
|
+
output_format=output_format,
|
|
403
|
+
paginate=paginate,
|
|
404
|
+
)
|
|
405
|
+
|
|
406
|
+
# -- Cache path --
|
|
407
|
+
route = self._route.strip("/")
|
|
408
|
+
freq_key = frequency or "_default_"
|
|
409
|
+
fk = _facets_key(facets)
|
|
410
|
+
|
|
411
|
+
start_ts = pd.Timestamp(start)
|
|
412
|
+
end_ts = pd.Timestamp(end)
|
|
413
|
+
|
|
414
|
+
# 1. Read cached data
|
|
415
|
+
cached = self._cache.read(route, freq_key, fk, start_ts, end_ts)
|
|
416
|
+
logging.debug(
|
|
417
|
+
"Cache read: %d rows for %s/%s/%s [%s → %s]",
|
|
418
|
+
len(cached), route, freq_key, fk, start, end,
|
|
419
|
+
)
|
|
420
|
+
|
|
421
|
+
# 2. Find gaps
|
|
422
|
+
gaps = self._cache.find_gaps(cached, start_ts, end_ts)
|
|
423
|
+
|
|
424
|
+
if not gaps:
|
|
425
|
+
logging.info("Cache hit — no gaps for %s/%s/%s", route, freq_key, fk)
|
|
426
|
+
df = cached
|
|
427
|
+
else:
|
|
428
|
+
logging.info(
|
|
429
|
+
"Cache gaps: %s — fetching %d range(s)",
|
|
430
|
+
[(str(g.start), str(g.end)) for g in gaps],
|
|
431
|
+
len(gaps),
|
|
432
|
+
)
|
|
433
|
+
# 3. Fetch each gap (always fetch ALL columns for cache reuse)
|
|
434
|
+
fetched_parts = []
|
|
435
|
+
for gap in gaps:
|
|
436
|
+
# Format gap dates to match the endpoint's frequency
|
|
437
|
+
# Monthly endpoints expect YYYY-MM, daily expects YYYY-MM-DD, etc.
|
|
438
|
+
gap_start = self._format_gap_date(gap.start, frequency)
|
|
439
|
+
gap_end = self._format_gap_date(gap.end, frequency)
|
|
440
|
+
gap_df = self._fetch(
|
|
441
|
+
data_columns=None, # all columns → maximise cache reuse
|
|
442
|
+
facets=facets,
|
|
443
|
+
frequency=frequency,
|
|
444
|
+
start=gap_start,
|
|
445
|
+
end=gap_end,
|
|
446
|
+
sort=sort,
|
|
447
|
+
paginate=paginate,
|
|
448
|
+
)
|
|
449
|
+
if not gap_df.empty:
|
|
450
|
+
fetched_parts.append(gap_df)
|
|
451
|
+
|
|
452
|
+
# 4. Concat cached + new, deduplicate, sort
|
|
453
|
+
all_parts = [cached] + fetched_parts if not cached.empty else fetched_parts
|
|
454
|
+
if all_parts:
|
|
455
|
+
df = pd.concat(all_parts, ignore_index=False)
|
|
456
|
+
# Ensure period is the index for dedup/sort
|
|
457
|
+
if "period" in df.columns and not isinstance(df.index, pd.DatetimeIndex):
|
|
458
|
+
df = df.set_index("period")
|
|
459
|
+
df.index.name = "period"
|
|
460
|
+
# For long-format data: deduplicate keeping latest fetch
|
|
461
|
+
# Group by index + all non-numeric columns to identify unique rows
|
|
462
|
+
facet_cols = [c for c in df.columns if c not in list(self.data_columns.keys())]
|
|
463
|
+
if facet_cols:
|
|
464
|
+
df = df.reset_index()
|
|
465
|
+
df = df.drop_duplicates(
|
|
466
|
+
subset=["period"] + facet_cols,
|
|
467
|
+
keep="last",
|
|
468
|
+
)
|
|
469
|
+
df = df.set_index("period")
|
|
470
|
+
else:
|
|
471
|
+
df = df[~df.index.duplicated(keep="last")]
|
|
472
|
+
df = df.sort_index()
|
|
473
|
+
else:
|
|
474
|
+
df = pd.DataFrame()
|
|
475
|
+
|
|
476
|
+
# 5. Persist to cache
|
|
477
|
+
if not df.empty:
|
|
478
|
+
self._cache.write(route, freq_key, fk, df)
|
|
479
|
+
|
|
480
|
+
# 6. Filter to requested data_columns before returning
|
|
481
|
+
if data_columns and not df.empty:
|
|
482
|
+
existing = [c for c in data_columns if c in df.columns]
|
|
483
|
+
# Always keep facet columns alongside requested data columns
|
|
484
|
+
facet_cols = [c for c in df.columns if c not in list(self.data_columns.keys())]
|
|
485
|
+
keep = list(dict.fromkeys(facet_cols + existing)) # preserve order, dedupe
|
|
486
|
+
df = df[keep]
|
|
487
|
+
|
|
488
|
+
# Reset index so 'period' is a regular column (matches _fetch output)
|
|
489
|
+
if isinstance(df.index, pd.DatetimeIndex) and df.index.name == "period":
|
|
490
|
+
df = df.reset_index()
|
|
491
|
+
|
|
492
|
+
# Slice to requested range (handle tz-aware vs tz-naive)
|
|
493
|
+
if not df.empty and "period" in df.columns:
|
|
494
|
+
period_tz = getattr(df["period"].dt, "tz", None)
|
|
495
|
+
if period_tz is not None:
|
|
496
|
+
start_ts = start_ts.tz_localize(period_tz) if start_ts.tzinfo is None else start_ts.tz_convert(period_tz)
|
|
497
|
+
end_ts = end_ts.tz_localize(period_tz) if end_ts.tzinfo is None else end_ts.tz_convert(period_tz)
|
|
498
|
+
df = df[(df["period"] >= start_ts) & (df["period"] <= end_ts + pd.Timedelta(days=1))]
|
|
499
|
+
|
|
500
|
+
self.dataframe = df
|
|
501
|
+
return df
|
|
502
|
+
|
|
503
|
+
@staticmethod
|
|
504
|
+
def _format_gap_date(ts: "pd.Timestamp", frequency: Optional[str] = None) -> str:
|
|
505
|
+
"""Format a gap timestamp to match the API frequency.
|
|
506
|
+
|
|
507
|
+
Monthly endpoints expect YYYY-MM, annual expects YYYY,
|
|
508
|
+
daily/hourly expect YYYY-MM-DD. Falls back to date string.
|
|
509
|
+
"""
|
|
510
|
+
if frequency is not None:
|
|
511
|
+
freq_lower = frequency.lower()
|
|
512
|
+
if "annual" in freq_lower or "yearly" in freq_lower:
|
|
513
|
+
return str(ts.year)
|
|
514
|
+
if "month" in freq_lower:
|
|
515
|
+
return ts.strftime("%Y-%m")
|
|
516
|
+
# For daily, hourly, or unknown: use date string
|
|
517
|
+
return str(ts.date())
|
|
518
|
+
|
|
519
|
+
def _fetch(
|
|
520
|
+
self,
|
|
521
|
+
data_columns: Optional[List[str]] = None,
|
|
522
|
+
facets: Optional[Dict[str, Union[str, List[str]]]] = None,
|
|
523
|
+
frequency: Optional[str] = None,
|
|
524
|
+
start: Optional[str] = None,
|
|
525
|
+
end: Optional[str] = None,
|
|
526
|
+
sort: Optional[List[Dict[str, str]]] = None,
|
|
527
|
+
length: Optional[int] = None,
|
|
528
|
+
offset: Optional[int] = None,
|
|
529
|
+
output_format: Optional[Literal["json", "xml"]] = "json",
|
|
530
|
+
paginate: bool = True,
|
|
531
|
+
) -> pd.DataFrame:
|
|
532
|
+
"""
|
|
533
|
+
Fetches data from the EIA API (no caching). This is the original
|
|
534
|
+
get() logic extracted verbatim.
|
|
535
|
+
|
|
536
|
+
Args:
|
|
537
|
+
data_columns: List of data column IDs to retrieve. If None, all available columns are fetched.
|
|
538
|
+
facets: Dictionary of facet filters.
|
|
539
|
+
frequency: Data frequency ID (e.g., 'daily', 'monthly')
|
|
540
|
+
start: Start date/period
|
|
541
|
+
end: End date/period
|
|
542
|
+
sort: List of sort specifications
|
|
543
|
+
length: Maximum number of rows to return *if paginate=False*.
|
|
544
|
+
offset: Starting row offset for the first request.
|
|
545
|
+
output_format: Response format ('json' or 'xml').
|
|
546
|
+
paginate: Whether to automatically paginate through results.
|
|
547
|
+
|
|
345
548
|
Returns:
|
|
346
549
|
A pandas DataFrame containing the requested data
|
|
347
550
|
"""
|
|
@@ -349,6 +552,11 @@ class Data:
|
|
|
349
552
|
column_ids_to_fetch = (
|
|
350
553
|
data_columns if data_columns is not None else list(self.data_columns.keys())
|
|
351
554
|
)
|
|
555
|
+
# Some endpoints report no data columns in metadata but DO return
|
|
556
|
+
# 'value' when explicitly requested. Fall back to ['value'] so the
|
|
557
|
+
# response includes actual data instead of metadata-only rows.
|
|
558
|
+
if not column_ids_to_fetch:
|
|
559
|
+
column_ids_to_fetch = ["value"]
|
|
352
560
|
|
|
353
561
|
# Ensure output is json if we want a DataFrame
|
|
354
562
|
if output_format != "json":
|
|
@@ -497,7 +705,8 @@ class Route:
|
|
|
497
705
|
|
|
498
706
|
# If response doesn't contain routes, it means this endpoint has data
|
|
499
707
|
if "routes" not in response_data:
|
|
500
|
-
|
|
708
|
+
cache = getattr(self._client, "_cache", None)
|
|
709
|
+
self._data = Data(self._client, self._slug, response_data, cache=cache)
|
|
501
710
|
|
|
502
711
|
def __getattr__(self, name: str) -> Union["Route", Any]:
|
|
503
712
|
"""
|
|
@@ -597,7 +806,13 @@ class EIAClient:
|
|
|
597
806
|
)
|
|
598
807
|
|
|
599
808
|
def __init__(
|
|
600
|
-
self,
|
|
809
|
+
self,
|
|
810
|
+
api_key: Optional[str] = None,
|
|
811
|
+
session: Optional[requests.Session] = None,
|
|
812
|
+
*,
|
|
813
|
+
cache: bool = True,
|
|
814
|
+
cache_dir: Optional[Union[str, Path]] = None,
|
|
815
|
+
cache_recent_ttl: int = 48,
|
|
601
816
|
):
|
|
602
817
|
"""
|
|
603
818
|
Initializes the EIAClient.
|
|
@@ -605,6 +820,9 @@ class EIAClient:
|
|
|
605
820
|
Args:
|
|
606
821
|
api_key: Your EIA API key. If None, it will try to read from the EIA_API_KEY environment variable.
|
|
607
822
|
session: An optional requests.Session object for persistent connections.
|
|
823
|
+
cache: Enable/disable local parquet caching (default: True).
|
|
824
|
+
cache_dir: Custom cache directory. Defaults to ~/.cache/eia.
|
|
825
|
+
cache_recent_ttl: Hours before recent data is re-fetched (default: 48).
|
|
608
826
|
"""
|
|
609
827
|
resolved_api_key = api_key or os.environ.get("EIA_API_KEY")
|
|
610
828
|
if not resolved_api_key:
|
|
@@ -614,7 +832,21 @@ class EIAClient:
|
|
|
614
832
|
self.api_key = resolved_api_key
|
|
615
833
|
self.session = session or requests.Session()
|
|
616
834
|
self.session.headers.update({"User-Agent": "Python EIAClient"})
|
|
617
|
-
|
|
835
|
+
|
|
836
|
+
# Cache setup
|
|
837
|
+
config = CacheConfig(
|
|
838
|
+
enabled=cache,
|
|
839
|
+
cache_dir=Path(cache_dir) if cache_dir else CacheConfig().cache_dir,
|
|
840
|
+
recent_ttl_hours=cache_recent_ttl,
|
|
841
|
+
)
|
|
842
|
+
self._cache: Optional[CacheStore] = CacheStore(config) if config.enabled else None
|
|
843
|
+
|
|
844
|
+
# Catalog manager (lazy-loads YAML on first access)
|
|
845
|
+
from eia.catalog_manager import EIACatalogManager
|
|
846
|
+
|
|
847
|
+
self.catalog = EIACatalogManager(self)
|
|
848
|
+
|
|
849
|
+
logging.info("EIAClient initialized (cache=%s).", "enabled" if cache else "disabled")
|
|
618
850
|
|
|
619
851
|
def route(self, slug: str) -> Route:
|
|
620
852
|
"""
|
|
@@ -812,11 +1044,59 @@ class EIAClient:
|
|
|
812
1044
|
|
|
813
1045
|
return response_data.get("response", {})
|
|
814
1046
|
|
|
1047
|
+
@staticmethod
|
|
1048
|
+
def _route_info_to_metadata(route_info: "RouteInfo") -> Dict[str, Any]:
|
|
1049
|
+
"""Reconstruct API-style metadata dict from a RouteInfo with cached schema."""
|
|
1050
|
+
metadata: Dict[str, Any] = {
|
|
1051
|
+
"id": route_info.route.split("/")[-1],
|
|
1052
|
+
"name": route_info.name,
|
|
1053
|
+
"description": route_info.description,
|
|
1054
|
+
"defaultFrequency": route_info.frequency,
|
|
1055
|
+
"startPeriod": route_info.start_period,
|
|
1056
|
+
"endPeriod": route_info.end_period,
|
|
1057
|
+
"defaultDateFormat": route_info.default_date_format,
|
|
1058
|
+
}
|
|
1059
|
+
|
|
1060
|
+
# Reconstruct frequency list
|
|
1061
|
+
metadata["frequency"] = [
|
|
1062
|
+
{
|
|
1063
|
+
"id": f.id,
|
|
1064
|
+
"description": f.description,
|
|
1065
|
+
"query": f.query,
|
|
1066
|
+
"format": f.format,
|
|
1067
|
+
}
|
|
1068
|
+
for f in route_info.frequencies
|
|
1069
|
+
]
|
|
1070
|
+
|
|
1071
|
+
# Reconstruct facets list, with cached values
|
|
1072
|
+
metadata["facets"] = [
|
|
1073
|
+
{"id": fh.id, "description": fh.description}
|
|
1074
|
+
for fh in route_info.facets
|
|
1075
|
+
]
|
|
1076
|
+
metadata["_facet_values"] = {
|
|
1077
|
+
fh.id: fh.values
|
|
1078
|
+
for fh in route_info.facets
|
|
1079
|
+
if fh.values
|
|
1080
|
+
}
|
|
1081
|
+
|
|
1082
|
+
# Reconstruct data dict (keyed by column id)
|
|
1083
|
+
metadata["data"] = {
|
|
1084
|
+
col.id: {
|
|
1085
|
+
"units": col.units,
|
|
1086
|
+
"aggregation-method": col.aggregation_method,
|
|
1087
|
+
"alias": col.alias,
|
|
1088
|
+
}
|
|
1089
|
+
for col in route_info.data_columns
|
|
1090
|
+
}
|
|
1091
|
+
|
|
1092
|
+
return metadata
|
|
1093
|
+
|
|
815
1094
|
def get_data_endpoint(self, route_string: str) -> Data:
|
|
816
1095
|
"""
|
|
817
1096
|
Directly retrieves the Data object for a known, complete data route string.
|
|
818
1097
|
|
|
819
|
-
|
|
1098
|
+
If the route exists in the catalog with cached API schema, uses that
|
|
1099
|
+
to avoid an API metadata call. Otherwise falls back to fetching from the API.
|
|
820
1100
|
|
|
821
1101
|
Args:
|
|
822
1102
|
route_string: The full route path to the data endpoint
|
|
@@ -829,9 +1109,19 @@ class EIAClient:
|
|
|
829
1109
|
EIAError: If the route does not exist or does not contain data.
|
|
830
1110
|
"""
|
|
831
1111
|
route_string = route_string.strip("/")
|
|
832
|
-
logging.info(f"Directly accessing data endpoint metadata for: {route_string}")
|
|
833
1112
|
|
|
834
|
-
#
|
|
1113
|
+
# Try catalog first — skip API call if schema is cached
|
|
1114
|
+
try:
|
|
1115
|
+
route_info = self.catalog.get_route(route_string)
|
|
1116
|
+
if route_info.data_columns: # has cached schema
|
|
1117
|
+
logging.info(f"Using cached catalog schema for: {route_string}")
|
|
1118
|
+
metadata = self._route_info_to_metadata(route_info)
|
|
1119
|
+
return Data(self, route_string, metadata, cache=self._cache)
|
|
1120
|
+
except KeyError:
|
|
1121
|
+
pass
|
|
1122
|
+
|
|
1123
|
+
# Fallback: hit API as before
|
|
1124
|
+
logging.info(f"Directly accessing data endpoint metadata for: {route_string}")
|
|
835
1125
|
metadata = self.get_metadata(route_string)
|
|
836
1126
|
|
|
837
1127
|
# Check if the route actually contains data (basic check)
|
|
@@ -857,7 +1147,7 @@ class EIAClient:
|
|
|
857
1147
|
)
|
|
858
1148
|
|
|
859
1149
|
# Instantiate and return the Data object
|
|
860
|
-
return Data(self, route_string, metadata)
|
|
1150
|
+
return Data(self, route_string, metadata, cache=self._cache)
|
|
861
1151
|
|
|
862
1152
|
def get_data_from_url(self, url: str) -> Dict[str, Any]:
|
|
863
1153
|
"""
|
|
@@ -1,12 +1,14 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: python-eia
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary: A Python client for the U.S. Energy Information Administration (EIA) API v2
|
|
5
5
|
Project-URL: Homepage, https://github.com/datons/python-eia
|
|
6
|
-
Project-URL: Repository, https://github.com/datons/python-eia
|
|
6
|
+
Project-URL: Repository, https://github.com/datons/python-eia
|
|
7
7
|
Project-URL: Issues, https://github.com/datons/python-eia/issues
|
|
8
|
-
|
|
8
|
+
Project-URL: Changelog, https://github.com/datons/python-eia/releases
|
|
9
|
+
Author-email: Jesús López <jesus.lopez@datons.com>
|
|
9
10
|
License-Expression: MIT
|
|
11
|
+
Keywords: api,eia,electricity,energy,gas,oil,usa
|
|
10
12
|
Classifier: Development Status :: 3 - Alpha
|
|
11
13
|
Classifier: Intended Audience :: Developers
|
|
12
14
|
Classifier: Intended Audience :: Science/Research
|
|
@@ -20,6 +22,8 @@ Classifier: Programming Language :: Python :: 3.13
|
|
|
20
22
|
Classifier: Topic :: Scientific/Engineering
|
|
21
23
|
Requires-Python: >=3.10
|
|
22
24
|
Requires-Dist: pandas>=2.0
|
|
25
|
+
Requires-Dist: pyarrow>=14.0
|
|
26
|
+
Requires-Dist: pyyaml>=6.0
|
|
23
27
|
Requires-Dist: requests>=2.31.0
|
|
24
28
|
Requires-Dist: rich>=13.0
|
|
25
29
|
Requires-Dist: typer>=0.9
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
eia/__init__.py,sha256=nRNJ61aw4Y8PwNPAeyd27-cLdCPui4px0NntDFVdhvk,302
|
|
2
|
+
eia/cache.py,sha256=R8TuAeBRLQW2IjfOwxRIVOuRpxhTsJ-utlkrwiBZLGo,13124
|
|
3
|
+
eia/catalog.py,sha256=ISYEwTqcAOGGdSdGB0xC0mhdZj5bCO2Rovf9tNQvuHw,3910
|
|
4
|
+
eia/catalog_manager.py,sha256=C6vNvKd3mfXj0EbrFaP6Y2nPd3x5TadiTtllIuAv6Fc,17446
|
|
5
|
+
eia/client.py,sha256=C7izyxHkG62ZOEz4xIB8y-6ZmY3lg0-4eRay11-7SZM,44980
|
|
6
|
+
eia/.agents/skills/eia/SKILL.md,sha256=UhGQCEnc38yCrog2NTkJGgiJokHX9skR38BXSkUe900,6294
|
|
7
|
+
eia/cli/__init__.py,sha256=KCvyQU45hrJdWk3cra4mSHhW-u_5qFeQDfeE5KkDdN4,243
|
|
8
|
+
eia/cli/_output.py,sha256=mhD1j5TyuHtw5BcjwUvHZ8-a87xpBPcEKtoPqMk0-pA,2618
|
|
9
|
+
eia/cli/app.py,sha256=0Gch8mErIdlkkDuopoQ0XWrqI2m7WU_2AvgrcXoMC5g,1843
|
|
10
|
+
eia/cli/cache_cmd.py,sha256=QUK-pPWVE86_6kgIjax0C0XivZDbazOb4R1XOmmhHBU,1563
|
|
11
|
+
eia/cli/catalog_cmd.py,sha256=2piq4i46F4gq0jh51uEVi81iqXRm3lQ6P8_AdDSvodw,6533
|
|
12
|
+
eia/cli/config.py,sha256=V_1wiXixPPIgVivIUDuAIS9aL9JcPu0qTshlVw9XDxQ,1657
|
|
13
|
+
eia/cli/config_cmd.py,sha256=rUdnM-G3UHSoiBqfqxHXrRDdf7TFjAD6t8QLkuTJ0Ao,917
|
|
14
|
+
eia/cli/exec_cmd.py,sha256=DkP8Bcm9RNjdC-D1E6k0skRJmGH1uRy8ALpyXFdr488,2776
|
|
15
|
+
eia/cli/facets_cmd.py,sha256=urpbAtqsZKnNn-Hiqf8hR8tAyZCd9Hm7xJmSH3YONuo,2208
|
|
16
|
+
eia/cli/get_cmd.py,sha256=MtHQ4yM_mHqAf7YdeYZUqL7lfd6w3-tSVMQPSHIVM94,3036
|
|
17
|
+
eia/cli/meta_cmd.py,sha256=p8JObyCMG-VnPnMjKVYpVGTJh38SU_eQ9zp2x7ET6LA,3112
|
|
18
|
+
eia/cli/routes_cmd.py,sha256=FuK90wjFklUb5KtUWDoufEYjc2b5KhE-KpsPWXc4iFM,2427
|
|
19
|
+
python_eia-0.3.0.dist-info/METADATA,sha256=dvmN68A5trEUhSvHb58kXWS8MCWq4rZ0MoYz1ofqjn0,2257
|
|
20
|
+
python_eia-0.3.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
|
|
21
|
+
python_eia-0.3.0.dist-info/entry_points.txt,sha256=oi4FTIzWeuoYDkpi_yywwRlEMBMLceEVJQKKD7zrRO0,36
|
|
22
|
+
python_eia-0.3.0.dist-info/RECORD,,
|