python-eia 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
eia/cli/catalog_cmd.py ADDED
@@ -0,0 +1,186 @@
1
+ """CLI command: browse the built-in data catalog and recipes."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Optional
6
+
7
+ import typer
8
+ from rich.console import Console
9
+ from rich.table import Table
10
+ from rich.panel import Panel
11
+
12
+ from eia.catalog import ROUTES, RECIPES
13
+
14
+ catalog_app = typer.Typer(no_args_is_help=True)
15
+ console = Console()
16
+
17
+
18
+ @catalog_app.command("refresh")
19
+ def catalog_refresh(
20
+ dry_run: bool = typer.Option(True, "--dry-run/--apply", help="Show what would change without modifying files"),
21
+ ):
22
+ """Discover new routes from the live EIA API and compare with the catalog."""
23
+ from eia.cli.app import get_client
24
+ from eia.catalog_manager import EIACatalogManager
25
+
26
+ client = get_client()
27
+ mgr = EIACatalogManager(client)
28
+ result = mgr.refresh(dry_run=dry_run)
29
+
30
+ if result.errors:
31
+ for err in result.errors:
32
+ console.print(f"[red]Error:[/red] {err}")
33
+
34
+ if result.updated:
35
+ console.print(f"\n[blue]Updated schema ({len(result.updated)}):[/blue]")
36
+ for r in result.updated:
37
+ console.print(f" ~ {r}")
38
+
39
+ if result.added:
40
+ console.print(f"\n[green]New routes ({len(result.added)}):[/green]")
41
+ for r in result.added:
42
+ console.print(f" + {r}")
43
+
44
+ if result.removed:
45
+ console.print(f"\n[yellow]Routes in catalog but not discovered ({len(result.removed)}):[/yellow]")
46
+ for r in result.removed:
47
+ console.print(f" - {r}")
48
+
49
+ if not result.updated and not result.added:
50
+ console.print("[green]All routes up to date.[/green]")
51
+
52
+ console.print(f"\n[dim]Unchanged: {len(result.unchanged)} routes[/dim]")
53
+
54
+
55
+ @catalog_app.command("routes")
56
+ def catalog_routes(
57
+ query: Optional[str] = typer.Argument(None, help="Filter routes by keyword"),
58
+ ):
59
+ """List all cataloged data routes with descriptions."""
60
+ table = Table(title="EIA Data Routes", show_header=True, padding=(0, 1))
61
+ table.add_column("Route", style="cyan")
62
+ table.add_column("Name")
63
+ table.add_column("Frequency", style="green")
64
+
65
+ for route_path, info in sorted(ROUTES.items()):
66
+ if query:
67
+ q = query.lower()
68
+ if q not in route_path.lower() and q not in info.name.lower() and q not in info.description.lower():
69
+ continue
70
+ table.add_row(route_path, info.name, info.frequency)
71
+
72
+ console.print(table)
73
+
74
+
75
+ @catalog_app.command("show")
76
+ def catalog_show(
77
+ route: str = typer.Argument(..., help="Route path (e.g. natural-gas/move/expc)"),
78
+ ):
79
+ """Show detailed info for a specific route, including facets and hints."""
80
+ from eia.catalog import get_route
81
+
82
+ try:
83
+ info = get_route(route)
84
+ except KeyError as e:
85
+ console.print(f"[red]{e}[/red]")
86
+ raise typer.Exit(1)
87
+
88
+ console.print(f"\n[bold]{info.name}[/bold]")
89
+ console.print(f" Route: {info.route}")
90
+ console.print(f" {info.description}")
91
+ console.print(f" Default frequency: {info.frequency}")
92
+
93
+ if info.notes:
94
+ console.print(f"\n [yellow]Note:[/yellow] {info.notes}")
95
+
96
+ if info.start_period or info.end_period:
97
+ console.print(f"\n Period: {info.start_period} → {info.end_period}")
98
+ if info.default_date_format:
99
+ console.print(f" Date format: {info.default_date_format}")
100
+ if info.last_refreshed:
101
+ console.print(f" [dim]Last refreshed: {info.last_refreshed}[/dim]")
102
+
103
+ if info.data_columns:
104
+ console.print(f"\n [bold]Data Columns:[/bold]")
105
+ col_table = Table(show_header=True, padding=(0, 1))
106
+ col_table.add_column("Column", style="green")
107
+ col_table.add_column("Alias")
108
+ col_table.add_column("Units")
109
+ col_table.add_column("Aggregation")
110
+ for col in info.data_columns:
111
+ col_table.add_row(col.id, col.alias, col.units, col.aggregation_method)
112
+ console.print(col_table)
113
+
114
+ if info.frequencies:
115
+ console.print(f"\n [bold]Frequencies:[/bold]")
116
+ freq_table = Table(show_header=True, padding=(0, 1))
117
+ freq_table.add_column("ID", style="green")
118
+ freq_table.add_column("Description")
119
+ freq_table.add_column("Query")
120
+ freq_table.add_column("Format")
121
+ for freq in info.frequencies:
122
+ freq_table.add_row(freq.id, freq.description, freq.query, freq.format)
123
+ console.print(freq_table)
124
+
125
+ for facet in info.facets:
126
+ console.print(f"\n [cyan]Facet: {facet.id}[/cyan] — {facet.description}")
127
+ if facet.common_values:
128
+ table = Table(show_header=True, padding=(0, 1))
129
+ table.add_column("Value", style="green")
130
+ table.add_column("Description")
131
+ for val_id, val_desc in sorted(facet.common_values.items()):
132
+ table.add_row(val_id, val_desc)
133
+ console.print(table)
134
+
135
+ console.print()
136
+
137
+
138
+ @catalog_app.command("recipes")
139
+ def catalog_recipes(
140
+ query: Optional[str] = typer.Argument(None, help="Filter recipes by keyword"),
141
+ ):
142
+ """List pre-configured query recipes for common use cases."""
143
+ table = Table(title="EIA Recipes", show_header=True, padding=(0, 1))
144
+ table.add_column("ID", style="cyan")
145
+ table.add_column("Name")
146
+ table.add_column("Route", style="green")
147
+
148
+ for recipe_id, recipe in sorted(RECIPES.items()):
149
+ if query:
150
+ q = query.lower()
151
+ if q not in recipe_id.lower() and q not in recipe.name.lower() and q not in recipe.description.lower():
152
+ continue
153
+ table.add_row(recipe_id, recipe.name, recipe.route)
154
+
155
+ console.print(table)
156
+
157
+
158
+ @catalog_app.command("recipe")
159
+ def catalog_recipe(
160
+ recipe_id: str = typer.Argument(..., help="Recipe ID (e.g. lng-exports-europe)"),
161
+ ):
162
+ """Show detailed info for a specific recipe, including code examples."""
163
+ from eia.catalog import get_recipe
164
+
165
+ try:
166
+ recipe = get_recipe(recipe_id)
167
+ except KeyError as e:
168
+ console.print(f"[red]{e}[/red]")
169
+ raise typer.Exit(1)
170
+
171
+ console.print(f"\n[bold]{recipe.name}[/bold]")
172
+ console.print(f" {recipe.description}")
173
+ console.print(f" Route: {recipe.route}")
174
+ console.print(f" Frequency: {recipe.frequency}")
175
+ console.print(f" Facets: {recipe.facets}")
176
+
177
+ if recipe.notes:
178
+ console.print(f"\n [yellow]Note:[/yellow] {recipe.notes}")
179
+
180
+ if recipe.cli_example:
181
+ console.print(Panel(recipe.cli_example, title="CLI Example", border_style="green"))
182
+
183
+ if recipe.python_example:
184
+ console.print(Panel(recipe.python_example, title="Python Example", border_style="blue"))
185
+
186
+ console.print()
eia/client.py CHANGED
@@ -2,8 +2,10 @@ import requests
2
2
  import logging
3
3
  import pandas as pd
4
4
  import re
5
+ from pathlib import Path
5
6
  from urllib.parse import urlparse, parse_qs
6
7
  from typing import (
8
+ TYPE_CHECKING,
7
9
  List,
8
10
  Dict,
9
11
  Optional,
@@ -16,9 +18,14 @@ from typing import (
16
18
  Protocol,
17
19
  runtime_checkable,
18
20
  )
21
+
22
+ if TYPE_CHECKING:
23
+ from eia.catalog import RouteInfo
19
24
  import os
20
25
  from dataclasses import dataclass, field
21
26
 
27
+ from eia.cache import CacheConfig, CacheStore, _facets_key
28
+
22
29
  # Configure logging
23
30
  logging.basicConfig(
24
31
  level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
@@ -87,9 +94,16 @@ class FacetInfo:
87
94
  # Store the route slug for potential API calls
88
95
  _route_slug: Optional[str] = field(default=None, repr=False)
89
96
  _client: Optional["EIAClient"] = field(default=None, repr=False)
97
+ _cached_values: Optional[List["FacetValue"]] = field(default=None, repr=False)
90
98
 
91
99
  def get_values(self) -> List[FacetValue]:
92
- """Fetches and returns all possible values for this facet."""
100
+ """Returns all possible values for this facet.
101
+
102
+ Uses cached catalog values when available, otherwise fetches from the API.
103
+ """
104
+ if self._cached_values is not None:
105
+ return self._cached_values
106
+
93
107
  if not self._client or not self._route_slug:
94
108
  raise ValueError("Client and route slug must be set to fetch facet values.")
95
109
 
@@ -258,10 +272,17 @@ class FacetContainer(BaseFacetContainer):
258
272
  class Data:
259
273
  """Represents a data endpoint in the EIA API with its metadata and query capabilities."""
260
274
 
261
- def __init__(self, client: "EIAClient", route: str, metadata: Dict[str, Any]):
275
+ def __init__(
276
+ self,
277
+ client: "EIAClient",
278
+ route: str,
279
+ metadata: Dict[str, Any],
280
+ cache: Optional[CacheStore] = None,
281
+ ):
262
282
  self._client = client
263
283
  self._route = route
264
284
  self._metadata = metadata
285
+ self._cache = cache
265
286
  self.id = metadata.get("id", route.split("/")[-1])
266
287
  self.name = metadata.get("name", "")
267
288
  self.description = metadata.get("description", "")
@@ -281,16 +302,26 @@ class Data:
281
302
  if isinstance(freq, dict) and "id" in freq
282
303
  ]
283
304
 
284
- facet_dict = {
285
- facet_data["id"]: FacetInfo(
286
- id=facet_data["id"],
305
+ cached_facet_values = metadata.get("_facet_values", {})
306
+ facet_dict = {}
307
+ for facet_data in metadata.get("facets", []):
308
+ if not isinstance(facet_data, dict) or "id" not in facet_data:
309
+ continue
310
+ fid = facet_data["id"]
311
+ # Build cached FacetValue list from catalog values if available
312
+ cached_values = None
313
+ if fid in cached_facet_values:
314
+ cached_values = [
315
+ FacetValue(id=vid, name=vname)
316
+ for vid, vname in cached_facet_values[fid].items()
317
+ ]
318
+ facet_dict[fid] = FacetInfo(
319
+ id=fid,
287
320
  description=facet_data.get("description"),
288
- _route_slug=route, # Pass route slug
289
- _client=client, # Pass client instance
321
+ _route_slug=route,
322
+ _client=client,
323
+ _cached_values=cached_values,
290
324
  )
291
- for facet_data in metadata.get("facets", [])
292
- if isinstance(facet_data, dict) and "id" in facet_data
293
- }
294
325
  # Use FacetContainer for attribute-based access
295
326
  self.facets = FacetContainer(facet_dict)
296
327
 
@@ -327,8 +358,11 @@ class Data:
327
358
  paginate: bool = True,
328
359
  ) -> pd.DataFrame:
329
360
  """
330
- Retrieves data from this endpoint, stores it and metadata internally,
331
- and returns the data as a pandas DataFrame. Handles pagination automatically by default.
361
+ Retrieves data from this endpoint with transparent caching.
362
+
363
+ On first call, fetches from the API and persists to a local parquet
364
+ cache. Subsequent calls for the same (or overlapping) date range
365
+ return cached data instantly, only fetching gaps.
332
366
 
333
367
  Args:
334
368
  data_columns: List of data column IDs to retrieve. If None, all available columns are fetched.
@@ -342,6 +376,175 @@ class Data:
342
376
  output_format: Response format ('json' or 'xml'). Must be 'json' for DataFrame conversion.
343
377
  paginate: Whether to automatically paginate through results (default: True).
344
378
 
379
+ Returns:
380
+ A pandas DataFrame containing the requested data
381
+ """
382
+ # Cache requires start/end and must be json format
383
+ can_cache = (
384
+ self._cache is not None
385
+ and start is not None
386
+ and end is not None
387
+ and output_format == "json"
388
+ and offset is None
389
+ and length is None
390
+ )
391
+
392
+ if not can_cache:
393
+ return self._fetch(
394
+ data_columns=data_columns,
395
+ facets=facets,
396
+ frequency=frequency,
397
+ start=start,
398
+ end=end,
399
+ sort=sort,
400
+ length=length,
401
+ offset=offset,
402
+ output_format=output_format,
403
+ paginate=paginate,
404
+ )
405
+
406
+ # -- Cache path --
407
+ route = self._route.strip("/")
408
+ freq_key = frequency or "_default_"
409
+ fk = _facets_key(facets)
410
+
411
+ start_ts = pd.Timestamp(start)
412
+ end_ts = pd.Timestamp(end)
413
+
414
+ # 1. Read cached data
415
+ cached = self._cache.read(route, freq_key, fk, start_ts, end_ts)
416
+ logging.debug(
417
+ "Cache read: %d rows for %s/%s/%s [%s → %s]",
418
+ len(cached), route, freq_key, fk, start, end,
419
+ )
420
+
421
+ # 2. Find gaps
422
+ gaps = self._cache.find_gaps(cached, start_ts, end_ts)
423
+
424
+ if not gaps:
425
+ logging.info("Cache hit — no gaps for %s/%s/%s", route, freq_key, fk)
426
+ df = cached
427
+ else:
428
+ logging.info(
429
+ "Cache gaps: %s — fetching %d range(s)",
430
+ [(str(g.start), str(g.end)) for g in gaps],
431
+ len(gaps),
432
+ )
433
+ # 3. Fetch each gap (always fetch ALL columns for cache reuse)
434
+ fetched_parts = []
435
+ for gap in gaps:
436
+ # Format gap dates to match the endpoint's frequency
437
+ # Monthly endpoints expect YYYY-MM, daily expects YYYY-MM-DD, etc.
438
+ gap_start = self._format_gap_date(gap.start, frequency)
439
+ gap_end = self._format_gap_date(gap.end, frequency)
440
+ gap_df = self._fetch(
441
+ data_columns=None, # all columns → maximise cache reuse
442
+ facets=facets,
443
+ frequency=frequency,
444
+ start=gap_start,
445
+ end=gap_end,
446
+ sort=sort,
447
+ paginate=paginate,
448
+ )
449
+ if not gap_df.empty:
450
+ fetched_parts.append(gap_df)
451
+
452
+ # 4. Concat cached + new, deduplicate, sort
453
+ all_parts = [cached] + fetched_parts if not cached.empty else fetched_parts
454
+ if all_parts:
455
+ df = pd.concat(all_parts, ignore_index=False)
456
+ # Ensure period is the index for dedup/sort
457
+ if "period" in df.columns and not isinstance(df.index, pd.DatetimeIndex):
458
+ df = df.set_index("period")
459
+ df.index.name = "period"
460
+ # For long-format data: deduplicate keeping latest fetch
461
+ # Group by index + all non-numeric columns to identify unique rows
462
+ facet_cols = [c for c in df.columns if c not in list(self.data_columns.keys())]
463
+ if facet_cols:
464
+ df = df.reset_index()
465
+ df = df.drop_duplicates(
466
+ subset=["period"] + facet_cols,
467
+ keep="last",
468
+ )
469
+ df = df.set_index("period")
470
+ else:
471
+ df = df[~df.index.duplicated(keep="last")]
472
+ df = df.sort_index()
473
+ else:
474
+ df = pd.DataFrame()
475
+
476
+ # 5. Persist to cache
477
+ if not df.empty:
478
+ self._cache.write(route, freq_key, fk, df)
479
+
480
+ # 6. Filter to requested data_columns before returning
481
+ if data_columns and not df.empty:
482
+ existing = [c for c in data_columns if c in df.columns]
483
+ # Always keep facet columns alongside requested data columns
484
+ facet_cols = [c for c in df.columns if c not in list(self.data_columns.keys())]
485
+ keep = list(dict.fromkeys(facet_cols + existing)) # preserve order, dedupe
486
+ df = df[keep]
487
+
488
+ # Reset index so 'period' is a regular column (matches _fetch output)
489
+ if isinstance(df.index, pd.DatetimeIndex) and df.index.name == "period":
490
+ df = df.reset_index()
491
+
492
+ # Slice to requested range (handle tz-aware vs tz-naive)
493
+ if not df.empty and "period" in df.columns:
494
+ period_tz = getattr(df["period"].dt, "tz", None)
495
+ if period_tz is not None:
496
+ start_ts = start_ts.tz_localize(period_tz) if start_ts.tzinfo is None else start_ts.tz_convert(period_tz)
497
+ end_ts = end_ts.tz_localize(period_tz) if end_ts.tzinfo is None else end_ts.tz_convert(period_tz)
498
+ df = df[(df["period"] >= start_ts) & (df["period"] <= end_ts + pd.Timedelta(days=1))]
499
+
500
+ self.dataframe = df
501
+ return df
502
+
503
+ @staticmethod
504
+ def _format_gap_date(ts: "pd.Timestamp", frequency: Optional[str] = None) -> str:
505
+ """Format a gap timestamp to match the API frequency.
506
+
507
+ Monthly endpoints expect YYYY-MM, annual expects YYYY,
508
+ daily/hourly expect YYYY-MM-DD. Falls back to date string.
509
+ """
510
+ if frequency is not None:
511
+ freq_lower = frequency.lower()
512
+ if "annual" in freq_lower or "yearly" in freq_lower:
513
+ return str(ts.year)
514
+ if "month" in freq_lower:
515
+ return ts.strftime("%Y-%m")
516
+ # For daily, hourly, or unknown: use date string
517
+ return str(ts.date())
518
+
519
+ def _fetch(
520
+ self,
521
+ data_columns: Optional[List[str]] = None,
522
+ facets: Optional[Dict[str, Union[str, List[str]]]] = None,
523
+ frequency: Optional[str] = None,
524
+ start: Optional[str] = None,
525
+ end: Optional[str] = None,
526
+ sort: Optional[List[Dict[str, str]]] = None,
527
+ length: Optional[int] = None,
528
+ offset: Optional[int] = None,
529
+ output_format: Optional[Literal["json", "xml"]] = "json",
530
+ paginate: bool = True,
531
+ ) -> pd.DataFrame:
532
+ """
533
+ Fetches data from the EIA API (no caching). This is the original
534
+ get() logic extracted verbatim.
535
+
536
+ Args:
537
+ data_columns: List of data column IDs to retrieve. If None, all available columns are fetched.
538
+ facets: Dictionary of facet filters.
539
+ frequency: Data frequency ID (e.g., 'daily', 'monthly')
540
+ start: Start date/period
541
+ end: End date/period
542
+ sort: List of sort specifications
543
+ length: Maximum number of rows to return *if paginate=False*.
544
+ offset: Starting row offset for the first request.
545
+ output_format: Response format ('json' or 'xml').
546
+ paginate: Whether to automatically paginate through results.
547
+
345
548
  Returns:
346
549
  A pandas DataFrame containing the requested data
347
550
  """
@@ -349,6 +552,11 @@ class Data:
349
552
  column_ids_to_fetch = (
350
553
  data_columns if data_columns is not None else list(self.data_columns.keys())
351
554
  )
555
+ # Some endpoints report no data columns in metadata but DO return
556
+ # 'value' when explicitly requested. Fall back to ['value'] so the
557
+ # response includes actual data instead of metadata-only rows.
558
+ if not column_ids_to_fetch:
559
+ column_ids_to_fetch = ["value"]
352
560
 
353
561
  # Ensure output is json if we want a DataFrame
354
562
  if output_format != "json":
@@ -497,7 +705,8 @@ class Route:
497
705
 
498
706
  # If response doesn't contain routes, it means this endpoint has data
499
707
  if "routes" not in response_data:
500
- self._data = Data(self._client, self._slug, response_data)
708
+ cache = getattr(self._client, "_cache", None)
709
+ self._data = Data(self._client, self._slug, response_data, cache=cache)
501
710
 
502
711
  def __getattr__(self, name: str) -> Union["Route", Any]:
503
712
  """
@@ -597,7 +806,13 @@ class EIAClient:
597
806
  )
598
807
 
599
808
  def __init__(
600
- self, api_key: Optional[str] = None, session: Optional[requests.Session] = None
809
+ self,
810
+ api_key: Optional[str] = None,
811
+ session: Optional[requests.Session] = None,
812
+ *,
813
+ cache: bool = True,
814
+ cache_dir: Optional[Union[str, Path]] = None,
815
+ cache_recent_ttl: int = 48,
601
816
  ):
602
817
  """
603
818
  Initializes the EIAClient.
@@ -605,6 +820,9 @@ class EIAClient:
605
820
  Args:
606
821
  api_key: Your EIA API key. If None, it will try to read from the EIA_API_KEY environment variable.
607
822
  session: An optional requests.Session object for persistent connections.
823
+ cache: Enable/disable local parquet caching (default: True).
824
+ cache_dir: Custom cache directory. Defaults to ~/.cache/eia.
825
+ cache_recent_ttl: Hours before recent data is re-fetched (default: 48).
608
826
  """
609
827
  resolved_api_key = api_key or os.environ.get("EIA_API_KEY")
610
828
  if not resolved_api_key:
@@ -614,7 +832,21 @@ class EIAClient:
614
832
  self.api_key = resolved_api_key
615
833
  self.session = session or requests.Session()
616
834
  self.session.headers.update({"User-Agent": "Python EIAClient"})
617
- logging.info("EIAClient initialized.")
835
+
836
+ # Cache setup
837
+ config = CacheConfig(
838
+ enabled=cache,
839
+ cache_dir=Path(cache_dir) if cache_dir else CacheConfig().cache_dir,
840
+ recent_ttl_hours=cache_recent_ttl,
841
+ )
842
+ self._cache: Optional[CacheStore] = CacheStore(config) if config.enabled else None
843
+
844
+ # Catalog manager (lazy-loads YAML on first access)
845
+ from eia.catalog_manager import EIACatalogManager
846
+
847
+ self.catalog = EIACatalogManager(self)
848
+
849
+ logging.info("EIAClient initialized (cache=%s).", "enabled" if cache else "disabled")
618
850
 
619
851
  def route(self, slug: str) -> Route:
620
852
  """
@@ -812,11 +1044,59 @@ class EIAClient:
812
1044
 
813
1045
  return response_data.get("response", {})
814
1046
 
1047
+ @staticmethod
1048
+ def _route_info_to_metadata(route_info: "RouteInfo") -> Dict[str, Any]:
1049
+ """Reconstruct API-style metadata dict from a RouteInfo with cached schema."""
1050
+ metadata: Dict[str, Any] = {
1051
+ "id": route_info.route.split("/")[-1],
1052
+ "name": route_info.name,
1053
+ "description": route_info.description,
1054
+ "defaultFrequency": route_info.frequency,
1055
+ "startPeriod": route_info.start_period,
1056
+ "endPeriod": route_info.end_period,
1057
+ "defaultDateFormat": route_info.default_date_format,
1058
+ }
1059
+
1060
+ # Reconstruct frequency list
1061
+ metadata["frequency"] = [
1062
+ {
1063
+ "id": f.id,
1064
+ "description": f.description,
1065
+ "query": f.query,
1066
+ "format": f.format,
1067
+ }
1068
+ for f in route_info.frequencies
1069
+ ]
1070
+
1071
+ # Reconstruct facets list, with cached values
1072
+ metadata["facets"] = [
1073
+ {"id": fh.id, "description": fh.description}
1074
+ for fh in route_info.facets
1075
+ ]
1076
+ metadata["_facet_values"] = {
1077
+ fh.id: fh.values
1078
+ for fh in route_info.facets
1079
+ if fh.values
1080
+ }
1081
+
1082
+ # Reconstruct data dict (keyed by column id)
1083
+ metadata["data"] = {
1084
+ col.id: {
1085
+ "units": col.units,
1086
+ "aggregation-method": col.aggregation_method,
1087
+ "alias": col.alias,
1088
+ }
1089
+ for col in route_info.data_columns
1090
+ }
1091
+
1092
+ return metadata
1093
+
815
1094
  def get_data_endpoint(self, route_string: str) -> Data:
816
1095
  """
817
1096
  Directly retrieves the Data object for a known, complete data route string.
818
1097
 
819
- This allows bypassing the chained route navigation if the exact data route is known.
1098
+ If the route exists in the catalog with cached API schema, uses that
1099
+ to avoid an API metadata call. Otherwise falls back to fetching from the API.
820
1100
 
821
1101
  Args:
822
1102
  route_string: The full route path to the data endpoint
@@ -829,9 +1109,19 @@ class EIAClient:
829
1109
  EIAError: If the route does not exist or does not contain data.
830
1110
  """
831
1111
  route_string = route_string.strip("/")
832
- logging.info(f"Directly accessing data endpoint metadata for: {route_string}")
833
1112
 
834
- # Fetch metadata for the route
1113
+ # Try catalog first skip API call if schema is cached
1114
+ try:
1115
+ route_info = self.catalog.get_route(route_string)
1116
+ if route_info.data_columns: # has cached schema
1117
+ logging.info(f"Using cached catalog schema for: {route_string}")
1118
+ metadata = self._route_info_to_metadata(route_info)
1119
+ return Data(self, route_string, metadata, cache=self._cache)
1120
+ except KeyError:
1121
+ pass
1122
+
1123
+ # Fallback: hit API as before
1124
+ logging.info(f"Directly accessing data endpoint metadata for: {route_string}")
835
1125
  metadata = self.get_metadata(route_string)
836
1126
 
837
1127
  # Check if the route actually contains data (basic check)
@@ -857,7 +1147,7 @@ class EIAClient:
857
1147
  )
858
1148
 
859
1149
  # Instantiate and return the Data object
860
- return Data(self, route_string, metadata)
1150
+ return Data(self, route_string, metadata, cache=self._cache)
861
1151
 
862
1152
  def get_data_from_url(self, url: str) -> Dict[str, Any]:
863
1153
  """
@@ -1,12 +1,14 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: python-eia
3
- Version: 0.2.0
3
+ Version: 0.3.0
4
4
  Summary: A Python client for the U.S. Energy Information Administration (EIA) API v2
5
5
  Project-URL: Homepage, https://github.com/datons/python-eia
6
- Project-URL: Repository, https://github.com/datons/python-eia.git
6
+ Project-URL: Repository, https://github.com/datons/python-eia
7
7
  Project-URL: Issues, https://github.com/datons/python-eia/issues
8
- Author-email: Jesus Lopez <jesus.lopez@datons.ai>
8
+ Project-URL: Changelog, https://github.com/datons/python-eia/releases
9
+ Author-email: Jesús López <jesus.lopez@datons.com>
9
10
  License-Expression: MIT
11
+ Keywords: api,eia,electricity,energy,gas,oil,usa
10
12
  Classifier: Development Status :: 3 - Alpha
11
13
  Classifier: Intended Audience :: Developers
12
14
  Classifier: Intended Audience :: Science/Research
@@ -20,6 +22,8 @@ Classifier: Programming Language :: Python :: 3.13
20
22
  Classifier: Topic :: Scientific/Engineering
21
23
  Requires-Python: >=3.10
22
24
  Requires-Dist: pandas>=2.0
25
+ Requires-Dist: pyarrow>=14.0
26
+ Requires-Dist: pyyaml>=6.0
23
27
  Requires-Dist: requests>=2.31.0
24
28
  Requires-Dist: rich>=13.0
25
29
  Requires-Dist: typer>=0.9
@@ -0,0 +1,22 @@
1
+ eia/__init__.py,sha256=nRNJ61aw4Y8PwNPAeyd27-cLdCPui4px0NntDFVdhvk,302
2
+ eia/cache.py,sha256=R8TuAeBRLQW2IjfOwxRIVOuRpxhTsJ-utlkrwiBZLGo,13124
3
+ eia/catalog.py,sha256=ISYEwTqcAOGGdSdGB0xC0mhdZj5bCO2Rovf9tNQvuHw,3910
4
+ eia/catalog_manager.py,sha256=C6vNvKd3mfXj0EbrFaP6Y2nPd3x5TadiTtllIuAv6Fc,17446
5
+ eia/client.py,sha256=C7izyxHkG62ZOEz4xIB8y-6ZmY3lg0-4eRay11-7SZM,44980
6
+ eia/.agents/skills/eia/SKILL.md,sha256=UhGQCEnc38yCrog2NTkJGgiJokHX9skR38BXSkUe900,6294
7
+ eia/cli/__init__.py,sha256=KCvyQU45hrJdWk3cra4mSHhW-u_5qFeQDfeE5KkDdN4,243
8
+ eia/cli/_output.py,sha256=mhD1j5TyuHtw5BcjwUvHZ8-a87xpBPcEKtoPqMk0-pA,2618
9
+ eia/cli/app.py,sha256=0Gch8mErIdlkkDuopoQ0XWrqI2m7WU_2AvgrcXoMC5g,1843
10
+ eia/cli/cache_cmd.py,sha256=QUK-pPWVE86_6kgIjax0C0XivZDbazOb4R1XOmmhHBU,1563
11
+ eia/cli/catalog_cmd.py,sha256=2piq4i46F4gq0jh51uEVi81iqXRm3lQ6P8_AdDSvodw,6533
12
+ eia/cli/config.py,sha256=V_1wiXixPPIgVivIUDuAIS9aL9JcPu0qTshlVw9XDxQ,1657
13
+ eia/cli/config_cmd.py,sha256=rUdnM-G3UHSoiBqfqxHXrRDdf7TFjAD6t8QLkuTJ0Ao,917
14
+ eia/cli/exec_cmd.py,sha256=DkP8Bcm9RNjdC-D1E6k0skRJmGH1uRy8ALpyXFdr488,2776
15
+ eia/cli/facets_cmd.py,sha256=urpbAtqsZKnNn-Hiqf8hR8tAyZCd9Hm7xJmSH3YONuo,2208
16
+ eia/cli/get_cmd.py,sha256=MtHQ4yM_mHqAf7YdeYZUqL7lfd6w3-tSVMQPSHIVM94,3036
17
+ eia/cli/meta_cmd.py,sha256=p8JObyCMG-VnPnMjKVYpVGTJh38SU_eQ9zp2x7ET6LA,3112
18
+ eia/cli/routes_cmd.py,sha256=FuK90wjFklUb5KtUWDoufEYjc2b5KhE-KpsPWXc4iFM,2427
19
+ python_eia-0.3.0.dist-info/METADATA,sha256=dvmN68A5trEUhSvHb58kXWS8MCWq4rZ0MoYz1ofqjn0,2257
20
+ python_eia-0.3.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
21
+ python_eia-0.3.0.dist-info/entry_points.txt,sha256=oi4FTIzWeuoYDkpi_yywwRlEMBMLceEVJQKKD7zrRO0,36
22
+ python_eia-0.3.0.dist-info/RECORD,,