PyPI - python-esios - Versions diffs - 2.2.0__py3-none-any.whl → 2.4.0__py3-none-any.whl - Mend

python-esios 2.2.0py3-none-any.whl → 2.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

esios/constants.py CHANGED Viewed

@@ -14,7 +14,17 @@ MAX_RETRIES = 3
 RETRY_MIN_WAIT = 2  # seconds
 RETRY_MAX_WAIT = 10  # seconds
-# ESIOS API limits responses to ~3 weeks of data per request
-CHUNK_SIZE_DAYS = 21
+# ESIOS API chunk sizes for historical data fetching.
+# High-geo indicators (40+ geos) timeout (504) at >21 days.
+# Low-geo indicators handle 6+ months per request in <0.1s.
+CHUNK_SIZE_DAYS = 21  # Legacy default, kept for backward compat
+CHUNK_SIZE_DAYS_LOW_GEO = 180  # 6 months for indicators with few geos
+CHUNK_SIZE_DAYS_HIGH_GEO = 21  # Conservative for indicators with many geos
+HIGH_GEO_THRESHOLD = 15  # Indicators with >= this many geos use smaller chunks
+# Concurrent chunk fetching within a single indicator.
+# 4 workers gives ~17-95x speedup over sequential with no errors.
+# Diminishing returns past 4 (ESIOS server becomes the bottleneck).
+DEFAULT_CHUNK_WORKERS = 4
 TIMEZONE = "Europe/Madrid"

esios/managers/indicators.py CHANGED Viewed

@@ -3,13 +3,20 @@
 from __future__ import annotations
 import logging
+from concurrent.futures import ThreadPoolExecutor, as_completed
 from datetime import timedelta
 from typing import Any
 import pandas as pd
 from esios.cache import CacheStore
-from esios.constants import CHUNK_SIZE_DAYS, TIMEZONE
+from esios.constants import (
+    CHUNK_SIZE_DAYS_HIGH_GEO,
+    CHUNK_SIZE_DAYS_LOW_GEO,
+    DEFAULT_CHUNK_WORKERS,
+    HIGH_GEO_THRESHOLD,
+    TIMEZONE,
+)
 from esios.managers.base import BaseManager
 from esios.models.indicator import Indicator
 from esios.processing.dataframes import to_dataframe
@@ -131,6 +138,88 @@ class IndicatorHandle:
             f"Available: {', '.join(available)}"
         )
+    @property
+    def _chunk_days(self) -> int:
+        """Choose chunk size based on indicator's geo count.
+        ESIOS API times out (504) for high-geo indicators (40+ geos) with
+        windows larger than ~3 weeks. Low-geo indicators handle 6+ months
+        per request in <0.1s.
+        When geos are unknown (empty metadata), uses the conservative
+        chunk size to avoid timeouts on first fetch.
+        """
+        geo_count = len(self.geos)
+        if geo_count == 0:
+            # Unknown geo count — be conservative
+            return CHUNK_SIZE_DAYS_HIGH_GEO
+        if geo_count >= HIGH_GEO_THRESHOLD:
+            return CHUNK_SIZE_DAYS_HIGH_GEO
+        return CHUNK_SIZE_DAYS_LOW_GEO
+    def _fetch_one(
+        self, start: str, end: str, base_params: dict[str, Any],
+    ) -> list[dict]:
+        """Fetch a single date-range chunk from the ESIOS API."""
+        params = {
+            **base_params,
+            "start_date": start,
+            "end_date": end + "T23:59:59",
+        }
+        logger.debug("Fetch %s → %s", start, end)
+        data = self._manager._get(f"indicators/{self.id}", params=params)
+        return data.get("indicator", {}).get("values", [])
+    def _fetch_chunks(
+        self,
+        gaps: list,
+        base_params: dict[str, Any],
+        max_workers: int = DEFAULT_CHUNK_WORKERS,
+    ) -> list[dict]:
+        """Fetch all gap chunks concurrently, return values in order.
+        Builds a list of (start, end) chunks from the gaps, then fetches
+        them in parallel using a thread pool. Results are reassembled in
+        chronological order.
+        """
+        chunk_delta = timedelta(days=self._chunk_days)
+        # Build chunk list
+        chunks: list[tuple[str, str]] = []
+        for gap in gaps:
+            current = gap.start
+            while current <= gap.end:
+                chunk_end = min(current + chunk_delta, gap.end)
+                chunks.append((
+                    current.strftime("%Y-%m-%d"),
+                    chunk_end.strftime("%Y-%m-%d"),
+                ))
+                current = chunk_end + timedelta(days=1)
+        if not chunks:
+            return []
+        if len(chunks) == 1:
+            return self._fetch_one(chunks[0][0], chunks[0][1], base_params)
+        # Fetch concurrently, preserve order
+        results: list[list[dict] | None] = [None] * len(chunks)
+        with ThreadPoolExecutor(max_workers=max_workers) as pool:
+            futures = {
+                pool.submit(self._fetch_one, s, e, base_params): i
+                for i, (s, e) in enumerate(chunks)
+            }
+            for future in as_completed(futures):
+                idx = futures[future]
+                results[idx] = future.result()
+        # Flatten in chronological order
+        all_values: list[dict] = []
+        for chunk_values in results:
+            if chunk_values:
+                all_values.extend(chunk_values)
+        return all_values
     def historical(
         self,
         start: str,
@@ -142,15 +231,28 @@ class IndicatorHandle:
         geo_agg: str | None = None,
         time_trunc: str | None = None,
         geo_trunc: str | None = None,
+        column_name: str | None = None,
+        chunk_workers: int = DEFAULT_CHUNK_WORKERS,
     ) -> pd.DataFrame:
         """Fetch historical values as a DataFrame with DatetimeIndex.
         Uses local parquet cache when enabled. Only fetches missing date ranges
-        from the API. Automatically chunks requests exceeding ~3 weeks.
+        from the API. Automatically chunks requests and fetches concurrently.
+        Chunk size adapts to the indicator's geo count: 180 days for low-geo
+        indicators, 21 days for high-geo (≥15 geos) to avoid ESIOS timeouts.
         When multiple geo_ids are present (e.g. indicator 600 returns data for
         several countries), the result is pivoted so each geo becomes a column
         named by its geo_name. Use *geo_ids* to filter to specific geos.
+        Args:
+            column_name: If provided, rename the output column(s) to this name.
+                Useful for single-column results where a stable name like
+                ``"value"`` is preferred over the default geo_name or
+                indicator ID.
+            chunk_workers: Number of concurrent threads for fetching chunks.
+                Defaults to 4. Set to 1 for sequential fetching.
         """
         base_params: dict[str, Any] = {
             "locale": locale,
@@ -204,24 +306,8 @@ class IndicatorHandle:
             from esios.cache import DateRange
             gaps = [DateRange(start_date, end_date)]
-        # -- Fetch missing ranges ----------------------------------------------
-        all_values: list[dict] = []
-        chunk_delta = timedelta(days=CHUNK_SIZE_DAYS)
-        for gap in gaps:
-            current = gap.start
-            gap_end = gap.end
-            while current <= gap_end:
-                chunk_end = min(current + chunk_delta, gap_end)
-                params = {
-                    **base_params,
-                    "start_date": current.strftime("%Y-%m-%d"),
-                    "end_date": chunk_end.strftime("%Y-%m-%d") + "T23:59:59",
-                }
-                logger.debug("Fetch %s → %s", params["start_date"], params["end_date"])
-                data = self._manager._get(f"indicators/{self.id}", params=params)
-                all_values.extend(data.get("indicator", {}).get("values", []))
-                current = chunk_end + timedelta(days=1)
+        # -- Fetch missing ranges (concurrent + adaptive chunk size) -----------
+        all_values = self._fetch_chunks(gaps, base_params, max_workers=chunk_workers)
         # Learn any new geo mappings from the response
         self._enrich_geo_map(all_values)
@@ -250,7 +336,7 @@ class IndicatorHandle:
             if existing:
                 result = result[existing]
-        return self._finalize(result)
+        return self._finalize(result, column_name=column_name)
     def _to_wide(self, values: list[dict]) -> pd.DataFrame:
         """Convert raw API value dicts to wide-format DataFrame.
@@ -283,16 +369,26 @@ class IndicatorHandle:
         df = df.drop(columns=geo_drop, errors="ignore")
         return df
-    def _finalize(self, df: pd.DataFrame) -> pd.DataFrame:
+    def _finalize(
+        self, df: pd.DataFrame, *, column_name: str | None = None,
+    ) -> pd.DataFrame:
         """Prepare DataFrame for user-facing output.
         Cache stores columns as str(geo_id). This method renames them to
         human-readable geo_names at the very end, just before returning to
         the caller. Single-value/single-geo indicators get the indicator ID.
+        If ``column_name`` is provided and the result has a single column,
+        that column is renamed to ``column_name`` (e.g. ``"value"``).
         """
         if df.empty:
             return df
+        # If caller wants a specific column name and there's a single column, use it
+        if column_name and len(df.columns) == 1:
+            df = df.rename(columns={df.columns[0]: column_name})
+            return df
         if len(df.columns) == 1:
             col = df.columns[0]
             if col == "value":
@@ -305,11 +401,15 @@ class IndicatorHandle:
         if rename:
             df = df.rename(columns=rename)
+        # If caller wants a specific column name for multi-column, skip
+        # (ambiguous which column to rename)
+        if column_name and len(df.columns) == 1:
+            df = df.rename(columns={df.columns[0]: column_name})
+            return df
         # Single-geo after rename: use indicator ID as column name
         if len(df.columns) == 1:
             col = df.columns[0]
-            # If the single column is a geo_name, keep it (user filtered to one geo)
-            # If it's still a geo_id string, rename to indicator ID
             if col not in geo_map.values():
                 df = df.rename(columns={col: str(self.id)})

esios/processing/i90.py CHANGED Viewed

@@ -166,11 +166,15 @@ class I90Sheet:
     def _normalize_datetime_columns(self, columns: np.ndarray) -> np.ndarray:
         """Normalize time column headers to integer period indices.
-        Handles three column formats found in I90 files:
-        - Sequential integers 1–24 (hourly) or 1–96 (quarterly)
-        - H-Q format with dash notation: "1-1", "1-2", "1-3", "1-4", "2-1", …
-        - NaN-filler format: [1, NaN, NaN, NaN, 2, …] (one label per hour,
-          three trailing NaNs for quarters 2–4)
+        Handles four column formats found in I90 files:
+        1. Sequential integers: 1–24 (hourly) or 1–96 (quarterly)
+        2. H-Q format: "1-1", "1-2", "1-3", "1-4", "2-1", …
+        3. NaN-filler format: [1, NaN, NaN, NaN, 2, …]
+        4. Range format (DST days): "00-01", "01-02", "02-03a", "02-03b", …
+           where the first number is the start hour and a/b suffix marks
+           the repeated hour on fall-back days. Detected by the first
+           column starting with "0" (e.g. "00-01").
         """
         if any(pd.isna(columns)):
             self._n_columns_totals = 3
@@ -178,6 +182,17 @@ class I90Sheet:
             self._n_columns_totals = 2
         series = pd.Series(columns, dtype=str).ffill()
+        # Range format (DST): "00-01", "01-02", "02-03a", "02-03b", ...
+        # Detected by first column starting with "0" (sequential ints start at 1).
+        first_val = str(columns[0]).strip()
+        if first_val.startswith("0") and "-" in first_val:
+            # Simply assign sequential 1-based indices.
+            # The count of columns (23, 24, or 25 for hourly; 92, 96, or 100
+            # for QH) already encodes the DST information. The datetime builder
+            # in _preprocess uses these as offsets from midnight UTC.
+            return np.arange(1, len(columns) + 1)
         parts = series.str.split("-")
         hours = parts.str[0].astype(float).astype(int)
@@ -251,12 +266,18 @@ class I90Sheet:
                 self.frequency = "hourly"
                 time_deltas = columns_date * 60  # minutes
-            # Build datetime index
-            base_date = pd.to_datetime(self.metadata["date_data"])
-            columns_datetime = base_date + pd.to_timedelta(time_deltas, unit="m")
-            columns_datetime = pd.DatetimeIndex(columns_datetime).tz_localize(
-                "Europe/Madrid", ambiguous="infer"
-            )
+            # Build datetime index in UTC to avoid DST ambiguity.
+            # On fall-back days (Oct), I90 has 25 hourly periods (or 100 QH).
+            # Naïve offset arithmetic creates a single 02:00 that tz_localize
+            # cannot disambiguate.  By anchoring midnight in Europe/Madrid,
+            # converting to UTC, then adding offsets, each period maps to a
+            # unique UTC instant — no ambiguity.
+            # On spring-forward days (Mar), I90 has 23 periods (or 92 QH)
+            # and this approach naturally skips the non-existent hour.
+            midnight_utc = pd.Timestamp(
+                self.metadata["date_data"], tz="Europe/Madrid"
+            ).tz_convert("UTC")
+            columns_datetime = midnight_utc + pd.to_timedelta(time_deltas, unit="m")
             data = pd.DataFrame(self.rows[idx + 1 :], columns=columns)

{python_esios-2.2.0.dist-info → python_esios-2.4.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: python-esios
-Version: 2.2.0
+Version: 2.4.0
 Summary: A Python wrapper for the ESIOS API (Spanish electricity market)
 Project-URL: Homepage, https://github.com/datons/python-esios
 Project-URL: Repository, https://github.com/datons/python-esios

{python_esios-2.2.0.dist-info → python_esios-2.4.0.dist-info}/RECORD RENAMED Viewed

@@ -3,7 +3,7 @@ esios/async_client.py,sha256=OVNNZwFbvPyUnu7LVr7X5MdXlk_-AJ1lfkUE0OODlbQ,3452
 esios/cache.py,sha256=GgbrL9Rc9aLrEWHvXtQOCGQRgq2T4m6VBJDvBJfWMTk,18920
 esios/catalog.py,sha256=xWwMx5I32m34npjAXHh-Ua4e_0pfG89yxUC_Vy9VlAA,16811
 esios/client.py,sha256=rLgdyPFII6CC_TJwgkHaScJ7nBUpt85N94mujKAn0d0,5825
-esios/constants.py,sha256=pwB2UlBI96zYBA8wAbcCSHcm_E-aIj2hBarDA8t1Vp8,474
+esios/constants.py,sha256=yfxSNG37i4dkpa7x0CBvXTroyddn5jhNTuWGDhAq3-0,1074
 esios/exceptions.py,sha256=AiWLdRDWj50JEsld9CvVBsfLnZZKFmW62_bZmZ7Z_eA,899
 esios/.agents/skills/esios/SKILL.md,sha256=_5wCzMMB8FHWcAPeMA5vGklZFEGBEvU5wBOryNIogzM,6252
 esios/cli/__init__.py,sha256=9gd5ZDIH1-yNP_xcd60ethOFXm9w6un0CJ9CX0Qvb2A,256
@@ -24,7 +24,7 @@ esios/data/time_periods.yaml,sha256=oyisKYYyOGA57eEAqkFFx6B3x9rdSl0DokZe5gNZfMw,
 esios/managers/__init__.py,sha256=-1AwL7arUf7WEZn1RSiK_DZhY3j6U4GE9_dqjbukCJc,268
 esios/managers/archives.py,sha256=PG-1gQYEiJUVQQtTKIZeEoWIsS-gkWT3ZHy89c8tTW8,9293
 esios/managers/base.py,sha256=7XcdrUtUOPuqfHYlz4w562TD8o9cNdBWOgs4CHHonoo,835
-esios/managers/indicators.py,sha256=GEyHq09TCPnf3ARULS7olJYC6iiom2XrcyhDAf946po,16653
+esios/managers/indicators.py,sha256=4f1wLhT33Fc93ixHr51DIzIBqzznJSaoeLfWOT-2EQ0,20260
 esios/managers/offer_indicators.py,sha256=0MjEKkj77YC2fRSHVTEc7FW6E8AuwwciAXK-bOVEL5Q,4187
 esios/models/__init__.py,sha256=oppuTASpf0Dh2KbGMXInULT0F4sELjeo-9UhPiPOZiA,289
 esios/models/archive.py,sha256=P2LaT7_ff4ujwqVn_ofgQP3dbpf7jqON0R22dKwSJ_w,1062
@@ -32,10 +32,10 @@ esios/models/indicator.py,sha256=u1AJyEA3YeOqQFjV08_lzyMaofuCiMoLPjvosls9gfE,111
 esios/models/offer_indicator.py,sha256=nA80Y7Yp0utDaDOdZ-ObcWTsAdhvuXlfJjJBpdVQ7Lo,758
 esios/processing/__init__.py,sha256=1kLt_gO_wDhXM1BbY0zTyfAYo-CjYKW1ljgRRDZ7USM,278
 esios/processing/dataframes.py,sha256=OitzBvAerssGP2VXNC-sSO48XsHdIB2nKTUgByN5eYQ,2524
-esios/processing/i90.py,sha256=k4RH4lIwIm04ASYnubdQwJ3WM98iLj5l14zwxXBQEBo,10443
+esios/processing/i90.py,sha256=fI8DfY8CD2kF1_ZrAzuEDxN0m7Vh3CV3dIn32lxKffA,11687
 esios/processing/zip.py,sha256=12LbFHJTdX_h3JG-clEgQ4Haj-kw0UjfopGLlCRXfGM,1913
-python_esios-2.2.0.dist-info/METADATA,sha256=ZnkUUArmCnyVe_kDuOhjm-JzXWXeqKN9irD39vT0YpI,3169
-python_esios-2.2.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
-python_esios-2.2.0.dist-info/entry_points.txt,sha256=7ngseyIyvJ4buTHFL9htaZ4tTFHpG4zzJNkc8B5Jr8U,40
-python_esios-2.2.0.dist-info/licenses/LICENSE,sha256=LorLs1-VeBW70Wo9fLAtLJN7nNd6Poy0xzvqdWVqFlE,35128
-python_esios-2.2.0.dist-info/RECORD,,
+python_esios-2.4.0.dist-info/METADATA,sha256=STVMDUwpgk6ZOx79KXOMPwn-t1aIvhB8MdsBmQtfdkk,3169
+python_esios-2.4.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
+python_esios-2.4.0.dist-info/entry_points.txt,sha256=7ngseyIyvJ4buTHFL9htaZ4tTFHpG4zzJNkc8B5Jr8U,40
+python_esios-2.4.0.dist-info/licenses/LICENSE,sha256=LorLs1-VeBW70Wo9fLAtLJN7nNd6Poy0xzvqdWVqFlE,35128
+python_esios-2.4.0.dist-info/RECORD,,

{python_esios-2.2.0.dist-info → python_esios-2.4.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{python_esios-2.2.0.dist-info → python_esios-2.4.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{python_esios-2.2.0.dist-info → python_esios-2.4.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

python-esios 2.2.0__py3-none-any.whl → 2.4.0__py3-none-any.whl

python-esios 2.2.0py3-none-any.whl → 2.4.0py3-none-any.whl