python-esios 2.2.0__py3-none-any.whl → 2.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- esios/constants.py +12 -2
- esios/managers/indicators.py +124 -24
- esios/processing/i90.py +32 -11
- {python_esios-2.2.0.dist-info → python_esios-2.4.0.dist-info}/METADATA +1 -1
- {python_esios-2.2.0.dist-info → python_esios-2.4.0.dist-info}/RECORD +8 -8
- {python_esios-2.2.0.dist-info → python_esios-2.4.0.dist-info}/WHEEL +0 -0
- {python_esios-2.2.0.dist-info → python_esios-2.4.0.dist-info}/entry_points.txt +0 -0
- {python_esios-2.2.0.dist-info → python_esios-2.4.0.dist-info}/licenses/LICENSE +0 -0
esios/constants.py
CHANGED
|
@@ -14,7 +14,17 @@ MAX_RETRIES = 3
|
|
|
14
14
|
RETRY_MIN_WAIT = 2 # seconds
|
|
15
15
|
RETRY_MAX_WAIT = 10 # seconds
|
|
16
16
|
|
|
17
|
-
# ESIOS API
|
|
18
|
-
|
|
17
|
+
# ESIOS API chunk sizes for historical data fetching.
|
|
18
|
+
# High-geo indicators (40+ geos) timeout (504) at >21 days.
|
|
19
|
+
# Low-geo indicators handle 6+ months per request in <0.1s.
|
|
20
|
+
CHUNK_SIZE_DAYS = 21 # Legacy default, kept for backward compat
|
|
21
|
+
CHUNK_SIZE_DAYS_LOW_GEO = 180 # 6 months for indicators with few geos
|
|
22
|
+
CHUNK_SIZE_DAYS_HIGH_GEO = 21 # Conservative for indicators with many geos
|
|
23
|
+
HIGH_GEO_THRESHOLD = 15 # Indicators with >= this many geos use smaller chunks
|
|
24
|
+
|
|
25
|
+
# Concurrent chunk fetching within a single indicator.
|
|
26
|
+
# 4 workers gives ~17-95x speedup over sequential with no errors.
|
|
27
|
+
# Diminishing returns past 4 (ESIOS server becomes the bottleneck).
|
|
28
|
+
DEFAULT_CHUNK_WORKERS = 4
|
|
19
29
|
|
|
20
30
|
TIMEZONE = "Europe/Madrid"
|
esios/managers/indicators.py
CHANGED
|
@@ -3,13 +3,20 @@
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
5
|
import logging
|
|
6
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
6
7
|
from datetime import timedelta
|
|
7
8
|
from typing import Any
|
|
8
9
|
|
|
9
10
|
import pandas as pd
|
|
10
11
|
|
|
11
12
|
from esios.cache import CacheStore
|
|
12
|
-
from esios.constants import
|
|
13
|
+
from esios.constants import (
|
|
14
|
+
CHUNK_SIZE_DAYS_HIGH_GEO,
|
|
15
|
+
CHUNK_SIZE_DAYS_LOW_GEO,
|
|
16
|
+
DEFAULT_CHUNK_WORKERS,
|
|
17
|
+
HIGH_GEO_THRESHOLD,
|
|
18
|
+
TIMEZONE,
|
|
19
|
+
)
|
|
13
20
|
from esios.managers.base import BaseManager
|
|
14
21
|
from esios.models.indicator import Indicator
|
|
15
22
|
from esios.processing.dataframes import to_dataframe
|
|
@@ -131,6 +138,88 @@ class IndicatorHandle:
|
|
|
131
138
|
f"Available: {', '.join(available)}"
|
|
132
139
|
)
|
|
133
140
|
|
|
141
|
+
@property
|
|
142
|
+
def _chunk_days(self) -> int:
|
|
143
|
+
"""Choose chunk size based on indicator's geo count.
|
|
144
|
+
|
|
145
|
+
ESIOS API times out (504) for high-geo indicators (40+ geos) with
|
|
146
|
+
windows larger than ~3 weeks. Low-geo indicators handle 6+ months
|
|
147
|
+
per request in <0.1s.
|
|
148
|
+
|
|
149
|
+
When geos are unknown (empty metadata), uses the conservative
|
|
150
|
+
chunk size to avoid timeouts on first fetch.
|
|
151
|
+
"""
|
|
152
|
+
geo_count = len(self.geos)
|
|
153
|
+
if geo_count == 0:
|
|
154
|
+
# Unknown geo count — be conservative
|
|
155
|
+
return CHUNK_SIZE_DAYS_HIGH_GEO
|
|
156
|
+
if geo_count >= HIGH_GEO_THRESHOLD:
|
|
157
|
+
return CHUNK_SIZE_DAYS_HIGH_GEO
|
|
158
|
+
return CHUNK_SIZE_DAYS_LOW_GEO
|
|
159
|
+
|
|
160
|
+
def _fetch_one(
|
|
161
|
+
self, start: str, end: str, base_params: dict[str, Any],
|
|
162
|
+
) -> list[dict]:
|
|
163
|
+
"""Fetch a single date-range chunk from the ESIOS API."""
|
|
164
|
+
params = {
|
|
165
|
+
**base_params,
|
|
166
|
+
"start_date": start,
|
|
167
|
+
"end_date": end + "T23:59:59",
|
|
168
|
+
}
|
|
169
|
+
logger.debug("Fetch %s → %s", start, end)
|
|
170
|
+
data = self._manager._get(f"indicators/{self.id}", params=params)
|
|
171
|
+
return data.get("indicator", {}).get("values", [])
|
|
172
|
+
|
|
173
|
+
def _fetch_chunks(
|
|
174
|
+
self,
|
|
175
|
+
gaps: list,
|
|
176
|
+
base_params: dict[str, Any],
|
|
177
|
+
max_workers: int = DEFAULT_CHUNK_WORKERS,
|
|
178
|
+
) -> list[dict]:
|
|
179
|
+
"""Fetch all gap chunks concurrently, return values in order.
|
|
180
|
+
|
|
181
|
+
Builds a list of (start, end) chunks from the gaps, then fetches
|
|
182
|
+
them in parallel using a thread pool. Results are reassembled in
|
|
183
|
+
chronological order.
|
|
184
|
+
"""
|
|
185
|
+
chunk_delta = timedelta(days=self._chunk_days)
|
|
186
|
+
|
|
187
|
+
# Build chunk list
|
|
188
|
+
chunks: list[tuple[str, str]] = []
|
|
189
|
+
for gap in gaps:
|
|
190
|
+
current = gap.start
|
|
191
|
+
while current <= gap.end:
|
|
192
|
+
chunk_end = min(current + chunk_delta, gap.end)
|
|
193
|
+
chunks.append((
|
|
194
|
+
current.strftime("%Y-%m-%d"),
|
|
195
|
+
chunk_end.strftime("%Y-%m-%d"),
|
|
196
|
+
))
|
|
197
|
+
current = chunk_end + timedelta(days=1)
|
|
198
|
+
|
|
199
|
+
if not chunks:
|
|
200
|
+
return []
|
|
201
|
+
|
|
202
|
+
if len(chunks) == 1:
|
|
203
|
+
return self._fetch_one(chunks[0][0], chunks[0][1], base_params)
|
|
204
|
+
|
|
205
|
+
# Fetch concurrently, preserve order
|
|
206
|
+
results: list[list[dict] | None] = [None] * len(chunks)
|
|
207
|
+
with ThreadPoolExecutor(max_workers=max_workers) as pool:
|
|
208
|
+
futures = {
|
|
209
|
+
pool.submit(self._fetch_one, s, e, base_params): i
|
|
210
|
+
for i, (s, e) in enumerate(chunks)
|
|
211
|
+
}
|
|
212
|
+
for future in as_completed(futures):
|
|
213
|
+
idx = futures[future]
|
|
214
|
+
results[idx] = future.result()
|
|
215
|
+
|
|
216
|
+
# Flatten in chronological order
|
|
217
|
+
all_values: list[dict] = []
|
|
218
|
+
for chunk_values in results:
|
|
219
|
+
if chunk_values:
|
|
220
|
+
all_values.extend(chunk_values)
|
|
221
|
+
return all_values
|
|
222
|
+
|
|
134
223
|
def historical(
|
|
135
224
|
self,
|
|
136
225
|
start: str,
|
|
@@ -142,15 +231,28 @@ class IndicatorHandle:
|
|
|
142
231
|
geo_agg: str | None = None,
|
|
143
232
|
time_trunc: str | None = None,
|
|
144
233
|
geo_trunc: str | None = None,
|
|
234
|
+
column_name: str | None = None,
|
|
235
|
+
chunk_workers: int = DEFAULT_CHUNK_WORKERS,
|
|
145
236
|
) -> pd.DataFrame:
|
|
146
237
|
"""Fetch historical values as a DataFrame with DatetimeIndex.
|
|
147
238
|
|
|
148
239
|
Uses local parquet cache when enabled. Only fetches missing date ranges
|
|
149
|
-
from the API. Automatically chunks requests
|
|
240
|
+
from the API. Automatically chunks requests and fetches concurrently.
|
|
241
|
+
|
|
242
|
+
Chunk size adapts to the indicator's geo count: 180 days for low-geo
|
|
243
|
+
indicators, 21 days for high-geo (≥15 geos) to avoid ESIOS timeouts.
|
|
150
244
|
|
|
151
245
|
When multiple geo_ids are present (e.g. indicator 600 returns data for
|
|
152
246
|
several countries), the result is pivoted so each geo becomes a column
|
|
153
247
|
named by its geo_name. Use *geo_ids* to filter to specific geos.
|
|
248
|
+
|
|
249
|
+
Args:
|
|
250
|
+
column_name: If provided, rename the output column(s) to this name.
|
|
251
|
+
Useful for single-column results where a stable name like
|
|
252
|
+
``"value"`` is preferred over the default geo_name or
|
|
253
|
+
indicator ID.
|
|
254
|
+
chunk_workers: Number of concurrent threads for fetching chunks.
|
|
255
|
+
Defaults to 4. Set to 1 for sequential fetching.
|
|
154
256
|
"""
|
|
155
257
|
base_params: dict[str, Any] = {
|
|
156
258
|
"locale": locale,
|
|
@@ -204,24 +306,8 @@ class IndicatorHandle:
|
|
|
204
306
|
from esios.cache import DateRange
|
|
205
307
|
gaps = [DateRange(start_date, end_date)]
|
|
206
308
|
|
|
207
|
-
# -- Fetch missing ranges
|
|
208
|
-
all_values
|
|
209
|
-
chunk_delta = timedelta(days=CHUNK_SIZE_DAYS)
|
|
210
|
-
|
|
211
|
-
for gap in gaps:
|
|
212
|
-
current = gap.start
|
|
213
|
-
gap_end = gap.end
|
|
214
|
-
while current <= gap_end:
|
|
215
|
-
chunk_end = min(current + chunk_delta, gap_end)
|
|
216
|
-
params = {
|
|
217
|
-
**base_params,
|
|
218
|
-
"start_date": current.strftime("%Y-%m-%d"),
|
|
219
|
-
"end_date": chunk_end.strftime("%Y-%m-%d") + "T23:59:59",
|
|
220
|
-
}
|
|
221
|
-
logger.debug("Fetch %s → %s", params["start_date"], params["end_date"])
|
|
222
|
-
data = self._manager._get(f"indicators/{self.id}", params=params)
|
|
223
|
-
all_values.extend(data.get("indicator", {}).get("values", []))
|
|
224
|
-
current = chunk_end + timedelta(days=1)
|
|
309
|
+
# -- Fetch missing ranges (concurrent + adaptive chunk size) -----------
|
|
310
|
+
all_values = self._fetch_chunks(gaps, base_params, max_workers=chunk_workers)
|
|
225
311
|
|
|
226
312
|
# Learn any new geo mappings from the response
|
|
227
313
|
self._enrich_geo_map(all_values)
|
|
@@ -250,7 +336,7 @@ class IndicatorHandle:
|
|
|
250
336
|
if existing:
|
|
251
337
|
result = result[existing]
|
|
252
338
|
|
|
253
|
-
return self._finalize(result)
|
|
339
|
+
return self._finalize(result, column_name=column_name)
|
|
254
340
|
|
|
255
341
|
def _to_wide(self, values: list[dict]) -> pd.DataFrame:
|
|
256
342
|
"""Convert raw API value dicts to wide-format DataFrame.
|
|
@@ -283,16 +369,26 @@ class IndicatorHandle:
|
|
|
283
369
|
df = df.drop(columns=geo_drop, errors="ignore")
|
|
284
370
|
return df
|
|
285
371
|
|
|
286
|
-
def _finalize(
|
|
372
|
+
def _finalize(
|
|
373
|
+
self, df: pd.DataFrame, *, column_name: str | None = None,
|
|
374
|
+
) -> pd.DataFrame:
|
|
287
375
|
"""Prepare DataFrame for user-facing output.
|
|
288
376
|
|
|
289
377
|
Cache stores columns as str(geo_id). This method renames them to
|
|
290
378
|
human-readable geo_names at the very end, just before returning to
|
|
291
379
|
the caller. Single-value/single-geo indicators get the indicator ID.
|
|
380
|
+
|
|
381
|
+
If ``column_name`` is provided and the result has a single column,
|
|
382
|
+
that column is renamed to ``column_name`` (e.g. ``"value"``).
|
|
292
383
|
"""
|
|
293
384
|
if df.empty:
|
|
294
385
|
return df
|
|
295
386
|
|
|
387
|
+
# If caller wants a specific column name and there's a single column, use it
|
|
388
|
+
if column_name and len(df.columns) == 1:
|
|
389
|
+
df = df.rename(columns={df.columns[0]: column_name})
|
|
390
|
+
return df
|
|
391
|
+
|
|
296
392
|
if len(df.columns) == 1:
|
|
297
393
|
col = df.columns[0]
|
|
298
394
|
if col == "value":
|
|
@@ -305,11 +401,15 @@ class IndicatorHandle:
|
|
|
305
401
|
if rename:
|
|
306
402
|
df = df.rename(columns=rename)
|
|
307
403
|
|
|
404
|
+
# If caller wants a specific column name for multi-column, skip
|
|
405
|
+
# (ambiguous which column to rename)
|
|
406
|
+
if column_name and len(df.columns) == 1:
|
|
407
|
+
df = df.rename(columns={df.columns[0]: column_name})
|
|
408
|
+
return df
|
|
409
|
+
|
|
308
410
|
# Single-geo after rename: use indicator ID as column name
|
|
309
411
|
if len(df.columns) == 1:
|
|
310
412
|
col = df.columns[0]
|
|
311
|
-
# If the single column is a geo_name, keep it (user filtered to one geo)
|
|
312
|
-
# If it's still a geo_id string, rename to indicator ID
|
|
313
413
|
if col not in geo_map.values():
|
|
314
414
|
df = df.rename(columns={col: str(self.id)})
|
|
315
415
|
|
esios/processing/i90.py
CHANGED
|
@@ -166,11 +166,15 @@ class I90Sheet:
|
|
|
166
166
|
def _normalize_datetime_columns(self, columns: np.ndarray) -> np.ndarray:
|
|
167
167
|
"""Normalize time column headers to integer period indices.
|
|
168
168
|
|
|
169
|
-
Handles
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
169
|
+
Handles four column formats found in I90 files:
|
|
170
|
+
|
|
171
|
+
1. Sequential integers: 1–24 (hourly) or 1–96 (quarterly)
|
|
172
|
+
2. H-Q format: "1-1", "1-2", "1-3", "1-4", "2-1", …
|
|
173
|
+
3. NaN-filler format: [1, NaN, NaN, NaN, 2, …]
|
|
174
|
+
4. Range format (DST days): "00-01", "01-02", "02-03a", "02-03b", …
|
|
175
|
+
where the first number is the start hour and a/b suffix marks
|
|
176
|
+
the repeated hour on fall-back days. Detected by the first
|
|
177
|
+
column starting with "0" (e.g. "00-01").
|
|
174
178
|
"""
|
|
175
179
|
if any(pd.isna(columns)):
|
|
176
180
|
self._n_columns_totals = 3
|
|
@@ -178,6 +182,17 @@ class I90Sheet:
|
|
|
178
182
|
self._n_columns_totals = 2
|
|
179
183
|
|
|
180
184
|
series = pd.Series(columns, dtype=str).ffill()
|
|
185
|
+
|
|
186
|
+
# Range format (DST): "00-01", "01-02", "02-03a", "02-03b", ...
|
|
187
|
+
# Detected by first column starting with "0" (sequential ints start at 1).
|
|
188
|
+
first_val = str(columns[0]).strip()
|
|
189
|
+
if first_val.startswith("0") and "-" in first_val:
|
|
190
|
+
# Simply assign sequential 1-based indices.
|
|
191
|
+
# The count of columns (23, 24, or 25 for hourly; 92, 96, or 100
|
|
192
|
+
# for QH) already encodes the DST information. The datetime builder
|
|
193
|
+
# in _preprocess uses these as offsets from midnight UTC.
|
|
194
|
+
return np.arange(1, len(columns) + 1)
|
|
195
|
+
|
|
181
196
|
parts = series.str.split("-")
|
|
182
197
|
hours = parts.str[0].astype(float).astype(int)
|
|
183
198
|
|
|
@@ -251,12 +266,18 @@ class I90Sheet:
|
|
|
251
266
|
self.frequency = "hourly"
|
|
252
267
|
time_deltas = columns_date * 60 # minutes
|
|
253
268
|
|
|
254
|
-
# Build datetime index
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
269
|
+
# Build datetime index in UTC to avoid DST ambiguity.
|
|
270
|
+
# On fall-back days (Oct), I90 has 25 hourly periods (or 100 QH).
|
|
271
|
+
# Naïve offset arithmetic creates a single 02:00 that tz_localize
|
|
272
|
+
# cannot disambiguate. By anchoring midnight in Europe/Madrid,
|
|
273
|
+
# converting to UTC, then adding offsets, each period maps to a
|
|
274
|
+
# unique UTC instant — no ambiguity.
|
|
275
|
+
# On spring-forward days (Mar), I90 has 23 periods (or 92 QH)
|
|
276
|
+
# and this approach naturally skips the non-existent hour.
|
|
277
|
+
midnight_utc = pd.Timestamp(
|
|
278
|
+
self.metadata["date_data"], tz="Europe/Madrid"
|
|
279
|
+
).tz_convert("UTC")
|
|
280
|
+
columns_datetime = midnight_utc + pd.to_timedelta(time_deltas, unit="m")
|
|
260
281
|
|
|
261
282
|
data = pd.DataFrame(self.rows[idx + 1 :], columns=columns)
|
|
262
283
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: python-esios
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.4.0
|
|
4
4
|
Summary: A Python wrapper for the ESIOS API (Spanish electricity market)
|
|
5
5
|
Project-URL: Homepage, https://github.com/datons/python-esios
|
|
6
6
|
Project-URL: Repository, https://github.com/datons/python-esios
|
|
@@ -3,7 +3,7 @@ esios/async_client.py,sha256=OVNNZwFbvPyUnu7LVr7X5MdXlk_-AJ1lfkUE0OODlbQ,3452
|
|
|
3
3
|
esios/cache.py,sha256=GgbrL9Rc9aLrEWHvXtQOCGQRgq2T4m6VBJDvBJfWMTk,18920
|
|
4
4
|
esios/catalog.py,sha256=xWwMx5I32m34npjAXHh-Ua4e_0pfG89yxUC_Vy9VlAA,16811
|
|
5
5
|
esios/client.py,sha256=rLgdyPFII6CC_TJwgkHaScJ7nBUpt85N94mujKAn0d0,5825
|
|
6
|
-
esios/constants.py,sha256=
|
|
6
|
+
esios/constants.py,sha256=yfxSNG37i4dkpa7x0CBvXTroyddn5jhNTuWGDhAq3-0,1074
|
|
7
7
|
esios/exceptions.py,sha256=AiWLdRDWj50JEsld9CvVBsfLnZZKFmW62_bZmZ7Z_eA,899
|
|
8
8
|
esios/.agents/skills/esios/SKILL.md,sha256=_5wCzMMB8FHWcAPeMA5vGklZFEGBEvU5wBOryNIogzM,6252
|
|
9
9
|
esios/cli/__init__.py,sha256=9gd5ZDIH1-yNP_xcd60ethOFXm9w6un0CJ9CX0Qvb2A,256
|
|
@@ -24,7 +24,7 @@ esios/data/time_periods.yaml,sha256=oyisKYYyOGA57eEAqkFFx6B3x9rdSl0DokZe5gNZfMw,
|
|
|
24
24
|
esios/managers/__init__.py,sha256=-1AwL7arUf7WEZn1RSiK_DZhY3j6U4GE9_dqjbukCJc,268
|
|
25
25
|
esios/managers/archives.py,sha256=PG-1gQYEiJUVQQtTKIZeEoWIsS-gkWT3ZHy89c8tTW8,9293
|
|
26
26
|
esios/managers/base.py,sha256=7XcdrUtUOPuqfHYlz4w562TD8o9cNdBWOgs4CHHonoo,835
|
|
27
|
-
esios/managers/indicators.py,sha256=
|
|
27
|
+
esios/managers/indicators.py,sha256=4f1wLhT33Fc93ixHr51DIzIBqzznJSaoeLfWOT-2EQ0,20260
|
|
28
28
|
esios/managers/offer_indicators.py,sha256=0MjEKkj77YC2fRSHVTEc7FW6E8AuwwciAXK-bOVEL5Q,4187
|
|
29
29
|
esios/models/__init__.py,sha256=oppuTASpf0Dh2KbGMXInULT0F4sELjeo-9UhPiPOZiA,289
|
|
30
30
|
esios/models/archive.py,sha256=P2LaT7_ff4ujwqVn_ofgQP3dbpf7jqON0R22dKwSJ_w,1062
|
|
@@ -32,10 +32,10 @@ esios/models/indicator.py,sha256=u1AJyEA3YeOqQFjV08_lzyMaofuCiMoLPjvosls9gfE,111
|
|
|
32
32
|
esios/models/offer_indicator.py,sha256=nA80Y7Yp0utDaDOdZ-ObcWTsAdhvuXlfJjJBpdVQ7Lo,758
|
|
33
33
|
esios/processing/__init__.py,sha256=1kLt_gO_wDhXM1BbY0zTyfAYo-CjYKW1ljgRRDZ7USM,278
|
|
34
34
|
esios/processing/dataframes.py,sha256=OitzBvAerssGP2VXNC-sSO48XsHdIB2nKTUgByN5eYQ,2524
|
|
35
|
-
esios/processing/i90.py,sha256=
|
|
35
|
+
esios/processing/i90.py,sha256=fI8DfY8CD2kF1_ZrAzuEDxN0m7Vh3CV3dIn32lxKffA,11687
|
|
36
36
|
esios/processing/zip.py,sha256=12LbFHJTdX_h3JG-clEgQ4Haj-kw0UjfopGLlCRXfGM,1913
|
|
37
|
-
python_esios-2.
|
|
38
|
-
python_esios-2.
|
|
39
|
-
python_esios-2.
|
|
40
|
-
python_esios-2.
|
|
41
|
-
python_esios-2.
|
|
37
|
+
python_esios-2.4.0.dist-info/METADATA,sha256=STVMDUwpgk6ZOx79KXOMPwn-t1aIvhB8MdsBmQtfdkk,3169
|
|
38
|
+
python_esios-2.4.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
|
|
39
|
+
python_esios-2.4.0.dist-info/entry_points.txt,sha256=7ngseyIyvJ4buTHFL9htaZ4tTFHpG4zzJNkc8B5Jr8U,40
|
|
40
|
+
python_esios-2.4.0.dist-info/licenses/LICENSE,sha256=LorLs1-VeBW70Wo9fLAtLJN7nNd6Poy0xzvqdWVqFlE,35128
|
|
41
|
+
python_esios-2.4.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|