pydeflate 2.1.3__py3-none-any.whl → 2.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pydeflate/__init__.py +64 -20
- pydeflate/cache.py +139 -0
- pydeflate/constants.py +121 -0
- pydeflate/context.py +211 -0
- pydeflate/core/api.py +33 -11
- pydeflate/core/source.py +92 -11
- pydeflate/deflate/deflators.py +1 -1
- pydeflate/deflate/legacy_deflate.py +1 -1
- pydeflate/exceptions.py +166 -0
- pydeflate/exchange/exchangers.py +1 -1
- pydeflate/plugins.py +289 -0
- pydeflate/protocols.py +168 -0
- pydeflate/pydeflate_config.py +77 -6
- pydeflate/schemas.py +297 -0
- pydeflate/sources/common.py +59 -107
- pydeflate/sources/dac.py +39 -52
- pydeflate/sources/imf.py +23 -39
- pydeflate/sources/world_bank.py +44 -117
- pydeflate/utils.py +14 -9
- {pydeflate-2.1.3.dist-info → pydeflate-2.2.0.dist-info}/METADATA +119 -18
- pydeflate-2.2.0.dist-info/RECORD +32 -0
- pydeflate-2.2.0.dist-info/WHEEL +4 -0
- {pydeflate-2.1.3.dist-info → pydeflate-2.2.0.dist-info/licenses}/LICENSE +1 -1
- pydeflate-2.1.3.dist-info/RECORD +0 -25
- pydeflate-2.1.3.dist-info/WHEEL +0 -4
pydeflate/sources/common.py
CHANGED
|
@@ -1,41 +1,19 @@
|
|
|
1
|
-
from
|
|
2
|
-
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
3
|
from typing import Any, Literal
|
|
4
4
|
|
|
5
5
|
import pandas as pd
|
|
6
6
|
from hdx.location.country import Country
|
|
7
7
|
|
|
8
|
-
from pydeflate.pydeflate_config import
|
|
8
|
+
from pydeflate.pydeflate_config import logger
|
|
9
9
|
|
|
10
10
|
AvailableDeflators = Literal["NGDP_D", "NGDP_DL", "CPI", "PCPI", "PCPIE"]
|
|
11
11
|
|
|
12
12
|
|
|
13
|
-
def check_file_age(file: Path) -> int:
|
|
14
|
-
"""Check the age of a WEO file in days.
|
|
15
|
-
|
|
16
|
-
Args:
|
|
17
|
-
file (Path): The WEO parquet file to check.
|
|
18
|
-
|
|
19
|
-
Returns:
|
|
20
|
-
int: The number of days since the file was created.
|
|
21
|
-
"""
|
|
22
|
-
current_date = datetime.today()
|
|
23
|
-
# Extract date from the filename (format: weo_YYYY-MM-DD.parquet)
|
|
24
|
-
file_date = datetime.strptime(file.stem.split("_")[-1], "%Y-%m-%d")
|
|
25
|
-
|
|
26
|
-
# Return the difference in days between today and the file's date
|
|
27
|
-
return (current_date - file_date).days
|
|
28
|
-
|
|
29
|
-
|
|
30
13
|
def enforce_pyarrow_types(df: pd.DataFrame) -> pd.DataFrame:
|
|
31
|
-
"""
|
|
32
|
-
return df.convert_dtypes(dtype_backend="pyarrow")
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
def today() -> str:
|
|
36
|
-
from datetime import datetime
|
|
14
|
+
"""Ensure that a DataFrame uses pyarrow-backed dtypes."""
|
|
37
15
|
|
|
38
|
-
return
|
|
16
|
+
return df.convert_dtypes(dtype_backend="pyarrow")
|
|
39
17
|
|
|
40
18
|
|
|
41
19
|
def _match_regex_to_iso3(
|
|
@@ -52,20 +30,17 @@ def _match_regex_to_iso3(
|
|
|
52
30
|
if additional_mapping is None:
|
|
53
31
|
additional_mapping = {}
|
|
54
32
|
|
|
55
|
-
# Create a Country object
|
|
56
33
|
country = Country()
|
|
57
|
-
|
|
58
|
-
# Match the regex strings to ISO3 country codes
|
|
59
|
-
matches = {}
|
|
34
|
+
matches: dict[str, str | None] = {}
|
|
60
35
|
|
|
61
36
|
for match in to_match:
|
|
62
37
|
try:
|
|
63
38
|
match_ = country.get_iso3_country_code_fuzzy(match)[0]
|
|
64
|
-
except:
|
|
39
|
+
except Exception: # pragma: no cover - defensive logging
|
|
65
40
|
match_ = None
|
|
66
41
|
matches[match] = match_
|
|
67
42
|
if match_ is None and match not in additional_mapping:
|
|
68
|
-
logger.debug(
|
|
43
|
+
logger.debug("No ISO3 match found for %s", match)
|
|
69
44
|
|
|
70
45
|
return matches | additional_mapping
|
|
71
46
|
|
|
@@ -76,7 +51,7 @@ def convert_id(
|
|
|
76
51
|
to_type: str = "ISO3",
|
|
77
52
|
not_found: Any = None,
|
|
78
53
|
*,
|
|
79
|
-
additional_mapping: dict = None,
|
|
54
|
+
additional_mapping: dict | None = None,
|
|
80
55
|
) -> pd.Series:
|
|
81
56
|
"""Takes a Pandas' series with country IDs and converts them into the desired type.
|
|
82
57
|
|
|
@@ -93,7 +68,6 @@ def convert_id(
|
|
|
93
68
|
the same datatype as the target type.
|
|
94
69
|
"""
|
|
95
70
|
|
|
96
|
-
# if from and to are the same, return without changing anything
|
|
97
71
|
if from_type == to_type:
|
|
98
72
|
return series
|
|
99
73
|
|
|
@@ -107,7 +81,6 @@ def convert_id(
|
|
|
107
81
|
mapping = mapping_functions[from_type](
|
|
108
82
|
to_match=s_unique, additional_mapping=additional_mapping
|
|
109
83
|
)
|
|
110
|
-
|
|
111
84
|
return series.map(mapping).fillna(series if not_found is None else not_found)
|
|
112
85
|
|
|
113
86
|
|
|
@@ -141,7 +114,6 @@ def add_pydeflate_iso3(
|
|
|
141
114
|
"Sub-Sahara Africa": "SSA",
|
|
142
115
|
},
|
|
143
116
|
)
|
|
144
|
-
|
|
145
117
|
return df
|
|
146
118
|
|
|
147
119
|
|
|
@@ -160,7 +132,6 @@ def prefix_pydeflate_to_columns(
|
|
|
160
132
|
df.columns = [
|
|
161
133
|
f"{prefix}{col}" if not col.startswith(prefix) else col for col in df.columns
|
|
162
134
|
]
|
|
163
|
-
|
|
164
135
|
return df
|
|
165
136
|
|
|
166
137
|
|
|
@@ -187,7 +158,7 @@ def compute_exchange_deflator(
|
|
|
187
158
|
base_year_measure: str | None = None,
|
|
188
159
|
exchange: str = "EXCHANGE",
|
|
189
160
|
year: str = "year",
|
|
190
|
-
grouper: list[str] = None,
|
|
161
|
+
grouper: list[str] | None = None,
|
|
191
162
|
) -> pd.DataFrame:
|
|
192
163
|
"""Compute the exchange rate deflator for each group of entities.
|
|
193
164
|
|
|
@@ -205,87 +176,68 @@ def compute_exchange_deflator(
|
|
|
205
176
|
pd.DataFrame: DataFrame with an additional column for the exchange rate deflator.
|
|
206
177
|
"""
|
|
207
178
|
|
|
208
|
-
def
|
|
179
|
+
def _compute_deflator_for_group(
|
|
209
180
|
group: pd.DataFrame,
|
|
210
|
-
measure: str | None
|
|
211
|
-
|
|
212
|
-
|
|
181
|
+
measure: str | None,
|
|
182
|
+
exchange_col: str,
|
|
183
|
+
year_col: str,
|
|
184
|
+
deflator_col: str,
|
|
213
185
|
) -> pd.DataFrame:
|
|
214
|
-
|
|
215
|
-
#
|
|
216
|
-
if exchange.endswith("_to") or exchange.endswith("_from"):
|
|
217
|
-
exchange_name = exchange.rsplit("_", 1)[0]
|
|
218
|
-
else:
|
|
219
|
-
exchange_name = exchange
|
|
220
|
-
|
|
221
|
-
# Identify the base year for the deflator
|
|
186
|
+
"""Compute deflator for a single group and add it as a column."""
|
|
187
|
+
# Identify base year
|
|
222
188
|
if measure is not None:
|
|
223
|
-
base_year = identify_base_year(group, measure=measure, year=
|
|
189
|
+
base_year = identify_base_year(group, measure=measure, year=year_col)
|
|
224
190
|
else:
|
|
225
|
-
|
|
191
|
+
valid_rows = group.dropna(subset=[exchange_col])
|
|
192
|
+
base_year = valid_rows[year_col].max() if not valid_rows.empty else None
|
|
226
193
|
|
|
227
|
-
# If no base year
|
|
194
|
+
# If no base year found, return group without deflator column
|
|
228
195
|
if base_year is None or pd.isna(base_year):
|
|
229
196
|
return group
|
|
230
197
|
|
|
231
198
|
# Extract the exchange rate value for the base year
|
|
232
|
-
|
|
199
|
+
base_value_rows = group.loc[group[year_col] == base_year, exchange_col]
|
|
233
200
|
|
|
234
|
-
# If base value
|
|
235
|
-
if
|
|
236
|
-
group
|
|
237
|
-
|
|
238
|
-
|
|
201
|
+
# If no valid base value, return group without deflator column
|
|
202
|
+
if base_value_rows.empty or pd.isna(base_value_rows.iloc[0]):
|
|
203
|
+
return group
|
|
204
|
+
|
|
205
|
+
# Calculate and add deflator column
|
|
206
|
+
base_value = base_value_rows.iloc[0]
|
|
207
|
+
group = group.copy()
|
|
208
|
+
group[deflator_col] = round(100 * group[exchange_col] / base_value, 6)
|
|
239
209
|
|
|
240
210
|
return group
|
|
241
211
|
|
|
242
212
|
if grouper is None:
|
|
243
213
|
grouper = ["entity", "entity_code"]
|
|
244
214
|
|
|
245
|
-
#
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
#
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
download_func()
|
|
275
|
-
files = file_finder_func(PYDEFLATE_PATHS.data)
|
|
276
|
-
|
|
277
|
-
# If files are found, sort them by age and load the most recent one
|
|
278
|
-
if len(files) > 0:
|
|
279
|
-
files = sorted(files, key=check_file_age)
|
|
280
|
-
latest_file = files[0]
|
|
281
|
-
|
|
282
|
-
# Check if the latest file is older than 120 days and log a warning
|
|
283
|
-
if check_file_age(latest_file) > 120:
|
|
284
|
-
logger.warn(
|
|
285
|
-
f"The latest {data_name} data is more than 120 days old.\n"
|
|
286
|
-
f"Consider updating by setting update=True in the function call."
|
|
287
|
-
)
|
|
288
|
-
|
|
289
|
-
# Read and return the latest parquet file as a DataFrame
|
|
290
|
-
logger.info(f"Reading {data_name} data from {latest_file}")
|
|
291
|
-
return pd.read_parquet(latest_file)
|
|
215
|
+
# Determine the exchange column name for the deflator
|
|
216
|
+
if exchange.endswith("_to") or exchange.endswith("_from"):
|
|
217
|
+
exchange_name = exchange.rsplit("_", 1)[0]
|
|
218
|
+
else:
|
|
219
|
+
exchange_name = exchange
|
|
220
|
+
|
|
221
|
+
deflator_col = f"{exchange_name}_D"
|
|
222
|
+
|
|
223
|
+
# Process each group and concatenate results
|
|
224
|
+
# This approach avoids the FutureWarning from groupby().apply() operating on grouping columns
|
|
225
|
+
processed_groups = []
|
|
226
|
+
for name, group in df.groupby(grouper, sort=False):
|
|
227
|
+
processed_group = _compute_deflator_for_group(
|
|
228
|
+
group=group,
|
|
229
|
+
measure=base_year_measure,
|
|
230
|
+
exchange_col=exchange,
|
|
231
|
+
year_col=year,
|
|
232
|
+
deflator_col=deflator_col,
|
|
233
|
+
)
|
|
234
|
+
processed_groups.append(processed_group)
|
|
235
|
+
|
|
236
|
+
# Concatenate all processed groups and restore original row order
|
|
237
|
+
result = pd.concat(processed_groups, ignore_index=False)
|
|
238
|
+
|
|
239
|
+
# Sort by index to restore original row order
|
|
240
|
+
# (groupby may have changed the order when grouping rows together)
|
|
241
|
+
result = result.sort_index()
|
|
242
|
+
|
|
243
|
+
return result
|
pydeflate/sources/dac.py
CHANGED
|
@@ -1,71 +1,58 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
from pathlib import Path
|
|
2
4
|
|
|
3
5
|
import pandas as pd
|
|
4
6
|
from oda_reader import download_dac1
|
|
5
7
|
|
|
6
|
-
from pydeflate.
|
|
8
|
+
from pydeflate.cache import CacheEntry, cache_manager
|
|
9
|
+
from pydeflate.pydeflate_config import logger
|
|
7
10
|
from pydeflate.sources.common import (
|
|
8
|
-
today,
|
|
9
11
|
add_pydeflate_iso3,
|
|
10
|
-
enforce_pyarrow_types,
|
|
11
12
|
compute_exchange_deflator,
|
|
12
|
-
|
|
13
|
+
enforce_pyarrow_types,
|
|
13
14
|
prefix_pydeflate_to_columns,
|
|
14
15
|
)
|
|
15
16
|
|
|
16
17
|
|
|
17
|
-
def _find_dac_files_in_path(path: Path) -> list:
|
|
18
|
-
"""Find all DAC parquet files in the specified directory.
|
|
19
|
-
|
|
20
|
-
Args:
|
|
21
|
-
path (Path): The directory path to search for DAC parquet files.
|
|
22
|
-
|
|
23
|
-
Returns:
|
|
24
|
-
list: List of DAC parquet files found in the directory.
|
|
25
|
-
"""
|
|
26
|
-
return list(path.glob("dac_*.parquet"))
|
|
27
|
-
|
|
28
|
-
|
|
29
18
|
def _to_units(df: pd.DataFrame) -> pd.DataFrame:
|
|
30
|
-
"""
|
|
31
|
-
|
|
32
|
-
Args:
|
|
33
|
-
df (pd.DataFrame): Dataframe with raw observation values.
|
|
19
|
+
"""Scale reported DAC values (supplied in millions) into base units."""
|
|
34
20
|
|
|
35
|
-
Returns:
|
|
36
|
-
pd.DataFrame: Dataframe with scaled observation values.
|
|
37
|
-
"""
|
|
38
21
|
df = df.copy()
|
|
39
22
|
df["value"] = df["value"] * df["unit_multiplier"]
|
|
40
23
|
return df
|
|
41
24
|
|
|
42
25
|
|
|
43
26
|
def _keep_official_definition_only(df: pd.DataFrame) -> pd.DataFrame:
|
|
27
|
+
"""Retain rows matching the official DAC definition across regime changes."""
|
|
28
|
+
|
|
44
29
|
query = (
|
|
45
30
|
"(aidtype_code == 1010 & flows_code == 1140 & year <2018 ) | "
|
|
46
31
|
"(aidtype_code == 11010 & flows_code == 1160 & year >=2018)"
|
|
47
32
|
)
|
|
48
|
-
|
|
49
33
|
return df.query(query)
|
|
50
34
|
|
|
51
35
|
|
|
52
36
|
def _keep_useful_columns(df: pd.DataFrame) -> pd.DataFrame:
|
|
53
|
-
columns
|
|
37
|
+
"""Select the key columns used downstream in pydeflate."""
|
|
54
38
|
|
|
55
|
-
return df.filter(
|
|
39
|
+
return df.filter(["year", "donor_code", "donor_name", "EXCHANGE", "DAC_DEFLATOR"])
|
|
56
40
|
|
|
57
41
|
|
|
58
42
|
def _pivot_amount_type(df: pd.DataFrame) -> pd.DataFrame:
|
|
43
|
+
"""Pivot amount-type codes into separate columns (A/N/D)."""
|
|
44
|
+
|
|
59
45
|
df = df.filter(["year", "donor_code", "donor_name", "amounttype_code", "value"])
|
|
60
46
|
return df.pivot(
|
|
61
|
-
index=[c for c in df.columns if c not in
|
|
47
|
+
index=[c for c in df.columns if c not in {"amounttype_code", "value"}],
|
|
62
48
|
columns="amounttype_code",
|
|
63
49
|
values="value",
|
|
64
50
|
).reset_index()
|
|
65
51
|
|
|
66
52
|
|
|
67
53
|
def _compute_exchange(df: pd.DataFrame) -> pd.DataFrame:
|
|
68
|
-
|
|
54
|
+
"""Derive exchange rates, forcing DAC aggregates to unity."""
|
|
55
|
+
|
|
69
56
|
df.loc[lambda d: d.donor_code >= 20000, "N"] = df.loc[
|
|
70
57
|
lambda d: d.donor_code >= 20000, "A"
|
|
71
58
|
]
|
|
@@ -74,32 +61,32 @@ def _compute_exchange(df: pd.DataFrame) -> pd.DataFrame:
|
|
|
74
61
|
|
|
75
62
|
|
|
76
63
|
def _compute_dac_deflator(df: pd.DataFrame) -> pd.DataFrame:
|
|
64
|
+
"""Calculate the published DAC price deflator from amounts A/D."""
|
|
65
|
+
|
|
77
66
|
df["DAC_DEFLATOR"] = round(100 * df["A"] / df["D"], 6)
|
|
78
67
|
return df
|
|
79
68
|
|
|
80
69
|
|
|
81
70
|
def _compute_dac_gdp_deflator(df: pd.DataFrame) -> pd.DataFrame:
|
|
82
|
-
|
|
71
|
+
"""Back out a GDP-style deflator using the exchange deflator."""
|
|
83
72
|
|
|
73
|
+
df["NGDP_D"] = round(df["EXCHANGE_D"] / 100 * df["DAC_DEFLATOR"], 5)
|
|
84
74
|
return df
|
|
85
75
|
|
|
86
76
|
|
|
87
77
|
def _rename_columns(df: pd.DataFrame) -> pd.DataFrame:
|
|
88
|
-
|
|
89
|
-
columns={
|
|
90
|
-
"donor_code": "entity_code",
|
|
91
|
-
"donor_name": "entity",
|
|
92
|
-
}
|
|
93
|
-
)
|
|
78
|
+
"""Align donor metadata with pydeflate naming conventions."""
|
|
94
79
|
|
|
80
|
+
return df.rename(columns={"donor_code": "entity_code", "donor_name": "entity"})
|
|
95
81
|
|
|
96
|
-
|
|
97
|
-
|
|
82
|
+
|
|
83
|
+
def _download_dac(output_path: Path) -> None:
|
|
84
|
+
"""Download and cache the DAC statistics parquet file."""
|
|
85
|
+
|
|
86
|
+
logger.info("Downloading DAC statistics from ODA reader...")
|
|
98
87
|
df = download_dac1(
|
|
99
88
|
filters={"measure": ["1010", "11010"], "flow_type": ["1140", "1160"]}
|
|
100
89
|
)
|
|
101
|
-
|
|
102
|
-
# Clean the data
|
|
103
90
|
df = (
|
|
104
91
|
df.pipe(_to_units)
|
|
105
92
|
.pipe(_keep_official_definition_only)
|
|
@@ -115,23 +102,23 @@ def download_dac():
|
|
|
115
102
|
.pipe(enforce_pyarrow_types)
|
|
116
103
|
.reset_index(drop=True)
|
|
117
104
|
)
|
|
105
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
106
|
+
df.to_parquet(output_path)
|
|
107
|
+
logger.info("Saved DAC dataset to %s", output_path)
|
|
118
108
|
|
|
119
|
-
# Get today's date to use as a file suffix
|
|
120
|
-
suffix = today()
|
|
121
109
|
|
|
122
|
-
|
|
123
|
-
|
|
110
|
+
_DAC_ENTRY = CacheEntry(
|
|
111
|
+
key="dac_stats",
|
|
112
|
+
filename="dac.parquet",
|
|
113
|
+
fetcher=_download_dac,
|
|
114
|
+
ttl_days=30,
|
|
115
|
+
)
|
|
124
116
|
|
|
125
117
|
|
|
126
118
|
def read_dac(update: bool = False) -> pd.DataFrame:
|
|
127
|
-
|
|
128
|
-
return
|
|
129
|
-
file_finder_func=_find_dac_files_in_path,
|
|
130
|
-
download_func=download_dac,
|
|
131
|
-
data_name="DAC",
|
|
132
|
-
update=update,
|
|
133
|
-
)
|
|
119
|
+
path = cache_manager().ensure(_DAC_ENTRY, refresh=update)
|
|
120
|
+
return pd.read_parquet(path)
|
|
134
121
|
|
|
135
122
|
|
|
136
|
-
if __name__ == "__main__":
|
|
137
|
-
|
|
123
|
+
if __name__ == "__main__": # pragma: no cover
|
|
124
|
+
read_dac(update=True)
|
pydeflate/sources/imf.py
CHANGED
|
@@ -1,15 +1,16 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
from pathlib import Path
|
|
2
4
|
|
|
3
5
|
import pandas as pd
|
|
4
6
|
from imf_reader import weo
|
|
5
7
|
|
|
6
|
-
from pydeflate.
|
|
8
|
+
from pydeflate.cache import CacheEntry, cache_manager
|
|
9
|
+
from pydeflate.pydeflate_config import logger
|
|
7
10
|
from pydeflate.sources.common import (
|
|
8
|
-
today,
|
|
9
11
|
add_pydeflate_iso3,
|
|
10
|
-
enforce_pyarrow_types,
|
|
11
12
|
compute_exchange_deflator,
|
|
12
|
-
|
|
13
|
+
enforce_pyarrow_types,
|
|
13
14
|
prefix_pydeflate_to_columns,
|
|
14
15
|
)
|
|
15
16
|
|
|
@@ -93,7 +94,7 @@ def _keep_useful_columns(df: pd.DataFrame) -> pd.DataFrame:
|
|
|
93
94
|
|
|
94
95
|
|
|
95
96
|
def _pivot_concept_code(df: pd.DataFrame) -> pd.DataFrame:
|
|
96
|
-
"""Pivot the concept
|
|
97
|
+
"""Pivot the concept dimension so each indicator becomes a column
|
|
97
98
|
|
|
98
99
|
Args:
|
|
99
100
|
df (pd.DataFrame): Dataframe with concept code column.
|
|
@@ -102,7 +103,7 @@ def _pivot_concept_code(df: pd.DataFrame) -> pd.DataFrame:
|
|
|
102
103
|
pd.DataFrame: Dataframe with concept code pivoted to columns.
|
|
103
104
|
"""
|
|
104
105
|
return df.pivot(
|
|
105
|
-
index=[c for c in df.columns if c not in
|
|
106
|
+
index=[c for c in df.columns if c not in {"concept_code", "value"}],
|
|
106
107
|
columns="concept_code",
|
|
107
108
|
values="value",
|
|
108
109
|
).reset_index()
|
|
@@ -171,15 +172,13 @@ def _create_eur_series(df: pd.DataFrame) -> pd.DataFrame:
|
|
|
171
172
|
df.loc[df.entity_code == 998, "EXCHANGE"] = df.loc[
|
|
172
173
|
df.entity_code == 998, "year"
|
|
173
174
|
].map(eur)
|
|
174
|
-
|
|
175
175
|
return df
|
|
176
176
|
|
|
177
177
|
|
|
178
|
-
def
|
|
179
|
-
"""
|
|
180
|
-
logger.info("Downloading the latest WEO data...")
|
|
178
|
+
def _download_weo(output_path: Path) -> None:
|
|
179
|
+
"""Fetch, transform, and store the latest WEO dataset in Parquet format."""
|
|
181
180
|
|
|
182
|
-
|
|
181
|
+
logger.info("Downloading the latest IMF WEO dataset...")
|
|
183
182
|
df = (
|
|
184
183
|
weo.fetch_data()
|
|
185
184
|
.pipe(_filter_indicators)
|
|
@@ -195,38 +194,23 @@ def download_weo() -> None:
|
|
|
195
194
|
.pipe(enforce_pyarrow_types)
|
|
196
195
|
.reset_index(drop=True)
|
|
197
196
|
)
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
# Save the processed dataframe to parquet format
|
|
203
|
-
df.to_parquet(PYDEFLATE_PATHS.data / f"weo_{suffix}.parquet")
|
|
204
|
-
|
|
205
|
-
logger.info(f"Saved WEO data to weo_{suffix}.parquet")
|
|
197
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
198
|
+
df.to_parquet(output_path)
|
|
199
|
+
logger.info("Saved WEO data to %s", output_path)
|
|
206
200
|
|
|
207
201
|
|
|
208
|
-
|
|
209
|
-
""
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
Returns:
|
|
215
|
-
list: List of WEO parquet files found in the directory.
|
|
216
|
-
"""
|
|
217
|
-
return list(path.glob("weo_*.parquet"))
|
|
202
|
+
_IMF_CACHE_ENTRY = CacheEntry(
|
|
203
|
+
key="imf_weo",
|
|
204
|
+
filename="imf_weo.parquet",
|
|
205
|
+
fetcher=_download_weo,
|
|
206
|
+
ttl_days=60,
|
|
207
|
+
)
|
|
218
208
|
|
|
219
209
|
|
|
220
210
|
def read_weo(update: bool = False) -> pd.DataFrame:
|
|
221
|
-
|
|
222
|
-
return
|
|
223
|
-
file_finder_func=_find_weo_files_in_path,
|
|
224
|
-
download_func=download_weo,
|
|
225
|
-
data_name="WEO",
|
|
226
|
-
update=update,
|
|
227
|
-
)
|
|
211
|
+
path = cache_manager().ensure(_IMF_CACHE_ENTRY, refresh=update)
|
|
212
|
+
return pd.read_parquet(path)
|
|
228
213
|
|
|
229
214
|
|
|
230
|
-
if __name__ == "__main__":
|
|
231
|
-
|
|
232
|
-
dfi = read_weo(update=True)
|
|
215
|
+
if __name__ == "__main__": # pragma: no cover
|
|
216
|
+
read_weo(update=True)
|