pydeflate 2.0.1__tar.gz → 2.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (25) hide show
  1. {pydeflate-2.0.1 → pydeflate-2.1.0}/PKG-INFO +1 -1
  2. {pydeflate-2.0.1 → pydeflate-2.1.0}/pydeflate/__init__.py +7 -2
  3. {pydeflate-2.0.1 → pydeflate-2.1.0}/pydeflate/core/api.py +3 -1
  4. {pydeflate-2.0.1 → pydeflate-2.1.0}/pydeflate/core/exchange.py +0 -2
  5. {pydeflate-2.0.1 → pydeflate-2.1.0}/pydeflate/core/source.py +10 -1
  6. {pydeflate-2.0.1 → pydeflate-2.1.0}/pydeflate/exchange/exchangers.py +48 -17
  7. {pydeflate-2.0.1 → pydeflate-2.1.0}/pydeflate/sources/common.py +1 -1
  8. {pydeflate-2.0.1 → pydeflate-2.1.0}/pydeflate/sources/world_bank.py +111 -7
  9. {pydeflate-2.0.1 → pydeflate-2.1.0}/pydeflate/utils.py +12 -7
  10. {pydeflate-2.0.1 → pydeflate-2.1.0}/pyproject.toml +1 -1
  11. {pydeflate-2.0.1 → pydeflate-2.1.0}/LICENSE +0 -0
  12. {pydeflate-2.0.1 → pydeflate-2.1.0}/README.md +0 -0
  13. {pydeflate-2.0.1 → pydeflate-2.1.0}/pydeflate/.pydeflate_data/README.md +0 -0
  14. {pydeflate-2.0.1 → pydeflate-2.1.0}/pydeflate/core/__init__.py +0 -0
  15. {pydeflate-2.0.1 → pydeflate-2.1.0}/pydeflate/core/deflator.py +0 -0
  16. {pydeflate-2.0.1 → pydeflate-2.1.0}/pydeflate/deflate/__init__.py +0 -0
  17. {pydeflate-2.0.1 → pydeflate-2.1.0}/pydeflate/deflate/deflators.py +0 -0
  18. {pydeflate-2.0.1 → pydeflate-2.1.0}/pydeflate/deflate/legacy_deflate.py +0 -0
  19. {pydeflate-2.0.1 → pydeflate-2.1.0}/pydeflate/exchange/__init__.py +0 -0
  20. {pydeflate-2.0.1 → pydeflate-2.1.0}/pydeflate/pydeflate_config.py +0 -0
  21. {pydeflate-2.0.1 → pydeflate-2.1.0}/pydeflate/settings/emu.json +0 -0
  22. {pydeflate-2.0.1 → pydeflate-2.1.0}/pydeflate/settings/oecd_codes.json +0 -0
  23. {pydeflate-2.0.1 → pydeflate-2.1.0}/pydeflate/sources/__init__.py +0 -0
  24. {pydeflate-2.0.1 → pydeflate-2.1.0}/pydeflate/sources/dac.py +0 -0
  25. {pydeflate-2.0.1 → pydeflate-2.1.0}/pydeflate/sources/imf.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pydeflate
3
- Version: 2.0.1
3
+ Version: 2.1.0
4
4
  Summary: Package to convert current prices figures to constant prices and vice versa
5
5
  License: MIT
6
6
  Author: Jorge Rivera
@@ -1,5 +1,5 @@
1
1
  __author__ = """Jorge Rivera"""
2
- __version__ = "2.0.1"
2
+ __version__ = "2.1.0"
3
3
 
4
4
  from pydeflate.deflate.deflators import (
5
5
  oecd_dac_deflate,
@@ -12,7 +12,12 @@ from pydeflate.deflate.deflators import (
12
12
  )
13
13
 
14
14
  from pydeflate.deflate.legacy_deflate import deflate
15
- from pydeflate.exchange.exchangers import oecd_dac_exchange, wb_exchange, imf_exchange
15
+ from pydeflate.exchange.exchangers import (
16
+ oecd_dac_exchange,
17
+ wb_exchange,
18
+ wb_exchange_ppp,
19
+ imf_exchange,
20
+ )
16
21
  from pydeflate.pydeflate_config import setup_logger
17
22
 
18
23
 
@@ -77,7 +77,9 @@ def _base_operation(
77
77
  )
78
78
 
79
79
  # Flag missing data
80
- flag_missing_pydeflate_data(base_obj._unmatched_data)
80
+ flag_missing_pydeflate_data(
81
+ base_obj._unmatched_data, entity_column=entity_column, year_column=year_column
82
+ )
81
83
  x = base_obj._merged_data[value_column]
82
84
  y = base_obj._merged_data[
83
85
  "pydeflate_EXCHANGE" if exchange else "pydeflate_deflator"
@@ -37,8 +37,6 @@ class Exchange:
37
37
  self.exchange_data = self.exchange_rate(
38
38
  self.source_currency, self.target_currency
39
39
  )
40
- if self.source_currency == "USA":
41
- self.exchange_data["pydeflate_EXCHANGE_D"] = 1
42
40
 
43
41
  def _get_exchange_rate(self, currency):
44
42
  """Helper function to fetch exchange rates for a given currency."""
@@ -5,7 +5,7 @@ import pandas as pd
5
5
  from pydeflate.sources.common import AvailableDeflators
6
6
  from pydeflate.sources.dac import read_dac
7
7
  from pydeflate.sources.imf import read_weo
8
- from pydeflate.sources.world_bank import read_wb
8
+ from pydeflate.sources.world_bank import read_wb, read_wb_lcu_ppp, read_wb_usd_ppp
9
9
 
10
10
 
11
11
  @dataclass
@@ -49,6 +49,15 @@ class WorldBank(Source):
49
49
  super().__init__(name="World Bank", reader=read_wb, update=update)
50
50
 
51
51
 
52
+ class WorldBankPPP(Source):
53
+ def __init__(self, update: bool = False, *, from_lcu: bool = True):
54
+ super().__init__(
55
+ name="World Bank PPP",
56
+ reader=read_wb_lcu_ppp if from_lcu else read_wb_usd_ppp,
57
+ update=update,
58
+ )
59
+
60
+
52
61
  class DAC(Source):
53
62
  def __init__(self, update: bool = False):
54
63
  super().__init__(name="DAC", reader=read_dac, update=update)
@@ -1,9 +1,10 @@
1
1
  from functools import wraps
2
2
 
3
3
  import pandas as pd
4
+ from frictionless.console.common import source
4
5
 
5
6
  from pydeflate.core.api import BaseExchange
6
- from pydeflate.core.source import DAC, WorldBank, IMF
7
+ from pydeflate.core.source import DAC, WorldBank, IMF, WorldBankPPP
7
8
 
8
9
 
9
10
  def _generate_docstring(source_name: str) -> str:
@@ -28,26 +29,33 @@ def _generate_docstring(source_name: str) -> str:
28
29
  )
29
30
 
30
31
 
31
- def _exchange(exchange_source_cls):
32
+ def _exchange(exchange_source_cls, **fixed_params):
32
33
  """Decorator to create exchange wrappers with specific source"""
33
34
 
34
35
  def decorator(func):
35
36
  @wraps(func)
36
- def wrapper(
37
- data: pd.DataFrame,
38
- *,
39
- source_currency: str = "USA",
40
- target_currency: str = "USA",
41
- id_column: str = "iso_code",
42
- year_column: str = "year",
43
- use_source_codes: bool = False,
44
- value_column: str = "value",
45
- target_value_column: str = "value",
46
- reversed_: bool = False,
47
- year_format: str | None = None,
48
- update_rates: bool = False,
49
- ):
37
+ def wrapper(data: pd.DataFrame, **kwargs):
50
38
  # Validate input parameters
39
+ for param in fixed_params:
40
+ if param in kwargs:
41
+ raise ValueError(
42
+ f"The parameter '{param}' cannot be passed to this function."
43
+ )
44
+ # set fixed parameters
45
+ kwargs.update(fixed_params)
46
+
47
+ # Unpack the parameters
48
+ source_currency = kwargs.get("source_currency", "USA")
49
+ target_currency = kwargs.get("target_currency", "USA")
50
+ id_column = kwargs.get("id_column", "iso_code")
51
+ year_column = kwargs.get("year_column", "year")
52
+ use_source_codes = kwargs.get("use_source_codes", False)
53
+ value_column = kwargs.get("value_column", "value")
54
+ target_value_column = kwargs.get("target_value_column", "value")
55
+ reversed_ = kwargs.get("reversed_", False)
56
+ year_format = kwargs.get("year_format", None)
57
+ update_rates = kwargs.get("update_rates", False)
58
+
51
59
  if not isinstance(data, pd.DataFrame):
52
60
  raise ValueError("The 'data' parameter must be a pandas DataFrame.")
53
61
 
@@ -68,7 +76,14 @@ def _exchange(exchange_source_cls):
68
76
  to_exchange = data.copy()
69
77
 
70
78
  # Initialize the deflator source
71
- source = exchange_source_cls(update=update_rates)
79
+ if exchange_source_cls.__name__ == "WorldBankPPP":
80
+ source = exchange_source_cls(
81
+ update=update_rates,
82
+ from_lcu=False if source_currency == "USA" else True,
83
+ )
84
+ source_currency = "LCU" if source_currency == "USA" else source_currency
85
+ else:
86
+ source = exchange_source_cls(update=update_rates)
72
87
 
73
88
  # Create a deflator object
74
89
  exchange = BaseExchange(
@@ -130,6 +145,22 @@ def wb_exchange(
130
145
  ) -> pd.DataFrame: ...
131
146
 
132
147
 
148
+ @_exchange(WorldBankPPP, target_currency="PPP")
149
+ def wb_exchange_ppp(
150
+ data: pd.DataFrame,
151
+ *,
152
+ source_currency: str = "USA",
153
+ id_column: str = "iso_code",
154
+ year_column: str = "year",
155
+ use_source_codes: bool = False,
156
+ value_column: str = "value",
157
+ target_value_column: str = "value",
158
+ reversed_: bool = False,
159
+ year_format: str | None = None,
160
+ update_rates: bool = False,
161
+ ) -> pd.DataFrame: ...
162
+
163
+
133
164
  @_exchange(IMF)
134
165
  def imf_exchange(
135
166
  data: pd.DataFrame,
@@ -21,7 +21,7 @@ def check_file_age(file: Path) -> int:
21
21
  """
22
22
  current_date = datetime.today()
23
23
  # Extract date from the filename (format: weo_YYYY-MM-DD.parquet)
24
- file_date = datetime.strptime(file.stem.split("_")[1], "%Y-%m-%d")
24
+ file_date = datetime.strptime(file.stem.split("_")[-1], "%Y-%m-%d")
25
25
 
26
26
  # Return the difference in days between today and the file's date
27
27
  return (current_date - file_date).days
@@ -21,6 +21,18 @@ _INDICATORS: dict = {
21
21
  "PA.NUS.FCRF": "EXCHANGE", # Official Exchange Rate
22
22
  }
23
23
 
24
+ _INDICATORS_LCU_PPP: dict = {
25
+ "NY.GDP.DEFL.ZS": "NGDP_D", # GDP Deflator (Index)
26
+ "NY.GDP.DEFL.ZS.AD": "NGDP_DL", # GDP Deflator linked series
27
+ "PA.NUS.PPP": "EXCHANGE", # PPP conversion factor
28
+ }
29
+
30
+ _INDICATORS_USD_PPP: dict = {
31
+ "NY.GDP.DEFL.ZS": "NGDP_D", # GDP Deflator (Index)
32
+ "NY.GDP.DEFL.ZS.AD": "NGDP_DL", # GDP Deflator linked series
33
+ "PA.NUS.PPPC.RF": "EXCHANGE", # PPP conversion factor to market exchange rate
34
+ }
35
+
24
36
 
25
37
  def get_wb_indicator(series: str, value_name: str | None = None) -> pd.DataFrame:
26
38
  """Fetch a World Bank indicator and transform it into a cleaned DataFrame.
@@ -126,7 +138,28 @@ def _parallel_download_indicators(indicators: dict) -> list[pd.DataFrame]:
126
138
  return dfs
127
139
 
128
140
 
129
- def download_wb() -> None:
141
+ def _add_ppp_ppp_exchange(df: pd.DataFrame) -> pd.DataFrame:
142
+ """
143
+ Add the PPP exchange rate to the DataFrame.
144
+
145
+ Args:
146
+ df: pd.DataFrame: The DataFrame containing the World Bank data.
147
+
148
+ Returns:
149
+ pd.DataFrame: The DataFrame with the PPP exchange rates
150
+
151
+ """
152
+ ppp = df.loc[lambda d: d["entity_code"] == "USA"].copy()
153
+ ppp[["entity_code", "entity", "pydeflate_iso3"]] = "PPP"
154
+
155
+ df = pd.concat([df, ppp], ignore_index=True)
156
+
157
+ return df
158
+
159
+
160
+ def _download_wb(
161
+ indicators: dict, prefix: str = "wb", add_ppp_exchange: bool = False
162
+ ) -> None:
130
163
  """Download multiple World Bank indicators in parallel and save as a parquet file.
131
164
 
132
165
  This function fetches all indicators defined in _INDICATORS in parallel, concatenates
@@ -134,7 +167,7 @@ def download_wb() -> None:
134
167
  """
135
168
  logger.info("Downloading the latest World Bank data...")
136
169
 
137
- indicators_data = _parallel_download_indicators(indicators=_INDICATORS)
170
+ indicators_data = _parallel_download_indicators(indicators=indicators)
138
171
 
139
172
  # Concatenate all DataFrames horizontally (by columns)
140
173
  df = pd.concat(indicators_data, axis=1).reset_index()
@@ -145,7 +178,13 @@ def download_wb() -> None:
145
178
  .pipe(compute_exchange_deflator, base_year_measure="NGDP_D")
146
179
  .assign(pydeflate_iso3=lambda d: d.entity_code)
147
180
  .sort_values(by=["year", "entity_code"])
148
- .pipe(prefix_pydeflate_to_columns)
181
+ )
182
+
183
+ if add_ppp_exchange:
184
+ df = df.pipe(_add_ppp_ppp_exchange)
185
+
186
+ df = (
187
+ df.pipe(prefix_pydeflate_to_columns)
149
188
  .pipe(enforce_pyarrow_types)
150
189
  .reset_index(drop=True)
151
190
  )
@@ -154,10 +193,29 @@ def download_wb() -> None:
154
193
  suffix = today()
155
194
 
156
195
  # Save the DataFrame as a parquet file
157
- output_path = PYDEFLATE_PATHS.data / f"wb_{suffix}.parquet"
196
+ output_path = PYDEFLATE_PATHS.data / f"{prefix}_{suffix}.parquet"
158
197
  df.to_parquet(output_path)
159
198
 
160
- logger.info(f"Saved World Bank data to wb_{suffix}.parquet")
199
+ logger.info(f"Saved World Bank data to {prefix}_{suffix}.parquet")
200
+
201
+
202
+ def download_wb() -> None:
203
+ """Download the latest World Bank data."""
204
+ _download_wb(indicators=_INDICATORS, prefix="wb")
205
+
206
+
207
+ def download_wb_lcu_ppp() -> None:
208
+ """Download the latest World Bank data (PPP)."""
209
+ _download_wb(
210
+ indicators=_INDICATORS_LCU_PPP, prefix="wb_lcu_ppp", add_ppp_exchange=True
211
+ )
212
+
213
+
214
+ def download_wb_usd_ppp() -> None:
215
+ """Download the latest World Bank data (PPP)."""
216
+ _download_wb(
217
+ indicators=_INDICATORS_USD_PPP, prefix="wb_usd_ppp", add_ppp_exchange=True
218
+ )
161
219
 
162
220
 
163
221
  def _find_wb_files_in_path(path: Path) -> list:
@@ -169,7 +227,31 @@ def _find_wb_files_in_path(path: Path) -> list:
169
227
  Returns:
170
228
  list: List of WB parquet files found in the directory.
171
229
  """
172
- return list(path.glob("wb_*.parquet"))
230
+ return list(path.glob(f"wb_*.parquet"))
231
+
232
+
233
+ def _find_wb_lcu_ppp_files_in_path(path: Path) -> list:
234
+ """Find all WB PPP parquet files in the specified directory.
235
+
236
+ Args:
237
+ path (Path): The directory path to search for WB parquet files.
238
+
239
+ Returns:
240
+ list: List of WB parquet files found in the directory.
241
+ """
242
+ return list(path.glob(f"wb_lcu_ppp_*.parquet"))
243
+
244
+
245
+ def _find_wb_usd_ppp_files_in_path(path: Path) -> list:
246
+ """Find all WB PPP parquet files in the specified directory.
247
+
248
+ Args:
249
+ path (Path): The directory path to search for WB parquet files.
250
+
251
+ Returns:
252
+ list: List of WB parquet files found in the directory.
253
+ """
254
+ return list(path.glob(f"wb_usd_ppp_*.parquet"))
173
255
 
174
256
 
175
257
  def read_wb(update: bool = False) -> pd.DataFrame:
@@ -182,5 +264,27 @@ def read_wb(update: bool = False) -> pd.DataFrame:
182
264
  )
183
265
 
184
266
 
267
+ def read_wb_lcu_ppp(update: bool = False) -> pd.DataFrame:
268
+ """Read the latest World Bank data from parquet files or download fresh data."""
269
+ return read_data(
270
+ file_finder_func=_find_wb_lcu_ppp_files_in_path,
271
+ download_func=download_wb_lcu_ppp,
272
+ data_name="World Bank",
273
+ update=update,
274
+ )
275
+
276
+
277
+ def read_wb_usd_ppp(update: bool = False) -> pd.DataFrame:
278
+ """Read the latest World Bank data from parquet files or download fresh data."""
279
+ return read_data(
280
+ file_finder_func=_find_wb_usd_ppp_files_in_path,
281
+ download_func=download_wb_usd_ppp,
282
+ data_name="World Bank",
283
+ update=update,
284
+ )
285
+
286
+
185
287
  if __name__ == "__main__":
186
- df = read_wb(True)
288
+ df_wb = read_wb(False)
289
+ df_usd = read_wb_usd_ppp(False)
290
+ df_lcu = read_wb_lcu_ppp(False)
@@ -35,6 +35,7 @@ def clean_number(number):
35
35
 
36
36
  return float(number)
37
37
 
38
+
38
39
  def create_pydeflate_year(
39
40
  data: pd.DataFrame, year_column: str, year_format: str | None = None
40
41
  ) -> pd.DataFrame:
@@ -84,17 +85,21 @@ def get_matched_pydeflate_data(
84
85
  )
85
86
 
86
87
 
87
- def flag_missing_pydeflate_data(unmatched_data: pd.DataFrame):
88
+ def flag_missing_pydeflate_data(
89
+ unmatched_data: pd.DataFrame, entity_column: str, year_column: str
90
+ ):
88
91
  """Flag data which is present in the input data but missing in pydeflate's data."""
89
92
  if unmatched_data.empty:
90
93
  return
91
-
92
94
  missing = (
93
- unmatched_data.drop_duplicates()
94
- .dropna(axis=1)
95
- .drop(columns="_merge")
96
- .to_string(index=False)
95
+ unmatched_data.filter([entity_column, year_column])
96
+ .drop_duplicates()
97
+ .groupby(entity_column)[year_column]
98
+ .apply(lambda x: ", ".join(map(str, sorted(x))))
99
+ .to_dict()
97
100
  )
98
101
 
102
+ missing_str = "\n".join(f"{entity}: {years}" for entity, years in missing.items())
103
+
99
104
  # log all missing data
100
- logger.info(f"Missing exchange data for:\n {missing}")
105
+ logger.info(f"Missing exchange data for:\n{missing_str}")
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "pydeflate"
3
- version = "2.0.1"
3
+ version = "2.1.0"
4
4
  description = "Package to convert current prices figures to constant prices and vice versa"
5
5
  authors = ["Jorge Rivera <jorge.rivera@one.org>"]
6
6
  license = "MIT"
File without changes
File without changes