pydeflate 1.4.2__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,146 +0,0 @@
1
- import warnings
2
- from dataclasses import dataclass
3
-
4
- import pandas as pd
5
- from oda_reader import download_dac1
6
-
7
- from pydeflate.get_data.deflate_data import Data
8
- from pydeflate.get_data.exchange_data import ExchangeOECD
9
- from pydeflate.pydeflate_config import PYDEFLATE_PATHS, logger
10
- from pydeflate.tools.update_data import update_update_date
11
- from pydeflate.utils import oecd_codes
12
-
13
- warnings.simplefilter("ignore", Warning, lineno=1013)
14
-
15
-
16
- def _compute_deflators_and_exchange(data: pd.DataFrame) -> pd.DataFrame:
17
- return data.assign(
18
- exchange=lambda d: round(d.N / d.A, 5),
19
- deflator=lambda d: round(100 * d.A / d.D, 6), # implied deflator
20
- iso_code=lambda d: d.donor_code.map(oecd_codes()),
21
- year=lambda d: pd.to_datetime(d.year, format="%Y"),
22
- ).assign(exchange=lambda d: d.exchange.fillna(1))
23
-
24
-
25
- def _clean_dac1(df: pd.DataFrame) -> pd.DataFrame:
26
- """Clean DAC1 to keep only relevant information for deflators and exchange.
27
-
28
- Args:
29
- df: the dataframe to clean
30
-
31
- Returns:
32
- A cleaned dataframe
33
- """
34
-
35
- # Columns to keep and rename
36
- cols = {"amounttype_code": "type", "aidtype_code": "aid", "flows_code": "flow"}
37
-
38
- # Get only the official definition of the data
39
- query = (
40
- "(aid == 1010 & flow == 1140 & year <2018 ) | "
41
- "(aid == 11010 & flow == 1160 & year >=2018)"
42
- )
43
-
44
- # Clean the data
45
- data = (
46
- df.rename(columns=cols)
47
- .query(query)
48
- .filter(["donor_code", "type", "year", "value"], axis=1)
49
- .pivot(index=["donor_code", "year"], columns=["type"], values="value")
50
- .reset_index()
51
- )
52
-
53
- data = (
54
- data.pipe(_compute_deflators_and_exchange)
55
- .dropna(subset=["iso_code"])
56
- .filter(["iso_code", "year", "exchange", "deflator"], axis=1)
57
- .reset_index(drop=True)
58
- )
59
-
60
- return data
61
-
62
-
63
- def update_dac1() -> None:
64
- """Update dac1 data from OECD site and save as feather"""
65
-
66
- # Use oda_reader to get the data
67
- df = download_dac1(
68
- filters={"measure": ["1010", "11010"], "flow_type": ["1140", "1160"]}
69
- )
70
-
71
- # Clean the data
72
- df = df.pipe(_clean_dac1)
73
-
74
- # Save the data
75
- df.to_feather(PYDEFLATE_PATHS.data / "pydeflate_dac1.feather")
76
-
77
- # Update the update date
78
- update_update_date("OECD DAC")
79
-
80
-
81
- def _identify_base_year(df: pd.DataFrame) -> int:
82
- return (
83
- df.query("iso_code in ['FRA','GBR','USA','CAN','DEU','EUI']")
84
- .groupby(["year"], as_index=False)
85
- .value.mean(numeric_only=True)
86
- .round(2)
87
- .loc[lambda d: d.value == 100.00]
88
- .year.dt.year.item()
89
- )
90
-
91
-
92
- def _calculate_price_deflator(deflators_df: pd.DataFrame) -> pd.DataFrame:
93
- return deflators_df.assign(
94
- value=lambda d: round(d.value_dac * d.value_exchange / 100, 6)
95
- ).filter(["iso_code", "year", "indicator", "value"], axis=1)
96
-
97
-
98
- @dataclass
99
- class OECD(Data):
100
- """An object to download and return the latest OECD DAC deflators data."""
101
-
102
- def __post_init__(self):
103
- self._available_methods = {"dac_deflator": "oecd_dac"}
104
-
105
- def update(self, **kwargs) -> None:
106
- update_dac1()
107
-
108
- def load_data(self, **kwargs) -> None:
109
- """Load the OECD DAC price deflators data.
110
-
111
- If the data is not found, it will be downloaded.
112
- DAC deflators are transformed into price deflators by using the
113
- implied exchange rate information from the OECD DAC data.
114
-
115
- The deflators that are loaded is therefore *not* the DAC deflator,
116
- but the price deflator used to produce the DAC deflators.
117
-
118
- """
119
- try:
120
- d_ = pd.read_feather(PYDEFLATE_PATHS.data / "pydeflate_dac1.feather")
121
- except FileNotFoundError:
122
- logger.info("Data not found, downloading...")
123
- self.update()
124
- self.load_data()
125
- return
126
-
127
- d_ = d_.assign(indicator="oecd_dac").rename(columns={"deflator": "value"})
128
-
129
- # Identify base year
130
- base_year = _identify_base_year(d_)
131
-
132
- # Load exchange deflators
133
- exchange_deflator = ExchangeOECD().exchange_deflator(
134
- source_iso="USA", target_iso="USA", base_year=base_year
135
- )
136
-
137
- # Merge deflators and exchange deflators
138
- deflators_df = d_.merge(
139
- exchange_deflator,
140
- on=["iso_code", "year"],
141
- how="left",
142
- suffixes=("_dac", "_exchange"),
143
- )
144
-
145
- # Calculate the price deflator
146
- self._data = _calculate_price_deflator(deflators_df=deflators_df)
@@ -1,75 +0,0 @@
1
- from dataclasses import dataclass
2
-
3
- import pandas as pd
4
- from pydeflate.get_data.deflate_data import Data
5
- from pydeflate.pydeflate_config import PYDEFLATE_PATHS
6
-
7
- from bblocks import WorldBankData, set_bblocks_data_path
8
-
9
- from pydeflate.tools.update_data import update_update_date
10
-
11
- set_bblocks_data_path(PYDEFLATE_PATHS.data)
12
-
13
- _INDICATORS: dict = {
14
- "gdp": "NY.GDP.DEFL.ZS",
15
- "gdp_linked": "NY.GDP.DEFL.ZS.AD",
16
- "cpi": "FP.CPI.TOTL",
17
- "exchange": "PA.NUS.FCRF",
18
- "effective_exchange": "PX.REX.REER",
19
- }
20
-
21
- START: int = 1950
22
- END: int = 2025
23
-
24
-
25
- def update_world_bank_data() -> None:
26
- """Update World Bank data."""
27
- wb = WorldBankData()
28
- wb.load_data(indicator=list(_INDICATORS.values()), start_year=START, end_year=END)
29
- wb.update_data()
30
- update_update_date(source="World Bank")
31
-
32
-
33
- @dataclass
34
- class WorldBank(Data):
35
- """An object to download and return the latest WorldBank exchange and price data"""
36
-
37
- def __post_init__(self):
38
- self._available_methods = {
39
- "gdp": "GDP deflator",
40
- "gdp_linked": "GDP deflator linked",
41
- "cpi": "Consumer price index",
42
- }
43
-
44
- def update(self, **kwargs) -> None:
45
- """Update data for all WorldBank indicators"""
46
- update_world_bank_data()
47
-
48
- def load_data(self) -> None:
49
- """Load data for all WorldBank indicators"""
50
-
51
- # get the paths to the data
52
- paths = [
53
- PYDEFLATE_PATHS.data / f"{_INDICATORS[i_]}_{START}-{END}_.csv"
54
- for i_ in _INDICATORS
55
- ]
56
-
57
- # check if data exists, if not update
58
- for path in paths:
59
- if not path.exists():
60
- update_world_bank_data()
61
- break
62
-
63
- # load the data
64
- files = []
65
- for path in paths:
66
- try:
67
- files.append(
68
- pd.read_csv(path, parse_dates=["date"]).rename(
69
- columns={"date": "year"}
70
- )
71
- )
72
- except FileNotFoundError:
73
- files.append(pd.DataFrame())
74
-
75
- self._data = pd.concat(files, ignore_index=True)
@@ -1,2 +0,0 @@
1
- #!/usr/bin/env python3
2
- # -*- coding: utf-8 -*-
@@ -1,171 +0,0 @@
1
- import pandas as pd
2
-
3
- from pydeflate.get_data.exchange_data import (
4
- ExchangeIMF,
5
- ExchangeOECD,
6
- ExchangeWorldBank,
7
- )
8
- from pydeflate.utils import check_year_as_number, to_iso3, oecd_codes
9
-
10
-
11
- _exchange_source = {
12
- "world_bank": ExchangeWorldBank,
13
- "wb": ExchangeWorldBank,
14
- "oecd_dac": ExchangeOECD,
15
- "imf": ExchangeIMF,
16
- }
17
-
18
-
19
- def _check_key_errors(
20
- rates_source: str,
21
- columns: str | list | pd.Index,
22
- value_column: str,
23
- date_column: str,
24
- ) -> None:
25
- """Check whether provided parameters are valid"""
26
-
27
- if rates_source not in _exchange_source.keys():
28
- raise KeyError(
29
- f"{rates_source=} is not a valid exchange rates source. "
30
- f"Please choose from {_exchange_source.keys()}"
31
- )
32
-
33
- if value_column not in columns:
34
- raise KeyError(
35
- f"{value_column} is not a valid column in the provided DataFrame"
36
- )
37
-
38
- if date_column not in columns:
39
- raise KeyError(f"{date_column} is not a valid column in the provided DataFrame")
40
-
41
-
42
- def exchange(
43
- df: pd.DataFrame,
44
- source_currency: str,
45
- target_currency: str,
46
- rates_source: str = "world_bank",
47
- id_column: str = "iso_code",
48
- id_type: str = "ISO3",
49
- value_column: str = "value",
50
- target_column: str = "value",
51
- date_column: str = "date",
52
- ) -> pd.DataFrame:
53
- """
54
-
55
-
56
- Parameters
57
- ----------
58
- df : pd.DataFrame
59
- A Pandas DataFrame, in long format, containing at least a date column,
60
- a column with iso-3 codes to identify the source currency, and a
61
- value column where the values to be converted are stored.
62
- source_currency : str
63
- The ISO-3 code of the country which owns the currency in which the data
64
- is expressed. "LCU" can be used to indicate that data is in Local
65
- Currency Unit. "emu" can be used for the EURO.
66
- target_currency : str
67
- The ISO-3 code of the country which owns the currency to which the data
68
- will be converted. "LCU" can be used to convert from a given currency
69
- (like the USD), back to each country's Local Currency.
70
- rates_source : str, optional
71
- The source of the exchange rate data. Current options include "wb" for
72
- the World Bank and "oecd_dac" for the exchange rates used for ODA
73
- statistics. The default is "wb".
74
- id_column : str, optional
75
- The name of the column containing the codes or names used to identify countries.
76
- The default is "iso_code".
77
- id_type : str, optional
78
- The types of codes used to identify countries. Should match options in
79
- Country Converter or the DAC codes.The default is "ISO3".
80
- value_column : str, optional
81
- The name of the column containing the values to be converted.
82
- The default is "value".
83
- target_column : str, optional
84
- The name of the column where the converted values will be stored.
85
- The default is "value_xe".
86
- date_column : str, optional
87
- The name of the column where the date/year is stored.
88
- The default is "date".
89
-
90
- Returns
91
- -------
92
- df : pd.DataFrame
93
- Returns a dataframe containing the converted data stored in the
94
- target column.
95
-
96
- """
97
-
98
- # create a copy of the dataframe to avoid modifying the original
99
- df = df.copy(deep=True)
100
-
101
- # Check whether provided parameters are valid
102
- _check_key_errors(rates_source, df.columns, value_column, date_column)
103
-
104
- # If source currency matches target currency, do nothing
105
- if source_currency == target_currency:
106
- df[target_column] = df[value_column]
107
- return df
108
-
109
- # keep track of original columns. This is so that the same order and columns can be
110
- # preserved.
111
- if target_column not in df.columns:
112
- cols = [*df.columns, target_column]
113
- else:
114
- cols = df.columns
115
-
116
- # check whether date is provided as integer
117
- df, year_as_number = check_year_as_number(df, date_column)
118
-
119
- # check whether target currency is LCU
120
- if target_currency == "LCU":
121
- target_currency = source_currency
122
- source_currency = "LCU"
123
- target_changed = True
124
- else:
125
- target_changed = False
126
-
127
- # get the selected rates function
128
- exchange_rates = (
129
- _exchange_source[rates_source]()
130
- .exchange_rate(target_currency)
131
- .rename(columns={"year": date_column, "value": value_column, "iso_code": "id_"})
132
- )
133
-
134
- # Create ID col.
135
- if id_type == "DAC":
136
- df["id_"] = df[id_column].map(oecd_codes()).fillna("DAC")
137
- else:
138
- df = df.pipe(
139
- to_iso3, codes_col=id_column, target_col="id_", src_classification=id_type
140
- )
141
-
142
- # merge exchange rates with data
143
- if source_currency == "LCU":
144
- df = df.merge(
145
- exchange_rates,
146
- on=["id_", date_column],
147
- suffixes=("", "_xe"),
148
- )
149
-
150
- else:
151
- xe = exchange_rates.loc[exchange_rates.id_ == source_currency]
152
- df = df.merge(
153
- xe.drop("id_", axis=1),
154
- on=[date_column],
155
- suffixes=("", "_xe"),
156
- )
157
-
158
- # revert change to target_currency if target_changed
159
- if target_changed:
160
- target_currency = "LCU"
161
-
162
- if target_currency == "LCU":
163
- df[target_column] = df[value_column] * df[f"{value_column}_xe"]
164
-
165
- else:
166
- df[target_column] = df[value_column] / df[f"{value_column}_xe"]
167
-
168
- if year_as_number:
169
- df[date_column] = df[date_column].dt.year
170
-
171
- return df.filter(cols, axis=1)
@@ -1,69 +0,0 @@
1
- import datetime
2
- import json
3
-
4
- from pydeflate.pydeflate_config import PYDEFLATE_PATHS, logger
5
-
6
-
7
- def _diff_from_today(date: datetime.datetime):
8
- """Compare to today"""
9
-
10
- today = datetime.datetime.today()
11
-
12
- return (today - date).days
13
-
14
-
15
- def warn_updates():
16
- if not (PYDEFLATE_PATHS.data / "data_updates.json").exists():
17
- return
18
-
19
- with open(PYDEFLATE_PATHS.data / "data_updates.json") as file:
20
- updates = json.load(file)
21
-
22
- for source, date in updates.items():
23
- d = datetime.datetime.strptime(date, "%Y-%m-%d")
24
- if _diff_from_today(d) > 50:
25
- message = (
26
- f'\n\nThe underlying data for "{source}" has not been updated'
27
- f" in over {_diff_from_today(d)} days. \nIn order to use"
28
- " pydeflate with the most recent data, please run:\n"
29
- "`pydeflate.update_all_data()`"
30
- )
31
- logger.warning(message)
32
-
33
-
34
- def update_update_date(source: str):
35
- """Update the most recent update date for data to today"""
36
-
37
- today = datetime.datetime.today().strftime("%Y-%m-%d")
38
-
39
- # Check to see if specified path contains an update file. Create one if not
40
- if not (PYDEFLATE_PATHS.data / "data_updates.json").exists():
41
- updates = {}
42
- with open(PYDEFLATE_PATHS.data / "data_updates.json", "w") as outfile:
43
- json.dump(updates, outfile)
44
-
45
- with open(PYDEFLATE_PATHS.data / "data_updates.json") as file:
46
- updates = json.load(file)
47
-
48
- updates[source] = today
49
-
50
- with open(PYDEFLATE_PATHS.data / "data_updates.json", "w") as outfile:
51
- json.dump(updates, outfile)
52
-
53
-
54
- def update_all_data() -> None:
55
- """Run to update all underlying data."""
56
-
57
- from pydeflate.get_data.imf_data import IMF
58
- from pydeflate.get_data.oecd_data import OECD
59
- from pydeflate.get_data.wb_data import WorldBank
60
-
61
- data = {
62
- "IMF WEO Data": IMF().update,
63
- "OECD DAC data": OECD().update,
64
- "WorldBank data": WorldBank().update,
65
- }
66
-
67
- for source, func in data.items():
68
- func()
69
- logger.info(f"****Successfully updated {source}****\n")