pydeflate 2.1.3__py3-none-any.whl → 2.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pydeflate/core/source.py CHANGED
@@ -2,6 +2,7 @@ from dataclasses import dataclass, field
2
2
 
3
3
  import pandas as pd
4
4
 
5
+ from pydeflate.exceptions import ConfigurationError, DataSourceError
5
6
  from pydeflate.sources.common import AvailableDeflators
6
7
  from pydeflate.sources.dac import read_dac
7
8
  from pydeflate.sources.imf import read_weo
@@ -10,6 +11,12 @@ from pydeflate.sources.world_bank import read_wb, read_wb_lcu_ppp, read_wb_usd_p
10
11
 
11
12
  @dataclass
12
13
  class Source:
14
+ """Base class for data sources implementing SourceProtocol.
15
+
16
+ This class handles loading data from external sources, caching,
17
+ and validation. It implements the SourceProtocol interface.
18
+ """
19
+
13
20
  name: str
14
21
  reader: callable
15
22
  update: bool = False
@@ -17,26 +24,100 @@ class Source:
17
24
  _idx = ["pydeflate_year", "pydeflate_entity_code", "pydeflate_iso3"]
18
25
 
19
26
  def __post_init__(self):
20
- self.data = self.reader(self.update)
27
+ """Load and validate data after initialization."""
28
+ try:
29
+ self.data = self.reader(self.update)
30
+ except Exception as e:
31
+ raise DataSourceError(
32
+ f"Failed to load data: {e}",
33
+ source=self.name,
34
+ ) from e
35
+
21
36
  self.validate()
22
37
 
23
38
  def validate(self):
24
- if self.data.empty:
25
- raise ValueError(f"No data found for {self.name}")
39
+ """Validate that source data is properly formatted.
26
40
 
27
- # check all columns start with pydeflate_
28
- if not all(col.startswith("pydeflate_") for col in self.data.columns):
29
- raise ValueError(f"Invalid data format for {self.name}")
41
+ Raises:
42
+ DataSourceError: If data is empty or improperly formatted
43
+ SchemaValidationError: If data doesn't match expected schema
44
+ """
45
+ if self.data.empty:
46
+ raise DataSourceError(f"No data found", source=self.name)
47
+
48
+ # Check all columns start with pydeflate_
49
+ invalid_cols = [
50
+ col for col in self.data.columns if not col.startswith("pydeflate_")
51
+ ]
52
+ if invalid_cols:
53
+ raise DataSourceError(
54
+ f"Invalid column names (must start with 'pydeflate_'): {invalid_cols}",
55
+ source=self.name,
56
+ )
57
+
58
+ # Validate schema if available and enabled
59
+ # Note: Schema validation is currently experimental
60
+ # Set environment variable PYDEFLATE_ENABLE_VALIDATION=1 to enable
61
+ import os
62
+
63
+ if os.environ.get("PYDEFLATE_ENABLE_VALIDATION") == "1":
64
+ try:
65
+ from pydeflate.schemas import validate_source_data
66
+
67
+ validate_source_data(self.data, self.name)
68
+ except ImportError:
69
+ # Pandera not available, skip schema validation
70
+ pass
71
+ except Exception as e:
72
+ # Schema validation failed, but don't break for now
73
+ # This allows us to roll out schema validation gradually
74
+ import logging
75
+
76
+ logger = logging.getLogger("pydeflate")
77
+ logger.debug(f"Schema validation skipped for {self.name}: {e}")
30
78
 
31
79
  def lcu_usd_exchange(self) -> pd.DataFrame:
80
+ """Return local currency to USD exchange rates.
81
+
82
+ Returns:
83
+ DataFrame with exchange rate data
84
+
85
+ Raises:
86
+ DataSourceError: If exchange rate data is missing
87
+ """
88
+ if "pydeflate_EXCHANGE" not in self.data.columns:
89
+ raise DataSourceError(
90
+ "Exchange rate data (pydeflate_EXCHANGE) not available",
91
+ source=self.name,
92
+ )
32
93
  return self.data.filter(self._idx + ["pydeflate_EXCHANGE"])
33
94
 
34
95
  def price_deflator(self, kind: AvailableDeflators = "NGDP_D") -> pd.DataFrame:
35
-
36
- if f"pydeflate_{kind}" not in self.data.columns:
37
- raise ValueError(f"No deflator data found for {kind} in {self.name} data.")
38
-
39
- return self.data.filter(self._idx + [f"pydeflate_{kind}"])
96
+ """Return price deflator data for specified kind.
97
+
98
+ Args:
99
+ kind: Type of deflator (e.g., 'NGDP_D', 'CPI')
100
+
101
+ Returns:
102
+ DataFrame with deflator data
103
+
104
+ Raises:
105
+ ConfigurationError: If deflator kind not available for this source
106
+ """
107
+ column_name = f"pydeflate_{kind}"
108
+ if column_name not in self.data.columns:
109
+ available = [
110
+ col.replace("pydeflate_", "")
111
+ for col in self.data.columns
112
+ if col.startswith("pydeflate_") and col not in self._idx
113
+ ]
114
+ raise ConfigurationError(
115
+ f"Deflator '{kind}' not available for {self.name}. "
116
+ f"Available deflators: {', '.join(available)}",
117
+ parameter="kind",
118
+ )
119
+
120
+ return self.data.filter(self._idx + [column_name])
40
121
 
41
122
 
42
123
  class IMF(Source):
@@ -3,7 +3,7 @@ from functools import wraps
3
3
  import pandas as pd
4
4
 
5
5
  from pydeflate.core.api import BaseDeflate
6
- from pydeflate.core.source import DAC, WorldBank, IMF
6
+ from pydeflate.core.source import DAC, IMF, WorldBank
7
7
 
8
8
 
9
9
  def _generate_docstring(source_name: str, price_kind: str) -> str:
@@ -0,0 +1,233 @@
1
+ from functools import wraps
2
+
3
+ import pandas as pd
4
+
5
+ from pydeflate.core.api import BaseDeflate
6
+ from pydeflate.core.source import DAC, IMF, WorldBank
7
+
8
+
9
+ def _generate_get_deflator_docstring(source_name: str, price_kind: str) -> str:
10
+ """Generate docstring for each get deflator function."""
11
+ return (
12
+ f"Get deflator data from {source_name} ({price_kind}) without requiring user data.\n\n"
13
+ f"This function returns a DataFrame containing deflator values for the specified parameters.\n\n"
14
+ "Args:\n"
15
+ " base_year (int): The base year for calculating deflation adjustments.\n"
16
+ " source_currency (str, optional): The source currency code. Defaults to 'USA'.\n"
17
+ " target_currency (str, optional): The target currency code. Defaults to 'USA'.\n"
18
+ " countries (list[str] | None, optional): List of country codes to include. If None, returns all. Defaults to None.\n"
19
+ " years (list[int] | range | None, optional): List or range of years to include. If None, returns all. Defaults to None.\n"
20
+ " use_source_codes (bool, optional): Use source-specific entity codes. Defaults to False.\n"
21
+ " to_current (bool, optional): Get deflators for constant-to-current conversion. Defaults to False.\n"
22
+ " include_components (bool, optional): Include price_deflator, exchange_deflator, and exchange_rate columns. Defaults to False.\n"
23
+ " update_deflators (bool, optional): Update the deflator data before retrieval. Defaults to False.\n\n"
24
+ "Returns:\n"
25
+ " pd.DataFrame: DataFrame with columns:\n"
26
+ " - iso_code (or entity_code if use_source_codes=True): Country/entity identifier\n"
27
+ " - year: Year\n"
28
+ " - deflator: The combined deflator value\n"
29
+ " - price_deflator (if include_components=True): The price deflator component\n"
30
+ " - exchange_deflator (if include_components=True): The exchange rate deflator component\n"
31
+ " - exchange_rate (if include_components=True): The exchange rate\n"
32
+ )
33
+
34
+
35
+ def _get_deflator(deflator_source_cls, price_kind):
36
+ """Decorator to create get_deflator wrappers with specific deflator source and price kind."""
37
+
38
+ def decorator(func):
39
+ @wraps(func)
40
+ def wrapper(
41
+ *,
42
+ base_year: int,
43
+ source_currency: str = "USA",
44
+ target_currency: str = "USA",
45
+ countries: list[str] | None = None,
46
+ years: list[int] | range | None = None,
47
+ use_source_codes: bool = False,
48
+ to_current: bool = False,
49
+ include_components: bool = False,
50
+ update_deflators: bool = False,
51
+ ):
52
+ # Validate input parameters
53
+ if not isinstance(base_year, int):
54
+ raise ValueError("The 'base_year' parameter must be an integer.")
55
+
56
+ # Initialize the deflator source
57
+ source = deflator_source_cls(update=update_deflators)
58
+
59
+ # Create a deflator object
60
+ deflator = BaseDeflate(
61
+ base_year=base_year,
62
+ deflator_source=source,
63
+ exchange_source=source,
64
+ source_currency=source_currency,
65
+ target_currency=target_currency,
66
+ price_kind=price_kind,
67
+ use_source_codes=use_source_codes,
68
+ to_current=to_current,
69
+ )
70
+
71
+ # Get the pydeflate data
72
+ data = deflator.pydeflate_data.copy()
73
+
74
+ # Determine the entity column based on use_source_codes
75
+ entity_col = "pydeflate_entity_code" if use_source_codes else "pydeflate_iso3"
76
+
77
+ # Filter by countries if specified
78
+ if countries is not None:
79
+ data = data[data[entity_col].isin(countries)]
80
+
81
+ # Filter by years if specified
82
+ if years is not None:
83
+ if isinstance(years, range):
84
+ years = list(years)
85
+ data = data[data["pydeflate_year"].isin(years)]
86
+
87
+ # Select columns to return
88
+ columns_to_keep = [entity_col, "pydeflate_year", "pydeflate_deflator"]
89
+
90
+ if include_components:
91
+ # Add component columns
92
+ price_col = f"pydeflate_{price_kind}"
93
+ columns_to_keep.extend(
94
+ [price_col, "pydeflate_EXCHANGE_D", "pydeflate_EXCHANGE"]
95
+ )
96
+
97
+ # Keep only the specified columns
98
+ result = data[columns_to_keep].copy()
99
+
100
+ # Rename columns to user-friendly names
101
+ rename_map = {
102
+ entity_col: "entity_code" if use_source_codes else "iso_code",
103
+ "pydeflate_year": "year",
104
+ "pydeflate_deflator": "deflator",
105
+ }
106
+
107
+ if include_components:
108
+ rename_map.update(
109
+ {
110
+ f"pydeflate_{price_kind}": "price_deflator",
111
+ "pydeflate_EXCHANGE_D": "exchange_deflator",
112
+ "pydeflate_EXCHANGE": "exchange_rate",
113
+ }
114
+ )
115
+
116
+ result = result.rename(columns=rename_map)
117
+
118
+ # Reset index
119
+ result = result.reset_index(drop=True)
120
+
121
+ return result
122
+
123
+ wrapper.__doc__ = _generate_get_deflator_docstring(
124
+ deflator_source_cls.__name__, price_kind
125
+ )
126
+ return wrapper
127
+
128
+ return decorator
129
+
130
+
131
+ @_get_deflator(DAC, "NGDP_D")
132
+ def get_oecd_dac_deflators(
133
+ *,
134
+ base_year: int,
135
+ source_currency: str = "USA",
136
+ target_currency: str = "USA",
137
+ countries: list[str] | None = None,
138
+ years: list[int] | range | None = None,
139
+ use_source_codes: bool = False,
140
+ to_current: bool = False,
141
+ include_components: bool = False,
142
+ update_deflators: bool = False,
143
+ ) -> pd.DataFrame: ...
144
+
145
+
146
+ @_get_deflator(WorldBank, "NGDP_D")
147
+ def get_wb_gdp_deflators(
148
+ *,
149
+ base_year: int,
150
+ source_currency: str = "USA",
151
+ target_currency: str = "USA",
152
+ countries: list[str] | None = None,
153
+ years: list[int] | range | None = None,
154
+ use_source_codes: bool = False,
155
+ to_current: bool = False,
156
+ include_components: bool = False,
157
+ update_deflators: bool = False,
158
+ ) -> pd.DataFrame: ...
159
+
160
+
161
+ @_get_deflator(WorldBank, "NGDP_DL")
162
+ def get_wb_gdp_linked_deflators(
163
+ *,
164
+ base_year: int,
165
+ source_currency: str = "USA",
166
+ target_currency: str = "USA",
167
+ countries: list[str] | None = None,
168
+ years: list[int] | range | None = None,
169
+ use_source_codes: bool = False,
170
+ to_current: bool = False,
171
+ include_components: bool = False,
172
+ update_deflators: bool = False,
173
+ ) -> pd.DataFrame: ...
174
+
175
+
176
+ @_get_deflator(WorldBank, "CPI")
177
+ def get_wb_cpi_deflators(
178
+ *,
179
+ base_year: int,
180
+ source_currency: str = "USA",
181
+ target_currency: str = "USA",
182
+ countries: list[str] | None = None,
183
+ years: list[int] | range | None = None,
184
+ use_source_codes: bool = False,
185
+ to_current: bool = False,
186
+ include_components: bool = False,
187
+ update_deflators: bool = False,
188
+ ) -> pd.DataFrame: ...
189
+
190
+
191
+ @_get_deflator(IMF, "NGDP_D")
192
+ def get_imf_gdp_deflators(
193
+ *,
194
+ base_year: int,
195
+ source_currency: str = "USA",
196
+ target_currency: str = "USA",
197
+ countries: list[str] | None = None,
198
+ years: list[int] | range | None = None,
199
+ use_source_codes: bool = False,
200
+ to_current: bool = False,
201
+ include_components: bool = False,
202
+ update_deflators: bool = False,
203
+ ) -> pd.DataFrame: ...
204
+
205
+
206
+ @_get_deflator(IMF, "PCPI")
207
+ def get_imf_cpi_deflators(
208
+ *,
209
+ base_year: int,
210
+ source_currency: str = "USA",
211
+ target_currency: str = "USA",
212
+ countries: list[str] | None = None,
213
+ years: list[int] | range | None = None,
214
+ use_source_codes: bool = False,
215
+ to_current: bool = False,
216
+ include_components: bool = False,
217
+ update_deflators: bool = False,
218
+ ) -> pd.DataFrame: ...
219
+
220
+
221
+ @_get_deflator(IMF, "PCPIE")
222
+ def get_imf_cpi_e_deflators(
223
+ *,
224
+ base_year: int,
225
+ source_currency: str = "USA",
226
+ target_currency: str = "USA",
227
+ countries: list[str] | None = None,
228
+ years: list[int] | range | None = None,
229
+ use_source_codes: bool = False,
230
+ to_current: bool = False,
231
+ include_components: bool = False,
232
+ update_deflators: bool = False,
233
+ ) -> pd.DataFrame: ...
@@ -4,7 +4,7 @@ import pandas as pd
4
4
  from pandas.util._decorators import deprecate_kwarg
5
5
 
6
6
  from pydeflate.core.api import BaseDeflate
7
- from pydeflate.core.source import DAC, WorldBank, IMF
7
+ from pydeflate.core.source import DAC, IMF, WorldBank
8
8
 
9
9
 
10
10
  @deprecate_kwarg(old_arg_name="method", new_arg_name="deflator_method")
@@ -0,0 +1,166 @@
1
+ """Custom exception hierarchy for pydeflate.
2
+
3
+ This module defines specific exception types that allow users to handle
4
+ different failure modes appropriately (e.g., retry on network errors,
5
+ fail fast on validation errors).
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+
11
+ class PydeflateError(Exception):
12
+ """Base exception for all pydeflate errors.
13
+
14
+ All exceptions raised by pydeflate inherit from this class,
15
+ making it easy to catch all pydeflate-specific errors.
16
+ """
17
+
18
+ pass
19
+
20
+
21
+ class DataSourceError(PydeflateError):
22
+ """Raised when there's an issue with a data source.
23
+
24
+ This is a base class for all data source related errors.
25
+ """
26
+
27
+ def __init__(self, message: str, source: str | None = None):
28
+ """Initialize DataSourceError.
29
+
30
+ Args:
31
+ message: Description of the error
32
+ source: Name of the data source (e.g., 'IMF', 'World Bank')
33
+ """
34
+ self.source = source
35
+ super().__init__(f"[{source}] {message}" if source else message)
36
+
37
+
38
+ class NetworkError(DataSourceError):
39
+ """Raised when network operations fail.
40
+
41
+ This typically indicates a transient error that might succeed on retry.
42
+ """
43
+
44
+ pass
45
+
46
+
47
+ class SchemaValidationError(DataSourceError):
48
+ """Raised when data doesn't match expected schema.
49
+
50
+ This indicates a problem with the data structure, either from:
51
+ - External API changes
52
+ - Corrupted downloaded data
53
+ - User input with wrong columns/types
54
+ """
55
+
56
+ def __init__(
57
+ self,
58
+ message: str,
59
+ source: str | None = None,
60
+ expected_schema: dict | None = None,
61
+ actual_schema: dict | None = None,
62
+ ):
63
+ """Initialize SchemaValidationError.
64
+
65
+ Args:
66
+ message: Description of validation failure
67
+ source: Name of the data source
68
+ expected_schema: Expected column types/names
69
+ actual_schema: Actual column types/names found
70
+ """
71
+ self.expected_schema = expected_schema
72
+ self.actual_schema = actual_schema
73
+ super().__init__(message, source)
74
+
75
+
76
+ class CacheError(PydeflateError):
77
+ """Raised when cache operations fail.
78
+
79
+ Examples:
80
+ - Unable to write to cache directory
81
+ - Corrupted cache files
82
+ - Lock file acquisition timeout
83
+ """
84
+
85
+ def __init__(self, message: str, cache_path: str | None = None):
86
+ """Initialize CacheError.
87
+
88
+ Args:
89
+ message: Description of cache error
90
+ cache_path: Path to the cache file/directory involved
91
+ """
92
+ self.cache_path = cache_path
93
+ super().__init__(
94
+ f"Cache error at {cache_path}: {message}" if cache_path else message
95
+ )
96
+
97
+
98
+ class ConfigurationError(PydeflateError):
99
+ """Raised when configuration parameters are invalid.
100
+
101
+ Examples:
102
+ - Invalid currency code
103
+ - Base year out of range
104
+ - Missing required columns in user data
105
+ - Conflicting parameter combinations
106
+ """
107
+
108
+ def __init__(self, message: str, parameter: str | None = None):
109
+ """Initialize ConfigurationError.
110
+
111
+ Args:
112
+ message: Description of configuration issue
113
+ parameter: Name of the problematic parameter
114
+ """
115
+ self.parameter = parameter
116
+ super().__init__(
117
+ f"Invalid configuration for '{parameter}': {message}"
118
+ if parameter
119
+ else message
120
+ )
121
+
122
+
123
+ class MissingDataError(PydeflateError):
124
+ """Raised when required deflator or exchange data is unavailable.
125
+
126
+ This occurs when:
127
+ - Requested country/year combination has no data in the source
128
+ - Data gaps in historical records
129
+ - Future years beyond available estimates
130
+ """
131
+
132
+ def __init__(
133
+ self,
134
+ message: str,
135
+ missing_entities: dict[str, list[int]] | None = None,
136
+ ):
137
+ """Initialize MissingDataError.
138
+
139
+ Args:
140
+ message: Description of missing data
141
+ missing_entities: Dict mapping entity codes to missing years
142
+ """
143
+ self.missing_entities = missing_entities
144
+ super().__init__(message)
145
+
146
+
147
+ class PluginError(PydeflateError):
148
+ """Raised when plugin registration or loading fails.
149
+
150
+ Examples:
151
+ - Plugin doesn't implement required protocol
152
+ - Plugin name conflicts with existing source
153
+ - Plugin initialization fails
154
+ """
155
+
156
+ def __init__(self, message: str, plugin_name: str | None = None):
157
+ """Initialize PluginError.
158
+
159
+ Args:
160
+ message: Description of plugin error
161
+ plugin_name: Name of the plugin that failed
162
+ """
163
+ self.plugin_name = plugin_name
164
+ super().__init__(
165
+ f"Plugin '{plugin_name}' error: {message}" if plugin_name else message
166
+ )
@@ -3,7 +3,7 @@ from functools import wraps
3
3
  import pandas as pd
4
4
 
5
5
  from pydeflate.core.api import BaseExchange
6
- from pydeflate.core.source import DAC, WorldBank, IMF, WorldBankPPP
6
+ from pydeflate.core.source import DAC, IMF, WorldBank, WorldBankPPP
7
7
 
8
8
 
9
9
  def _generate_docstring(source_name: str) -> str: