pydeflate 2.1.2__py3-none-any.whl → 2.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pydeflate/__init__.py +64 -20
- pydeflate/cache.py +139 -0
- pydeflate/constants.py +121 -0
- pydeflate/context.py +211 -0
- pydeflate/core/api.py +34 -12
- pydeflate/core/source.py +92 -11
- pydeflate/deflate/deflators.py +1 -1
- pydeflate/deflate/legacy_deflate.py +1 -1
- pydeflate/exceptions.py +166 -0
- pydeflate/exchange/exchangers.py +1 -2
- pydeflate/plugins.py +289 -0
- pydeflate/protocols.py +168 -0
- pydeflate/pydeflate_config.py +77 -6
- pydeflate/schemas.py +297 -0
- pydeflate/sources/common.py +60 -107
- pydeflate/sources/dac.py +39 -52
- pydeflate/sources/imf.py +51 -38
- pydeflate/sources/world_bank.py +44 -117
- pydeflate/utils.py +14 -9
- {pydeflate-2.1.2.dist-info → pydeflate-2.2.0.dist-info}/METADATA +119 -18
- pydeflate-2.2.0.dist-info/RECORD +32 -0
- pydeflate-2.2.0.dist-info/WHEEL +4 -0
- {pydeflate-2.1.2.dist-info → pydeflate-2.2.0.dist-info/licenses}/LICENSE +1 -1
- pydeflate-2.1.2.dist-info/RECORD +0 -25
- pydeflate-2.1.2.dist-info/WHEEL +0 -4
pydeflate/__init__.py
CHANGED
|
@@ -1,47 +1,91 @@
|
|
|
1
1
|
__author__ = """Jorge Rivera"""
|
|
2
|
-
__version__ = "2.
|
|
2
|
+
__version__ = "2.2.0"
|
|
3
3
|
|
|
4
4
|
from pydeflate.deflate.deflators import (
|
|
5
|
+
imf_cpi_deflate,
|
|
6
|
+
imf_cpi_e_deflate,
|
|
7
|
+
imf_gdp_deflate,
|
|
5
8
|
oecd_dac_deflate,
|
|
6
9
|
wb_cpi_deflate,
|
|
7
10
|
wb_gdp_deflate,
|
|
8
11
|
wb_gdp_linked_deflate,
|
|
9
|
-
imf_cpi_deflate,
|
|
10
|
-
imf_gdp_deflate,
|
|
11
|
-
imf_cpi_e_deflate,
|
|
12
12
|
)
|
|
13
|
-
|
|
14
13
|
from pydeflate.deflate.legacy_deflate import deflate
|
|
15
14
|
from pydeflate.exchange.exchangers import (
|
|
15
|
+
imf_exchange,
|
|
16
16
|
oecd_dac_exchange,
|
|
17
17
|
wb_exchange,
|
|
18
18
|
wb_exchange_ppp,
|
|
19
|
-
imf_exchange,
|
|
20
19
|
)
|
|
21
|
-
from pydeflate.pydeflate_config import setup_logger
|
|
20
|
+
from pydeflate.pydeflate_config import set_data_dir, setup_logger
|
|
22
21
|
|
|
22
|
+
from pydeflate.context import (
|
|
23
|
+
PydeflateContext,
|
|
24
|
+
get_default_context,
|
|
25
|
+
pydeflate_session,
|
|
26
|
+
set_default_context,
|
|
27
|
+
temporary_context,
|
|
28
|
+
)
|
|
29
|
+
from pydeflate.exceptions import (
|
|
30
|
+
CacheError,
|
|
31
|
+
ConfigurationError,
|
|
32
|
+
DataSourceError,
|
|
33
|
+
MissingDataError,
|
|
34
|
+
NetworkError,
|
|
35
|
+
PluginError,
|
|
36
|
+
PydeflateError,
|
|
37
|
+
SchemaValidationError,
|
|
38
|
+
)
|
|
39
|
+
from pydeflate.plugins import (
|
|
40
|
+
get_source,
|
|
41
|
+
is_source_registered,
|
|
42
|
+
list_sources,
|
|
43
|
+
register_source,
|
|
44
|
+
)
|
|
23
45
|
|
|
24
|
-
def set_pydeflate_path(path):
|
|
25
|
-
from pathlib import Path
|
|
26
|
-
from pydeflate.pydeflate_config import PYDEFLATE_PATHS
|
|
27
46
|
|
|
28
|
-
|
|
29
|
-
|
|
47
|
+
def set_pydeflate_path(path):
|
|
48
|
+
"""Set the path to the pydeflate data cache directory."""
|
|
30
49
|
|
|
31
|
-
|
|
50
|
+
return set_data_dir(path)
|
|
32
51
|
|
|
33
52
|
|
|
34
53
|
__all__ = [
|
|
35
|
-
|
|
54
|
+
# Deflation functions
|
|
55
|
+
"deflate",
|
|
56
|
+
"imf_cpi_deflate",
|
|
57
|
+
"imf_cpi_e_deflate",
|
|
58
|
+
"imf_gdp_deflate",
|
|
36
59
|
"oecd_dac_deflate",
|
|
37
|
-
"oecd_dac_exchange",
|
|
38
60
|
"wb_cpi_deflate",
|
|
39
61
|
"wb_gdp_deflate",
|
|
40
62
|
"wb_gdp_linked_deflate",
|
|
41
|
-
|
|
42
|
-
"imf_cpi_deflate",
|
|
43
|
-
"imf_gdp_deflate",
|
|
44
|
-
"imf_cpi_e_deflate",
|
|
63
|
+
# Exchange functions
|
|
45
64
|
"imf_exchange",
|
|
46
|
-
"
|
|
65
|
+
"oecd_dac_exchange",
|
|
66
|
+
"wb_exchange",
|
|
67
|
+
"wb_exchange_ppp",
|
|
68
|
+
# Configuration
|
|
69
|
+
"set_pydeflate_path",
|
|
70
|
+
"setup_logger",
|
|
71
|
+
# Context management
|
|
72
|
+
"PydeflateContext",
|
|
73
|
+
"get_default_context",
|
|
74
|
+
"pydeflate_session",
|
|
75
|
+
"set_default_context",
|
|
76
|
+
"temporary_context",
|
|
77
|
+
# Exceptions
|
|
78
|
+
"CacheError",
|
|
79
|
+
"ConfigurationError",
|
|
80
|
+
"DataSourceError",
|
|
81
|
+
"MissingDataError",
|
|
82
|
+
"NetworkError",
|
|
83
|
+
"PluginError",
|
|
84
|
+
"PydeflateError",
|
|
85
|
+
"SchemaValidationError",
|
|
86
|
+
# Plugin system
|
|
87
|
+
"get_source",
|
|
88
|
+
"is_source_registered",
|
|
89
|
+
"list_sources",
|
|
90
|
+
"register_source",
|
|
47
91
|
]
|
pydeflate/cache.py
ADDED
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import os
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from datetime import datetime, timedelta, timezone
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Callable, Dict, Iterable, Optional
|
|
9
|
+
|
|
10
|
+
from filelock import FileLock
|
|
11
|
+
|
|
12
|
+
from pydeflate.pydeflate_config import get_data_dir
|
|
13
|
+
|
|
14
|
+
ISO_FORMAT = "%Y-%m-%dT%H:%M:%S.%f%z"
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@dataclass(frozen=True)
|
|
18
|
+
class CacheEntry:
|
|
19
|
+
"""Describe a cacheable dataset."""
|
|
20
|
+
|
|
21
|
+
key: str
|
|
22
|
+
filename: str
|
|
23
|
+
fetcher: Callable[[Path], None]
|
|
24
|
+
ttl_days: int = 30
|
|
25
|
+
version: str | None = None
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@dataclass(frozen=True)
|
|
29
|
+
class CacheRecord:
|
|
30
|
+
key: str
|
|
31
|
+
path: Path
|
|
32
|
+
downloaded_at: datetime
|
|
33
|
+
ttl_days: int
|
|
34
|
+
version: str | None
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class CacheError(RuntimeError):
|
|
38
|
+
pass
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class CacheManager:
|
|
42
|
+
"""Handle cached datasets stored under the pydeflate data directory."""
|
|
43
|
+
|
|
44
|
+
def __init__(self, base_dir: Path | None = None) -> None:
|
|
45
|
+
self.base_dir = (base_dir or get_data_dir()).resolve()
|
|
46
|
+
self.base_dir.mkdir(parents=True, exist_ok=True)
|
|
47
|
+
self.manifest_path = self.base_dir / "manifest.json"
|
|
48
|
+
self._lock = FileLock(str(self.base_dir / ".cache.lock"))
|
|
49
|
+
self._manifest: Dict[str, dict] = self._load_manifest()
|
|
50
|
+
|
|
51
|
+
# ------------------------------------------------------------------
|
|
52
|
+
def ensure(self, entry: CacheEntry, *, refresh: bool = False) -> Path:
|
|
53
|
+
"""Return a local path for the given entry, downloading when needed."""
|
|
54
|
+
|
|
55
|
+
with self._lock:
|
|
56
|
+
record = self._manifest.get(entry.key)
|
|
57
|
+
path = self.base_dir / entry.filename
|
|
58
|
+
|
|
59
|
+
if not refresh and record and path.exists():
|
|
60
|
+
if not self._is_stale(record, entry):
|
|
61
|
+
return path
|
|
62
|
+
|
|
63
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
64
|
+
tmp_path = Path(f"{path}.tmp-{os.getpid()}")
|
|
65
|
+
try:
|
|
66
|
+
entry.fetcher(tmp_path)
|
|
67
|
+
tmp_path.replace(path)
|
|
68
|
+
finally:
|
|
69
|
+
if tmp_path.exists():
|
|
70
|
+
tmp_path.unlink(missing_ok=True)
|
|
71
|
+
|
|
72
|
+
self._manifest[entry.key] = {
|
|
73
|
+
"filename": entry.filename,
|
|
74
|
+
"downloaded_at": datetime.now(timezone.utc).strftime(ISO_FORMAT),
|
|
75
|
+
"ttl_days": entry.ttl_days,
|
|
76
|
+
"version": entry.version,
|
|
77
|
+
}
|
|
78
|
+
self._save_manifest()
|
|
79
|
+
return path
|
|
80
|
+
|
|
81
|
+
# ------------------------------------------------------------------
|
|
82
|
+
def list_records(self) -> Iterable[CacheRecord]:
|
|
83
|
+
for key, payload in self._manifest.items():
|
|
84
|
+
path = self.base_dir / payload["filename"]
|
|
85
|
+
yield CacheRecord(
|
|
86
|
+
key=key,
|
|
87
|
+
path=path,
|
|
88
|
+
downloaded_at=datetime.strptime(payload["downloaded_at"], ISO_FORMAT),
|
|
89
|
+
ttl_days=payload["ttl_days"],
|
|
90
|
+
version=payload.get("version"),
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
# ------------------------------------------------------------------
|
|
94
|
+
def clear(self, key: str | None = None) -> None:
|
|
95
|
+
with self._lock:
|
|
96
|
+
if key is None:
|
|
97
|
+
for payload in self._manifest.values():
|
|
98
|
+
(self.base_dir / payload["filename"]).unlink(missing_ok=True)
|
|
99
|
+
self._manifest = {}
|
|
100
|
+
else:
|
|
101
|
+
payload = self._manifest.pop(key, None)
|
|
102
|
+
if payload:
|
|
103
|
+
(self.base_dir / payload["filename"]).unlink(missing_ok=True)
|
|
104
|
+
self._save_manifest()
|
|
105
|
+
|
|
106
|
+
# ------------------------------------------------------------------
|
|
107
|
+
def _is_stale(self, record: dict, entry: CacheEntry) -> bool:
|
|
108
|
+
version_changed = entry.version is not None and entry.version != record.get(
|
|
109
|
+
"version"
|
|
110
|
+
)
|
|
111
|
+
downloaded = datetime.strptime(record["downloaded_at"], ISO_FORMAT)
|
|
112
|
+
age = datetime.now(timezone.utc) - downloaded
|
|
113
|
+
ttl = timedelta(days=entry.ttl_days)
|
|
114
|
+
return version_changed or age > ttl
|
|
115
|
+
|
|
116
|
+
# ------------------------------------------------------------------
|
|
117
|
+
def _load_manifest(self) -> Dict[str, dict]:
|
|
118
|
+
if not self.manifest_path.exists():
|
|
119
|
+
return {}
|
|
120
|
+
try:
|
|
121
|
+
return json.loads(self.manifest_path.read_text())
|
|
122
|
+
except json.JSONDecodeError:
|
|
123
|
+
return {}
|
|
124
|
+
|
|
125
|
+
# ------------------------------------------------------------------
|
|
126
|
+
def _save_manifest(self) -> None:
|
|
127
|
+
payload = json.dumps(self._manifest, indent=2)
|
|
128
|
+
self.manifest_path.write_text(payload)
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
_CACHE_MANAGER: Optional[CacheManager] = None
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def cache_manager() -> CacheManager:
|
|
135
|
+
global _CACHE_MANAGER
|
|
136
|
+
base_dir = get_data_dir().resolve()
|
|
137
|
+
if _CACHE_MANAGER is None or _CACHE_MANAGER.base_dir != base_dir:
|
|
138
|
+
_CACHE_MANAGER = CacheManager(base_dir)
|
|
139
|
+
return _CACHE_MANAGER
|
pydeflate/constants.py
ADDED
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
"""Constants used throughout pydeflate.
|
|
2
|
+
|
|
3
|
+
Centralizing constants eliminates magic strings and makes refactoring safer.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class PydeflateColumns:
|
|
10
|
+
"""Standard column names used in pydeflate DataFrames."""
|
|
11
|
+
|
|
12
|
+
# Index columns
|
|
13
|
+
YEAR = "pydeflate_year"
|
|
14
|
+
ENTITY_CODE = "pydeflate_entity_code"
|
|
15
|
+
ISO3 = "pydeflate_iso3"
|
|
16
|
+
|
|
17
|
+
# Data columns
|
|
18
|
+
EXCHANGE = "pydeflate_EXCHANGE"
|
|
19
|
+
EXCHANGE_D = "pydeflate_EXCHANGE_D"
|
|
20
|
+
|
|
21
|
+
# Deflator columns
|
|
22
|
+
NGDP_D = "pydeflate_NGDP_D"
|
|
23
|
+
NGDP_DL = "pydeflate_NGDP_DL"
|
|
24
|
+
CPI = "pydeflate_CPI"
|
|
25
|
+
PCPI = "pydeflate_PCPI"
|
|
26
|
+
PCPIE = "pydeflate_PCPIE"
|
|
27
|
+
DAC_DEFLATOR = "pydeflate_DAC_DEFLATOR"
|
|
28
|
+
|
|
29
|
+
# Standard index
|
|
30
|
+
STANDARD_INDEX = [YEAR, ENTITY_CODE, ISO3]
|
|
31
|
+
|
|
32
|
+
@classmethod
|
|
33
|
+
def deflator_column(cls, kind: str) -> str:
|
|
34
|
+
"""Get deflator column name for a given kind.
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
kind: Deflator type (e.g., 'NGDP_D', 'CPI')
|
|
38
|
+
|
|
39
|
+
Returns:
|
|
40
|
+
Full column name with pydeflate_ prefix
|
|
41
|
+
"""
|
|
42
|
+
if kind.startswith("pydeflate_"):
|
|
43
|
+
return kind
|
|
44
|
+
return f"pydeflate_{kind}"
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class CurrencyCodes:
|
|
48
|
+
"""Common currency code mappings."""
|
|
49
|
+
|
|
50
|
+
# ISO3 to common codes
|
|
51
|
+
USD = "USA"
|
|
52
|
+
EUR = "EUR" # For most sources
|
|
53
|
+
EUR_DAC = "EUI" # For DAC source
|
|
54
|
+
GBP = "GBR"
|
|
55
|
+
JPY = "JPN"
|
|
56
|
+
CAD = "CAN"
|
|
57
|
+
|
|
58
|
+
# Special codes
|
|
59
|
+
LCU = "LCU" # Local Currency Unit
|
|
60
|
+
PPP = "PPP" # Purchasing Power Parity
|
|
61
|
+
DAC = "DAC" # DAC members
|
|
62
|
+
|
|
63
|
+
# Mapping for user convenience
|
|
64
|
+
COMMON_ALIASES = {
|
|
65
|
+
"USD": USA,
|
|
66
|
+
"EUR": EUR,
|
|
67
|
+
"GBP": GBR,
|
|
68
|
+
"JPY": JPN,
|
|
69
|
+
"CAD": CAN,
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
@classmethod
|
|
73
|
+
def resolve(cls, code: str, source: str | None = None) -> str:
|
|
74
|
+
"""Resolve a currency code to ISO3.
|
|
75
|
+
|
|
76
|
+
Args:
|
|
77
|
+
code: Currency code (USD, EUR, etc.) or ISO3
|
|
78
|
+
source: Data source name (affects EUR mapping for DAC)
|
|
79
|
+
|
|
80
|
+
Returns:
|
|
81
|
+
ISO3 country code or special code (LCU, PPP)
|
|
82
|
+
"""
|
|
83
|
+
# Handle EUR special case for DAC
|
|
84
|
+
if code == "EUR" and source == "DAC":
|
|
85
|
+
return cls.EUR_DAC
|
|
86
|
+
|
|
87
|
+
# Try aliases
|
|
88
|
+
return cls.COMMON_ALIASES.get(code, code)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
class DataSources:
|
|
92
|
+
"""Names of built-in data sources."""
|
|
93
|
+
|
|
94
|
+
IMF = "IMF"
|
|
95
|
+
WORLD_BANK = "World Bank"
|
|
96
|
+
WORLD_BANK_PPP = "World Bank PPP"
|
|
97
|
+
DAC = "DAC"
|
|
98
|
+
|
|
99
|
+
# Aliases
|
|
100
|
+
WB = "World Bank"
|
|
101
|
+
OECD = "DAC"
|
|
102
|
+
|
|
103
|
+
ALL_SOURCES = [IMF, WORLD_BANK, WORLD_BANK_PPP, DAC]
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
class CacheDefaults:
|
|
107
|
+
"""Default values for caching."""
|
|
108
|
+
|
|
109
|
+
TTL_DAYS_IMF = 60 # IMF data updates less frequently
|
|
110
|
+
TTL_DAYS_WB = 30 # World Bank monthly updates
|
|
111
|
+
TTL_DAYS_DAC = 30 # DAC data
|
|
112
|
+
DEFAULT_TTL = 30
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
class ValidationConfig:
|
|
116
|
+
"""Validation configuration."""
|
|
117
|
+
|
|
118
|
+
MIN_YEAR = 1960 # No data before 1960
|
|
119
|
+
MAX_YEAR = 2100 # No projections beyond 2100
|
|
120
|
+
MIN_EXCHANGE_RATE = 1e-6 # Extremely low but non-zero
|
|
121
|
+
MAX_EXCHANGE_RATE = 1e6 # Extremely high but finite
|
pydeflate/context.py
ADDED
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
"""Context management for dependency injection.
|
|
2
|
+
|
|
3
|
+
This module provides a context-based approach to managing pydeflate's
|
|
4
|
+
configuration, cache, and logging. This eliminates global state and
|
|
5
|
+
enables better testability and parallel execution.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import logging
|
|
11
|
+
from contextlib import contextmanager
|
|
12
|
+
from dataclasses import dataclass, field
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import Any, Generator
|
|
15
|
+
|
|
16
|
+
from pydeflate.cache import CacheManager
|
|
17
|
+
from pydeflate.pydeflate_config import get_data_dir
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@dataclass
|
|
21
|
+
class PydeflateContext:
|
|
22
|
+
"""Encapsulates all runtime configuration for pydeflate operations.
|
|
23
|
+
|
|
24
|
+
This class holds the cache manager, data directory, logger, and other
|
|
25
|
+
runtime settings. Using a context object instead of global variables
|
|
26
|
+
enables:
|
|
27
|
+
- Multiple independent configurations in the same process
|
|
28
|
+
- Better testability (mock the context instead of globals)
|
|
29
|
+
- Thread-safe parallel operations
|
|
30
|
+
- Clear dependency tracking
|
|
31
|
+
|
|
32
|
+
Attributes:
|
|
33
|
+
data_dir: Directory where deflator/exchange data is cached
|
|
34
|
+
cache_manager: Manages cached datasets
|
|
35
|
+
logger: Logger instance for this context
|
|
36
|
+
log_level: Logging level (DEBUG, INFO, WARNING, ERROR)
|
|
37
|
+
enable_validation: Whether to validate data schemas (recommended: True)
|
|
38
|
+
config: Additional configuration options
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
data_dir: Path
|
|
42
|
+
cache_manager: CacheManager | None = None
|
|
43
|
+
logger: logging.Logger | None = None
|
|
44
|
+
log_level: int = logging.INFO
|
|
45
|
+
enable_validation: bool = True
|
|
46
|
+
config: dict[str, Any] = field(default_factory=dict)
|
|
47
|
+
|
|
48
|
+
def __post_init__(self):
|
|
49
|
+
"""Initialize cache manager and logger if not provided."""
|
|
50
|
+
if self.cache_manager is None:
|
|
51
|
+
self.cache_manager = CacheManager(self.data_dir)
|
|
52
|
+
|
|
53
|
+
if self.logger is None:
|
|
54
|
+
self.logger = self._create_logger()
|
|
55
|
+
|
|
56
|
+
def _create_logger(self) -> logging.Logger:
|
|
57
|
+
"""Create a logger for this context."""
|
|
58
|
+
from pydeflate.pydeflate_config import setup_logger
|
|
59
|
+
|
|
60
|
+
logger = setup_logger(f"pydeflate.{id(self)}")
|
|
61
|
+
logger.setLevel(self.log_level)
|
|
62
|
+
return logger
|
|
63
|
+
|
|
64
|
+
@classmethod
|
|
65
|
+
def create(
|
|
66
|
+
cls,
|
|
67
|
+
data_dir: str | Path | None = None,
|
|
68
|
+
log_level: int = logging.INFO,
|
|
69
|
+
enable_validation: bool = True,
|
|
70
|
+
**config,
|
|
71
|
+
) -> PydeflateContext:
|
|
72
|
+
"""Factory method to create a context with sensible defaults.
|
|
73
|
+
|
|
74
|
+
Args:
|
|
75
|
+
data_dir: Path to cache directory. If None, uses default from config.
|
|
76
|
+
log_level: Logging level for this context
|
|
77
|
+
enable_validation: Enable schema validation
|
|
78
|
+
**config: Additional configuration options
|
|
79
|
+
|
|
80
|
+
Returns:
|
|
81
|
+
New PydeflateContext instance
|
|
82
|
+
"""
|
|
83
|
+
if data_dir is None:
|
|
84
|
+
data_dir = get_data_dir()
|
|
85
|
+
else:
|
|
86
|
+
data_dir = Path(data_dir).expanduser().resolve()
|
|
87
|
+
|
|
88
|
+
return cls(
|
|
89
|
+
data_dir=data_dir,
|
|
90
|
+
log_level=log_level,
|
|
91
|
+
enable_validation=enable_validation,
|
|
92
|
+
config=config,
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
# Thread-local storage for default context
|
|
97
|
+
import threading
|
|
98
|
+
|
|
99
|
+
_thread_local = threading.local()
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def get_default_context() -> PydeflateContext:
|
|
103
|
+
"""Get the default context for the current thread.
|
|
104
|
+
|
|
105
|
+
If no context has been set, creates one with default settings.
|
|
106
|
+
|
|
107
|
+
Returns:
|
|
108
|
+
PydeflateContext for current thread
|
|
109
|
+
"""
|
|
110
|
+
if not hasattr(_thread_local, "context"):
|
|
111
|
+
_thread_local.context = PydeflateContext.create()
|
|
112
|
+
return _thread_local.context
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def set_default_context(context: PydeflateContext) -> None:
|
|
116
|
+
"""Set the default context for the current thread.
|
|
117
|
+
|
|
118
|
+
Args:
|
|
119
|
+
context: Context to use as default
|
|
120
|
+
"""
|
|
121
|
+
_thread_local.context = context
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
@contextmanager
|
|
125
|
+
def pydeflate_session(
|
|
126
|
+
data_dir: str | Path | None = None,
|
|
127
|
+
log_level: int = logging.INFO,
|
|
128
|
+
enable_validation: bool = True,
|
|
129
|
+
**config,
|
|
130
|
+
) -> Generator[PydeflateContext, None, None]:
|
|
131
|
+
"""Context manager for pydeflate operations with custom configuration.
|
|
132
|
+
|
|
133
|
+
This is the recommended way to use pydeflate when you need custom
|
|
134
|
+
configuration. It ensures clean setup and teardown.
|
|
135
|
+
|
|
136
|
+
Example:
|
|
137
|
+
>>> from pydeflate.context import pydeflate_session
|
|
138
|
+
>>> with pydeflate_session(data_dir="/tmp/my_cache") as ctx:
|
|
139
|
+
... # Use ctx for deflation operations
|
|
140
|
+
... result = imf_gdp_deflate(df, context=ctx, ...)
|
|
141
|
+
|
|
142
|
+
Args:
|
|
143
|
+
data_dir: Path to cache directory
|
|
144
|
+
log_level: Logging level
|
|
145
|
+
enable_validation: Enable schema validation
|
|
146
|
+
**config: Additional configuration
|
|
147
|
+
|
|
148
|
+
Yields:
|
|
149
|
+
PydeflateContext configured with the given parameters
|
|
150
|
+
"""
|
|
151
|
+
context = PydeflateContext.create(
|
|
152
|
+
data_dir=data_dir,
|
|
153
|
+
log_level=log_level,
|
|
154
|
+
enable_validation=enable_validation,
|
|
155
|
+
**config,
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
# Save previous default context
|
|
159
|
+
previous_context = getattr(_thread_local, "context", None)
|
|
160
|
+
|
|
161
|
+
try:
|
|
162
|
+
# Set as default for this thread
|
|
163
|
+
set_default_context(context)
|
|
164
|
+
yield context
|
|
165
|
+
finally:
|
|
166
|
+
# Restore previous context
|
|
167
|
+
if previous_context is not None:
|
|
168
|
+
set_default_context(previous_context)
|
|
169
|
+
elif hasattr(_thread_local, "context"):
|
|
170
|
+
delattr(_thread_local, "context")
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
@contextmanager
|
|
174
|
+
def temporary_context(
|
|
175
|
+
**overrides,
|
|
176
|
+
) -> Generator[PydeflateContext, None, None]:
|
|
177
|
+
"""Create a temporary context with specific overrides.
|
|
178
|
+
|
|
179
|
+
This is useful for testing or temporarily changing configuration.
|
|
180
|
+
|
|
181
|
+
Example:
|
|
182
|
+
>>> from pydeflate.context import temporary_context
|
|
183
|
+
>>> with temporary_context(enable_validation=False) as ctx:
|
|
184
|
+
... # Validation disabled for this block
|
|
185
|
+
... result = process_data(ctx=ctx)
|
|
186
|
+
|
|
187
|
+
Args:
|
|
188
|
+
**overrides: Configuration overrides (log_level, enable_validation, etc.)
|
|
189
|
+
|
|
190
|
+
Yields:
|
|
191
|
+
Temporary PydeflateContext with overrides applied
|
|
192
|
+
"""
|
|
193
|
+
default = get_default_context()
|
|
194
|
+
|
|
195
|
+
# Create new context with overrides
|
|
196
|
+
config = default.config.copy()
|
|
197
|
+
config.update(overrides.get("config", {}))
|
|
198
|
+
|
|
199
|
+
temp_ctx = PydeflateContext.create(
|
|
200
|
+
data_dir=overrides.get("data_dir", default.data_dir),
|
|
201
|
+
log_level=overrides.get("log_level", default.log_level),
|
|
202
|
+
enable_validation=overrides.get("enable_validation", default.enable_validation),
|
|
203
|
+
**config,
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
previous = get_default_context()
|
|
207
|
+
try:
|
|
208
|
+
set_default_context(temp_ctx)
|
|
209
|
+
yield temp_ctx
|
|
210
|
+
finally:
|
|
211
|
+
set_default_context(previous)
|
pydeflate/core/api.py
CHANGED
|
@@ -6,17 +6,17 @@ from pydeflate.core.source import Source
|
|
|
6
6
|
from pydeflate.sources.common import AvailableDeflators
|
|
7
7
|
from pydeflate.utils import (
|
|
8
8
|
create_pydeflate_year,
|
|
9
|
-
merge_user_and_pydeflate_data,
|
|
10
|
-
get_unmatched_pydeflate_data,
|
|
11
|
-
get_matched_pydeflate_data,
|
|
12
9
|
flag_missing_pydeflate_data,
|
|
10
|
+
get_matched_pydeflate_data,
|
|
11
|
+
get_unmatched_pydeflate_data,
|
|
12
|
+
merge_user_and_pydeflate_data,
|
|
13
13
|
)
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
def resolve_common_currencies(currency: str, source: str) -> str:
|
|
17
17
|
mapping = {
|
|
18
18
|
"USD": "USA",
|
|
19
|
-
"EUR": "
|
|
19
|
+
"EUR": "EUR",
|
|
20
20
|
"GBP": "GBR",
|
|
21
21
|
"JPY": "JPN",
|
|
22
22
|
"CAD": "CAN",
|
|
@@ -85,7 +85,6 @@ def _base_operation(
|
|
|
85
85
|
"pydeflate_EXCHANGE" if exchange else "pydeflate_deflator"
|
|
86
86
|
]
|
|
87
87
|
|
|
88
|
-
# Apply the correct operation based on `exchange` and `reversed`
|
|
89
88
|
if (exchange and not reversed_) or (not exchange and reversed_):
|
|
90
89
|
base_obj._merged_data[target_value_column] = (x * y).round(6)
|
|
91
90
|
else:
|
|
@@ -299,33 +298,56 @@ class BaseDeflate:
|
|
|
299
298
|
# drop where necessary data is missing
|
|
300
299
|
data = data.set_index(self._idx).dropna(how="any").reset_index()
|
|
301
300
|
|
|
301
|
+
# For to_current=True, we need the base year exchange rate
|
|
302
|
+
# Extract base year exchange rates and merge them
|
|
303
|
+
if self.to_current:
|
|
304
|
+
entity_col = self._idx[1] # pydeflate_iso3 or pydeflate_entity_code
|
|
305
|
+
base_year_rates = (
|
|
306
|
+
data[data["pydeflate_year"] == self.price_deflator.base_year]
|
|
307
|
+
.filter([entity_col, "pydeflate_EXCHANGE"])
|
|
308
|
+
.rename(columns={"pydeflate_EXCHANGE": "pydeflate_EXCHANGE_BASE"})
|
|
309
|
+
)
|
|
310
|
+
data = data.merge(base_year_rates, on=entity_col, how="left")
|
|
311
|
+
|
|
302
312
|
# Calculate price-exchange deflator
|
|
303
313
|
data["pydeflate_deflator"] = self._calculate_deflator_value(
|
|
304
314
|
data[f"pydeflate_{self.price_deflator.price_kind}"],
|
|
305
315
|
data["pydeflate_EXCHANGE_D"],
|
|
306
316
|
data[f"pydeflate_EXCHANGE"],
|
|
317
|
+
data.get("pydeflate_EXCHANGE_BASE", data[f"pydeflate_EXCHANGE"]),
|
|
307
318
|
)
|
|
308
319
|
|
|
309
320
|
self.pydeflate_data = data
|
|
310
321
|
|
|
311
322
|
def _calculate_deflator_value(
|
|
312
|
-
self,
|
|
323
|
+
self,
|
|
324
|
+
price_def: pd.Series,
|
|
325
|
+
exchange_def: pd.Series,
|
|
326
|
+
exchange_rate: pd.Series,
|
|
327
|
+
exchange_rate_base: pd.Series,
|
|
313
328
|
):
|
|
314
329
|
"""Compute the combined deflator value using price deflator, exchange deflator, and rates.
|
|
315
330
|
|
|
316
331
|
Args:
|
|
317
332
|
price_def (pd.Series): Series of price deflator values.
|
|
318
333
|
exchange_def (pd.Series): Series of exchange deflator values.
|
|
319
|
-
exchange_rate (pd.Series): Series of exchange rates.
|
|
334
|
+
exchange_rate (pd.Series): Series of exchange rates for each year.
|
|
335
|
+
exchange_rate_base (pd.Series): Series of exchange rates at base year.
|
|
320
336
|
|
|
321
337
|
Returns:
|
|
322
338
|
pd.Series: Series with combined deflator values.
|
|
323
339
|
"""
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
340
|
+
# Calculate deflator based on direction of conversion
|
|
341
|
+
if self.to_current:
|
|
342
|
+
# For constant -> current: deflator = 100 / (exchange_rate_base * price_def)
|
|
343
|
+
# Use BASE YEAR exchange rate (not year's rate) because:
|
|
344
|
+
# - We start with constant values at base year prices
|
|
345
|
+
# - The base year exchange rate converts between currencies at base year
|
|
346
|
+
# - The price_def adjusts for price level changes from base year to target year
|
|
347
|
+
return 100 / (exchange_rate_base * price_def)
|
|
348
|
+
else:
|
|
349
|
+
# For current -> constant: standard formula
|
|
350
|
+
return price_def / (exchange_def * exchange_rate)
|
|
329
351
|
|
|
330
352
|
def _merge_components(self, df: pd.DataFrame, other: pd.DataFrame):
|
|
331
353
|
"""Combine data components, merging deflator and exchange rate information.
|