pyconvexity 0.4.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pyconvexity might be problematic. Click here for more details.
- pyconvexity/__init__.py +241 -0
- pyconvexity/_version.py +1 -0
- pyconvexity/core/__init__.py +60 -0
- pyconvexity/core/database.py +485 -0
- pyconvexity/core/errors.py +106 -0
- pyconvexity/core/types.py +400 -0
- pyconvexity/dashboard.py +265 -0
- pyconvexity/data/README.md +101 -0
- pyconvexity/data/__init__.py +17 -0
- pyconvexity/data/loaders/__init__.py +3 -0
- pyconvexity/data/loaders/cache.py +213 -0
- pyconvexity/data/schema/01_core_schema.sql +420 -0
- pyconvexity/data/schema/02_data_metadata.sql +120 -0
- pyconvexity/data/schema/03_validation_data.sql +507 -0
- pyconvexity/data/sources/__init__.py +5 -0
- pyconvexity/data/sources/gem.py +442 -0
- pyconvexity/io/__init__.py +26 -0
- pyconvexity/io/excel_exporter.py +1226 -0
- pyconvexity/io/excel_importer.py +1381 -0
- pyconvexity/io/netcdf_exporter.py +191 -0
- pyconvexity/io/netcdf_importer.py +1802 -0
- pyconvexity/models/__init__.py +195 -0
- pyconvexity/models/attributes.py +730 -0
- pyconvexity/models/carriers.py +159 -0
- pyconvexity/models/components.py +611 -0
- pyconvexity/models/network.py +503 -0
- pyconvexity/models/results.py +148 -0
- pyconvexity/models/scenarios.py +234 -0
- pyconvexity/solvers/__init__.py +29 -0
- pyconvexity/solvers/pypsa/__init__.py +30 -0
- pyconvexity/solvers/pypsa/api.py +446 -0
- pyconvexity/solvers/pypsa/batch_loader.py +296 -0
- pyconvexity/solvers/pypsa/builder.py +655 -0
- pyconvexity/solvers/pypsa/clearing_price.py +678 -0
- pyconvexity/solvers/pypsa/constraints.py +405 -0
- pyconvexity/solvers/pypsa/solver.py +1442 -0
- pyconvexity/solvers/pypsa/storage.py +2096 -0
- pyconvexity/timeseries.py +330 -0
- pyconvexity/validation/__init__.py +25 -0
- pyconvexity/validation/rules.py +312 -0
- pyconvexity-0.4.8.dist-info/METADATA +148 -0
- pyconvexity-0.4.8.dist-info/RECORD +44 -0
- pyconvexity-0.4.8.dist-info/WHEEL +5 -0
- pyconvexity-0.4.8.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
# PyConvexity Data Module
|
|
2
|
+
|
|
3
|
+
The `pyconvexity.data` module provides functions for loading external energy data and integrating it with PyConvexity models. This is a simple, expert-friendly toolbox for working with real-world energy data.
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
Install PyConvexity with data dependencies:
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
pip install pyconvexity[data]
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
## Current Data Sources
|
|
14
|
+
|
|
15
|
+
### Global Energy Monitor (GEM)
|
|
16
|
+
|
|
17
|
+
Load power plant data from GEM's Global Integrated Power dataset.
|
|
18
|
+
|
|
19
|
+
**Setup:**
|
|
20
|
+
1. Download the GEM Excel file: `Global-Integrated-Power-August-2025.xlsx`
|
|
21
|
+
2. Place it in a `data/raw/global-energy-monitor/` directory, or set the path manually
|
|
22
|
+
|
|
23
|
+
**Usage:**
|
|
24
|
+
|
|
25
|
+
```python
|
|
26
|
+
import pyconvexity as px
|
|
27
|
+
|
|
28
|
+
# Load generators for a specific country
|
|
29
|
+
generators = px.data.get_generators_from_gem(
|
|
30
|
+
country="USA", # ISO 3-letter country code
|
|
31
|
+
technology_types=["solar", "wind", "nuclear"], # Optional filter
|
|
32
|
+
min_capacity_mw=100.0 # Optional minimum capacity
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
# Create a network and add generators
|
|
36
|
+
px.create_database_with_schema("my_model.db")
|
|
37
|
+
|
|
38
|
+
with px.database_context("my_model.db") as conn:
|
|
39
|
+
network_id = px.create_network(conn, network_req)
|
|
40
|
+
|
|
41
|
+
# Create carriers
|
|
42
|
+
carriers = {}
|
|
43
|
+
for carrier_name in generators['carrier'].unique():
|
|
44
|
+
carriers[carrier_name] = px.create_carrier(conn, network_id, carrier_name)
|
|
45
|
+
|
|
46
|
+
# Add generators to network
|
|
47
|
+
generator_ids = px.data.add_gem_generators_to_network(
|
|
48
|
+
conn, network_id, generators, carrier_mapping=carriers
|
|
49
|
+
)
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
## Data Output Format
|
|
53
|
+
|
|
54
|
+
The `get_generators_from_gem()` function returns a pandas DataFrame with these columns:
|
|
55
|
+
|
|
56
|
+
- `plant_name`: Name of the power plant
|
|
57
|
+
- `country_iso_3`: ISO 3-letter country code
|
|
58
|
+
- `category`: Energy category (nuclear, thermal, renewables, storage, etc.)
|
|
59
|
+
- `carrier`: Energy carrier (coal, gas, solar, wind, nuclear, etc.)
|
|
60
|
+
- `type`: Technology type (subcritical, combined-cycle, photovoltaic, etc.)
|
|
61
|
+
- `capacity_mw`: Capacity in megawatts
|
|
62
|
+
- `start_year`: Year the plant started operation
|
|
63
|
+
- `latitude`: Latitude coordinate
|
|
64
|
+
- `longitude`: Longitude coordinate
|
|
65
|
+
|
|
66
|
+
## Technology Mapping
|
|
67
|
+
|
|
68
|
+
GEM technologies are automatically mapped to a standardized schema:
|
|
69
|
+
|
|
70
|
+
- **Nuclear**: pressurized-water-reactor, boiling-water-reactor, small-modular-reactor
|
|
71
|
+
- **Thermal**: subcritical, supercritical, combined-cycle, gas-turbine
|
|
72
|
+
- **Renewables**: photovoltaic, thermal (solar), onshore/offshore (wind), run-of-river (hydro)
|
|
73
|
+
- **Storage**: lithium-ion (battery), pumped-hydro
|
|
74
|
+
- **Bioenergy**: biomass, biogas
|
|
75
|
+
|
|
76
|
+
## Caching
|
|
77
|
+
|
|
78
|
+
Data is automatically cached for 7 days to improve performance. You can:
|
|
79
|
+
|
|
80
|
+
```python
|
|
81
|
+
# Disable caching
|
|
82
|
+
generators = px.data.get_generators_from_gem(country="USA", use_cache=False)
|
|
83
|
+
|
|
84
|
+
# Clear cache
|
|
85
|
+
cache = px.data.DataCache()
|
|
86
|
+
cache.clear_cache('gem_generators')
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
## Examples
|
|
90
|
+
|
|
91
|
+
See `examples/gem_data_example.py` for a complete working example.
|
|
92
|
+
|
|
93
|
+
## Future Data Sources
|
|
94
|
+
|
|
95
|
+
The framework is designed to be extensible. Planned additions include:
|
|
96
|
+
|
|
97
|
+
- IRENA Global Energy Atlas (renewable resource data)
|
|
98
|
+
- World Bank energy statistics
|
|
99
|
+
- IEA World Energy Outlook data
|
|
100
|
+
- OpenStreetMap transmission infrastructure
|
|
101
|
+
- NASA weather data for renewable profiles
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
"""
|
|
2
|
+
PyConvexity Data Module
|
|
3
|
+
|
|
4
|
+
Provides functions for loading external energy data and integrating it with PyConvexity models.
|
|
5
|
+
This module offers a simple, expert-friendly toolbox for working with real-world energy data.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from .sources.gem import get_generators_from_gem, add_gem_generators_to_network
|
|
9
|
+
from .loaders.cache import DataCache
|
|
10
|
+
|
|
11
|
+
__all__ = [
|
|
12
|
+
# GEM (Global Energy Monitor) functions
|
|
13
|
+
"get_generators_from_gem",
|
|
14
|
+
"add_gem_generators_to_network",
|
|
15
|
+
# Caching utilities
|
|
16
|
+
"DataCache",
|
|
17
|
+
]
|
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Caching functionality for PyConvexity data operations.
|
|
3
|
+
|
|
4
|
+
This module handles caching of processed datasets to improve performance.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import pandas as pd
|
|
8
|
+
import hashlib
|
|
9
|
+
import json
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import Dict, Any, Optional
|
|
12
|
+
import logging
|
|
13
|
+
from datetime import datetime, timedelta
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class DataCache:
|
|
19
|
+
"""Manages caching of processed datasets."""
|
|
20
|
+
|
|
21
|
+
def __init__(self, cache_dir: Optional[str] = None):
|
|
22
|
+
"""
|
|
23
|
+
Initialize the cache manager.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
cache_dir: Directory to store cache files. Defaults to 'data/cache'
|
|
27
|
+
"""
|
|
28
|
+
if cache_dir is None:
|
|
29
|
+
cache_dir = "data/cache"
|
|
30
|
+
|
|
31
|
+
self.cache_dir = Path(cache_dir)
|
|
32
|
+
self.cache_dir.mkdir(parents=True, exist_ok=True)
|
|
33
|
+
|
|
34
|
+
# Cache metadata file
|
|
35
|
+
self.metadata_file = self.cache_dir / "cache_metadata.json"
|
|
36
|
+
self._load_metadata()
|
|
37
|
+
|
|
38
|
+
def _load_metadata(self):
|
|
39
|
+
"""Load cache metadata from file."""
|
|
40
|
+
if self.metadata_file.exists():
|
|
41
|
+
try:
|
|
42
|
+
with open(self.metadata_file, "r") as f:
|
|
43
|
+
self.metadata = json.load(f)
|
|
44
|
+
except (json.JSONDecodeError, FileNotFoundError):
|
|
45
|
+
self.metadata = {}
|
|
46
|
+
else:
|
|
47
|
+
self.metadata = {}
|
|
48
|
+
|
|
49
|
+
def _save_metadata(self):
|
|
50
|
+
"""Save cache metadata to file."""
|
|
51
|
+
with open(self.metadata_file, "w") as f:
|
|
52
|
+
json.dump(self.metadata, f, indent=2)
|
|
53
|
+
|
|
54
|
+
def _get_cache_key(self, dataset_name: str, filters: Dict[str, Any]) -> str:
|
|
55
|
+
"""Generate a unique cache key for a dataset and filters combination."""
|
|
56
|
+
# Create a hash of the filters
|
|
57
|
+
filters_str = json.dumps(filters, sort_keys=True)
|
|
58
|
+
filters_hash = hashlib.md5(filters_str.encode()).hexdigest()
|
|
59
|
+
|
|
60
|
+
return f"{dataset_name}_{filters_hash}"
|
|
61
|
+
|
|
62
|
+
def _get_cache_file_path(self, cache_key: str) -> Path:
|
|
63
|
+
"""Get the file path for a cache key."""
|
|
64
|
+
return self.cache_dir / f"{cache_key}.parquet"
|
|
65
|
+
|
|
66
|
+
def get_cached_data(
|
|
67
|
+
self, dataset_name: str, filters: Dict[str, Any]
|
|
68
|
+
) -> Optional[pd.DataFrame]:
|
|
69
|
+
"""
|
|
70
|
+
Retrieve cached data if available and not expired.
|
|
71
|
+
|
|
72
|
+
Args:
|
|
73
|
+
dataset_name: Name of the dataset
|
|
74
|
+
filters: Filters applied to the dataset
|
|
75
|
+
|
|
76
|
+
Returns:
|
|
77
|
+
pandas.DataFrame or None: Cached data if available and valid
|
|
78
|
+
"""
|
|
79
|
+
cache_key = self._get_cache_key(dataset_name, filters)
|
|
80
|
+
cache_file = self._get_cache_file_path(cache_key)
|
|
81
|
+
|
|
82
|
+
# Check if cache file exists
|
|
83
|
+
if not cache_file.exists():
|
|
84
|
+
return None
|
|
85
|
+
|
|
86
|
+
# Check if cache entry exists in metadata
|
|
87
|
+
if cache_key not in self.metadata:
|
|
88
|
+
# Clean up orphaned cache file
|
|
89
|
+
cache_file.unlink(missing_ok=True)
|
|
90
|
+
return None
|
|
91
|
+
|
|
92
|
+
# Check if cache is expired (default: 7 days)
|
|
93
|
+
cache_info = self.metadata[cache_key]
|
|
94
|
+
created_time = datetime.fromisoformat(cache_info["created"])
|
|
95
|
+
max_age = timedelta(days=cache_info.get("max_age_days", 7))
|
|
96
|
+
|
|
97
|
+
if datetime.now() - created_time > max_age:
|
|
98
|
+
logger.info(f"Cache expired for '{dataset_name}', removing...")
|
|
99
|
+
self._remove_cache_entry(cache_key)
|
|
100
|
+
return None
|
|
101
|
+
|
|
102
|
+
# Load cached data
|
|
103
|
+
try:
|
|
104
|
+
cached_data = pd.read_parquet(cache_file)
|
|
105
|
+
logger.info(
|
|
106
|
+
f"Loaded cached data for '{dataset_name}' ({len(cached_data)} rows)"
|
|
107
|
+
)
|
|
108
|
+
return cached_data
|
|
109
|
+
except Exception as e:
|
|
110
|
+
logger.warning(f"Failed to load cached data for '{dataset_name}': {e}")
|
|
111
|
+
self._remove_cache_entry(cache_key)
|
|
112
|
+
return None
|
|
113
|
+
|
|
114
|
+
def cache_data(
|
|
115
|
+
self,
|
|
116
|
+
dataset_name: str,
|
|
117
|
+
data: pd.DataFrame,
|
|
118
|
+
filters: Dict[str, Any],
|
|
119
|
+
max_age_days: int = 7,
|
|
120
|
+
):
|
|
121
|
+
"""
|
|
122
|
+
Cache processed data.
|
|
123
|
+
|
|
124
|
+
Args:
|
|
125
|
+
dataset_name: Name of the dataset
|
|
126
|
+
data: Processed pandas DataFrame
|
|
127
|
+
filters: Filters applied to the dataset
|
|
128
|
+
max_age_days: Maximum age of cache in days
|
|
129
|
+
"""
|
|
130
|
+
cache_key = self._get_cache_key(dataset_name, filters)
|
|
131
|
+
cache_file = self._get_cache_file_path(cache_key)
|
|
132
|
+
|
|
133
|
+
# Save data to parquet file
|
|
134
|
+
data.to_parquet(cache_file, index=False)
|
|
135
|
+
|
|
136
|
+
# Update metadata
|
|
137
|
+
self.metadata[cache_key] = {
|
|
138
|
+
"dataset_name": dataset_name,
|
|
139
|
+
"filters": filters,
|
|
140
|
+
"created": datetime.now().isoformat(),
|
|
141
|
+
"max_age_days": max_age_days,
|
|
142
|
+
"rows": len(data),
|
|
143
|
+
"columns": list(data.columns),
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
self._save_metadata()
|
|
147
|
+
logger.info(f"Cached data for '{dataset_name}' ({len(data)} rows)")
|
|
148
|
+
|
|
149
|
+
def _remove_cache_entry(self, cache_key: str):
|
|
150
|
+
"""Remove a cache entry and its file."""
|
|
151
|
+
cache_file = self._get_cache_file_path(cache_key)
|
|
152
|
+
cache_file.unlink(missing_ok=True)
|
|
153
|
+
|
|
154
|
+
if cache_key in self.metadata:
|
|
155
|
+
del self.metadata[cache_key]
|
|
156
|
+
self._save_metadata()
|
|
157
|
+
|
|
158
|
+
def clear_cache(self, dataset_name: Optional[str] = None):
|
|
159
|
+
"""
|
|
160
|
+
Clear cache entries.
|
|
161
|
+
|
|
162
|
+
Args:
|
|
163
|
+
dataset_name: If provided, only clear cache for this dataset
|
|
164
|
+
"""
|
|
165
|
+
keys_to_remove = []
|
|
166
|
+
|
|
167
|
+
for cache_key, info in self.metadata.items():
|
|
168
|
+
if dataset_name is None or info["dataset_name"] == dataset_name:
|
|
169
|
+
keys_to_remove.append(cache_key)
|
|
170
|
+
|
|
171
|
+
for key in keys_to_remove:
|
|
172
|
+
self._remove_cache_entry(key)
|
|
173
|
+
|
|
174
|
+
logger.info(f"Cleared {len(keys_to_remove)} cache entries")
|
|
175
|
+
|
|
176
|
+
def get_cache_info(self) -> Dict[str, Any]:
|
|
177
|
+
"""Get information about the cache."""
|
|
178
|
+
total_size = 0
|
|
179
|
+
dataset_counts = {}
|
|
180
|
+
|
|
181
|
+
for cache_key, info in self.metadata.items():
|
|
182
|
+
dataset_name = info["dataset_name"]
|
|
183
|
+
dataset_counts[dataset_name] = dataset_counts.get(dataset_name, 0) + 1
|
|
184
|
+
|
|
185
|
+
cache_file = self._get_cache_file_path(cache_key)
|
|
186
|
+
if cache_file.exists():
|
|
187
|
+
total_size += cache_file.stat().st_size
|
|
188
|
+
|
|
189
|
+
return {
|
|
190
|
+
"total_entries": len(self.metadata),
|
|
191
|
+
"total_size_mb": round(total_size / (1024 * 1024), 2),
|
|
192
|
+
"dataset_counts": dataset_counts,
|
|
193
|
+
"cache_dir": str(self.cache_dir),
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
def cleanup_expired_cache(self):
|
|
197
|
+
"""Remove expired cache entries."""
|
|
198
|
+
expired_keys = []
|
|
199
|
+
|
|
200
|
+
for cache_key, info in self.metadata.items():
|
|
201
|
+
created_time = datetime.fromisoformat(info["created"])
|
|
202
|
+
max_age = timedelta(days=info.get("max_age_days", 7))
|
|
203
|
+
|
|
204
|
+
if datetime.now() - created_time > max_age:
|
|
205
|
+
expired_keys.append(cache_key)
|
|
206
|
+
|
|
207
|
+
for key in expired_keys:
|
|
208
|
+
self._remove_cache_entry(key)
|
|
209
|
+
|
|
210
|
+
if expired_keys:
|
|
211
|
+
logger.info(f"Cleaned up {len(expired_keys)} expired cache entries")
|
|
212
|
+
else:
|
|
213
|
+
logger.info("No expired cache entries found")
|