pyconvexity 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pyconvexity might be problematic. Click here for more details.

Files changed (35) hide show
  1. pyconvexity/__init__.py +30 -6
  2. pyconvexity/_version.py +1 -1
  3. pyconvexity/data/README.md +101 -0
  4. pyconvexity/data/__init__.py +18 -0
  5. pyconvexity/data/__pycache__/__init__.cpython-313.pyc +0 -0
  6. pyconvexity/data/loaders/__init__.py +3 -0
  7. pyconvexity/data/loaders/__pycache__/__init__.cpython-313.pyc +0 -0
  8. pyconvexity/data/loaders/__pycache__/cache.cpython-313.pyc +0 -0
  9. pyconvexity/data/loaders/cache.py +212 -0
  10. pyconvexity/data/sources/__init__.py +5 -0
  11. pyconvexity/data/sources/__pycache__/__init__.cpython-313.pyc +0 -0
  12. pyconvexity/data/sources/__pycache__/gem.cpython-313.pyc +0 -0
  13. pyconvexity/data/sources/gem.py +412 -0
  14. pyconvexity/io/__init__.py +32 -0
  15. pyconvexity/io/excel_exporter.py +991 -0
  16. pyconvexity/io/excel_importer.py +1112 -0
  17. pyconvexity/io/netcdf_exporter.py +192 -0
  18. pyconvexity/io/netcdf_importer.py +599 -0
  19. pyconvexity/models/__init__.py +7 -0
  20. pyconvexity/models/attributes.py +3 -1
  21. pyconvexity/models/components.py +3 -0
  22. pyconvexity/models/scenarios.py +177 -0
  23. pyconvexity/solvers/__init__.py +29 -0
  24. pyconvexity/solvers/pypsa/__init__.py +24 -0
  25. pyconvexity/solvers/pypsa/api.py +398 -0
  26. pyconvexity/solvers/pypsa/batch_loader.py +311 -0
  27. pyconvexity/solvers/pypsa/builder.py +656 -0
  28. pyconvexity/solvers/pypsa/constraints.py +321 -0
  29. pyconvexity/solvers/pypsa/solver.py +1255 -0
  30. pyconvexity/solvers/pypsa/storage.py +2207 -0
  31. {pyconvexity-0.1.1.dist-info → pyconvexity-0.1.3.dist-info}/METADATA +5 -2
  32. pyconvexity-0.1.3.dist-info/RECORD +45 -0
  33. pyconvexity-0.1.1.dist-info/RECORD +0 -20
  34. {pyconvexity-0.1.1.dist-info → pyconvexity-0.1.3.dist-info}/WHEEL +0 -0
  35. {pyconvexity-0.1.1.dist-info → pyconvexity-0.1.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,311 @@
1
+ """
2
+ PyPSA Batch Data Loader
3
+ Optimized batch loading functions for PyPSA network construction.
4
+ Eliminates N+1 query patterns for improved performance.
5
+ """
6
+
7
+ import logging
8
+ import pandas as pd
9
+ import json
10
+ from typing import Dict, Any, List, Optional
11
+
12
+ # Import functions directly from pyconvexity
13
+ from pyconvexity.models.attributes import deserialize_timeseries_from_parquet
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ class PyPSABatchLoader:
19
+ """
20
+ Optimized batch data loader for PyPSA network construction.
21
+ Eliminates N+1 query patterns by loading data in batches.
22
+ """
23
+
24
+ def __init__(self):
25
+ pass
26
+
27
+ def batch_load_component_attributes(self, conn, component_ids: List[int], scenario_id: Optional[int]) -> Dict[int, Dict[str, Any]]:
28
+ """Batch load all static attributes for multiple components to avoid N+1 queries"""
29
+ if not component_ids:
30
+ return {}
31
+
32
+ # Build a single query to get all attributes for all components
33
+ placeholders = ','.join(['?' for _ in component_ids])
34
+
35
+ # Get all attribute names for all components in one query
36
+ cursor = conn.execute(f"""
37
+ SELECT DISTINCT attribute_name
38
+ FROM component_attributes
39
+ WHERE component_id IN ({placeholders}) AND storage_type = 'static'
40
+ """, component_ids)
41
+ all_attribute_names = [row[0] for row in cursor.fetchall()]
42
+
43
+ if not all_attribute_names:
44
+ return {comp_id: {} for comp_id in component_ids}
45
+
46
+ # Build query to get all attributes for all components
47
+ attr_placeholders = ','.join(['?' for _ in all_attribute_names])
48
+
49
+ # Resolve scenario IDs for fallback logic
50
+ scenario_filter_values = []
51
+ master_id = None
52
+ if scenario_id is not None:
53
+ # Get master scenario ID for fallback
54
+ cursor = conn.execute("SELECT id FROM scenarios WHERE network_id = (SELECT network_id FROM components WHERE id = ?) AND is_master = 1", (component_ids[0],))
55
+ result = cursor.fetchone()
56
+ if result:
57
+ master_id = result[0]
58
+ scenario_filter_values = [scenario_id, master_id]
59
+ else:
60
+ scenario_filter_values = [scenario_id]
61
+ else:
62
+ # Get master scenario ID
63
+ cursor = conn.execute("SELECT id FROM scenarios WHERE network_id = (SELECT network_id FROM components WHERE id = ?) AND is_master = 1", (component_ids[0],))
64
+ result = cursor.fetchone()
65
+ if result:
66
+ master_id = result[0]
67
+ scenario_filter_values = [master_id]
68
+ else:
69
+ return {comp_id: {} for comp_id in component_ids}
70
+
71
+ scen_placeholders = ','.join(['?' for _ in scenario_filter_values])
72
+
73
+ # Single query to get all attributes
74
+ # CRITICAL: Order by scenario_id DESC to prioritize current scenario over master
75
+ query = f"""
76
+ SELECT component_id, attribute_name, static_value, data_type, scenario_id
77
+ FROM component_attributes
78
+ WHERE component_id IN ({placeholders})
79
+ AND attribute_name IN ({attr_placeholders})
80
+ AND scenario_id IN ({scen_placeholders})
81
+ AND storage_type = 'static'
82
+ ORDER BY component_id, attribute_name,
83
+ CASE WHEN scenario_id = ? THEN 0 ELSE 1 END
84
+ """
85
+
86
+ # Parameters must match the order of placeholders in the query
87
+ query_params = component_ids + all_attribute_names + scenario_filter_values + [scenario_id if scenario_id is not None else master_id]
88
+
89
+ cursor = conn.execute(query, query_params)
90
+
91
+ # Group by component_id, preferring current scenario over master
92
+ component_attributes = {}
93
+ for comp_id in component_ids:
94
+ component_attributes[comp_id] = {}
95
+
96
+ # Process results, preferring current scenario over master
97
+ rows = cursor.fetchall()
98
+
99
+ for row in rows:
100
+ comp_id, attr_name, static_value_json, data_type, row_scenario_id = row
101
+
102
+ # Ensure component exists in our dictionary (safety check)
103
+ if comp_id not in component_attributes:
104
+ continue
105
+
106
+ # Skip if we already have this attribute from a preferred scenario
107
+ if attr_name in component_attributes[comp_id]:
108
+ continue
109
+
110
+ # Parse JSON value
111
+ json_value = json.loads(static_value_json)
112
+
113
+ # Convert based on data type
114
+ if data_type == "float":
115
+ value = float(json_value) if isinstance(json_value, (int, float)) else 0.0
116
+ elif data_type == "int":
117
+ value = int(json_value) if isinstance(json_value, (int, float)) else 0
118
+ elif data_type == "boolean":
119
+ value = bool(json_value) if isinstance(json_value, bool) else False
120
+ elif data_type == "string":
121
+ value = str(json_value) if isinstance(json_value, str) else ""
122
+ else:
123
+ value = json_value
124
+
125
+ component_attributes[comp_id][attr_name] = value
126
+
127
+ return component_attributes
128
+
129
+ def batch_load_component_connections(self, conn, network_id: int) -> Dict[str, Dict[str, str]]:
130
+ """Batch load bus and carrier connections to avoid individual lookups"""
131
+ # Get all bus names in one query
132
+ cursor = conn.execute("""
133
+ SELECT id, name FROM components
134
+ WHERE network_id = ? AND component_type = 'BUS'
135
+ """, (network_id,))
136
+ bus_id_to_name = {row[0]: row[1] for row in cursor.fetchall()}
137
+
138
+ # Get all carrier names in one query
139
+ cursor = conn.execute("""
140
+ SELECT id, name FROM carriers
141
+ WHERE network_id = ?
142
+ """, (network_id,))
143
+ carrier_id_to_name = {row[0]: row[1] for row in cursor.fetchall()}
144
+
145
+ return {
146
+ 'bus_id_to_name': bus_id_to_name,
147
+ 'carrier_id_to_name': carrier_id_to_name
148
+ }
149
+
150
+ def batch_load_component_timeseries(self, conn, component_ids: List[int], scenario_id: Optional[int]) -> Dict[int, Dict[str, pd.Series]]:
151
+ """Batch load all timeseries attributes for multiple components to avoid N+1 queries"""
152
+ if not component_ids:
153
+ return {}
154
+
155
+ # Get network time periods for proper timestamp alignment
156
+ network_time_periods = None
157
+ if component_ids:
158
+ cursor = conn.execute("SELECT network_id FROM components WHERE id = ? LIMIT 1", (component_ids[0],))
159
+ result = cursor.fetchone()
160
+ if result:
161
+ network_id = result[0]
162
+ try:
163
+ from pyconvexity.models.network import get_network_time_periods
164
+ network_time_periods = get_network_time_periods(conn, network_id)
165
+ logger.debug(f"Loaded {len(network_time_periods)} time periods for timeseries alignment")
166
+ except Exception as e:
167
+ logger.warning(f"Failed to load network time periods: {e}")
168
+
169
+ # Build a single query to get all timeseries attributes for all components
170
+ placeholders = ','.join(['?' for _ in component_ids])
171
+
172
+ # Get all attribute names for all components in one query
173
+ cursor = conn.execute(f"""
174
+ SELECT DISTINCT attribute_name
175
+ FROM component_attributes
176
+ WHERE component_id IN ({placeholders}) AND storage_type = 'timeseries'
177
+ """, component_ids)
178
+ all_attribute_names = [row[0] for row in cursor.fetchall()]
179
+
180
+ if not all_attribute_names:
181
+ return {comp_id: {} for comp_id in component_ids}
182
+
183
+ # Build query to get all timeseries for all components
184
+ attr_placeholders = ','.join(['?' for _ in all_attribute_names])
185
+
186
+ # Resolve scenario IDs for fallback logic
187
+ scenario_filter_values = []
188
+ master_id = None
189
+ if scenario_id is not None:
190
+ # Get master scenario ID for fallback
191
+ cursor = conn.execute("SELECT id FROM scenarios WHERE network_id = (SELECT network_id FROM components WHERE id = ?) AND is_master = 1", (component_ids[0],))
192
+ result = cursor.fetchone()
193
+ if result:
194
+ master_id = result[0]
195
+ scenario_filter_values = [scenario_id, master_id]
196
+ else:
197
+ scenario_filter_values = [scenario_id]
198
+ else:
199
+ # Get master scenario ID
200
+ cursor = conn.execute("SELECT id FROM scenarios WHERE network_id = (SELECT network_id FROM components WHERE id = ?) AND is_master = 1", (component_ids[0],))
201
+ result = cursor.fetchone()
202
+ if result:
203
+ master_id = result[0]
204
+ scenario_filter_values = [master_id]
205
+ else:
206
+ return {comp_id: {} for comp_id in component_ids}
207
+
208
+ scen_placeholders = ','.join(['?' for _ in scenario_filter_values])
209
+
210
+ # Single query to get all timeseries
211
+ # CRITICAL: Order by scenario_id to prioritize current scenario over master
212
+ query = f"""
213
+ SELECT component_id, attribute_name, timeseries_data, scenario_id
214
+ FROM component_attributes
215
+ WHERE component_id IN ({placeholders})
216
+ AND attribute_name IN ({attr_placeholders})
217
+ AND scenario_id IN ({scen_placeholders})
218
+ AND storage_type = 'timeseries'
219
+ ORDER BY component_id, attribute_name,
220
+ CASE WHEN scenario_id = ? THEN 0 ELSE 1 END
221
+ """
222
+
223
+ # Parameters must match the order of placeholders in the query
224
+ query_params = component_ids + all_attribute_names + scenario_filter_values + [scenario_id if scenario_id is not None else master_id]
225
+
226
+ cursor = conn.execute(query, query_params)
227
+
228
+ # Group by component_id, preferring current scenario over master
229
+ component_timeseries = {}
230
+ for comp_id in component_ids:
231
+ component_timeseries[comp_id] = {}
232
+
233
+ # Process results, preferring current scenario over master
234
+ rows = cursor.fetchall()
235
+
236
+ for row in rows:
237
+ comp_id, attr_name, timeseries_data, row_scenario_id = row
238
+
239
+ # Ensure component exists in our dictionary (safety check)
240
+ if comp_id not in component_timeseries:
241
+ continue
242
+
243
+ # Skip if we already have this attribute from a preferred scenario
244
+ if attr_name in component_timeseries[comp_id]:
245
+ continue
246
+
247
+ # Deserialize timeseries data
248
+ try:
249
+ timeseries_points = deserialize_timeseries_from_parquet(timeseries_data, network_time_periods)
250
+ if timeseries_points:
251
+ # Create pandas Series from timeseries points
252
+ # Sort by period_index to ensure correct order
253
+ timeseries_points.sort(key=lambda x: x.period_index)
254
+ values = [point.value for point in timeseries_points]
255
+
256
+ # Create proper timestamps for PyPSA alignment
257
+ if network_time_periods:
258
+ # Use formatted timestamps from network time periods
259
+ timestamps = []
260
+ for point in timeseries_points:
261
+ if point.period_index < len(network_time_periods):
262
+ tp = network_time_periods[point.period_index]
263
+ timestamps.append(pd.Timestamp(tp.formatted_time))
264
+ else:
265
+ logger.warning(f"Period index {point.period_index} out of range for network time periods")
266
+ timestamps.append(pd.Timestamp.now()) # Fallback
267
+ component_timeseries[comp_id][attr_name] = pd.Series(values, index=timestamps)
268
+ else:
269
+ # Fallback: use period_index as index
270
+ period_indices = [point.period_index for point in timeseries_points]
271
+ component_timeseries[comp_id][attr_name] = pd.Series(values, index=period_indices)
272
+ except Exception as e:
273
+ logger.warning(f"Failed to deserialize timeseries {attr_name} for component {comp_id}: {e}")
274
+ continue
275
+
276
+ return component_timeseries
277
+
278
+ def batch_load_all_component_timeseries_by_type(self, conn, network_id: int, component_type: str, scenario_id: Optional[int]) -> Dict[str, pd.DataFrame]:
279
+ """
280
+ Load all timeseries attributes for a component type and organize by attribute name.
281
+ This is a compatibility method for the existing _load_all_component_timeseries interface.
282
+ """
283
+ from pyconvexity.models import list_components_by_type
284
+
285
+ components = list_components_by_type(conn, network_id, component_type)
286
+ component_ids = [comp.id for comp in components]
287
+
288
+ # Use batch loading
289
+ component_timeseries = self.batch_load_component_timeseries(conn, component_ids, scenario_id)
290
+
291
+ # Reorganize by attribute name (matching original interface)
292
+ timeseries_by_attr = {}
293
+
294
+ for component in components:
295
+ comp_timeseries = component_timeseries.get(component.id, {})
296
+
297
+ for attr_name, series in comp_timeseries.items():
298
+ if attr_name not in timeseries_by_attr:
299
+ timeseries_by_attr[attr_name] = {}
300
+
301
+ # Store series in dict first
302
+ timeseries_by_attr[attr_name][component.name] = series
303
+
304
+ # Convert to DataFrames all at once to avoid fragmentation
305
+ for attr_name in timeseries_by_attr:
306
+ if timeseries_by_attr[attr_name]:
307
+ timeseries_by_attr[attr_name] = pd.DataFrame(timeseries_by_attr[attr_name])
308
+ else:
309
+ timeseries_by_attr[attr_name] = pd.DataFrame()
310
+
311
+ return timeseries_by_attr