pyconvexity 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pyconvexity might be problematic. Click here for more details.

Files changed (43) hide show
  1. pyconvexity/__init__.py +57 -8
  2. pyconvexity/_version.py +1 -2
  3. pyconvexity/core/__init__.py +0 -2
  4. pyconvexity/core/database.py +158 -0
  5. pyconvexity/core/types.py +105 -18
  6. pyconvexity/data/README.md +101 -0
  7. pyconvexity/data/__init__.py +18 -0
  8. pyconvexity/data/__pycache__/__init__.cpython-313.pyc +0 -0
  9. pyconvexity/data/loaders/__init__.py +3 -0
  10. pyconvexity/data/loaders/__pycache__/__init__.cpython-313.pyc +0 -0
  11. pyconvexity/data/loaders/__pycache__/cache.cpython-313.pyc +0 -0
  12. pyconvexity/data/loaders/cache.py +212 -0
  13. pyconvexity/data/schema/01_core_schema.sql +12 -12
  14. pyconvexity/data/schema/02_data_metadata.sql +17 -321
  15. pyconvexity/data/sources/__init__.py +5 -0
  16. pyconvexity/data/sources/__pycache__/__init__.cpython-313.pyc +0 -0
  17. pyconvexity/data/sources/__pycache__/gem.cpython-313.pyc +0 -0
  18. pyconvexity/data/sources/gem.py +412 -0
  19. pyconvexity/io/__init__.py +32 -0
  20. pyconvexity/io/excel_exporter.py +1012 -0
  21. pyconvexity/io/excel_importer.py +1109 -0
  22. pyconvexity/io/netcdf_exporter.py +192 -0
  23. pyconvexity/io/netcdf_importer.py +1602 -0
  24. pyconvexity/models/__init__.py +7 -0
  25. pyconvexity/models/attributes.py +209 -72
  26. pyconvexity/models/components.py +3 -0
  27. pyconvexity/models/network.py +17 -15
  28. pyconvexity/models/scenarios.py +177 -0
  29. pyconvexity/solvers/__init__.py +29 -0
  30. pyconvexity/solvers/pypsa/__init__.py +24 -0
  31. pyconvexity/solvers/pypsa/api.py +421 -0
  32. pyconvexity/solvers/pypsa/batch_loader.py +304 -0
  33. pyconvexity/solvers/pypsa/builder.py +566 -0
  34. pyconvexity/solvers/pypsa/constraints.py +321 -0
  35. pyconvexity/solvers/pypsa/solver.py +1106 -0
  36. pyconvexity/solvers/pypsa/storage.py +1574 -0
  37. pyconvexity/timeseries.py +327 -0
  38. pyconvexity/validation/rules.py +2 -2
  39. {pyconvexity-0.1.2.dist-info → pyconvexity-0.1.4.dist-info}/METADATA +5 -2
  40. pyconvexity-0.1.4.dist-info/RECORD +46 -0
  41. pyconvexity-0.1.2.dist-info/RECORD +0 -20
  42. {pyconvexity-0.1.2.dist-info → pyconvexity-0.1.4.dist-info}/WHEEL +0 -0
  43. {pyconvexity-0.1.2.dist-info → pyconvexity-0.1.4.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,304 @@
1
+ """
2
+ PyPSA Batch Data Loader
3
+ Simplified to always create MultiIndex timeseries for consistent multi-period optimization.
4
+ """
5
+
6
+ import logging
7
+ import pandas as pd
8
+ import json
9
+ from typing import Dict, Any, List, Optional
10
+
11
+ from pyconvexity.models.attributes import get_timeseries
12
+ from pyconvexity.models import get_network_time_periods
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ class PyPSABatchLoader:
18
+ """
19
+ Simplified batch data loader for PyPSA network construction.
20
+ Always creates MultiIndex timeseries for consistent multi-period optimization.
21
+ """
22
+
23
+ def __init__(self):
24
+ pass
25
+
26
+ def batch_load_component_attributes(self, conn, component_ids: List[int], scenario_id: Optional[int]) -> Dict[int, Dict[str, Any]]:
27
+ """Batch load all static attributes for multiple components to avoid N+1 queries"""
28
+ if not component_ids:
29
+ return {}
30
+
31
+ # Build a single query to get all attributes for all components
32
+ placeholders = ','.join(['?' for _ in component_ids])
33
+
34
+ # Get all attribute names for all components in one query
35
+ cursor = conn.execute(f"""
36
+ SELECT DISTINCT attribute_name
37
+ FROM component_attributes
38
+ WHERE component_id IN ({placeholders}) AND storage_type = 'static'
39
+ """, component_ids)
40
+ all_attribute_names = [row[0] for row in cursor.fetchall()]
41
+
42
+ if not all_attribute_names:
43
+ return {comp_id: {} for comp_id in component_ids}
44
+
45
+ # Build query to get all attributes for all components
46
+ attr_placeholders = ','.join(['?' for _ in all_attribute_names])
47
+
48
+ # Resolve scenario IDs for fallback logic
49
+ scenario_filter_values = []
50
+ master_id = None
51
+ if scenario_id is not None:
52
+ # Get master scenario ID for fallback
53
+ cursor = conn.execute("SELECT id FROM scenarios WHERE network_id = (SELECT network_id FROM components WHERE id = ?) AND is_master = 1", (component_ids[0],))
54
+ result = cursor.fetchone()
55
+ if result:
56
+ master_id = result[0]
57
+ scenario_filter_values = [scenario_id, master_id]
58
+ else:
59
+ scenario_filter_values = [scenario_id]
60
+ else:
61
+ # Get master scenario ID
62
+ cursor = conn.execute("SELECT id FROM scenarios WHERE network_id = (SELECT network_id FROM components WHERE id = ?) AND is_master = 1", (component_ids[0],))
63
+ result = cursor.fetchone()
64
+ if result:
65
+ master_id = result[0]
66
+ scenario_filter_values = [master_id]
67
+ else:
68
+ return {comp_id: {} for comp_id in component_ids}
69
+
70
+ scen_placeholders = ','.join(['?' for _ in scenario_filter_values])
71
+
72
+ # Single query to get all attributes
73
+ # CRITICAL: Order by scenario_id DESC to prioritize current scenario over master
74
+ query = f"""
75
+ SELECT component_id, attribute_name, static_value, data_type, scenario_id
76
+ FROM component_attributes
77
+ WHERE component_id IN ({placeholders})
78
+ AND attribute_name IN ({attr_placeholders})
79
+ AND scenario_id IN ({scen_placeholders})
80
+ AND storage_type = 'static'
81
+ ORDER BY component_id, attribute_name,
82
+ CASE WHEN scenario_id = ? THEN 0 ELSE 1 END
83
+ """
84
+
85
+ # Parameters must match the order of placeholders in the query
86
+ query_params = component_ids + all_attribute_names + scenario_filter_values + [scenario_id if scenario_id is not None else master_id]
87
+
88
+ cursor = conn.execute(query, query_params)
89
+
90
+ # Group by component_id, preferring current scenario over master
91
+ component_attributes = {}
92
+ for comp_id in component_ids:
93
+ component_attributes[comp_id] = {}
94
+
95
+ # Process results, preferring current scenario over master
96
+ rows = cursor.fetchall()
97
+
98
+ for row in rows:
99
+ comp_id, attr_name, static_value_json, data_type, row_scenario_id = row
100
+
101
+ # Ensure component exists in our dictionary (safety check)
102
+ if comp_id not in component_attributes:
103
+ continue
104
+
105
+ # Skip if we already have this attribute from a preferred scenario
106
+ if attr_name in component_attributes[comp_id]:
107
+ continue
108
+
109
+ # Parse JSON value
110
+ json_value = json.loads(static_value_json)
111
+
112
+ # Convert based on data type
113
+ if data_type == "float":
114
+ value = float(json_value) if isinstance(json_value, (int, float)) else 0.0
115
+ elif data_type == "int":
116
+ value = int(json_value) if isinstance(json_value, (int, float)) else 0
117
+ elif data_type == "boolean":
118
+ value = bool(json_value) if isinstance(json_value, bool) else False
119
+ elif data_type == "string":
120
+ value = str(json_value) if isinstance(json_value, str) else ""
121
+ else:
122
+ value = json_value
123
+
124
+ component_attributes[comp_id][attr_name] = value
125
+
126
+ return component_attributes
127
+
128
+ def batch_load_component_connections(self, conn, network_id: int) -> Dict[str, Dict[str, str]]:
129
+ """Batch load bus and carrier connections to avoid individual lookups"""
130
+ # Get all bus names in one query
131
+ cursor = conn.execute("""
132
+ SELECT id, name FROM components
133
+ WHERE network_id = ? AND component_type = 'BUS'
134
+ """, (network_id,))
135
+ bus_id_to_name = {row[0]: row[1] for row in cursor.fetchall()}
136
+
137
+ # Get all carrier names in one query
138
+ cursor = conn.execute("""
139
+ SELECT id, name FROM carriers
140
+ WHERE network_id = ?
141
+ """, (network_id,))
142
+ carrier_id_to_name = {row[0]: row[1] for row in cursor.fetchall()}
143
+
144
+ return {
145
+ 'bus_id_to_name': bus_id_to_name,
146
+ 'carrier_id_to_name': carrier_id_to_name
147
+ }
148
+
149
+ def batch_load_component_timeseries(self, conn, component_ids: List[int], scenario_id: Optional[int]) -> Dict[int, Dict[str, pd.Series]]:
150
+ """Batch load all timeseries attributes - always create MultiIndex for consistency"""
151
+ if not component_ids:
152
+ return {}
153
+
154
+ # Get network time periods for proper timestamp alignment
155
+ cursor = conn.execute("SELECT network_id FROM components WHERE id = ? LIMIT 1", (component_ids[0],))
156
+ result = cursor.fetchone()
157
+ if not result:
158
+ return {comp_id: {} for comp_id in component_ids}
159
+
160
+ network_id = result[0]
161
+ network_time_periods = get_network_time_periods(conn, network_id)
162
+ if not network_time_periods:
163
+ logger.warning("No time periods found for network")
164
+ return {comp_id: {} for comp_id in component_ids}
165
+
166
+ # Convert to timestamps and extract years
167
+ timestamps = [pd.Timestamp(tp.formatted_time) for tp in network_time_periods]
168
+ years = sorted(list(set([ts.year for ts in timestamps])))
169
+
170
+ # Build a single query to get all timeseries attributes for all components
171
+ placeholders = ','.join(['?' for _ in component_ids])
172
+
173
+ # Get all attribute names for all components in one query
174
+ cursor = conn.execute(f"""
175
+ SELECT DISTINCT attribute_name
176
+ FROM component_attributes
177
+ WHERE component_id IN ({placeholders}) AND storage_type = 'timeseries'
178
+ """, component_ids)
179
+ all_attribute_names = [row[0] for row in cursor.fetchall()]
180
+
181
+ if not all_attribute_names:
182
+ return {comp_id: {} for comp_id in component_ids}
183
+
184
+ # Build query to get all timeseries for all components
185
+ attr_placeholders = ','.join(['?' for _ in all_attribute_names])
186
+
187
+ # Resolve scenario IDs for fallback logic
188
+ scenario_filter_values = []
189
+ master_id = None
190
+ if scenario_id is not None:
191
+ # Get master scenario ID for fallback
192
+ cursor = conn.execute("SELECT id FROM scenarios WHERE network_id = ? AND is_master = 1", (network_id,))
193
+ result = cursor.fetchone()
194
+ if result:
195
+ master_id = result[0]
196
+ scenario_filter_values = [scenario_id, master_id]
197
+ else:
198
+ scenario_filter_values = [scenario_id]
199
+ else:
200
+ # Get master scenario ID
201
+ cursor = conn.execute("SELECT id FROM scenarios WHERE network_id = ? AND is_master = 1", (network_id,))
202
+ result = cursor.fetchone()
203
+ if result:
204
+ master_id = result[0]
205
+ scenario_filter_values = [master_id]
206
+ else:
207
+ return {comp_id: {} for comp_id in component_ids}
208
+
209
+ scen_placeholders = ','.join(['?' for _ in scenario_filter_values])
210
+
211
+ # Single query to get all timeseries
212
+ query = f"""
213
+ SELECT component_id, attribute_name, timeseries_data, scenario_id
214
+ FROM component_attributes
215
+ WHERE component_id IN ({placeholders})
216
+ AND attribute_name IN ({attr_placeholders})
217
+ AND scenario_id IN ({scen_placeholders})
218
+ AND storage_type = 'timeseries'
219
+ ORDER BY component_id, attribute_name,
220
+ CASE WHEN scenario_id = ? THEN 0 ELSE 1 END
221
+ """
222
+
223
+ # Parameters must match the order of placeholders in the query
224
+ query_params = component_ids + all_attribute_names + scenario_filter_values + [scenario_id if scenario_id is not None else master_id]
225
+
226
+ cursor = conn.execute(query, query_params)
227
+
228
+ # Group by component_id, preferring current scenario over master
229
+ component_timeseries = {}
230
+ for comp_id in component_ids:
231
+ component_timeseries[comp_id] = {}
232
+
233
+ # Process results, preferring current scenario over master
234
+ rows = cursor.fetchall()
235
+
236
+ for row in rows:
237
+ comp_id, attr_name, timeseries_data, row_scenario_id = row
238
+
239
+ # Ensure component exists in our dictionary (safety check)
240
+ if comp_id not in component_timeseries:
241
+ continue
242
+
243
+ # Skip if we already have this attribute from a preferred scenario
244
+ if attr_name in component_timeseries[comp_id]:
245
+ continue
246
+
247
+ # Deserialize timeseries data
248
+ try:
249
+ timeseries = get_timeseries(conn, comp_id, attr_name, row_scenario_id)
250
+ if timeseries and timeseries.values:
251
+ values = timeseries.values
252
+
253
+ # Always create MultiIndex following PyPSA multi-investment tutorial format
254
+ # First level: investment periods (years), Second level: timesteps
255
+ multi_snapshots = []
256
+ for i, ts in enumerate(timestamps[:len(values)]):
257
+ multi_snapshots.append((ts.year, ts))
258
+
259
+ if multi_snapshots:
260
+ multi_index = pd.MultiIndex.from_tuples(multi_snapshots, names=['period', 'timestep'])
261
+ component_timeseries[comp_id][attr_name] = pd.Series(values, index=multi_index)
262
+ else:
263
+ logger.warning(f"No valid timestamps for timeseries {attr_name}")
264
+
265
+ except Exception as e:
266
+ logger.warning(f"Failed to load timeseries {attr_name} for component {comp_id}: {e}")
267
+ continue
268
+
269
+ return component_timeseries
270
+
271
+ def batch_load_all_component_timeseries_by_type(self, conn, network_id: int, component_type: str, scenario_id: Optional[int]) -> Dict[str, pd.DataFrame]:
272
+ """
273
+ Load all timeseries attributes for a component type and organize by attribute name.
274
+ This is a compatibility method for the existing _load_all_component_timeseries interface.
275
+ """
276
+ from pyconvexity.models import list_components_by_type
277
+
278
+ components = list_components_by_type(conn, network_id, component_type)
279
+ component_ids = [comp.id for comp in components]
280
+
281
+ # Use batch loading
282
+ component_timeseries = self.batch_load_component_timeseries(conn, component_ids, scenario_id)
283
+
284
+ # Reorganize by attribute name (matching original interface)
285
+ timeseries_by_attr = {}
286
+
287
+ for component in components:
288
+ comp_timeseries = component_timeseries.get(component.id, {})
289
+
290
+ for attr_name, series in comp_timeseries.items():
291
+ if attr_name not in timeseries_by_attr:
292
+ timeseries_by_attr[attr_name] = {}
293
+
294
+ # Store series in dict first
295
+ timeseries_by_attr[attr_name][component.name] = series
296
+
297
+ # Convert to DataFrames all at once to avoid fragmentation
298
+ for attr_name in timeseries_by_attr:
299
+ if timeseries_by_attr[attr_name]:
300
+ timeseries_by_attr[attr_name] = pd.DataFrame(timeseries_by_attr[attr_name])
301
+ else:
302
+ timeseries_by_attr[attr_name] = pd.DataFrame()
303
+
304
+ return timeseries_by_attr