pyconvexity 0.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pyconvexity might be problematic. Click here for more details.

Files changed (42) hide show
  1. pyconvexity/__init__.py +226 -0
  2. pyconvexity/_version.py +1 -0
  3. pyconvexity/core/__init__.py +60 -0
  4. pyconvexity/core/database.py +485 -0
  5. pyconvexity/core/errors.py +106 -0
  6. pyconvexity/core/types.py +400 -0
  7. pyconvexity/data/README.md +101 -0
  8. pyconvexity/data/__init__.py +17 -0
  9. pyconvexity/data/loaders/__init__.py +3 -0
  10. pyconvexity/data/loaders/cache.py +213 -0
  11. pyconvexity/data/schema/01_core_schema.sql +420 -0
  12. pyconvexity/data/schema/02_data_metadata.sql +120 -0
  13. pyconvexity/data/schema/03_validation_data.sql +506 -0
  14. pyconvexity/data/sources/__init__.py +5 -0
  15. pyconvexity/data/sources/gem.py +442 -0
  16. pyconvexity/io/__init__.py +26 -0
  17. pyconvexity/io/excel_exporter.py +1226 -0
  18. pyconvexity/io/excel_importer.py +1381 -0
  19. pyconvexity/io/netcdf_exporter.py +197 -0
  20. pyconvexity/io/netcdf_importer.py +1833 -0
  21. pyconvexity/models/__init__.py +195 -0
  22. pyconvexity/models/attributes.py +730 -0
  23. pyconvexity/models/carriers.py +159 -0
  24. pyconvexity/models/components.py +611 -0
  25. pyconvexity/models/network.py +503 -0
  26. pyconvexity/models/results.py +148 -0
  27. pyconvexity/models/scenarios.py +234 -0
  28. pyconvexity/solvers/__init__.py +29 -0
  29. pyconvexity/solvers/pypsa/__init__.py +24 -0
  30. pyconvexity/solvers/pypsa/api.py +460 -0
  31. pyconvexity/solvers/pypsa/batch_loader.py +307 -0
  32. pyconvexity/solvers/pypsa/builder.py +675 -0
  33. pyconvexity/solvers/pypsa/constraints.py +405 -0
  34. pyconvexity/solvers/pypsa/solver.py +1509 -0
  35. pyconvexity/solvers/pypsa/storage.py +2048 -0
  36. pyconvexity/timeseries.py +330 -0
  37. pyconvexity/validation/__init__.py +25 -0
  38. pyconvexity/validation/rules.py +312 -0
  39. pyconvexity-0.4.3.dist-info/METADATA +47 -0
  40. pyconvexity-0.4.3.dist-info/RECORD +42 -0
  41. pyconvexity-0.4.3.dist-info/WHEEL +5 -0
  42. pyconvexity-0.4.3.dist-info/top_level.txt +1 -0
@@ -0,0 +1,307 @@
1
+ """
2
+ PyPSA Batch Data Loader
3
+ Simplified to always create MultiIndex timeseries for consistent multi-period optimization.
4
+ """
5
+
6
+ import logging
7
+ import pandas as pd
8
+ import json
9
+ from typing import Dict, Any, List, Optional
10
+
11
+ from pyconvexity.models.attributes import get_timeseries
12
+ from pyconvexity.models import get_network_time_periods
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ class PyPSABatchLoader:
18
+ """
19
+ Simplified batch data loader for PyPSA network construction.
20
+ Always creates MultiIndex timeseries for consistent multi-period optimization.
21
+ """
22
+
23
+ def __init__(self):
24
+ pass
25
+
26
+ def batch_load_component_attributes(
27
+ self, conn, component_ids: List[int], scenario_id: Optional[int]
28
+ ) -> Dict[int, Dict[str, Any]]:
29
+ """Batch load all static attributes for multiple components to avoid N+1 queries (single network per database)"""
30
+ if not component_ids:
31
+ return {}
32
+
33
+ # Build a single query to get all attributes for all components
34
+ placeholders = ",".join(["?" for _ in component_ids])
35
+
36
+ # Get all attribute names for all components in one query
37
+ cursor = conn.execute(
38
+ f"""
39
+ SELECT DISTINCT attribute_name
40
+ FROM component_attributes
41
+ WHERE component_id IN ({placeholders}) AND storage_type = 'static'
42
+ """,
43
+ component_ids,
44
+ )
45
+ all_attribute_names = [row[0] for row in cursor.fetchall()]
46
+
47
+ if not all_attribute_names:
48
+ return {comp_id: {} for comp_id in component_ids}
49
+
50
+ # Build query to get all attributes for all components
51
+ attr_placeholders = ",".join(["?" for _ in all_attribute_names])
52
+
53
+ # Scenario fallback: scenario_id -> NULL (base network)
54
+ # Query for both scenario-specific and base network attributes
55
+ if scenario_id is not None:
56
+ # Get both scenario and base network values (scenario takes precedence)
57
+ query = f"""
58
+ SELECT component_id, attribute_name, static_value, data_type, scenario_id
59
+ FROM component_attributes
60
+ WHERE component_id IN ({placeholders})
61
+ AND attribute_name IN ({attr_placeholders})
62
+ AND (scenario_id = ? OR scenario_id IS NULL)
63
+ AND storage_type = 'static'
64
+ ORDER BY component_id, attribute_name,
65
+ CASE WHEN scenario_id = ? THEN 0 ELSE 1 END
66
+ """
67
+ query_params = (
68
+ component_ids + all_attribute_names + [scenario_id, scenario_id]
69
+ )
70
+ else:
71
+ # Get only base network attributes (scenario_id IS NULL)
72
+ query = f"""
73
+ SELECT component_id, attribute_name, static_value, data_type, scenario_id
74
+ FROM component_attributes
75
+ WHERE component_id IN ({placeholders})
76
+ AND attribute_name IN ({attr_placeholders})
77
+ AND scenario_id IS NULL
78
+ AND storage_type = 'static'
79
+ ORDER BY component_id, attribute_name
80
+ """
81
+ query_params = component_ids + all_attribute_names
82
+
83
+ cursor = conn.execute(query, query_params)
84
+
85
+ # Group by component_id, preferring current scenario over master
86
+ component_attributes = {}
87
+ for comp_id in component_ids:
88
+ component_attributes[comp_id] = {}
89
+
90
+ # Process results, preferring current scenario over master
91
+ rows = cursor.fetchall()
92
+
93
+ for row in rows:
94
+ comp_id, attr_name, static_value_json, data_type, row_scenario_id = row
95
+
96
+ # Ensure component exists in our dictionary (safety check)
97
+ if comp_id not in component_attributes:
98
+ continue
99
+
100
+ # Skip if we already have this attribute from a preferred scenario
101
+ if attr_name in component_attributes[comp_id]:
102
+ continue
103
+
104
+ # Parse JSON value
105
+ json_value = json.loads(static_value_json)
106
+
107
+ # Convert based on data type
108
+ if data_type == "float":
109
+ value = (
110
+ float(json_value) if isinstance(json_value, (int, float)) else 0.0
111
+ )
112
+ elif data_type == "int":
113
+ value = int(json_value) if isinstance(json_value, (int, float)) else 0
114
+ elif data_type == "boolean":
115
+ value = bool(json_value) if isinstance(json_value, bool) else False
116
+ elif data_type == "string":
117
+ value = str(json_value) if isinstance(json_value, str) else ""
118
+ else:
119
+ value = json_value
120
+
121
+ component_attributes[comp_id][attr_name] = value
122
+
123
+ return component_attributes
124
+
125
+ def batch_load_component_connections(self, conn) -> Dict[str, Dict[str, str]]:
126
+ """Batch load bus and carrier connections to avoid individual lookups (single network per database)"""
127
+ # Get all bus names in one query
128
+ cursor = conn.execute(
129
+ """
130
+ SELECT id, name FROM components
131
+ WHERE component_type = 'BUS'
132
+ """
133
+ )
134
+ bus_id_to_name = {row[0]: row[1] for row in cursor.fetchall()}
135
+
136
+ # Get all carrier names in one query
137
+ cursor = conn.execute(
138
+ """
139
+ SELECT id, name FROM carriers
140
+ """
141
+ )
142
+ carrier_id_to_name = {row[0]: row[1] for row in cursor.fetchall()}
143
+
144
+ return {
145
+ "bus_id_to_name": bus_id_to_name,
146
+ "carrier_id_to_name": carrier_id_to_name,
147
+ }
148
+
149
+ def batch_load_component_timeseries(
150
+ self, conn, component_ids: List[int], scenario_id: Optional[int]
151
+ ) -> Dict[int, Dict[str, pd.Series]]:
152
+ """Batch load all timeseries attributes - always create MultiIndex for consistency (single network per database)"""
153
+ if not component_ids:
154
+ return {}
155
+
156
+ # Get network time periods for proper timestamp alignment
157
+ network_time_periods = get_network_time_periods(conn)
158
+ if not network_time_periods:
159
+ logger.warning("No time periods found for network")
160
+ return {comp_id: {} for comp_id in component_ids}
161
+
162
+ # Convert to timestamps and extract years
163
+ timestamps = [pd.Timestamp(tp.formatted_time) for tp in network_time_periods]
164
+ years = sorted(list(set([ts.year for ts in timestamps])))
165
+
166
+ # Build a single query to get all timeseries attributes for all components
167
+ placeholders = ",".join(["?" for _ in component_ids])
168
+
169
+ # Get all attribute names for all components in one query
170
+ cursor = conn.execute(
171
+ f"""
172
+ SELECT DISTINCT attribute_name
173
+ FROM component_attributes
174
+ WHERE component_id IN ({placeholders}) AND storage_type = 'timeseries'
175
+ """,
176
+ component_ids,
177
+ )
178
+ all_attribute_names = [row[0] for row in cursor.fetchall()]
179
+
180
+ if not all_attribute_names:
181
+ return {comp_id: {} for comp_id in component_ids}
182
+
183
+ # Build query to get all timeseries for all components
184
+ attr_placeholders = ",".join(["?" for _ in all_attribute_names])
185
+
186
+ # Scenario fallback: scenario_id -> NULL (base network)
187
+ if scenario_id is not None:
188
+ # Get both scenario and base network timeseries (scenario takes precedence)
189
+ query = f"""
190
+ SELECT component_id, attribute_name, timeseries_data, scenario_id
191
+ FROM component_attributes
192
+ WHERE component_id IN ({placeholders})
193
+ AND attribute_name IN ({attr_placeholders})
194
+ AND (scenario_id = ? OR scenario_id IS NULL)
195
+ AND storage_type = 'timeseries'
196
+ ORDER BY component_id, attribute_name,
197
+ CASE WHEN scenario_id = ? THEN 0 ELSE 1 END
198
+ """
199
+ query_params = (
200
+ component_ids + all_attribute_names + [scenario_id, scenario_id]
201
+ )
202
+ else:
203
+ # Get only base network timeseries (scenario_id IS NULL)
204
+ query = f"""
205
+ SELECT component_id, attribute_name, timeseries_data, scenario_id
206
+ FROM component_attributes
207
+ WHERE component_id IN ({placeholders})
208
+ AND attribute_name IN ({attr_placeholders})
209
+ AND scenario_id IS NULL
210
+ AND storage_type = 'timeseries'
211
+ ORDER BY component_id, attribute_name
212
+ """
213
+ query_params = component_ids + all_attribute_names
214
+
215
+ cursor = conn.execute(query, query_params)
216
+
217
+ # Group by component_id, preferring current scenario over master
218
+ component_timeseries = {}
219
+ for comp_id in component_ids:
220
+ component_timeseries[comp_id] = {}
221
+
222
+ # Process results, preferring current scenario over master
223
+ rows = cursor.fetchall()
224
+
225
+ for row in rows:
226
+ comp_id, attr_name, timeseries_data, row_scenario_id = row
227
+
228
+ # Ensure component exists in our dictionary (safety check)
229
+ if comp_id not in component_timeseries:
230
+ continue
231
+
232
+ # Skip if we already have this attribute from a preferred scenario
233
+ if attr_name in component_timeseries[comp_id]:
234
+ continue
235
+
236
+ # Deserialize timeseries data
237
+ try:
238
+ timeseries = get_timeseries(conn, comp_id, attr_name, row_scenario_id)
239
+ if timeseries and timeseries.values:
240
+ values = timeseries.values
241
+
242
+ # Always create MultiIndex following PyPSA multi-investment tutorial format
243
+ # First level: investment periods (years), Second level: timesteps
244
+ multi_snapshots = []
245
+ for i, ts in enumerate(timestamps[: len(values)]):
246
+ multi_snapshots.append((ts.year, ts))
247
+
248
+ if multi_snapshots:
249
+ multi_index = pd.MultiIndex.from_tuples(
250
+ multi_snapshots, names=["period", "timestep"]
251
+ )
252
+ component_timeseries[comp_id][attr_name] = pd.Series(
253
+ values, index=multi_index
254
+ )
255
+ else:
256
+ logger.warning(
257
+ f"No valid timestamps for timeseries {attr_name}"
258
+ )
259
+
260
+ except Exception as e:
261
+ logger.warning(
262
+ f"Failed to load timeseries {attr_name} for component {comp_id}: {e}"
263
+ )
264
+ continue
265
+
266
+ return component_timeseries
267
+
268
+ def batch_load_all_component_timeseries_by_type(
269
+ self, conn, component_type: str, scenario_id: Optional[int]
270
+ ) -> Dict[str, pd.DataFrame]:
271
+ """
272
+ Load all timeseries attributes for a component type and organize by attribute name (single network per database).
273
+ This is a compatibility method for the existing _load_all_component_timeseries interface.
274
+ """
275
+ from pyconvexity.models import list_components_by_type
276
+
277
+ components = list_components_by_type(conn, component_type)
278
+ component_ids = [comp.id for comp in components]
279
+
280
+ # Use batch loading
281
+ component_timeseries = self.batch_load_component_timeseries(
282
+ conn, component_ids, scenario_id
283
+ )
284
+
285
+ # Reorganize by attribute name (matching original interface)
286
+ timeseries_by_attr = {}
287
+
288
+ for component in components:
289
+ comp_timeseries = component_timeseries.get(component.id, {})
290
+
291
+ for attr_name, series in comp_timeseries.items():
292
+ if attr_name not in timeseries_by_attr:
293
+ timeseries_by_attr[attr_name] = {}
294
+
295
+ # Store series in dict first
296
+ timeseries_by_attr[attr_name][component.name] = series
297
+
298
+ # Convert to DataFrames all at once to avoid fragmentation
299
+ for attr_name in timeseries_by_attr:
300
+ if timeseries_by_attr[attr_name]:
301
+ timeseries_by_attr[attr_name] = pd.DataFrame(
302
+ timeseries_by_attr[attr_name]
303
+ )
304
+ else:
305
+ timeseries_by_attr[attr_name] = pd.DataFrame()
306
+
307
+ return timeseries_by_attr