pyconvexity 0.4.0__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. pyconvexity/__init__.py +87 -46
  2. pyconvexity/_version.py +1 -1
  3. pyconvexity/core/__init__.py +3 -5
  4. pyconvexity/core/database.py +111 -103
  5. pyconvexity/core/errors.py +16 -10
  6. pyconvexity/core/types.py +61 -54
  7. pyconvexity/data/__init__.py +0 -1
  8. pyconvexity/data/loaders/cache.py +65 -64
  9. pyconvexity/data/schema/01_core_schema.sql +134 -234
  10. pyconvexity/data/schema/02_data_metadata.sql +38 -168
  11. pyconvexity/data/schema/03_validation_data.sql +327 -264
  12. pyconvexity/data/sources/gem.py +169 -139
  13. pyconvexity/io/__init__.py +4 -10
  14. pyconvexity/io/excel_exporter.py +694 -480
  15. pyconvexity/io/excel_importer.py +817 -545
  16. pyconvexity/io/netcdf_exporter.py +66 -61
  17. pyconvexity/io/netcdf_importer.py +850 -619
  18. pyconvexity/models/__init__.py +109 -59
  19. pyconvexity/models/attributes.py +197 -178
  20. pyconvexity/models/carriers.py +70 -67
  21. pyconvexity/models/components.py +260 -236
  22. pyconvexity/models/network.py +202 -284
  23. pyconvexity/models/results.py +65 -55
  24. pyconvexity/models/scenarios.py +58 -88
  25. pyconvexity/solvers/__init__.py +5 -5
  26. pyconvexity/solvers/pypsa/__init__.py +3 -3
  27. pyconvexity/solvers/pypsa/api.py +150 -134
  28. pyconvexity/solvers/pypsa/batch_loader.py +165 -162
  29. pyconvexity/solvers/pypsa/builder.py +390 -291
  30. pyconvexity/solvers/pypsa/constraints.py +184 -162
  31. pyconvexity/solvers/pypsa/solver.py +968 -663
  32. pyconvexity/solvers/pypsa/storage.py +1377 -671
  33. pyconvexity/timeseries.py +63 -60
  34. pyconvexity/validation/__init__.py +14 -6
  35. pyconvexity/validation/rules.py +95 -84
  36. pyconvexity-0.4.1.dist-info/METADATA +46 -0
  37. pyconvexity-0.4.1.dist-info/RECORD +42 -0
  38. pyconvexity/data/schema/04_scenario_schema.sql +0 -122
  39. pyconvexity/data/schema/migrate_add_geometries.sql +0 -73
  40. pyconvexity-0.4.0.dist-info/METADATA +0 -138
  41. pyconvexity-0.4.0.dist-info/RECORD +0 -44
  42. {pyconvexity-0.4.0.dist-info → pyconvexity-0.4.1.dist-info}/WHEEL +0 -0
  43. {pyconvexity-0.4.0.dist-info → pyconvexity-0.4.1.dist-info}/top_level.txt +0 -0
@@ -19,99 +19,96 @@ class PyPSABatchLoader:
19
19
  Simplified batch data loader for PyPSA network construction.
20
20
  Always creates MultiIndex timeseries for consistent multi-period optimization.
21
21
  """
22
-
22
+
23
23
  def __init__(self):
24
24
  pass
25
-
26
- def batch_load_component_attributes(self, conn, component_ids: List[int], scenario_id: Optional[int]) -> Dict[int, Dict[str, Any]]:
27
- """Batch load all static attributes for multiple components to avoid N+1 queries"""
25
+
26
+ def batch_load_component_attributes(
27
+ self, conn, component_ids: List[int], scenario_id: Optional[int]
28
+ ) -> Dict[int, Dict[str, Any]]:
29
+ """Batch load all static attributes for multiple components to avoid N+1 queries (single network per database)"""
28
30
  if not component_ids:
29
31
  return {}
30
-
32
+
31
33
  # Build a single query to get all attributes for all components
32
- placeholders = ','.join(['?' for _ in component_ids])
33
-
34
+ placeholders = ",".join(["?" for _ in component_ids])
35
+
34
36
  # Get all attribute names for all components in one query
35
- cursor = conn.execute(f"""
37
+ cursor = conn.execute(
38
+ f"""
36
39
  SELECT DISTINCT attribute_name
37
40
  FROM component_attributes
38
41
  WHERE component_id IN ({placeholders}) AND storage_type = 'static'
39
- """, component_ids)
42
+ """,
43
+ component_ids,
44
+ )
40
45
  all_attribute_names = [row[0] for row in cursor.fetchall()]
41
-
46
+
42
47
  if not all_attribute_names:
43
48
  return {comp_id: {} for comp_id in component_ids}
44
-
49
+
45
50
  # Build query to get all attributes for all components
46
- attr_placeholders = ','.join(['?' for _ in all_attribute_names])
47
-
48
- # Resolve scenario IDs for fallback logic
49
- scenario_filter_values = []
50
- master_id = None
51
+ attr_placeholders = ",".join(["?" for _ in all_attribute_names])
52
+
53
+ # Scenario fallback: scenario_id -> NULL (base network)
54
+ # Query for both scenario-specific and base network attributes
51
55
  if scenario_id is not None:
52
- # Get master scenario ID for fallback
53
- cursor = conn.execute("SELECT id FROM scenarios WHERE network_id = (SELECT network_id FROM components WHERE id = ?) AND is_master = 1", (component_ids[0],))
54
- result = cursor.fetchone()
55
- if result:
56
- master_id = result[0]
57
- scenario_filter_values = [scenario_id, master_id]
58
- else:
59
- scenario_filter_values = [scenario_id]
56
+ # Get both scenario and base network values (scenario takes precedence)
57
+ query = f"""
58
+ SELECT component_id, attribute_name, static_value, data_type, scenario_id
59
+ FROM component_attributes
60
+ WHERE component_id IN ({placeholders})
61
+ AND attribute_name IN ({attr_placeholders})
62
+ AND (scenario_id = ? OR scenario_id IS NULL)
63
+ AND storage_type = 'static'
64
+ ORDER BY component_id, attribute_name,
65
+ CASE WHEN scenario_id = ? THEN 0 ELSE 1 END
66
+ """
67
+ query_params = (
68
+ component_ids + all_attribute_names + [scenario_id, scenario_id]
69
+ )
60
70
  else:
61
- # Get master scenario ID
62
- cursor = conn.execute("SELECT id FROM scenarios WHERE network_id = (SELECT network_id FROM components WHERE id = ?) AND is_master = 1", (component_ids[0],))
63
- result = cursor.fetchone()
64
- if result:
65
- master_id = result[0]
66
- scenario_filter_values = [master_id]
67
- else:
68
- return {comp_id: {} for comp_id in component_ids}
69
-
70
- scen_placeholders = ','.join(['?' for _ in scenario_filter_values])
71
-
72
- # Single query to get all attributes
73
- # CRITICAL: Order by scenario_id DESC to prioritize current scenario over master
74
- query = f"""
75
- SELECT component_id, attribute_name, static_value, data_type, scenario_id
76
- FROM component_attributes
77
- WHERE component_id IN ({placeholders})
78
- AND attribute_name IN ({attr_placeholders})
79
- AND scenario_id IN ({scen_placeholders})
80
- AND storage_type = 'static'
81
- ORDER BY component_id, attribute_name,
82
- CASE WHEN scenario_id = ? THEN 0 ELSE 1 END
83
- """
84
-
85
- # Parameters must match the order of placeholders in the query
86
- query_params = component_ids + all_attribute_names + scenario_filter_values + [scenario_id if scenario_id is not None else master_id]
87
-
71
+ # Get only base network attributes (scenario_id IS NULL)
72
+ query = f"""
73
+ SELECT component_id, attribute_name, static_value, data_type, scenario_id
74
+ FROM component_attributes
75
+ WHERE component_id IN ({placeholders})
76
+ AND attribute_name IN ({attr_placeholders})
77
+ AND scenario_id IS NULL
78
+ AND storage_type = 'static'
79
+ ORDER BY component_id, attribute_name
80
+ """
81
+ query_params = component_ids + all_attribute_names
82
+
88
83
  cursor = conn.execute(query, query_params)
89
-
84
+
90
85
  # Group by component_id, preferring current scenario over master
91
86
  component_attributes = {}
92
87
  for comp_id in component_ids:
93
88
  component_attributes[comp_id] = {}
94
-
89
+
95
90
  # Process results, preferring current scenario over master
96
91
  rows = cursor.fetchall()
97
-
92
+
98
93
  for row in rows:
99
94
  comp_id, attr_name, static_value_json, data_type, row_scenario_id = row
100
-
95
+
101
96
  # Ensure component exists in our dictionary (safety check)
102
97
  if comp_id not in component_attributes:
103
98
  continue
104
-
99
+
105
100
  # Skip if we already have this attribute from a preferred scenario
106
101
  if attr_name in component_attributes[comp_id]:
107
102
  continue
108
-
103
+
109
104
  # Parse JSON value
110
105
  json_value = json.loads(static_value_json)
111
-
106
+
112
107
  # Convert based on data type
113
108
  if data_type == "float":
114
- value = float(json_value) if isinstance(json_value, (int, float)) else 0.0
109
+ value = (
110
+ float(json_value) if isinstance(json_value, (int, float)) else 0.0
111
+ )
115
112
  elif data_type == "int":
116
113
  value = int(json_value) if isinstance(json_value, (int, float)) else 0
117
114
  elif data_type == "boolean":
@@ -120,185 +117,191 @@ class PyPSABatchLoader:
120
117
  value = str(json_value) if isinstance(json_value, str) else ""
121
118
  else:
122
119
  value = json_value
123
-
120
+
124
121
  component_attributes[comp_id][attr_name] = value
125
-
122
+
126
123
  return component_attributes
127
-
128
- def batch_load_component_connections(self, conn, network_id: int) -> Dict[str, Dict[str, str]]:
129
- """Batch load bus and carrier connections to avoid individual lookups"""
124
+
125
+ def batch_load_component_connections(self, conn) -> Dict[str, Dict[str, str]]:
126
+ """Batch load bus and carrier connections to avoid individual lookups (single network per database)"""
130
127
  # Get all bus names in one query
131
- cursor = conn.execute("""
128
+ cursor = conn.execute(
129
+ """
132
130
  SELECT id, name FROM components
133
- WHERE network_id = ? AND component_type = 'BUS'
134
- """, (network_id,))
131
+ WHERE component_type = 'BUS'
132
+ """
133
+ )
135
134
  bus_id_to_name = {row[0]: row[1] for row in cursor.fetchall()}
136
-
135
+
137
136
  # Get all carrier names in one query
138
- cursor = conn.execute("""
139
- SELECT id, name FROM carriers
140
- WHERE network_id = ?
141
- """, (network_id,))
137
+ cursor = conn.execute(
138
+ """
139
+ SELECT id, name FROM carriers
140
+ """
141
+ )
142
142
  carrier_id_to_name = {row[0]: row[1] for row in cursor.fetchall()}
143
-
143
+
144
144
  return {
145
- 'bus_id_to_name': bus_id_to_name,
146
- 'carrier_id_to_name': carrier_id_to_name
145
+ "bus_id_to_name": bus_id_to_name,
146
+ "carrier_id_to_name": carrier_id_to_name,
147
147
  }
148
-
149
- def batch_load_component_timeseries(self, conn, component_ids: List[int], scenario_id: Optional[int]) -> Dict[int, Dict[str, pd.Series]]:
150
- """Batch load all timeseries attributes - always create MultiIndex for consistency"""
148
+
149
+ def batch_load_component_timeseries(
150
+ self, conn, component_ids: List[int], scenario_id: Optional[int]
151
+ ) -> Dict[int, Dict[str, pd.Series]]:
152
+ """Batch load all timeseries attributes - always create MultiIndex for consistency (single network per database)"""
151
153
  if not component_ids:
152
154
  return {}
153
-
155
+
154
156
  # Get network time periods for proper timestamp alignment
155
- cursor = conn.execute("SELECT network_id FROM components WHERE id = ? LIMIT 1", (component_ids[0],))
156
- result = cursor.fetchone()
157
- if not result:
158
- return {comp_id: {} for comp_id in component_ids}
159
-
160
- network_id = result[0]
161
- network_time_periods = get_network_time_periods(conn, network_id)
157
+ network_time_periods = get_network_time_periods(conn)
162
158
  if not network_time_periods:
163
159
  logger.warning("No time periods found for network")
164
160
  return {comp_id: {} for comp_id in component_ids}
165
-
161
+
166
162
  # Convert to timestamps and extract years
167
163
  timestamps = [pd.Timestamp(tp.formatted_time) for tp in network_time_periods]
168
164
  years = sorted(list(set([ts.year for ts in timestamps])))
169
-
165
+
170
166
  # Build a single query to get all timeseries attributes for all components
171
- placeholders = ','.join(['?' for _ in component_ids])
172
-
167
+ placeholders = ",".join(["?" for _ in component_ids])
168
+
173
169
  # Get all attribute names for all components in one query
174
- cursor = conn.execute(f"""
170
+ cursor = conn.execute(
171
+ f"""
175
172
  SELECT DISTINCT attribute_name
176
173
  FROM component_attributes
177
174
  WHERE component_id IN ({placeholders}) AND storage_type = 'timeseries'
178
- """, component_ids)
175
+ """,
176
+ component_ids,
177
+ )
179
178
  all_attribute_names = [row[0] for row in cursor.fetchall()]
180
-
179
+
181
180
  if not all_attribute_names:
182
181
  return {comp_id: {} for comp_id in component_ids}
183
-
182
+
184
183
  # Build query to get all timeseries for all components
185
- attr_placeholders = ','.join(['?' for _ in all_attribute_names])
186
-
187
- # Resolve scenario IDs for fallback logic
188
- scenario_filter_values = []
189
- master_id = None
184
+ attr_placeholders = ",".join(["?" for _ in all_attribute_names])
185
+
186
+ # Scenario fallback: scenario_id -> NULL (base network)
190
187
  if scenario_id is not None:
191
- # Get master scenario ID for fallback
192
- cursor = conn.execute("SELECT id FROM scenarios WHERE network_id = ? AND is_master = 1", (network_id,))
193
- result = cursor.fetchone()
194
- if result:
195
- master_id = result[0]
196
- scenario_filter_values = [scenario_id, master_id]
197
- else:
198
- scenario_filter_values = [scenario_id]
188
+ # Get both scenario and base network timeseries (scenario takes precedence)
189
+ query = f"""
190
+ SELECT component_id, attribute_name, timeseries_data, scenario_id
191
+ FROM component_attributes
192
+ WHERE component_id IN ({placeholders})
193
+ AND attribute_name IN ({attr_placeholders})
194
+ AND (scenario_id = ? OR scenario_id IS NULL)
195
+ AND storage_type = 'timeseries'
196
+ ORDER BY component_id, attribute_name,
197
+ CASE WHEN scenario_id = ? THEN 0 ELSE 1 END
198
+ """
199
+ query_params = (
200
+ component_ids + all_attribute_names + [scenario_id, scenario_id]
201
+ )
199
202
  else:
200
- # Get master scenario ID
201
- cursor = conn.execute("SELECT id FROM scenarios WHERE network_id = ? AND is_master = 1", (network_id,))
202
- result = cursor.fetchone()
203
- if result:
204
- master_id = result[0]
205
- scenario_filter_values = [master_id]
206
- else:
207
- return {comp_id: {} for comp_id in component_ids}
208
-
209
- scen_placeholders = ','.join(['?' for _ in scenario_filter_values])
210
-
211
- # Single query to get all timeseries
212
- query = f"""
213
- SELECT component_id, attribute_name, timeseries_data, scenario_id
214
- FROM component_attributes
215
- WHERE component_id IN ({placeholders})
216
- AND attribute_name IN ({attr_placeholders})
217
- AND scenario_id IN ({scen_placeholders})
218
- AND storage_type = 'timeseries'
219
- ORDER BY component_id, attribute_name,
220
- CASE WHEN scenario_id = ? THEN 0 ELSE 1 END
221
- """
222
-
223
- # Parameters must match the order of placeholders in the query
224
- query_params = component_ids + all_attribute_names + scenario_filter_values + [scenario_id if scenario_id is not None else master_id]
225
-
203
+ # Get only base network timeseries (scenario_id IS NULL)
204
+ query = f"""
205
+ SELECT component_id, attribute_name, timeseries_data, scenario_id
206
+ FROM component_attributes
207
+ WHERE component_id IN ({placeholders})
208
+ AND attribute_name IN ({attr_placeholders})
209
+ AND scenario_id IS NULL
210
+ AND storage_type = 'timeseries'
211
+ ORDER BY component_id, attribute_name
212
+ """
213
+ query_params = component_ids + all_attribute_names
214
+
226
215
  cursor = conn.execute(query, query_params)
227
-
216
+
228
217
  # Group by component_id, preferring current scenario over master
229
218
  component_timeseries = {}
230
219
  for comp_id in component_ids:
231
220
  component_timeseries[comp_id] = {}
232
-
221
+
233
222
  # Process results, preferring current scenario over master
234
223
  rows = cursor.fetchall()
235
-
224
+
236
225
  for row in rows:
237
226
  comp_id, attr_name, timeseries_data, row_scenario_id = row
238
-
227
+
239
228
  # Ensure component exists in our dictionary (safety check)
240
229
  if comp_id not in component_timeseries:
241
230
  continue
242
-
231
+
243
232
  # Skip if we already have this attribute from a preferred scenario
244
233
  if attr_name in component_timeseries[comp_id]:
245
234
  continue
246
-
235
+
247
236
  # Deserialize timeseries data
248
237
  try:
249
238
  timeseries = get_timeseries(conn, comp_id, attr_name, row_scenario_id)
250
239
  if timeseries and timeseries.values:
251
240
  values = timeseries.values
252
-
241
+
253
242
  # Always create MultiIndex following PyPSA multi-investment tutorial format
254
243
  # First level: investment periods (years), Second level: timesteps
255
244
  multi_snapshots = []
256
- for i, ts in enumerate(timestamps[:len(values)]):
245
+ for i, ts in enumerate(timestamps[: len(values)]):
257
246
  multi_snapshots.append((ts.year, ts))
258
-
247
+
259
248
  if multi_snapshots:
260
- multi_index = pd.MultiIndex.from_tuples(multi_snapshots, names=['period', 'timestep'])
261
- component_timeseries[comp_id][attr_name] = pd.Series(values, index=multi_index)
249
+ multi_index = pd.MultiIndex.from_tuples(
250
+ multi_snapshots, names=["period", "timestep"]
251
+ )
252
+ component_timeseries[comp_id][attr_name] = pd.Series(
253
+ values, index=multi_index
254
+ )
262
255
  else:
263
- logger.warning(f"No valid timestamps for timeseries {attr_name}")
264
-
256
+ logger.warning(
257
+ f"No valid timestamps for timeseries {attr_name}"
258
+ )
259
+
265
260
  except Exception as e:
266
- logger.warning(f"Failed to load timeseries {attr_name} for component {comp_id}: {e}")
261
+ logger.warning(
262
+ f"Failed to load timeseries {attr_name} for component {comp_id}: {e}"
263
+ )
267
264
  continue
268
-
265
+
269
266
  return component_timeseries
270
-
271
- def batch_load_all_component_timeseries_by_type(self, conn, network_id: int, component_type: str, scenario_id: Optional[int]) -> Dict[str, pd.DataFrame]:
267
+
268
+ def batch_load_all_component_timeseries_by_type(
269
+ self, conn, component_type: str, scenario_id: Optional[int]
270
+ ) -> Dict[str, pd.DataFrame]:
272
271
  """
273
- Load all timeseries attributes for a component type and organize by attribute name.
272
+ Load all timeseries attributes for a component type and organize by attribute name (single network per database).
274
273
  This is a compatibility method for the existing _load_all_component_timeseries interface.
275
274
  """
276
275
  from pyconvexity.models import list_components_by_type
277
-
278
- components = list_components_by_type(conn, network_id, component_type)
276
+
277
+ components = list_components_by_type(conn, component_type)
279
278
  component_ids = [comp.id for comp in components]
280
-
279
+
281
280
  # Use batch loading
282
- component_timeseries = self.batch_load_component_timeseries(conn, component_ids, scenario_id)
283
-
281
+ component_timeseries = self.batch_load_component_timeseries(
282
+ conn, component_ids, scenario_id
283
+ )
284
+
284
285
  # Reorganize by attribute name (matching original interface)
285
286
  timeseries_by_attr = {}
286
-
287
+
287
288
  for component in components:
288
289
  comp_timeseries = component_timeseries.get(component.id, {})
289
-
290
+
290
291
  for attr_name, series in comp_timeseries.items():
291
292
  if attr_name not in timeseries_by_attr:
292
293
  timeseries_by_attr[attr_name] = {}
293
-
294
+
294
295
  # Store series in dict first
295
296
  timeseries_by_attr[attr_name][component.name] = series
296
-
297
+
297
298
  # Convert to DataFrames all at once to avoid fragmentation
298
299
  for attr_name in timeseries_by_attr:
299
300
  if timeseries_by_attr[attr_name]:
300
- timeseries_by_attr[attr_name] = pd.DataFrame(timeseries_by_attr[attr_name])
301
+ timeseries_by_attr[attr_name] = pd.DataFrame(
302
+ timeseries_by_attr[attr_name]
303
+ )
301
304
  else:
302
305
  timeseries_by_attr[attr_name] = pd.DataFrame()
303
-
306
+
304
307
  return timeseries_by_attr