pyconvexity 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pyconvexity might be problematic. Click here for more details.

Files changed (43) hide show
  1. pyconvexity/__init__.py +57 -8
  2. pyconvexity/_version.py +1 -2
  3. pyconvexity/core/__init__.py +0 -2
  4. pyconvexity/core/database.py +158 -0
  5. pyconvexity/core/types.py +105 -18
  6. pyconvexity/data/README.md +101 -0
  7. pyconvexity/data/__init__.py +18 -0
  8. pyconvexity/data/__pycache__/__init__.cpython-313.pyc +0 -0
  9. pyconvexity/data/loaders/__init__.py +3 -0
  10. pyconvexity/data/loaders/__pycache__/__init__.cpython-313.pyc +0 -0
  11. pyconvexity/data/loaders/__pycache__/cache.cpython-313.pyc +0 -0
  12. pyconvexity/data/loaders/cache.py +212 -0
  13. pyconvexity/data/schema/01_core_schema.sql +12 -12
  14. pyconvexity/data/schema/02_data_metadata.sql +17 -321
  15. pyconvexity/data/sources/__init__.py +5 -0
  16. pyconvexity/data/sources/__pycache__/__init__.cpython-313.pyc +0 -0
  17. pyconvexity/data/sources/__pycache__/gem.cpython-313.pyc +0 -0
  18. pyconvexity/data/sources/gem.py +412 -0
  19. pyconvexity/io/__init__.py +32 -0
  20. pyconvexity/io/excel_exporter.py +1012 -0
  21. pyconvexity/io/excel_importer.py +1109 -0
  22. pyconvexity/io/netcdf_exporter.py +192 -0
  23. pyconvexity/io/netcdf_importer.py +1602 -0
  24. pyconvexity/models/__init__.py +7 -0
  25. pyconvexity/models/attributes.py +209 -72
  26. pyconvexity/models/components.py +3 -0
  27. pyconvexity/models/network.py +17 -15
  28. pyconvexity/models/scenarios.py +177 -0
  29. pyconvexity/solvers/__init__.py +29 -0
  30. pyconvexity/solvers/pypsa/__init__.py +24 -0
  31. pyconvexity/solvers/pypsa/api.py +421 -0
  32. pyconvexity/solvers/pypsa/batch_loader.py +304 -0
  33. pyconvexity/solvers/pypsa/builder.py +566 -0
  34. pyconvexity/solvers/pypsa/constraints.py +321 -0
  35. pyconvexity/solvers/pypsa/solver.py +1106 -0
  36. pyconvexity/solvers/pypsa/storage.py +1574 -0
  37. pyconvexity/timeseries.py +327 -0
  38. pyconvexity/validation/rules.py +2 -2
  39. {pyconvexity-0.1.2.dist-info → pyconvexity-0.1.4.dist-info}/METADATA +5 -2
  40. pyconvexity-0.1.4.dist-info/RECORD +46 -0
  41. pyconvexity-0.1.2.dist-info/RECORD +0 -20
  42. {pyconvexity-0.1.2.dist-info → pyconvexity-0.1.4.dist-info}/WHEEL +0 -0
  43. {pyconvexity-0.1.2.dist-info → pyconvexity-0.1.4.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1602 @@
1
+ """
2
+ NetCDF importer for PyConvexity energy system models.
3
+ Imports PyPSA NetCDF files into PyConvexity database format.
4
+ """
5
+
6
+ import logging
7
+ import pandas as pd
8
+ import numpy as np
9
+ from typing import Dict, Any, Optional, Callable, Tuple, List
10
+ from pathlib import Path
11
+ import random
12
+ import math
13
+
14
+ # Import functions directly from pyconvexity
15
+ from pyconvexity.core.database import open_connection, create_database_with_schema
16
+ from pyconvexity.core.types import (
17
+ StaticValue, CreateNetworkRequest, CreateComponentRequest
18
+ )
19
+ from pyconvexity.core.errors import PyConvexityError as DbError, ValidationError
20
+ from pyconvexity.models import (
21
+ create_network, create_carrier, insert_component, set_static_attribute,
22
+ get_bus_name_to_id_map, set_timeseries_attribute, get_component_type, get_attribute,
23
+ get_network_time_periods
24
+ )
25
+ from pyconvexity.validation import get_validation_rule
26
+ from pyconvexity.timeseries import set_timeseries
27
+
28
+ logger = logging.getLogger(__name__)
29
+
30
+ class NetCDFModelImporter:
31
+ """Import PyPSA NetCDF files into PyConvexity database format"""
32
+
33
+ def __init__(self):
34
+ self.logger = logging.getLogger(__name__)
35
+ # Set random seed for reproducible coordinate generation
36
+ random.seed(42)
37
+ np.random.seed(42)
38
+ self._used_names = set() # Global registry of all used names
39
+
40
+ def import_netcdf_to_database(
41
+ self,
42
+ netcdf_path: str,
43
+ db_path: str,
44
+ network_name: str,
45
+ network_description: Optional[str] = None,
46
+ progress_callback: Optional[Callable[[int, str], None]] = None,
47
+ strict_validation: bool = False
48
+ ) -> Dict[str, Any]:
49
+ """
50
+ Import a PyPSA NetCDF file into a new database.
51
+
52
+ Args:
53
+ netcdf_path: Path to the PyPSA NetCDF file
54
+ db_path: Path where to create the database
55
+ network_name: Name for the imported network
56
+ network_description: Optional description
57
+ progress_callback: Optional callback for progress updates (progress: int, message: str)
58
+ strict_validation: Whether to skip undefined attributes rather than failing completely.
59
+ If True, will fail on any attribute not defined in the database schema.
60
+ If False (default), will skip undefined attributes with warnings.
61
+
62
+ Returns:
63
+ Dictionary with import results and statistics
64
+ """
65
+ try:
66
+ if progress_callback:
67
+ progress_callback(0, "Starting NetCDF import...")
68
+
69
+ # Import PyPSA
70
+ pypsa = self._import_pypsa()
71
+
72
+ if progress_callback:
73
+ progress_callback(5, "Loading PyPSA network from NetCDF...")
74
+
75
+ # Load the PyPSA network
76
+ network = pypsa.Network(netcdf_path)
77
+
78
+ if progress_callback:
79
+ progress_callback(15, f"Loaded network: {len(network.buses)} buses, {len(network.generators)} generators")
80
+
81
+ # Use the shared import logic
82
+ return self._import_network_to_database(
83
+ network=network,
84
+ db_path=db_path,
85
+ network_name=network_name,
86
+ network_description=network_description,
87
+ progress_callback=progress_callback,
88
+ strict_validation=strict_validation,
89
+ import_source="NetCDF",
90
+ netcdf_path=netcdf_path
91
+ )
92
+
93
+ except Exception as e:
94
+ self.logger.error(f"Error importing NetCDF: {e}", exc_info=True)
95
+ if progress_callback:
96
+ progress_callback(None, f"Error: {str(e)}")
97
+ raise
98
+
99
+ def import_csv_to_database(
100
+ self,
101
+ csv_directory: str,
102
+ db_path: str,
103
+ network_name: str,
104
+ network_description: Optional[str] = None,
105
+ progress_callback: Optional[Callable[[int, str], None]] = None,
106
+ strict_validation: bool = False
107
+ ) -> Dict[str, Any]:
108
+ """
109
+ Import a PyPSA network from CSV files into a new database.
110
+
111
+ Args:
112
+ csv_directory: Path to the directory containing PyPSA CSV files
113
+ db_path: Path where to create the database
114
+ network_name: Name for the imported network
115
+ network_description: Optional description
116
+ progress_callback: Optional callback for progress updates (progress: int, message: str)
117
+ strict_validation: Whether to skip undefined attributes rather than failing
118
+
119
+ Returns:
120
+ Dictionary with import results and statistics
121
+ """
122
+ try:
123
+ if progress_callback:
124
+ progress_callback(0, "Starting PyPSA CSV import...")
125
+
126
+ # Import PyPSA
127
+ pypsa = self._import_pypsa()
128
+
129
+ if progress_callback:
130
+ progress_callback(5, "Validating CSV files...")
131
+
132
+ # Validate CSV directory and files before attempting import
133
+ self._validate_csv_directory(csv_directory)
134
+
135
+ if progress_callback:
136
+ progress_callback(10, "Loading PyPSA network from CSV files...")
137
+
138
+ # Load the PyPSA network from CSV directory
139
+ network = pypsa.Network()
140
+
141
+ try:
142
+ network.import_from_csv_folder(csv_directory)
143
+ except Exception as e:
144
+ # Provide more helpful error message
145
+ error_msg = f"PyPSA CSV import failed: {str(e)}"
146
+ if "'name'" in str(e):
147
+ error_msg += "\n\nThis usually means one of your CSV files is missing a 'name' column. PyPSA CSV files require:\n"
148
+ error_msg += "- All component CSV files (buses.csv, generators.csv, etc.) must have a 'name' column as the first column\n"
149
+ error_msg += "- The 'name' column should contain unique identifiers for each component\n"
150
+ error_msg += "- Check that your CSV files follow the PyPSA CSV format specification"
151
+ elif "KeyError" in str(e):
152
+ error_msg += f"\n\nThis indicates a required column is missing from one of your CSV files. "
153
+ error_msg += "Please ensure your CSV files follow the PyPSA format specification."
154
+
155
+ self.logger.error(error_msg)
156
+ raise ValueError(error_msg)
157
+
158
+ if progress_callback:
159
+ progress_callback(20, f"Loaded network: {len(network.buses)} buses, {len(network.generators)} generators")
160
+
161
+ # Use the shared import logic
162
+ return self._import_network_to_database(
163
+ network=network,
164
+ db_path=db_path,
165
+ network_name=network_name,
166
+ network_description=network_description,
167
+ progress_callback=progress_callback,
168
+ strict_validation=strict_validation,
169
+ import_source="CSV"
170
+ )
171
+
172
+ except Exception as e:
173
+ self.logger.error(f"Error importing PyPSA CSV: {e}", exc_info=True)
174
+ if progress_callback:
175
+ progress_callback(None, f"Error: {str(e)}")
176
+ raise
177
+
178
+ def _import_pypsa(self):
179
+ """Import PyPSA with standard error handling."""
180
+ try:
181
+ import pypsa
182
+ return pypsa
183
+ except ImportError as e:
184
+ self.logger.error(f"Failed to import PyPSA: {e}", exc_info=True)
185
+ raise ImportError(
186
+ "PyPSA is not installed or could not be imported. "
187
+ "Please ensure it is installed correctly in the environment."
188
+ ) from e
189
+ except Exception as e:
190
+ self.logger.error(f"An unexpected error occurred during PyPSA import: {e}", exc_info=True)
191
+ raise
192
+
193
+ def _validate_csv_directory(self, csv_directory: str) -> None:
194
+ """Validate that the CSV directory contains valid PyPSA CSV files"""
195
+ import os
196
+ import pandas as pd
197
+
198
+ csv_path = Path(csv_directory)
199
+ if not csv_path.exists():
200
+ raise ValueError(f"CSV directory does not exist: {csv_directory}")
201
+
202
+ if not csv_path.is_dir():
203
+ raise ValueError(f"Path is not a directory: {csv_directory}")
204
+
205
+ # Find CSV files
206
+ csv_files = list(csv_path.glob("*.csv"))
207
+ if not csv_files:
208
+ raise ValueError(f"No CSV files found in directory: {csv_directory}")
209
+
210
+ # Check each CSV file for basic validity
211
+ component_files = ['buses.csv', 'generators.csv', 'loads.csv', 'lines.csv', 'links.csv', 'storage_units.csv', 'stores.csv']
212
+ required_files = ['buses.csv'] # At minimum, we need buses
213
+
214
+ # Check for required files
215
+ existing_files = [f.name for f in csv_files]
216
+ missing_required = [f for f in required_files if f not in existing_files]
217
+ if missing_required:
218
+ raise ValueError(f"Missing required CSV files: {missing_required}")
219
+
220
+ # Validate each component CSV file that exists
221
+ for csv_file in csv_files:
222
+ if csv_file.name in component_files:
223
+ try:
224
+ df = pd.read_csv(csv_file, nrows=0) # Just read headers
225
+ if 'name' not in df.columns:
226
+ raise ValueError(f"CSV file '{csv_file.name}' is missing required 'name' column. Found columns: {list(df.columns)}")
227
+ except Exception as e:
228
+ raise ValueError(f"Error reading CSV file '{csv_file.name}': {str(e)}")
229
+
230
+ def _import_network_to_database(
231
+ self,
232
+ network,
233
+ db_path: str,
234
+ network_name: str,
235
+ network_description: Optional[str] = None,
236
+ progress_callback: Optional[Callable[[int, str], None]] = None,
237
+ strict_validation: bool = False,
238
+ import_source: str = "PyPSA",
239
+ netcdf_path: Optional[str] = None
240
+ ) -> Dict[str, Any]:
241
+ """
242
+ Shared logic to import a PyPSA network object into a database.
243
+ This method is used by both NetCDF and CSV import functions.
244
+ """
245
+ try:
246
+ if progress_callback:
247
+ progress_callback(0, "Starting network import...")
248
+
249
+ # Create the database with schema using atomic utility
250
+ create_database_with_schema(db_path)
251
+
252
+ if progress_callback:
253
+ progress_callback(5, "Database schema created")
254
+
255
+ # Connect to database
256
+ conn = open_connection(db_path)
257
+
258
+ try:
259
+ # Load companion location CSV if available (for NetCDF imports only)
260
+ location_map = None
261
+ if import_source == "NetCDF" and netcdf_path:
262
+ location_map = self._detect_and_load_location_csv(netcdf_path)
263
+
264
+ # Create the network record
265
+ network_id = self._create_network_record(
266
+ conn, network, network_name, network_description
267
+ )
268
+
269
+ if progress_callback:
270
+ progress_callback(10, f"Created network record (ID: {network_id})")
271
+
272
+ # Verify that the "Main" scenario was created by the database trigger
273
+ cursor = conn.execute("SELECT id, name, is_master FROM scenarios WHERE network_id = ?", (network_id,))
274
+ scenarios = cursor.fetchall()
275
+ if scenarios:
276
+ main_scenario = next((s for s in scenarios if s[2] == True), None) # is_master = True
277
+ if not main_scenario:
278
+ self.logger.warning(f"No master scenario found in scenarios: {scenarios}")
279
+ else:
280
+ self.logger.error(f"No scenarios found after network creation - database trigger may have failed")
281
+
282
+ # Create network time periods from PyPSA snapshots
283
+ self._create_network_time_periods(conn, network, network_id)
284
+
285
+ if progress_callback:
286
+ progress_callback(15, f"Created network time periods")
287
+
288
+ # Import carriers
289
+ carriers_count = self._import_carriers(conn, network, network_id)
290
+
291
+ if progress_callback:
292
+ progress_callback(20, f"Imported {carriers_count} carriers")
293
+
294
+ # Import buses
295
+ buses_count = self._import_buses(conn, network, network_id, strict_validation)
296
+
297
+ if progress_callback:
298
+ progress_callback(25, f"Imported {buses_count} buses")
299
+
300
+ # Calculate scatter radius for non-bus components based on bus separation
301
+ bus_coordinates = self._get_bus_coordinates(conn, network_id)
302
+ scatter_radius = self._calculate_bus_separation_radius(bus_coordinates)
303
+
304
+ # Import generators
305
+ generators_count = self._import_generators(conn, network, network_id, strict_validation, scatter_radius, location_map)
306
+
307
+ if progress_callback:
308
+ progress_callback(30, f"Imported {generators_count} generators")
309
+
310
+ # Import loads
311
+ loads_count = self._import_loads(conn, network, network_id, strict_validation, scatter_radius, location_map)
312
+
313
+ if progress_callback:
314
+ progress_callback(35, f"Imported {loads_count} loads")
315
+
316
+ # Import lines
317
+ lines_count = self._import_lines(conn, network, network_id, strict_validation, location_map)
318
+
319
+ if progress_callback:
320
+ progress_callback(40, f"Imported {lines_count} lines")
321
+
322
+ # Import links
323
+ links_count = self._import_links(conn, network, network_id, strict_validation, location_map)
324
+
325
+ if progress_callback:
326
+ progress_callback(45, f"Imported {links_count} links")
327
+
328
+ # Import storage units
329
+ storage_units_count = self._import_storage_units(conn, network, network_id, strict_validation, scatter_radius, location_map)
330
+
331
+ if progress_callback:
332
+ progress_callback(50, f"Imported {storage_units_count} storage units")
333
+
334
+ # Import stores
335
+ stores_count = self._import_stores(conn, network, network_id, strict_validation, scatter_radius, location_map)
336
+
337
+ if progress_callback:
338
+ progress_callback(55, f"Imported {stores_count} stores")
339
+
340
+ conn.commit()
341
+
342
+ if progress_callback:
343
+ progress_callback(100, "Import completed successfully")
344
+
345
+ # Collect final statistics
346
+ stats = {
347
+ "network_id": network_id,
348
+ "network_name": network_name,
349
+ "carriers": carriers_count,
350
+ "buses": buses_count,
351
+ "generators": generators_count,
352
+ "loads": loads_count,
353
+ "lines": lines_count,
354
+ "links": links_count,
355
+ "storage_units": storage_units_count,
356
+ "stores": stores_count,
357
+ "total_components": (buses_count + generators_count + loads_count +
358
+ lines_count + links_count + storage_units_count + stores_count),
359
+ "snapshots": len(network.snapshots) if hasattr(network, 'snapshots') else 0,
360
+ }
361
+
362
+ return {
363
+ "success": True,
364
+ "message": f"Network imported successfully from {import_source}",
365
+ "db_path": db_path,
366
+ "stats": stats
367
+ }
368
+
369
+ finally:
370
+ conn.close()
371
+
372
+ except Exception as e:
373
+ self.logger.error(f"Error importing network: {e}", exc_info=True)
374
+ if progress_callback:
375
+ progress_callback(None, f"Error: {str(e)}")
376
+ raise
377
+
378
+ # Helper methods for the import process
379
+ # Note: These are simplified versions of the methods from the original netcdf_importer.py
380
+ # The full implementation would include all the detailed import logic for each component type
381
+
382
+ def _extract_datetime_snapshots(self, network) -> pd.DatetimeIndex:
383
+ """Extract datetime snapshots from a PyPSA network"""
384
+ if not hasattr(network, 'snapshots') or len(network.snapshots) == 0:
385
+ self.logger.warning("No snapshots found in PyPSA network")
386
+ return pd.DatetimeIndex([])
387
+
388
+ snapshots = network.snapshots
389
+
390
+ try:
391
+ # Try direct conversion first (works for simple DatetimeIndex)
392
+ return pd.to_datetime(snapshots)
393
+ except (TypeError, ValueError) as e:
394
+ # Handle MultiIndex case
395
+ if hasattr(snapshots, 'nlevels') and snapshots.nlevels > 1:
396
+ # Try to use the timesteps attribute if available (common in multi-period networks)
397
+ if hasattr(network, 'timesteps') and isinstance(network.timesteps, pd.DatetimeIndex):
398
+ return network.timesteps
399
+
400
+ # Try to extract datetime from the last level of the MultiIndex
401
+ try:
402
+ # Get the last level (usually the timestep level)
403
+ last_level = snapshots.get_level_values(snapshots.nlevels - 1)
404
+ datetime_snapshots = pd.to_datetime(last_level)
405
+ return datetime_snapshots
406
+ except Exception as multi_e:
407
+ self.logger.warning(f"Failed to extract datetime from MultiIndex: {multi_e}")
408
+
409
+ # Final fallback: create a default hourly range
410
+ self.logger.warning("Could not extract datetime snapshots, creating default hourly range")
411
+ default_start = pd.Timestamp('2024-01-01 00:00:00')
412
+ default_end = pd.Timestamp('2024-01-01 23:59:59')
413
+ return pd.date_range(start=default_start, end=default_end, freq='H')
414
+
415
+ def _create_network_record(
416
+ self,
417
+ conn,
418
+ network,
419
+ network_name: str,
420
+ network_description: Optional[str] = None
421
+ ) -> int:
422
+ """Create the network record and return network ID"""
423
+
424
+ # Extract time information from PyPSA network using our robust helper
425
+ snapshots = self._extract_datetime_snapshots(network)
426
+
427
+ if len(snapshots) > 0:
428
+ time_start = snapshots.min().strftime('%Y-%m-%d %H:%M:%S')
429
+ time_end = snapshots.max().strftime('%Y-%m-%d %H:%M:%S')
430
+
431
+ # Try to infer time interval
432
+ if len(snapshots) > 1:
433
+ freq = pd.infer_freq(snapshots)
434
+ time_interval = freq or 'H' # Default to hourly if can't infer
435
+ else:
436
+ time_interval = 'H'
437
+ else:
438
+ # Default time range if no snapshots
439
+ time_start = '2024-01-01 00:00:00'
440
+ time_end = '2024-01-01 23:59:59'
441
+ time_interval = 'H'
442
+
443
+ description = network_description or f"Imported from PyPSA NetCDF on {pd.Timestamp.now().strftime('%Y-%m-%d %H:%M:%S')}"
444
+
445
+ request = CreateNetworkRequest(
446
+ name=network_name,
447
+ description=description,
448
+ time_resolution=time_interval,
449
+ start_time=time_start,
450
+ end_time=time_end
451
+ )
452
+ return create_network(conn, request)
453
+
454
+ def _create_network_time_periods(self, conn, network, network_id: int) -> None:
455
+ """Create network time periods from PyPSA snapshots using optimized approach"""
456
+ # Use our robust helper to extract datetime snapshots
457
+ snapshots = self._extract_datetime_snapshots(network)
458
+
459
+ if len(snapshots) == 0:
460
+ self.logger.warning("No valid snapshots found in PyPSA network, skipping time periods creation")
461
+ return
462
+
463
+ # Insert optimized time periods metadata
464
+ period_count = len(snapshots)
465
+ start_timestamp = int(snapshots[0].timestamp())
466
+
467
+ # Calculate interval in seconds
468
+ if len(snapshots) > 1:
469
+ interval_seconds = int((snapshots[1] - snapshots[0]).total_seconds())
470
+ else:
471
+ interval_seconds = 3600 # Default to hourly
472
+
473
+ conn.execute("""
474
+ INSERT INTO network_time_periods (network_id, period_count, start_timestamp, interval_seconds)
475
+ VALUES (?, ?, ?, ?)
476
+ """, (network_id, period_count, start_timestamp, interval_seconds))
477
+
478
+ # Placeholder methods - in a full implementation, these would contain
479
+ # the detailed import logic from the original netcdf_importer.py
480
+
481
+ def _import_carriers(self, conn, network, network_id: int) -> int:
482
+ """Import carriers from PyPSA network, discovering from both network and component levels"""
483
+ count = 0
484
+ created_carriers = set()
485
+
486
+ # Discover all carriers from components (not just n.carriers table)
487
+ all_carriers = set()
488
+
489
+ # Get carriers from network.carriers table if it exists
490
+ if hasattr(network, 'carriers') and not network.carriers.empty:
491
+ all_carriers.update(network.carriers.index)
492
+
493
+ # Get carriers from generators
494
+ if hasattr(network, 'generators') and not network.generators.empty and 'carrier' in network.generators.columns:
495
+ component_carriers = set(network.generators.carrier.dropna().unique())
496
+ all_carriers.update(component_carriers)
497
+
498
+ # Get carriers from storage units
499
+ if hasattr(network, 'storage_units') and not network.storage_units.empty and 'carrier' in network.storage_units.columns:
500
+ component_carriers = set(network.storage_units.carrier.dropna().unique())
501
+ all_carriers.update(component_carriers)
502
+
503
+ # Get carriers from stores
504
+ if hasattr(network, 'stores') and not network.stores.empty and 'carrier' in network.stores.columns:
505
+ component_carriers = set(network.stores.carrier.dropna().unique())
506
+ all_carriers.update(component_carriers)
507
+
508
+ # Get carriers from loads (if they have carriers)
509
+ if hasattr(network, 'loads') and not network.loads.empty and 'carrier' in network.loads.columns:
510
+ component_carriers = set(network.loads.carrier.dropna().unique())
511
+ all_carriers.update(component_carriers)
512
+
513
+ # Get carriers from buses (if they have carriers)
514
+ if hasattr(network, 'buses') and not network.buses.empty and 'carrier' in network.buses.columns:
515
+ component_carriers = set(network.buses.carrier.dropna().unique())
516
+ all_carriers.update(component_carriers)
517
+
518
+ # Convert to sorted list for consistent ordering
519
+ all_carriers = sorted(list(all_carriers))
520
+
521
+ # Define a color palette similar to the Python code
522
+ color_palette = [
523
+ '#1f77b4', # C0 - blue
524
+ '#ff7f0e', # C1 - orange
525
+ '#2ca02c', # C2 - green
526
+ '#d62728', # C3 - red
527
+ '#9467bd', # C4 - purple
528
+ '#8c564b', # C5 - brown
529
+ '#e377c2', # C6 - pink
530
+ '#7f7f7f', # C7 - gray
531
+ '#bcbd22', # C8 - olive
532
+ '#17becf', # C9 - cyan
533
+ '#aec7e8', # light blue
534
+ '#ffbb78', # light orange
535
+ '#98df8a', # light green
536
+ '#ff9896', # light red
537
+ '#c5b0d5', # light purple
538
+ ]
539
+
540
+ # Create carriers from discovered list
541
+ for i, carrier_name in enumerate(all_carriers):
542
+ # Get carrier data from network.carriers if available
543
+ carrier_data = {}
544
+ if hasattr(network, 'carriers') and not network.carriers.empty and carrier_name in network.carriers.index:
545
+ # Use .iloc with index position to avoid fragmentation
546
+ carrier_idx = network.carriers.index.get_loc(carrier_name)
547
+ carrier_data = network.carriers.iloc[carrier_idx]
548
+
549
+ # Extract attributes with defaults
550
+ co2_emissions = carrier_data.get('co2_emissions', 0.0)
551
+
552
+ # Use color from network.carriers if available, otherwise assign from palette
553
+ if 'color' in carrier_data and pd.notna(carrier_data['color']):
554
+ color = carrier_data['color']
555
+ else:
556
+ color = color_palette[i % len(color_palette)]
557
+
558
+ nice_name = carrier_data.get('nice_name', None)
559
+
560
+ # Create the carrier
561
+ create_carrier(conn, network_id, carrier_name, co2_emissions, color, nice_name)
562
+ created_carriers.add(carrier_name)
563
+ count += 1
564
+
565
+ # Ensure we have essential carriers for bus validation
566
+ # Buses can only use AC, DC, heat, or gas carriers according to database constraints
567
+ essential_carriers = {
568
+ 'AC': {'co2_emissions': 0.0, 'color': '#3498db', 'nice_name': 'AC Electricity'},
569
+ 'electricity': {'co2_emissions': 0.0, 'color': '#2ecc71', 'nice_name': 'Electricity'}
570
+ }
571
+
572
+ for carrier_name, carrier_props in essential_carriers.items():
573
+ if carrier_name not in created_carriers:
574
+ create_carrier(
575
+ conn,
576
+ network_id,
577
+ carrier_name,
578
+ carrier_props['co2_emissions'],
579
+ carrier_props['color'],
580
+ carrier_props['nice_name']
581
+ )
582
+ created_carriers.add(carrier_name)
583
+ count += 1
584
+
585
+ return count
586
+
587
+ def _import_buses(self, conn, network, network_id: int, strict_validation: bool) -> int:
588
+ """Import buses from PyPSA network"""
589
+ count = 0
590
+
591
+ if not hasattr(network, 'buses') or network.buses.empty:
592
+ return count
593
+
594
+ for bus_name, bus_data in network.buses.iterrows():
595
+ try:
596
+ # Generate a unique name for this bus
597
+ unique_name = self._generate_unique_name(str(bus_name), 'BUS')
598
+
599
+ # Extract and log coordinate data for debugging
600
+ x_value = bus_data.get('x', None)
601
+ y_value = bus_data.get('y', None)
602
+ self.logger.debug(f"Bus '{bus_name}' -> '{unique_name}': x={x_value} (type: {type(x_value)}), y={y_value} (type: {type(y_value)})")
603
+
604
+ # Handle NaN/None values properly
605
+ longitude = None if x_value is None or (hasattr(x_value, '__iter__') and len(str(x_value)) == 0) else float(x_value) if x_value != '' else None
606
+ latitude = None if y_value is None or (hasattr(y_value, '__iter__') and len(str(y_value)) == 0) else float(y_value) if y_value != '' else None
607
+
608
+ # Additional check for pandas NaN values
609
+ if longitude is not None and pd.isna(longitude):
610
+ longitude = None
611
+ if latitude is not None and pd.isna(latitude):
612
+ latitude = None
613
+
614
+ # Get or create carrier
615
+ carrier_name = bus_data.get('carrier', 'AC')
616
+ carrier_id = self._get_or_create_carrier(conn, network_id, carrier_name)
617
+
618
+ # Create component record using atomic function
619
+ # Note: PyPSA 'x'/'y' coordinates are mapped to 'longitude'/'latitude' columns here
620
+ request = CreateComponentRequest(
621
+ network_id=network_id,
622
+ component_type='BUS',
623
+ name=unique_name, # Use globally unique name
624
+ latitude=latitude, # PyPSA y -> latitude
625
+ longitude=longitude, # PyPSA x -> longitude
626
+ carrier_id=carrier_id
627
+ )
628
+ component_id = insert_component(conn, request)
629
+
630
+ # Import bus attributes (location/coordinate data is handled above, not as attributes)
631
+ self._import_component_attributes(conn, component_id, bus_data, 'BUS', strict_validation)
632
+
633
+ # Import timeseries attributes for buses
634
+ self._import_component_timeseries(conn, network, component_id, bus_name, 'BUS', strict_validation)
635
+
636
+ count += 1
637
+
638
+ except Exception as e:
639
+ if strict_validation:
640
+ raise
641
+ self.logger.warning(f"Failed to import bus {bus_name}: {e}")
642
+ continue
643
+
644
+ return count
645
+
646
+ # Additional placeholder methods for other component types
647
+ def _import_generators(self, conn, network, network_id: int, strict_validation: bool, scatter_radius: float, location_map) -> int:
648
+ """Import generators from PyPSA network"""
649
+ count = 0
650
+
651
+ if not hasattr(network, 'generators') or network.generators.empty:
652
+ return count
653
+
654
+ # Get bus name to ID mapping
655
+ bus_name_to_id = get_bus_name_to_id_map(conn, network_id)
656
+
657
+ # Get master scenario ID
658
+ master_scenario_id = self._get_master_scenario_id(conn, network_id)
659
+
660
+ for gen_name, gen_data in network.generators.iterrows():
661
+ try:
662
+ # Get bus connection
663
+ bus_name = gen_data.get('bus')
664
+ bus_id = bus_name_to_id.get(bus_name) if bus_name else None
665
+
666
+ if not bus_id:
667
+ self.logger.warning(f"Generator {gen_name}: bus '{bus_name}' not found, skipping")
668
+ continue
669
+
670
+ # Get or create carrier
671
+ carrier_name = gen_data.get('carrier', 'AC')
672
+ carrier_id = self._get_or_create_carrier(conn, network_id, carrier_name)
673
+
674
+ # Generate coordinates near the bus
675
+ latitude, longitude = self._generate_component_coordinates(
676
+ conn, bus_id, scatter_radius, location_map, gen_name
677
+ )
678
+
679
+ # Create component record
680
+ request = CreateComponentRequest(
681
+ network_id=network_id,
682
+ component_type='GENERATOR',
683
+ name=str(gen_name),
684
+ latitude=latitude,
685
+ longitude=longitude,
686
+ carrier_id=carrier_id,
687
+ bus_id=bus_id
688
+ )
689
+ component_id = insert_component(conn, request)
690
+
691
+ # Import generator attributes
692
+ self._import_component_attributes(conn, component_id, gen_data, 'GENERATOR', strict_validation)
693
+
694
+ # Import timeseries attributes for generators
695
+ self._import_component_timeseries(conn, network, component_id, gen_name, 'GENERATOR', strict_validation)
696
+
697
+ count += 1
698
+
699
+ except Exception as e:
700
+ if strict_validation:
701
+ raise
702
+ self.logger.warning(f"Failed to import generator {gen_name}: {e}")
703
+ continue
704
+
705
+ return count
706
+
707
+ def _import_loads(self, conn, network, network_id: int, strict_validation: bool, scatter_radius: float, location_map) -> int:
708
+ """Import loads from PyPSA network"""
709
+ count = 0
710
+
711
+ if not hasattr(network, 'loads') or network.loads.empty:
712
+ return count
713
+
714
+ bus_map = get_bus_name_to_id_map(conn, network_id)
715
+ bus_coords = self._get_bus_coordinates_map(conn, network_id)
716
+
717
+ # Count components per bus for better distribution
718
+ components_per_bus = {}
719
+ for load_name, load_data in network.loads.iterrows():
720
+ bus_name = load_data['bus']
721
+ components_per_bus[bus_name] = components_per_bus.get(bus_name, 0) + 1
722
+
723
+ bus_component_counters = {}
724
+
725
+ for load_name, load_data in network.loads.iterrows():
726
+ try:
727
+ bus_id = bus_map.get(load_data['bus'])
728
+ if bus_id is None:
729
+ self.logger.warning(f"Bus '{load_data['bus']}' not found for load '{load_name}'")
730
+ continue
731
+
732
+ # Generate a unique name for this load
733
+ unique_name = self._generate_unique_name(str(load_name), 'LOAD')
734
+
735
+ # Try to get coordinates from CSV first, then fall back to scattered coordinates
736
+ latitude, longitude = None, None
737
+
738
+ # Check CSV coordinates first
739
+ csv_coords = self._get_csv_coordinates(unique_name, location_map)
740
+ if csv_coords:
741
+ latitude, longitude = csv_coords
742
+ elif bus_id in bus_coords:
743
+ # Fall back to scattered coordinates around the connected bus
744
+ bus_lat, bus_lon = bus_coords[bus_id]
745
+ bus_name = load_data['bus']
746
+
747
+ # Get component index for this bus
748
+ component_index = bus_component_counters.get(bus_name, 0)
749
+ bus_component_counters[bus_name] = component_index + 1
750
+
751
+ latitude, longitude = self._generate_scattered_coordinates(
752
+ bus_lat, bus_lon, scatter_radius,
753
+ components_per_bus[bus_name], component_index
754
+ )
755
+
756
+ # Get carrier ID if carrier is specified
757
+ carrier_id = None
758
+ if 'carrier' in load_data and pd.notna(load_data['carrier']):
759
+ carrier_id = self._get_or_create_carrier(conn, network_id, load_data['carrier'])
760
+
761
+ # Create component record using atomic function
762
+ request = CreateComponentRequest(
763
+ network_id=network_id,
764
+ component_type='LOAD',
765
+ name=unique_name, # Use globally unique name
766
+ bus_id=bus_id,
767
+ carrier_id=carrier_id,
768
+ latitude=latitude,
769
+ longitude=longitude
770
+ )
771
+ component_id = insert_component(conn, request)
772
+
773
+ # Import load attributes
774
+ self._import_component_attributes(conn, component_id, load_data, 'LOAD', strict_validation)
775
+
776
+ # Import timeseries attributes for loads
777
+ self._import_component_timeseries(conn, network, component_id, load_name, 'LOAD', strict_validation)
778
+
779
+ count += 1
780
+
781
+ except Exception as e:
782
+ if strict_validation:
783
+ raise
784
+ self.logger.warning(f"Failed to import load {load_name}: {e}")
785
+ continue
786
+
787
+ return count
788
+
789
+ def _import_lines(self, conn, network, network_id: int, strict_validation: bool, location_map) -> int:
790
+ """Import lines from PyPSA network"""
791
+ count = 0
792
+ name_counter = {} # Track duplicate names
793
+
794
+ if not hasattr(network, 'lines') or network.lines.empty:
795
+ return count
796
+
797
+ bus_map = get_bus_name_to_id_map(conn, network_id)
798
+
799
+ for line_name, line_data in network.lines.iterrows():
800
+ try:
801
+ bus0_id = bus_map.get(line_data['bus0'])
802
+ bus1_id = bus_map.get(line_data['bus1'])
803
+
804
+ if bus0_id is None or bus1_id is None:
805
+ self.logger.warning(f"Bus not found for line '{line_name}': bus0='{line_data['bus0']}', bus1='{line_data['bus1']}'")
806
+ continue
807
+
808
+ # Handle duplicate names by appending counter
809
+ unique_name = line_name
810
+ if line_name in name_counter:
811
+ name_counter[line_name] += 1
812
+ unique_name = f"{line_name}_{name_counter[line_name]}"
813
+ self.logger.warning(f"Duplicate line name '{line_name}' renamed to '{unique_name}'")
814
+ else:
815
+ name_counter[line_name] = 0
816
+
817
+ # Check for CSV coordinates
818
+ latitude, longitude = None, None
819
+ csv_coords = self._get_csv_coordinates(unique_name, location_map)
820
+ if csv_coords:
821
+ latitude, longitude = csv_coords
822
+
823
+ # Lines always use AC carrier
824
+ carrier_id = self._get_or_create_carrier(conn, network_id, 'AC')
825
+
826
+ # Create component record using atomic function
827
+ request = CreateComponentRequest(
828
+ network_id=network_id,
829
+ component_type='LINE',
830
+ name=unique_name, # Use deduplicated name
831
+ bus0_id=bus0_id,
832
+ bus1_id=bus1_id,
833
+ carrier_id=carrier_id,
834
+ latitude=latitude,
835
+ longitude=longitude
836
+ )
837
+ component_id = insert_component(conn, request)
838
+
839
+ # Import line attributes
840
+ self._import_component_attributes(conn, component_id, line_data, 'LINE', strict_validation)
841
+
842
+ # Import timeseries attributes for lines
843
+ self._import_component_timeseries(conn, network, component_id, line_name, 'LINE', strict_validation)
844
+
845
+ count += 1
846
+
847
+ except Exception as e:
848
+ if strict_validation:
849
+ raise
850
+ self.logger.warning(f"Failed to import line {line_name}: {e}")
851
+ continue
852
+
853
+ return count
854
+
855
+ def _import_links(self, conn, network, network_id: int, strict_validation: bool, location_map) -> int:
856
+ """Import links from PyPSA network"""
857
+ count = 0
858
+
859
+ if not hasattr(network, 'links') or network.links.empty:
860
+ return count
861
+
862
+ bus_map = get_bus_name_to_id_map(conn, network_id)
863
+
864
+ for link_name, link_data in network.links.iterrows():
865
+ try:
866
+ bus0_id = bus_map.get(link_data['bus0'])
867
+ bus1_id = bus_map.get(link_data['bus1'])
868
+
869
+ if bus0_id is None or bus1_id is None:
870
+ self.logger.warning(f"Bus not found for link '{link_name}': bus0='{link_data['bus0']}', bus1='{link_data['bus1']}'")
871
+ continue
872
+
873
+ # Generate a unique name for this link
874
+ unique_name = self._generate_unique_name(str(link_name), 'LINK')
875
+
876
+ # Check for CSV coordinates
877
+ latitude, longitude = None, None
878
+ csv_coords = self._get_csv_coordinates(unique_name, location_map)
879
+ if csv_coords:
880
+ latitude, longitude = csv_coords
881
+
882
+ # Get carrier ID if carrier is specified
883
+ carrier_id = None
884
+ if 'carrier' in link_data and pd.notna(link_data['carrier']):
885
+ carrier_id = self._get_or_create_carrier(conn, network_id, link_data['carrier'])
886
+ else:
887
+ # Default to DC for links
888
+ carrier_id = self._get_or_create_carrier(conn, network_id, 'DC')
889
+
890
+ # Create component record using atomic function
891
+ request = CreateComponentRequest(
892
+ network_id=network_id,
893
+ component_type='LINK',
894
+ name=unique_name, # Use globally unique name
895
+ bus0_id=bus0_id,
896
+ bus1_id=bus1_id,
897
+ carrier_id=carrier_id,
898
+ latitude=latitude,
899
+ longitude=longitude
900
+ )
901
+ component_id = insert_component(conn, request)
902
+
903
+ # Import link attributes
904
+ self._import_component_attributes(conn, component_id, link_data, 'LINK', strict_validation)
905
+
906
+ # Import timeseries attributes for links
907
+ self._import_component_timeseries(conn, network, component_id, link_name, 'LINK', strict_validation)
908
+
909
+ count += 1
910
+
911
+ except Exception as e:
912
+ if strict_validation:
913
+ raise
914
+ self.logger.warning(f"Failed to import link {link_name}: {e}")
915
+ continue
916
+
917
+ return count
918
+
919
+ def _import_storage_units(self, conn, network, network_id: int, strict_validation: bool, scatter_radius: float, location_map) -> int:
920
+ """Import storage units from PyPSA network"""
921
+ count = 0
922
+
923
+ if not hasattr(network, 'storage_units') or network.storage_units.empty:
924
+ return count
925
+
926
+ bus_map = get_bus_name_to_id_map(conn, network_id)
927
+ bus_coords = self._get_bus_coordinates_map(conn, network_id)
928
+
929
+ # Count components per bus for better distribution
930
+ components_per_bus = {}
931
+ for su_name, su_data in network.storage_units.iterrows():
932
+ bus_name = su_data['bus']
933
+ components_per_bus[bus_name] = components_per_bus.get(bus_name, 0) + 1
934
+
935
+ bus_component_counters = {}
936
+
937
+ for su_name, su_data in network.storage_units.iterrows():
938
+ try:
939
+ bus_id = bus_map.get(su_data['bus'])
940
+ if bus_id is None:
941
+ self.logger.warning(f"Bus '{su_data['bus']}' not found for storage unit '{su_name}'")
942
+ continue
943
+
944
+ # Generate a unique name for this storage unit
945
+ unique_name = self._generate_unique_name(str(su_name), 'STORAGE_UNIT')
946
+
947
+ # Try to get coordinates from CSV first, then fall back to scattered coordinates
948
+ latitude, longitude = None, None
949
+
950
+ # Check CSV coordinates first
951
+ csv_coords = self._get_csv_coordinates(unique_name, location_map)
952
+ if csv_coords:
953
+ latitude, longitude = csv_coords
954
+ elif bus_id in bus_coords:
955
+ # Fall back to scattered coordinates around the connected bus
956
+ bus_lat, bus_lon = bus_coords[bus_id]
957
+ bus_name = su_data['bus']
958
+
959
+ # Get component index for this bus
960
+ component_index = bus_component_counters.get(bus_name, 0)
961
+ bus_component_counters[bus_name] = component_index + 1
962
+
963
+ latitude, longitude = self._generate_scattered_coordinates(
964
+ bus_lat, bus_lon, scatter_radius,
965
+ components_per_bus[bus_name], component_index
966
+ )
967
+
968
+ # Get carrier ID if carrier is specified
969
+ carrier_id = None
970
+ if 'carrier' in su_data and pd.notna(su_data['carrier']):
971
+ carrier_id = self._get_or_create_carrier(conn, network_id, su_data['carrier'])
972
+
973
+ # Create component record using atomic function
974
+ request = CreateComponentRequest(
975
+ network_id=network_id,
976
+ component_type='STORAGE_UNIT',
977
+ name=unique_name, # Use globally unique name
978
+ bus_id=bus_id,
979
+ carrier_id=carrier_id,
980
+ latitude=latitude,
981
+ longitude=longitude
982
+ )
983
+ component_id = insert_component(conn, request)
984
+
985
+ # Import storage unit attributes
986
+ self._import_component_attributes(conn, component_id, su_data, 'STORAGE_UNIT', strict_validation)
987
+
988
+ # Import timeseries attributes for storage units
989
+ self._import_component_timeseries(conn, network, component_id, su_name, 'STORAGE_UNIT', strict_validation)
990
+
991
+ count += 1
992
+
993
+ except Exception as e:
994
+ if strict_validation:
995
+ raise
996
+ self.logger.warning(f"Failed to import storage unit {su_name}: {e}")
997
+ continue
998
+
999
+ return count
1000
+
1001
+ def _import_stores(self, conn, network, network_id: int, strict_validation: bool, scatter_radius: float, location_map) -> int:
1002
+ """Import stores from PyPSA network"""
1003
+ count = 0
1004
+ name_counter = {} # Track duplicate names
1005
+
1006
+ if not hasattr(network, 'stores') or network.stores.empty:
1007
+ return count
1008
+
1009
+ bus_map = get_bus_name_to_id_map(conn, network_id)
1010
+ bus_coords = self._get_bus_coordinates_map(conn, network_id)
1011
+
1012
+ # Count components per bus for better distribution
1013
+ components_per_bus = {}
1014
+ for store_name, store_data in network.stores.iterrows():
1015
+ bus_name = store_data['bus']
1016
+ components_per_bus[bus_name] = components_per_bus.get(bus_name, 0) + 1
1017
+
1018
+ bus_component_counters = {} # Track how many components we've placed at each bus
1019
+
1020
+ for store_name, store_data in network.stores.iterrows():
1021
+ try:
1022
+ bus_id = bus_map.get(store_data['bus'])
1023
+ if bus_id is None:
1024
+ self.logger.warning(f"Bus '{store_data['bus']}' not found for store '{store_name}'")
1025
+ continue
1026
+
1027
+ # Handle duplicate names by appending counter
1028
+ unique_name = store_name
1029
+ if store_name in name_counter:
1030
+ name_counter[store_name] += 1
1031
+ unique_name = f"{store_name}_{name_counter[store_name]}"
1032
+ self.logger.warning(f"Duplicate store name '{store_name}' renamed to '{unique_name}'")
1033
+ else:
1034
+ name_counter[store_name] = 0
1035
+
1036
+ # Try to get coordinates from CSV first, then fall back to scattered coordinates
1037
+ latitude, longitude = None, None
1038
+
1039
+ # Check CSV coordinates first
1040
+ csv_coords = self._get_csv_coordinates(unique_name, location_map)
1041
+ if csv_coords:
1042
+ latitude, longitude = csv_coords
1043
+ elif bus_id in bus_coords:
1044
+ # Fall back to scattered coordinates around the connected bus
1045
+ bus_lat, bus_lon = bus_coords[bus_id]
1046
+ bus_name = store_data['bus']
1047
+
1048
+ # Get component index for this bus
1049
+ component_index = bus_component_counters.get(bus_name, 0)
1050
+ bus_component_counters[bus_name] = component_index + 1
1051
+
1052
+ latitude, longitude = self._generate_scattered_coordinates(
1053
+ bus_lat, bus_lon, scatter_radius,
1054
+ components_per_bus[bus_name], component_index
1055
+ )
1056
+
1057
+ # Get carrier ID if carrier is specified
1058
+ carrier_id = None
1059
+ if 'carrier' in store_data and pd.notna(store_data['carrier']):
1060
+ carrier_id = self._get_or_create_carrier(conn, network_id, store_data['carrier'])
1061
+
1062
+ # Create component record using atomic function
1063
+ request = CreateComponentRequest(
1064
+ network_id=network_id,
1065
+ component_type='STORE',
1066
+ name=unique_name, # Use deduplicated name
1067
+ bus_id=bus_id,
1068
+ carrier_id=carrier_id,
1069
+ latitude=latitude,
1070
+ longitude=longitude
1071
+ )
1072
+ component_id = insert_component(conn, request)
1073
+
1074
+ # Import store attributes
1075
+ self._import_component_attributes(conn, component_id, store_data, 'STORE', strict_validation)
1076
+
1077
+ # Import timeseries attributes for stores
1078
+ self._import_component_timeseries(conn, network, component_id, store_name, 'STORE', strict_validation)
1079
+
1080
+ count += 1
1081
+
1082
+ except Exception as e:
1083
+ if strict_validation:
1084
+ raise
1085
+ self.logger.warning(f"Failed to import store {store_name}: {e}")
1086
+ continue
1087
+
1088
+ return count
1089
+
1090
+ def _get_bus_coordinates(self, conn, network_id: int) -> List[Tuple[float, float]]:
1091
+ """Get coordinates of all buses in the network that have valid coordinates"""
1092
+ cursor = conn.execute("""
1093
+ SELECT latitude, longitude FROM components
1094
+ WHERE network_id = ? AND component_type = 'BUS'
1095
+ AND latitude IS NOT NULL AND longitude IS NOT NULL
1096
+ AND NOT (latitude = 0 AND longitude = 0)
1097
+ """, (network_id,))
1098
+
1099
+ coordinates = [(row[0], row[1]) for row in cursor.fetchall()]
1100
+ return coordinates
1101
+
1102
+ def _calculate_bus_separation_radius(self, bus_coordinates: List[Tuple[float, float]]) -> float:
1103
+ """Calculate the minimum separation between buses and return a radius for scattering"""
1104
+ if len(bus_coordinates) < 2:
1105
+ return 0.01 # ~1km at equator
1106
+
1107
+ min_distance_degrees = float('inf')
1108
+ min_separation_threshold = 0.001 # ~100m threshold to exclude co-located buses
1109
+
1110
+ for i, (lat1, lon1) in enumerate(bus_coordinates):
1111
+ for j, (lat2, lon2) in enumerate(bus_coordinates[i+1:], i+1):
1112
+ # Simple Euclidean distance in degrees
1113
+ distance_degrees = math.sqrt((lat2 - lat1)**2 + (lon2 - lon1)**2)
1114
+
1115
+ if distance_degrees > min_separation_threshold:
1116
+ min_distance_degrees = min(min_distance_degrees, distance_degrees)
1117
+
1118
+ if min_distance_degrees == float('inf'):
1119
+ scatter_radius_degrees = 0.05 # ~5km default
1120
+ else:
1121
+ scatter_radius_degrees = min_distance_degrees * 0.25
1122
+
1123
+ # Ensure reasonable bounds: between 1km and 100km equivalent in degrees
1124
+ min_radius = 0.01 # ~1km
1125
+ max_radius = 1.0 # ~100km
1126
+ scatter_radius_degrees = max(min_radius, min(max_radius, scatter_radius_degrees))
1127
+
1128
+ return scatter_radius_degrees
1129
+
1130
+ def _detect_and_load_location_csv(self, netcdf_path: str) -> Optional[Dict[str, Tuple[float, float]]]:
1131
+ """
1132
+ Detect and load companion CSV file with component locations.
1133
+
1134
+ Args:
1135
+ netcdf_path: Path to the NetCDF file (e.g., /path/to/fileX.nc)
1136
+
1137
+ Returns:
1138
+ Dictionary mapping component names to (latitude, longitude) tuples, or None if no CSV found
1139
+ """
1140
+ try:
1141
+ # Construct expected CSV path: replace .nc with _locations.csv
1142
+ netcdf_file = Path(netcdf_path)
1143
+ csv_path = netcdf_file.parent / f"{netcdf_file.stem}_locations.csv"
1144
+
1145
+ if not csv_path.exists():
1146
+ return None
1147
+
1148
+ # Parse the CSV file
1149
+ try:
1150
+ location_df = pd.read_csv(csv_path)
1151
+
1152
+ # Validate required columns
1153
+ required_columns = {'name', 'longitude', 'latitude'}
1154
+ if not required_columns.issubset(location_df.columns):
1155
+ missing_cols = required_columns - set(location_df.columns)
1156
+ self.logger.warning(f"Location CSV missing required columns: {missing_cols}. Found columns: {list(location_df.columns)}")
1157
+ return None
1158
+
1159
+ # Create lookup dictionary
1160
+ location_map = {}
1161
+ skipped_count = 0
1162
+
1163
+ for _, row in location_df.iterrows():
1164
+ name = row['name']
1165
+ longitude = row['longitude']
1166
+ latitude = row['latitude']
1167
+
1168
+ # Skip rows with missing data
1169
+ if pd.isna(name) or pd.isna(longitude) or pd.isna(latitude):
1170
+ skipped_count += 1
1171
+ continue
1172
+
1173
+ # Validate coordinate ranges
1174
+ if not (-180 <= longitude <= 180) or not (-90 <= latitude <= 90):
1175
+ self.logger.warning(f"Invalid coordinates for '{name}': longitude={longitude}, latitude={latitude}")
1176
+ skipped_count += 1
1177
+ continue
1178
+
1179
+ location_map[str(name).strip()] = (float(latitude), float(longitude))
1180
+
1181
+ self.logger.info(f"Loaded {len(location_map)} component locations from CSV (skipped {skipped_count} invalid entries)")
1182
+ return location_map
1183
+
1184
+ except Exception as e:
1185
+ self.logger.error(f"Failed to parse location CSV {csv_path}: {e}")
1186
+ return None
1187
+
1188
+ except Exception as e:
1189
+ self.logger.warning(f"Error detecting location CSV: {e}")
1190
+ return None
1191
+
1192
+ def _get_master_scenario_id(self, conn, network_id: int) -> int:
1193
+ """Get the master scenario ID for a network"""
1194
+ cursor = conn.execute(
1195
+ "SELECT id FROM scenarios WHERE network_id = ? AND is_master = 1",
1196
+ (network_id,)
1197
+ )
1198
+ result = cursor.fetchone()
1199
+ if not result:
1200
+ raise ValueError(f"No master scenario found for network {network_id}")
1201
+ return result[0]
1202
+
1203
+ def _get_or_create_carrier(self, conn, network_id: int, carrier_name: str) -> int:
1204
+ """Get existing carrier ID or create new carrier"""
1205
+ # Try to find existing carrier
1206
+ cursor = conn.execute(
1207
+ "SELECT id FROM carriers WHERE network_id = ? AND name = ?",
1208
+ (network_id, carrier_name)
1209
+ )
1210
+ result = cursor.fetchone()
1211
+ if result:
1212
+ return result[0]
1213
+
1214
+ # Create new carrier
1215
+ carrier_id = create_carrier(conn, network_id, carrier_name, 0.0, '#3498db', carrier_name)
1216
+ return carrier_id
1217
+
1218
+ def _generate_component_coordinates(
1219
+ self,
1220
+ conn,
1221
+ bus_id: int,
1222
+ scatter_radius: float,
1223
+ location_map: Optional[Dict],
1224
+ component_name: str
1225
+ ) -> Tuple[Optional[float], Optional[float]]:
1226
+ """Generate coordinates for a component near its connected bus"""
1227
+ # Check location map first
1228
+ if location_map and component_name in location_map:
1229
+ return location_map[component_name]
1230
+
1231
+ # Get bus coordinates
1232
+ cursor = conn.execute(
1233
+ "SELECT latitude, longitude FROM components WHERE id = ?",
1234
+ (bus_id,)
1235
+ )
1236
+ result = cursor.fetchone()
1237
+ if not result or result[0] is None or result[1] is None:
1238
+ return None, None
1239
+
1240
+ bus_lat, bus_lon = result[0], result[1]
1241
+
1242
+ # Generate unique name-based offset
1243
+ name_hash = hash(component_name) % 1000
1244
+ angle = (name_hash / 1000.0) * 2 * math.pi
1245
+
1246
+ # Apply scatter radius
1247
+ lat_offset = scatter_radius * math.cos(angle)
1248
+ lon_offset = scatter_radius * math.sin(angle)
1249
+
1250
+ return bus_lat + lat_offset, bus_lon + lon_offset
1251
+
1252
+ def _import_component_attributes(
1253
+ self,
1254
+ conn,
1255
+ component_id: int,
1256
+ component_data: pd.Series,
1257
+ component_type: str,
1258
+ strict_validation: bool
1259
+ ):
1260
+ """Import component attributes, excluding bus connection columns"""
1261
+
1262
+ # Get master scenario ID
1263
+ network_id_result = conn.execute("SELECT network_id FROM components WHERE id = ?", (component_id,)).fetchone()
1264
+ if not network_id_result:
1265
+ self.logger.error(f"Could not find network_id for component {component_id}")
1266
+ return
1267
+
1268
+ network_id = network_id_result[0]
1269
+ scenario_id = self._get_master_scenario_id(conn, network_id)
1270
+
1271
+ # Skip these columns as they're handled in the components table
1272
+ skip_columns = {
1273
+ 'bus', 'bus0', 'bus1', 'name', # Bus connections and name
1274
+ 'x', 'y', 'location', # Coordinate/location data (stored as latitude/longitude columns)
1275
+ 'carrier' # Carrier reference (stored as carrier_id column)
1276
+ }
1277
+
1278
+ attribute_count = 0
1279
+ skipped_count = 0
1280
+
1281
+ for attr_name, value in component_data.items():
1282
+ if attr_name in skip_columns:
1283
+ skipped_count += 1
1284
+ continue
1285
+
1286
+ if pd.isna(value):
1287
+ skipped_count += 1
1288
+ continue
1289
+
1290
+ # Convert value to appropriate format for our database and use smart attribute setting
1291
+ try:
1292
+ # Get validation rule to check expected data type
1293
+ try:
1294
+ rule = get_validation_rule(conn, component_type, attr_name)
1295
+ expected_type = rule.data_type
1296
+ except:
1297
+ expected_type = None
1298
+
1299
+ # Convert based on expected type or infer from value
1300
+ if expected_type == 'boolean':
1301
+ # Handle boolean attributes that might come as int/float from PyPSA
1302
+ if isinstance(value, (bool, np.bool_)):
1303
+ static_value = StaticValue(bool(value))
1304
+ elif isinstance(value, (int, np.integer)):
1305
+ static_value = StaticValue(bool(value)) # 0 -> False, 1 -> True
1306
+ elif isinstance(value, (float, np.floating)):
1307
+ static_value = StaticValue(bool(int(value))) # 0.0 -> False, 1.0 -> True
1308
+ else:
1309
+ static_value = StaticValue(str(value).lower() == 'true')
1310
+ elif expected_type == 'int':
1311
+ # Handle integer attributes
1312
+ if isinstance(value, (int, np.integer)):
1313
+ static_value = StaticValue(int(value))
1314
+ elif isinstance(value, (float, np.floating)):
1315
+ if np.isfinite(value):
1316
+ static_value = StaticValue(int(value))
1317
+ else:
1318
+ skipped_count += 1
1319
+ continue
1320
+ elif isinstance(value, bool):
1321
+ static_value = StaticValue(int(value))
1322
+ else:
1323
+ static_value = StaticValue(int(float(str(value))))
1324
+ elif expected_type == 'float':
1325
+ # Handle float attributes
1326
+ if isinstance(value, (float, np.floating)):
1327
+ if np.isfinite(value):
1328
+ static_value = StaticValue(float(value))
1329
+ else:
1330
+ skipped_count += 1
1331
+ continue
1332
+ elif isinstance(value, (int, np.integer)):
1333
+ static_value = StaticValue(float(value))
1334
+ elif isinstance(value, bool):
1335
+ static_value = StaticValue(float(value))
1336
+ else:
1337
+ static_value = StaticValue(float(str(value)))
1338
+ else:
1339
+ # Fallback to type inference for unknown or string types
1340
+ if isinstance(value, bool):
1341
+ static_value = StaticValue(bool(value))
1342
+ elif isinstance(value, (int, np.integer)):
1343
+ static_value = StaticValue(int(value))
1344
+ elif isinstance(value, (float, np.floating)):
1345
+ if np.isfinite(value):
1346
+ static_value = StaticValue(float(value))
1347
+ else:
1348
+ skipped_count += 1
1349
+ continue # Skip infinite/NaN values
1350
+ else:
1351
+ static_value = StaticValue(str(value))
1352
+
1353
+ # Use direct static attribute setting
1354
+ set_static_attribute(conn, component_id, attr_name, static_value, scenario_id)
1355
+ attribute_count += 1
1356
+
1357
+ except Exception as e:
1358
+ # Handle validation errors from db_utils functions
1359
+ if ("No validation rule found" in str(e) or
1360
+ "does not allow" in str(e) or
1361
+ "ValidationError" in str(type(e).__name__)):
1362
+ if strict_validation:
1363
+ raise
1364
+ else:
1365
+ self.logger.warning(f"Skipping undefined/invalid attribute '{attr_name}' for {component_type} component {component_id}: {e}")
1366
+ skipped_count += 1
1367
+ continue
1368
+ else:
1369
+ # Log but don't fail on other attribute import errors (like type conversion issues)
1370
+ self.logger.warning(f"Skipping attribute {attr_name} for component {component_id}: {e}")
1371
+ skipped_count += 1
1372
+
1373
+ def _import_component_timeseries(
1374
+ self,
1375
+ conn,
1376
+ network,
1377
+ component_id: int,
1378
+ component_name: str,
1379
+ component_type: str,
1380
+ strict_validation: bool
1381
+ ):
1382
+ """Import timeseries attributes from PyPSA network"""
1383
+
1384
+ # Get master scenario ID
1385
+ network_id_result = conn.execute("SELECT network_id FROM components WHERE id = ?", (component_id,)).fetchone()
1386
+ if not network_id_result:
1387
+ self.logger.error(f"Could not find network_id for component {component_id}")
1388
+ return
1389
+
1390
+ network_id = network_id_result[0]
1391
+ scenario_id = self._get_master_scenario_id(conn, network_id)
1392
+
1393
+ # Map component types to their PyPSA timeseries DataFrames
1394
+ timeseries_map = {
1395
+ 'BUS': getattr(network, 'buses_t', {}),
1396
+ 'GENERATOR': getattr(network, 'generators_t', {}),
1397
+ 'LOAD': getattr(network, 'loads_t', {}),
1398
+ 'LINE': getattr(network, 'lines_t', {}),
1399
+ 'LINK': getattr(network, 'links_t', {}),
1400
+ 'STORAGE_UNIT': getattr(network, 'storage_units_t', {}),
1401
+ 'STORE': getattr(network, 'stores_t', {})
1402
+ }
1403
+
1404
+ component_timeseries = timeseries_map.get(component_type, {})
1405
+
1406
+ if not component_timeseries:
1407
+ return
1408
+
1409
+ timeseries_count = 0
1410
+
1411
+ # Iterate through each timeseries attribute (e.g., 'p', 'q', 'p_set', 'p_max_pu', etc.)
1412
+ for attr_name, timeseries_df in component_timeseries.items():
1413
+ if component_name not in timeseries_df.columns:
1414
+ continue
1415
+
1416
+ # Get the timeseries data for this component
1417
+ component_series = timeseries_df[component_name]
1418
+
1419
+ # Skip if all values are NaN
1420
+ if component_series.isna().all():
1421
+ continue
1422
+
1423
+ try:
1424
+ # Convert pandas Series to list of values (using optimized approach)
1425
+ values = []
1426
+
1427
+ for value in component_series:
1428
+ # Skip NaN values by using 0.0 as default (PyPSA convention)
1429
+ if pd.isna(value):
1430
+ values.append(0.0)
1431
+ else:
1432
+ values.append(float(value))
1433
+
1434
+ if not values:
1435
+ self.logger.warning(f"No valid timeseries points for '{attr_name}' on {component_type} '{component_name}'")
1436
+ continue
1437
+
1438
+ # Use optimized timeseries attribute setting
1439
+ set_timeseries_attribute(conn, component_id, attr_name, values, scenario_id)
1440
+ timeseries_count += 1
1441
+
1442
+ except Exception as e:
1443
+ if strict_validation:
1444
+ raise
1445
+ else:
1446
+ self.logger.warning(f"Skipping timeseries attribute '{attr_name}' for {component_type} component '{component_name}': {e}")
1447
+ continue
1448
+
1449
+ if timeseries_count > 0:
1450
+ self.logger.debug(f"Imported {timeseries_count} timeseries attributes for {component_type} '{component_name}'")
1451
+
1452
+ def _generate_unique_name(self, base_name: str, component_type: str) -> str:
1453
+ """
1454
+ Generate a unique name for a component, ensuring no duplicates across all component types.
1455
+
1456
+ Args:
1457
+ base_name: The original name to start with
1458
+ component_type: The type of component (used in the suffix if needed)
1459
+
1460
+ Returns:
1461
+ A unique name that hasn't been used yet
1462
+ """
1463
+ # First try the base name
1464
+ if base_name not in self._used_names:
1465
+ self._used_names.add(base_name)
1466
+ return base_name
1467
+
1468
+ # If base name is taken, try appending the component type
1469
+ typed_name = f"{base_name}_{component_type.lower()}"
1470
+ if typed_name not in self._used_names:
1471
+ self._used_names.add(typed_name)
1472
+ return typed_name
1473
+
1474
+ # If that's taken too, start adding numbers
1475
+ counter = 1
1476
+ while True:
1477
+ unique_name = f"{base_name}_{counter}"
1478
+ if unique_name not in self._used_names:
1479
+ self._used_names.add(unique_name)
1480
+ return unique_name
1481
+ counter += 1
1482
+
1483
+ def _generate_scattered_coordinates(
1484
+ self,
1485
+ bus_lat: float,
1486
+ bus_lon: float,
1487
+ scatter_radius: float,
1488
+ component_count_at_bus: int,
1489
+ component_index: int
1490
+ ) -> Tuple[float, float]:
1491
+ """
1492
+ Generate scattered coordinates around a bus location.
1493
+
1494
+ Args:
1495
+ bus_lat: Bus latitude
1496
+ bus_lon: Bus longitude
1497
+ scatter_radius: Radius in degrees to scatter within
1498
+ component_count_at_bus: Total number of components at this bus
1499
+ component_index: Index of this component (0-based)
1500
+
1501
+ Returns:
1502
+ Tuple of (latitude, longitude) for the scattered position
1503
+ """
1504
+ if component_count_at_bus == 1:
1505
+ # Single component - place it at a moderate distance from the bus
1506
+ angle = random.uniform(0, 2 * math.pi)
1507
+ distance = scatter_radius * random.uniform(0.5, 0.8) # 50-80% of scatter radius
1508
+ else:
1509
+ # Multiple components - arrange in a rough circle with some randomness
1510
+ base_angle = (2 * math.pi * component_index) / component_count_at_bus
1511
+ angle_jitter = random.uniform(-math.pi/8, math.pi/8) # ±22.5 degrees jitter
1512
+ angle = base_angle + angle_jitter
1513
+
1514
+ # Vary distance randomly within the radius (use more of the available radius)
1515
+ distance = scatter_radius * random.uniform(0.6, 1.0) # 60-100% of scatter radius
1516
+
1517
+ # Calculate new coordinates
1518
+ new_lat = bus_lat + distance * math.cos(angle)
1519
+ new_lon = bus_lon + distance * math.sin(angle)
1520
+
1521
+ return new_lat, new_lon
1522
+
1523
+ def _get_bus_coordinates_map(self, conn, network_id: int) -> Dict[int, Tuple[float, float]]:
1524
+ """
1525
+ Get a mapping from bus component ID to coordinates.
1526
+
1527
+ Returns:
1528
+ Dictionary mapping bus component ID to (latitude, longitude) tuple
1529
+ """
1530
+ cursor = conn.execute("""
1531
+ SELECT id, latitude, longitude FROM components
1532
+ WHERE network_id = ? AND component_type = 'BUS'
1533
+ AND latitude IS NOT NULL AND longitude IS NOT NULL
1534
+ AND NOT (latitude = 0 AND longitude = 0)
1535
+ """, (network_id,))
1536
+
1537
+ bus_coords = {row[0]: (row[1], row[2]) for row in cursor.fetchall()}
1538
+ return bus_coords
1539
+
1540
+ def _resolve_original_component_name(self, unique_name: str) -> str:
1541
+ """
1542
+ Resolve a potentially modified unique name back to its original name for CSV lookup.
1543
+
1544
+ Args:
1545
+ unique_name: The unique name that may have been modified (e.g., "component_1", "component_generator")
1546
+
1547
+ Returns:
1548
+ The original name for CSV lookup
1549
+ """
1550
+ # Remove common suffixes added by _generate_unique_name
1551
+ # Pattern 1: Remove "_NUMBER" suffix (e.g., "component_1" -> "component")
1552
+ import re
1553
+
1554
+ # First try removing "_NUMBER" pattern
1555
+ no_number_suffix = re.sub(r'_\d+$', '', unique_name)
1556
+ if no_number_suffix != unique_name:
1557
+ return no_number_suffix
1558
+
1559
+ # Then try removing "_COMPONENT_TYPE" pattern (e.g., "component_generator" -> "component")
1560
+ component_types = ['bus', 'generator', 'load', 'line', 'link', 'storage_unit', 'store']
1561
+ for comp_type in component_types:
1562
+ suffix = f"_{comp_type.lower()}"
1563
+ if unique_name.endswith(suffix):
1564
+ return unique_name[:-len(suffix)]
1565
+
1566
+ # If no patterns match, return the original name
1567
+ return unique_name
1568
+
1569
+ def _get_csv_coordinates(
1570
+ self,
1571
+ component_name: str,
1572
+ location_map: Optional[Dict[str, Tuple[float, float]]]
1573
+ ) -> Optional[Tuple[float, float]]:
1574
+ """
1575
+ Get coordinates for a component from the CSV location map.
1576
+
1577
+ Args:
1578
+ component_name: The component name (potentially modified for uniqueness)
1579
+ location_map: Dictionary mapping original names to coordinates
1580
+
1581
+ Returns:
1582
+ (latitude, longitude) tuple if found, None otherwise
1583
+ """
1584
+ if not location_map:
1585
+ return None
1586
+
1587
+ # Try exact match first
1588
+ if component_name in location_map:
1589
+ coordinates = location_map[component_name]
1590
+ self.logger.debug(f"CSV location exact match for '{component_name}': {coordinates}")
1591
+ return coordinates
1592
+
1593
+ # Try resolving back to original name
1594
+ original_name = self._resolve_original_component_name(component_name)
1595
+ if original_name != component_name and original_name in location_map:
1596
+ coordinates = location_map[original_name]
1597
+ self.logger.debug(f"CSV location resolved match for '{component_name}' -> '{original_name}': {coordinates}")
1598
+ return coordinates
1599
+
1600
+ # No match found
1601
+ self.logger.debug(f"No CSV location found for component '{component_name}' (original: '{original_name}')")
1602
+ return None