pyconvexity 0.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pyconvexity might be problematic. Click here for more details.

Files changed (42) hide show
  1. pyconvexity/__init__.py +226 -0
  2. pyconvexity/_version.py +1 -0
  3. pyconvexity/core/__init__.py +60 -0
  4. pyconvexity/core/database.py +485 -0
  5. pyconvexity/core/errors.py +106 -0
  6. pyconvexity/core/types.py +400 -0
  7. pyconvexity/data/README.md +101 -0
  8. pyconvexity/data/__init__.py +17 -0
  9. pyconvexity/data/loaders/__init__.py +3 -0
  10. pyconvexity/data/loaders/cache.py +213 -0
  11. pyconvexity/data/schema/01_core_schema.sql +420 -0
  12. pyconvexity/data/schema/02_data_metadata.sql +120 -0
  13. pyconvexity/data/schema/03_validation_data.sql +506 -0
  14. pyconvexity/data/sources/__init__.py +5 -0
  15. pyconvexity/data/sources/gem.py +442 -0
  16. pyconvexity/io/__init__.py +26 -0
  17. pyconvexity/io/excel_exporter.py +1226 -0
  18. pyconvexity/io/excel_importer.py +1381 -0
  19. pyconvexity/io/netcdf_exporter.py +197 -0
  20. pyconvexity/io/netcdf_importer.py +1833 -0
  21. pyconvexity/models/__init__.py +195 -0
  22. pyconvexity/models/attributes.py +730 -0
  23. pyconvexity/models/carriers.py +159 -0
  24. pyconvexity/models/components.py +611 -0
  25. pyconvexity/models/network.py +503 -0
  26. pyconvexity/models/results.py +148 -0
  27. pyconvexity/models/scenarios.py +234 -0
  28. pyconvexity/solvers/__init__.py +29 -0
  29. pyconvexity/solvers/pypsa/__init__.py +24 -0
  30. pyconvexity/solvers/pypsa/api.py +460 -0
  31. pyconvexity/solvers/pypsa/batch_loader.py +307 -0
  32. pyconvexity/solvers/pypsa/builder.py +675 -0
  33. pyconvexity/solvers/pypsa/constraints.py +405 -0
  34. pyconvexity/solvers/pypsa/solver.py +1509 -0
  35. pyconvexity/solvers/pypsa/storage.py +2048 -0
  36. pyconvexity/timeseries.py +330 -0
  37. pyconvexity/validation/__init__.py +25 -0
  38. pyconvexity/validation/rules.py +312 -0
  39. pyconvexity-0.4.3.dist-info/METADATA +47 -0
  40. pyconvexity-0.4.3.dist-info/RECORD +42 -0
  41. pyconvexity-0.4.3.dist-info/WHEEL +5 -0
  42. pyconvexity-0.4.3.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1833 @@
1
+ """
2
+ NetCDF importer for PyConvexity energy system models.
3
+ Imports PyPSA NetCDF files into PyConvexity database format.
4
+ """
5
+
6
+ import logging
7
+ import pandas as pd
8
+ import numpy as np
9
+ from typing import Dict, Any, Optional, Callable, Tuple, List
10
+ from pathlib import Path
11
+ import random
12
+ import math
13
+
14
+ # Import functions directly from pyconvexity
15
+ from pyconvexity.core.database import open_connection, create_database_with_schema
16
+ from pyconvexity.core.types import (
17
+ StaticValue,
18
+ CreateNetworkRequest,
19
+ CreateComponentRequest,
20
+ )
21
+ from pyconvexity.core.errors import PyConvexityError as DbError, ValidationError
22
+ from pyconvexity.models import (
23
+ create_network,
24
+ create_carrier,
25
+ insert_component,
26
+ set_static_attribute,
27
+ get_bus_name_to_id_map,
28
+ set_timeseries_attribute,
29
+ get_component_type,
30
+ get_attribute,
31
+ get_network_time_periods,
32
+ )
33
+ from pyconvexity.validation import get_validation_rule
34
+ from pyconvexity.timeseries import set_timeseries
35
+
36
+ logger = logging.getLogger(__name__)
37
+
38
+
39
+ class NetCDFModelImporter:
40
+ """Import PyPSA NetCDF files into PyConvexity database format"""
41
+
42
+ def __init__(self):
43
+ self.logger = logging.getLogger(__name__)
44
+ # Set random seed for reproducible coordinate generation
45
+ random.seed(42)
46
+ np.random.seed(42)
47
+ self._used_names = set() # Global registry of all used names
48
+
49
+ def import_netcdf_to_database(
50
+ self,
51
+ netcdf_path: str,
52
+ db_path: str,
53
+ network_name: str,
54
+ network_description: Optional[str] = None,
55
+ progress_callback: Optional[Callable[[int, str], None]] = None,
56
+ strict_validation: bool = False,
57
+ ) -> Dict[str, Any]:
58
+ """
59
+ Import a PyPSA NetCDF file into a new database.
60
+
61
+ Args:
62
+ netcdf_path: Path to the PyPSA NetCDF file
63
+ db_path: Path where to create the database
64
+ network_name: Name for the imported network
65
+ network_description: Optional description
66
+ progress_callback: Optional callback for progress updates (progress: int, message: str)
67
+ strict_validation: Whether to skip undefined attributes rather than failing completely.
68
+ If True, will fail on any attribute not defined in the database schema.
69
+ If False (default), will skip undefined attributes with warnings.
70
+
71
+ Returns:
72
+ Dictionary with import results and statistics
73
+ """
74
+ try:
75
+ if progress_callback:
76
+ progress_callback(0, "Starting NetCDF import...")
77
+
78
+ # Import PyPSA
79
+ pypsa = self._import_pypsa()
80
+
81
+ if progress_callback:
82
+ progress_callback(5, "Loading PyPSA network from NetCDF...")
83
+
84
+ # Load the PyPSA network
85
+ network = pypsa.Network(netcdf_path)
86
+
87
+ if progress_callback:
88
+ progress_callback(
89
+ 15,
90
+ f"Loaded network: {len(network.buses)} buses, {len(network.generators)} generators",
91
+ )
92
+
93
+ # Use the shared import logic
94
+ return self._import_network_to_database(
95
+ network=network,
96
+ db_path=db_path,
97
+ network_name=network_name,
98
+ network_description=network_description,
99
+ progress_callback=progress_callback,
100
+ strict_validation=strict_validation,
101
+ import_source="NetCDF",
102
+ netcdf_path=netcdf_path,
103
+ )
104
+
105
+ except Exception as e:
106
+ self.logger.error(f"Error importing NetCDF: {e}", exc_info=True)
107
+ if progress_callback:
108
+ progress_callback(None, f"Error: {str(e)}")
109
+ raise
110
+
111
+ def import_csv_to_database(
112
+ self,
113
+ csv_directory: str,
114
+ db_path: str,
115
+ network_name: str,
116
+ network_description: Optional[str] = None,
117
+ progress_callback: Optional[Callable[[int, str], None]] = None,
118
+ strict_validation: bool = False,
119
+ ) -> Dict[str, Any]:
120
+ """
121
+ Import a PyPSA network from CSV files into a new database.
122
+
123
+ Args:
124
+ csv_directory: Path to the directory containing PyPSA CSV files
125
+ db_path: Path where to create the database
126
+ network_name: Name for the imported network
127
+ network_description: Optional description
128
+ progress_callback: Optional callback for progress updates (progress: int, message: str)
129
+ strict_validation: Whether to skip undefined attributes rather than failing
130
+
131
+ Returns:
132
+ Dictionary with import results and statistics
133
+ """
134
+ try:
135
+ if progress_callback:
136
+ progress_callback(0, "Starting PyPSA CSV import...")
137
+
138
+ # Import PyPSA
139
+ pypsa = self._import_pypsa()
140
+
141
+ if progress_callback:
142
+ progress_callback(5, "Validating CSV files...")
143
+
144
+ # Validate CSV directory and files before attempting import
145
+ self._validate_csv_directory(csv_directory)
146
+
147
+ if progress_callback:
148
+ progress_callback(10, "Loading PyPSA network from CSV files...")
149
+
150
+ # Load the PyPSA network from CSV directory
151
+ network = pypsa.Network()
152
+
153
+ try:
154
+ network.import_from_csv_folder(csv_directory)
155
+ except Exception as e:
156
+ # Provide more helpful error message
157
+ error_msg = f"PyPSA CSV import failed: {str(e)}"
158
+ if "'name'" in str(e):
159
+ error_msg += "\n\nThis usually means one of your CSV files is missing a 'name' column. PyPSA CSV files require:\n"
160
+ error_msg += "- All component CSV files (buses.csv, generators.csv, etc.) must have a 'name' column as the first column\n"
161
+ error_msg += "- The 'name' column should contain unique identifiers for each component\n"
162
+ error_msg += "- Check that your CSV files follow the PyPSA CSV format specification"
163
+ elif "KeyError" in str(e):
164
+ error_msg += f"\n\nThis indicates a required column is missing from one of your CSV files. "
165
+ error_msg += "Please ensure your CSV files follow the PyPSA format specification."
166
+
167
+ self.logger.error(error_msg)
168
+ raise ValueError(error_msg)
169
+
170
+ if progress_callback:
171
+ progress_callback(
172
+ 20,
173
+ f"Loaded network: {len(network.buses)} buses, {len(network.generators)} generators",
174
+ )
175
+
176
+ # Use the shared import logic
177
+ return self._import_network_to_database(
178
+ network=network,
179
+ db_path=db_path,
180
+ network_name=network_name,
181
+ network_description=network_description,
182
+ progress_callback=progress_callback,
183
+ strict_validation=strict_validation,
184
+ import_source="CSV",
185
+ )
186
+
187
+ except Exception as e:
188
+ self.logger.error(f"Error importing PyPSA CSV: {e}", exc_info=True)
189
+ if progress_callback:
190
+ progress_callback(None, f"Error: {str(e)}")
191
+ raise
192
+
193
+ def _import_pypsa(self):
194
+ """Import PyPSA with standard error handling."""
195
+ try:
196
+ import pypsa
197
+
198
+ return pypsa
199
+ except ImportError as e:
200
+ self.logger.error(f"Failed to import PyPSA: {e}", exc_info=True)
201
+ raise ImportError(
202
+ "PyPSA is not installed or could not be imported. "
203
+ "Please ensure it is installed correctly in the environment."
204
+ ) from e
205
+ except Exception as e:
206
+ self.logger.error(
207
+ f"An unexpected error occurred during PyPSA import: {e}", exc_info=True
208
+ )
209
+ raise
210
+
211
+ def _validate_csv_directory(self, csv_directory: str) -> None:
212
+ """Validate that the CSV directory contains valid PyPSA CSV files"""
213
+ import os
214
+ import pandas as pd
215
+
216
+ csv_path = Path(csv_directory)
217
+ if not csv_path.exists():
218
+ raise ValueError(f"CSV directory does not exist: {csv_directory}")
219
+
220
+ if not csv_path.is_dir():
221
+ raise ValueError(f"Path is not a directory: {csv_directory}")
222
+
223
+ # Find CSV files
224
+ csv_files = list(csv_path.glob("*.csv"))
225
+ if not csv_files:
226
+ raise ValueError(f"No CSV files found in directory: {csv_directory}")
227
+
228
+ # Check each CSV file for basic validity
229
+ component_files = [
230
+ "buses.csv",
231
+ "generators.csv",
232
+ "loads.csv",
233
+ "lines.csv",
234
+ "links.csv",
235
+ "storage_units.csv",
236
+ "stores.csv",
237
+ ]
238
+ required_files = ["buses.csv"] # At minimum, we need buses
239
+
240
+ # Check for required files
241
+ existing_files = [f.name for f in csv_files]
242
+ missing_required = [f for f in required_files if f not in existing_files]
243
+ if missing_required:
244
+ raise ValueError(f"Missing required CSV files: {missing_required}")
245
+
246
+ # Validate each component CSV file that exists
247
+ for csv_file in csv_files:
248
+ if csv_file.name in component_files:
249
+ try:
250
+ df = pd.read_csv(csv_file, nrows=0) # Just read headers
251
+ if "name" not in df.columns:
252
+ raise ValueError(
253
+ f"CSV file '{csv_file.name}' is missing required 'name' column. Found columns: {list(df.columns)}"
254
+ )
255
+ except Exception as e:
256
+ raise ValueError(
257
+ f"Error reading CSV file '{csv_file.name}': {str(e)}"
258
+ )
259
+
260
+ def _import_network_to_database(
261
+ self,
262
+ network,
263
+ db_path: str,
264
+ network_name: str,
265
+ network_description: Optional[str] = None,
266
+ progress_callback: Optional[Callable[[int, str], None]] = None,
267
+ strict_validation: bool = False,
268
+ import_source: str = "PyPSA",
269
+ netcdf_path: Optional[str] = None,
270
+ ) -> Dict[str, Any]:
271
+ """
272
+ Shared logic to import a PyPSA network object into a database.
273
+ This method is used by both NetCDF and CSV import functions.
274
+ """
275
+ try:
276
+ if progress_callback:
277
+ progress_callback(0, "Starting network import...")
278
+
279
+ # Create the database with schema using atomic utility
280
+ create_database_with_schema(db_path)
281
+
282
+ if progress_callback:
283
+ progress_callback(5, "Database schema created")
284
+
285
+ # Connect to database
286
+ conn = open_connection(db_path)
287
+
288
+ try:
289
+ # Load companion location CSV if available (for NetCDF imports only)
290
+ location_map = None
291
+ if import_source == "NetCDF" and netcdf_path:
292
+ location_map = self._detect_and_load_location_csv(netcdf_path)
293
+
294
+ # Create the network record
295
+ self._create_network_record(
296
+ conn, network, network_name, network_description
297
+ )
298
+
299
+ if progress_callback:
300
+ progress_callback(10, "Created network record")
301
+
302
+ # Note: In the new schema, the base network uses scenario_id = NULL
303
+ # No master scenario record is needed in the scenarios table
304
+
305
+ # Create network time periods from PyPSA snapshots
306
+ self._create_network_time_periods(conn, network)
307
+
308
+ if progress_callback:
309
+ progress_callback(15, f"Created network time periods")
310
+
311
+ # Import carriers
312
+ carriers_count = self._import_carriers(conn, network)
313
+
314
+ if progress_callback:
315
+ progress_callback(20, f"Imported {carriers_count} carriers")
316
+
317
+ # Import buses
318
+ buses_count = self._import_buses(conn, network, strict_validation)
319
+
320
+ if progress_callback:
321
+ progress_callback(25, f"Imported {buses_count} buses")
322
+
323
+ # Calculate scatter radius for non-bus components based on bus separation
324
+ bus_coordinates = self._get_bus_coordinates(conn)
325
+ scatter_radius = self._calculate_bus_separation_radius(bus_coordinates)
326
+
327
+ # Import generators
328
+ generators_count = self._import_generators(
329
+ conn, network, strict_validation, scatter_radius, location_map
330
+ )
331
+
332
+ if progress_callback:
333
+ progress_callback(30, f"Imported {generators_count} generators")
334
+
335
+ # Import loads
336
+ loads_count = self._import_loads(
337
+ conn, network, strict_validation, scatter_radius, location_map
338
+ )
339
+
340
+ if progress_callback:
341
+ progress_callback(35, f"Imported {loads_count} loads")
342
+
343
+ # Import lines
344
+ lines_count = self._import_lines(
345
+ conn, network, strict_validation, location_map
346
+ )
347
+
348
+ if progress_callback:
349
+ progress_callback(40, f"Imported {lines_count} lines")
350
+
351
+ # Import links
352
+ links_count = self._import_links(
353
+ conn, network, strict_validation, location_map
354
+ )
355
+
356
+ if progress_callback:
357
+ progress_callback(45, f"Imported {links_count} links")
358
+
359
+ # Import storage units
360
+ storage_units_count = self._import_storage_units(
361
+ conn, network, strict_validation, scatter_radius, location_map
362
+ )
363
+
364
+ if progress_callback:
365
+ progress_callback(
366
+ 50, f"Imported {storage_units_count} storage units"
367
+ )
368
+
369
+ # Import stores
370
+ stores_count = self._import_stores(
371
+ conn, network, strict_validation, scatter_radius, location_map
372
+ )
373
+
374
+ if progress_callback:
375
+ progress_callback(55, f"Imported {stores_count} stores")
376
+
377
+ conn.commit()
378
+
379
+ if progress_callback:
380
+ progress_callback(100, "Import completed successfully")
381
+
382
+ # Collect final statistics
383
+ stats = {
384
+ "network_name": network_name,
385
+ "carriers": carriers_count,
386
+ "buses": buses_count,
387
+ "generators": generators_count,
388
+ "loads": loads_count,
389
+ "lines": lines_count,
390
+ "links": links_count,
391
+ "storage_units": storage_units_count,
392
+ "stores": stores_count,
393
+ "total_components": (
394
+ buses_count
395
+ + generators_count
396
+ + loads_count
397
+ + lines_count
398
+ + links_count
399
+ + storage_units_count
400
+ + stores_count
401
+ ),
402
+ "snapshots": (
403
+ len(network.snapshots) if hasattr(network, "snapshots") else 0
404
+ ),
405
+ }
406
+
407
+ return {
408
+ "success": True,
409
+ "message": f"Network imported successfully from {import_source}",
410
+ "db_path": db_path,
411
+ "stats": stats,
412
+ }
413
+
414
+ finally:
415
+ conn.close()
416
+
417
+ except Exception as e:
418
+ self.logger.error(f"Error importing network: {e}", exc_info=True)
419
+ if progress_callback:
420
+ progress_callback(None, f"Error: {str(e)}")
421
+ raise
422
+
423
+ # Helper methods for the import process
424
+ # Note: These are simplified versions of the methods from the original netcdf_importer.py
425
+ # The full implementation would include all the detailed import logic for each component type
426
+
427
+ def _extract_datetime_snapshots(self, network) -> pd.DatetimeIndex:
428
+ """Extract datetime snapshots from a PyPSA network"""
429
+ if not hasattr(network, "snapshots") or len(network.snapshots) == 0:
430
+ self.logger.warning("No snapshots found in PyPSA network")
431
+ return pd.DatetimeIndex([])
432
+
433
+ snapshots = network.snapshots
434
+
435
+ try:
436
+ # Try direct conversion first (works for simple DatetimeIndex)
437
+ return pd.to_datetime(snapshots)
438
+ except (TypeError, ValueError) as e:
439
+ # Handle MultiIndex case
440
+ if hasattr(snapshots, "nlevels") and snapshots.nlevels > 1:
441
+ # Try to use the timesteps attribute if available (common in multi-period networks)
442
+ if hasattr(network, "timesteps") and isinstance(
443
+ network.timesteps, pd.DatetimeIndex
444
+ ):
445
+ return network.timesteps
446
+
447
+ # Try to extract datetime from the last level of the MultiIndex
448
+ try:
449
+ # Get the last level (usually the timestep level)
450
+ last_level = snapshots.get_level_values(snapshots.nlevels - 1)
451
+ datetime_snapshots = pd.to_datetime(last_level)
452
+ return datetime_snapshots
453
+ except Exception as multi_e:
454
+ self.logger.warning(
455
+ f"Failed to extract datetime from MultiIndex: {multi_e}"
456
+ )
457
+
458
+ # Final fallback: create a default hourly range
459
+ self.logger.warning(
460
+ "Could not extract datetime snapshots, creating default hourly range"
461
+ )
462
+ default_start = pd.Timestamp("2024-01-01 00:00:00")
463
+ default_end = pd.Timestamp("2024-01-01 23:59:59")
464
+ return pd.date_range(start=default_start, end=default_end, freq="H")
465
+
466
+ def _create_network_record(
467
+ self,
468
+ conn,
469
+ network,
470
+ network_name: str,
471
+ network_description: Optional[str] = None,
472
+ ) -> None:
473
+ """Create the network record and return network ID"""
474
+
475
+ # Extract time information from PyPSA network using our robust helper
476
+ snapshots = self._extract_datetime_snapshots(network)
477
+
478
+ if len(snapshots) > 0:
479
+ time_start = snapshots.min().strftime("%Y-%m-%d %H:%M:%S")
480
+ time_end = snapshots.max().strftime("%Y-%m-%d %H:%M:%S")
481
+
482
+ # Try to infer time interval
483
+ if len(snapshots) > 1:
484
+ freq = pd.infer_freq(snapshots)
485
+ time_interval = freq or "H" # Default to hourly if can't infer
486
+ else:
487
+ time_interval = "H"
488
+ else:
489
+ # Default time range if no snapshots
490
+ time_start = "2024-01-01 00:00:00"
491
+ time_end = "2024-01-01 23:59:59"
492
+ time_interval = "H"
493
+
494
+ description = (
495
+ network_description
496
+ or f"Imported from PyPSA NetCDF on {pd.Timestamp.now().strftime('%Y-%m-%d %H:%M:%S')}"
497
+ )
498
+
499
+ request = CreateNetworkRequest(
500
+ name=network_name,
501
+ description=description,
502
+ time_resolution=time_interval,
503
+ start_time=time_start,
504
+ end_time=time_end,
505
+ )
506
+ create_network(conn, request) # Single network per database
507
+
508
+ def _create_network_time_periods(self, conn, network) -> None:
509
+ """Create network time periods from PyPSA snapshots using optimized approach (single network per database)"""
510
+ # Use our robust helper to extract datetime snapshots
511
+ snapshots = self._extract_datetime_snapshots(network)
512
+
513
+ if len(snapshots) == 0:
514
+ self.logger.warning(
515
+ "No valid snapshots found in PyPSA network, skipping time periods creation"
516
+ )
517
+ return
518
+
519
+ # Insert optimized time periods metadata
520
+ period_count = len(snapshots)
521
+ start_timestamp = int(snapshots[0].timestamp())
522
+
523
+ # Calculate interval in seconds
524
+ if len(snapshots) > 1:
525
+ interval_seconds = int((snapshots[1] - snapshots[0]).total_seconds())
526
+ else:
527
+ interval_seconds = 3600 # Default to hourly
528
+
529
+ conn.execute(
530
+ """
531
+ INSERT INTO network_time_periods (period_count, start_timestamp, interval_seconds)
532
+ VALUES (?, ?, ?)
533
+ """,
534
+ (period_count, start_timestamp, interval_seconds),
535
+ )
536
+
537
+ # Placeholder methods - in a full implementation, these would contain
538
+ # the detailed import logic from the original netcdf_importer.py
539
+
540
+ def _import_carriers(self, conn, network) -> int:
541
+ """Import carriers from PyPSA network, discovering from both network and component levels (single network per database)"""
542
+ count = 0
543
+ created_carriers = set()
544
+
545
+ # Discover all carriers from components (not just n.carriers table)
546
+ all_carriers = set()
547
+
548
+ # Get carriers from network.carriers table if it exists
549
+ if hasattr(network, "carriers") and not network.carriers.empty:
550
+ all_carriers.update(network.carriers.index)
551
+
552
+ # Get carriers from generators
553
+ if (
554
+ hasattr(network, "generators")
555
+ and not network.generators.empty
556
+ and "carrier" in network.generators.columns
557
+ ):
558
+ component_carriers = set(network.generators.carrier.dropna().unique())
559
+ all_carriers.update(component_carriers)
560
+
561
+ # Get carriers from storage units
562
+ if (
563
+ hasattr(network, "storage_units")
564
+ and not network.storage_units.empty
565
+ and "carrier" in network.storage_units.columns
566
+ ):
567
+ component_carriers = set(network.storage_units.carrier.dropna().unique())
568
+ all_carriers.update(component_carriers)
569
+
570
+ # Get carriers from stores
571
+ if (
572
+ hasattr(network, "stores")
573
+ and not network.stores.empty
574
+ and "carrier" in network.stores.columns
575
+ ):
576
+ component_carriers = set(network.stores.carrier.dropna().unique())
577
+ all_carriers.update(component_carriers)
578
+
579
+ # Get carriers from loads (if they have carriers)
580
+ if (
581
+ hasattr(network, "loads")
582
+ and not network.loads.empty
583
+ and "carrier" in network.loads.columns
584
+ ):
585
+ component_carriers = set(network.loads.carrier.dropna().unique())
586
+ all_carriers.update(component_carriers)
587
+
588
+ # Get carriers from buses (if they have carriers)
589
+ if (
590
+ hasattr(network, "buses")
591
+ and not network.buses.empty
592
+ and "carrier" in network.buses.columns
593
+ ):
594
+ component_carriers = set(network.buses.carrier.dropna().unique())
595
+ all_carriers.update(component_carriers)
596
+
597
+ # Convert to sorted list for consistent ordering
598
+ all_carriers = sorted(list(all_carriers))
599
+
600
+ # Define a color palette similar to the Python code
601
+ color_palette = [
602
+ "#1f77b4", # C0 - blue
603
+ "#ff7f0e", # C1 - orange
604
+ "#2ca02c", # C2 - green
605
+ "#d62728", # C3 - red
606
+ "#9467bd", # C4 - purple
607
+ "#8c564b", # C5 - brown
608
+ "#e377c2", # C6 - pink
609
+ "#7f7f7f", # C7 - gray
610
+ "#bcbd22", # C8 - olive
611
+ "#17becf", # C9 - cyan
612
+ "#aec7e8", # light blue
613
+ "#ffbb78", # light orange
614
+ "#98df8a", # light green
615
+ "#ff9896", # light red
616
+ "#c5b0d5", # light purple
617
+ ]
618
+
619
+ # Create carriers from discovered list
620
+ for i, carrier_name in enumerate(all_carriers):
621
+ # Get carrier data from network.carriers if available
622
+ carrier_data = {}
623
+ if (
624
+ hasattr(network, "carriers")
625
+ and not network.carriers.empty
626
+ and carrier_name in network.carriers.index
627
+ ):
628
+ # Use .iloc with index position to avoid fragmentation
629
+ carrier_idx = network.carriers.index.get_loc(carrier_name)
630
+ carrier_data = network.carriers.iloc[carrier_idx]
631
+
632
+ # Extract attributes with defaults
633
+ co2_emissions = carrier_data.get("co2_emissions", 0.0)
634
+
635
+ # Use color from network.carriers if available, otherwise assign from palette
636
+ if "color" in carrier_data and pd.notna(carrier_data["color"]):
637
+ color = carrier_data["color"]
638
+ else:
639
+ color = color_palette[i % len(color_palette)]
640
+
641
+ nice_name = carrier_data.get("nice_name", None)
642
+
643
+ # Create the carrier
644
+ create_carrier(conn, carrier_name, co2_emissions, color, nice_name)
645
+ created_carriers.add(carrier_name)
646
+ count += 1
647
+
648
+ # Ensure we have essential carriers for bus validation
649
+ # Buses can only use AC, DC, heat, or gas carriers according to database constraints
650
+ essential_carriers = {
651
+ "AC": {
652
+ "co2_emissions": 0.0,
653
+ "color": "#3498db",
654
+ "nice_name": "AC Electricity",
655
+ },
656
+ "electricity": {
657
+ "co2_emissions": 0.0,
658
+ "color": "#2ecc71",
659
+ "nice_name": "Electricity",
660
+ },
661
+ }
662
+
663
+ for carrier_name, carrier_props in essential_carriers.items():
664
+ if carrier_name not in created_carriers:
665
+ create_carrier(
666
+ conn,
667
+ carrier_name,
668
+ carrier_props["co2_emissions"],
669
+ carrier_props["color"],
670
+ carrier_props["nice_name"],
671
+ )
672
+ created_carriers.add(carrier_name)
673
+ count += 1
674
+
675
+ return count
676
+
677
+ def _import_buses(self, conn, network, strict_validation: bool) -> int:
678
+ """Import buses from PyPSA network (single network per database)"""
679
+ count = 0
680
+
681
+ if not hasattr(network, "buses") or network.buses.empty:
682
+ return count
683
+
684
+ for bus_name, bus_data in network.buses.iterrows():
685
+ try:
686
+ # Generate a unique name for this bus
687
+ unique_name = self._generate_unique_name(str(bus_name), "BUS")
688
+
689
+ # Extract and log coordinate data for debugging
690
+ x_value = bus_data.get("x", None)
691
+ y_value = bus_data.get("y", None)
692
+ self.logger.debug(
693
+ f"Bus '{bus_name}' -> '{unique_name}': x={x_value} (type: {type(x_value)}), y={y_value} (type: {type(y_value)})"
694
+ )
695
+
696
+ # Handle NaN/None values properly
697
+ longitude = (
698
+ None
699
+ if x_value is None
700
+ or (hasattr(x_value, "__iter__") and len(str(x_value)) == 0)
701
+ else float(x_value) if x_value != "" else None
702
+ )
703
+ latitude = (
704
+ None
705
+ if y_value is None
706
+ or (hasattr(y_value, "__iter__") and len(str(y_value)) == 0)
707
+ else float(y_value) if y_value != "" else None
708
+ )
709
+
710
+ # Additional check for pandas NaN values
711
+ if longitude is not None and pd.isna(longitude):
712
+ longitude = None
713
+ if latitude is not None and pd.isna(latitude):
714
+ latitude = None
715
+
716
+ # Get or create carrier
717
+ carrier_name = bus_data.get("carrier", "AC")
718
+ carrier_id = self._get_or_create_carrier(conn, carrier_name)
719
+
720
+ # Create component record using atomic function
721
+ # Note: PyPSA 'x'/'y' coordinates are mapped to 'longitude'/'latitude' columns here
722
+ request = CreateComponentRequest(
723
+ component_type="BUS",
724
+ name=unique_name, # Use globally unique name
725
+ latitude=latitude, # PyPSA y -> latitude
726
+ longitude=longitude, # PyPSA x -> longitude
727
+ carrier_id=carrier_id,
728
+ )
729
+ component_id = insert_component(conn, request)
730
+
731
+ # Import bus attributes (location/coordinate data is handled above, not as attributes)
732
+ self._import_component_attributes(
733
+ conn, component_id, bus_data, "BUS", strict_validation
734
+ )
735
+
736
+ # Import timeseries attributes for buses
737
+ self._import_component_timeseries(
738
+ conn, network, component_id, bus_name, "BUS", strict_validation
739
+ )
740
+
741
+ count += 1
742
+
743
+ except Exception as e:
744
+ if strict_validation:
745
+ raise
746
+ self.logger.warning(f"Failed to import bus {bus_name}: {e}")
747
+ continue
748
+
749
+ return count
750
+
751
+ # Additional placeholder methods for other component types
752
+ def _import_generators(
753
+ self,
754
+ conn,
755
+ network,
756
+ strict_validation: bool,
757
+ scatter_radius: float,
758
+ location_map,
759
+ ) -> int:
760
+ """Import generators from PyPSA network (single network per database)"""
761
+ count = 0
762
+
763
+ if not hasattr(network, "generators") or network.generators.empty:
764
+ return count
765
+
766
+ # Get bus name to ID mapping
767
+ bus_name_to_id = get_bus_name_to_id_map(conn)
768
+
769
+ # Get master scenario ID
770
+ master_scenario_id = None
771
+
772
+ for gen_name, gen_data in network.generators.iterrows():
773
+ try:
774
+ # Get bus connection
775
+ bus_name = gen_data.get("bus")
776
+ bus_id = bus_name_to_id.get(bus_name) if bus_name else None
777
+
778
+ if not bus_id:
779
+ self.logger.warning(
780
+ f"Generator {gen_name}: bus '{bus_name}' not found, skipping"
781
+ )
782
+ continue
783
+
784
+ # Get or create carrier
785
+ carrier_name = gen_data.get("carrier", "AC")
786
+ carrier_id = self._get_or_create_carrier(conn, carrier_name)
787
+
788
+ # Generate coordinates near the bus
789
+ latitude, longitude = self._generate_component_coordinates(
790
+ conn, bus_id, scatter_radius, location_map, gen_name
791
+ )
792
+
793
+ # Create component record
794
+ request = CreateComponentRequest(
795
+ component_type="GENERATOR",
796
+ name=str(gen_name),
797
+ latitude=latitude,
798
+ longitude=longitude,
799
+ carrier_id=carrier_id,
800
+ bus_id=bus_id,
801
+ )
802
+ component_id = insert_component(conn, request)
803
+
804
+ # Import generator attributes
805
+ self._import_component_attributes(
806
+ conn, component_id, gen_data, "GENERATOR", strict_validation
807
+ )
808
+
809
+ # Import timeseries attributes for generators
810
+ self._import_component_timeseries(
811
+ conn,
812
+ network,
813
+ component_id,
814
+ gen_name,
815
+ "GENERATOR",
816
+ strict_validation,
817
+ )
818
+
819
+ count += 1
820
+
821
+ except Exception as e:
822
+ if strict_validation:
823
+ raise
824
+ self.logger.warning(f"Failed to import generator {gen_name}: {e}")
825
+ continue
826
+
827
+ return count
828
+
829
+ def _import_loads(
830
+ self,
831
+ conn,
832
+ network,
833
+ strict_validation: bool,
834
+ scatter_radius: float,
835
+ location_map,
836
+ ) -> int:
837
+ """Import loads from PyPSA network (single network per database)"""
838
+ count = 0
839
+
840
+ if not hasattr(network, "loads") or network.loads.empty:
841
+ return count
842
+
843
+ bus_map = get_bus_name_to_id_map(conn)
844
+ bus_coords = self._get_bus_coordinates_map(conn)
845
+
846
+ # Count components per bus for better distribution
847
+ components_per_bus = {}
848
+ for load_name, load_data in network.loads.iterrows():
849
+ bus_name = load_data["bus"]
850
+ components_per_bus[bus_name] = components_per_bus.get(bus_name, 0) + 1
851
+
852
+ bus_component_counters = {}
853
+
854
+ for load_name, load_data in network.loads.iterrows():
855
+ try:
856
+ bus_id = bus_map.get(load_data["bus"])
857
+ if bus_id is None:
858
+ self.logger.warning(
859
+ f"Bus '{load_data['bus']}' not found for load '{load_name}'"
860
+ )
861
+ continue
862
+
863
+ # Generate a unique name for this load
864
+ unique_name = self._generate_unique_name(str(load_name), "LOAD")
865
+
866
+ # Try to get coordinates from CSV first, then fall back to scattered coordinates
867
+ latitude, longitude = None, None
868
+
869
+ # Check CSV coordinates first
870
+ csv_coords = self._get_csv_coordinates(unique_name, location_map)
871
+ if csv_coords:
872
+ latitude, longitude = csv_coords
873
+ elif bus_id in bus_coords:
874
+ # Fall back to scattered coordinates around the connected bus
875
+ bus_lat, bus_lon = bus_coords[bus_id]
876
+ bus_name = load_data["bus"]
877
+
878
+ # Get component index for this bus
879
+ component_index = bus_component_counters.get(bus_name, 0)
880
+ bus_component_counters[bus_name] = component_index + 1
881
+
882
+ latitude, longitude = self._generate_scattered_coordinates(
883
+ bus_lat,
884
+ bus_lon,
885
+ scatter_radius,
886
+ components_per_bus[bus_name],
887
+ component_index,
888
+ )
889
+
890
+ # Get carrier ID if carrier is specified
891
+ carrier_id = None
892
+ if "carrier" in load_data and pd.notna(load_data["carrier"]):
893
+ carrier_id = self._get_or_create_carrier(conn, load_data["carrier"])
894
+
895
+ # Create component record using atomic function
896
+ request = CreateComponentRequest(
897
+ component_type="LOAD",
898
+ name=unique_name, # Use globally unique name
899
+ bus_id=bus_id,
900
+ carrier_id=carrier_id,
901
+ latitude=latitude,
902
+ longitude=longitude,
903
+ )
904
+ component_id = insert_component(conn, request)
905
+
906
+ # Import load attributes
907
+ self._import_component_attributes(
908
+ conn, component_id, load_data, "LOAD", strict_validation
909
+ )
910
+
911
+ # Import timeseries attributes for loads
912
+ self._import_component_timeseries(
913
+ conn, network, component_id, load_name, "LOAD", strict_validation
914
+ )
915
+
916
+ count += 1
917
+
918
+ except Exception as e:
919
+ if strict_validation:
920
+ raise
921
+ self.logger.warning(f"Failed to import load {load_name}: {e}")
922
+ continue
923
+
924
+ return count
925
+
926
+ def _import_lines(
927
+ self, conn, network, strict_validation: bool, location_map
928
+ ) -> int:
929
+ """Import lines from PyPSA network (single network per database)"""
930
+ count = 0
931
+ name_counter = {} # Track duplicate names
932
+
933
+ if not hasattr(network, "lines") or network.lines.empty:
934
+ return count
935
+
936
+ bus_map = get_bus_name_to_id_map(conn)
937
+
938
+ for line_name, line_data in network.lines.iterrows():
939
+ try:
940
+ bus0_id = bus_map.get(line_data["bus0"])
941
+ bus1_id = bus_map.get(line_data["bus1"])
942
+
943
+ if bus0_id is None or bus1_id is None:
944
+ self.logger.warning(
945
+ f"Bus not found for line '{line_name}': bus0='{line_data['bus0']}', bus1='{line_data['bus1']}'"
946
+ )
947
+ continue
948
+
949
+ # Handle duplicate names by appending counter
950
+ unique_name = line_name
951
+ if line_name in name_counter:
952
+ name_counter[line_name] += 1
953
+ unique_name = f"{line_name}_{name_counter[line_name]}"
954
+ self.logger.warning(
955
+ f"Duplicate line name '{line_name}' renamed to '{unique_name}'"
956
+ )
957
+ else:
958
+ name_counter[line_name] = 0
959
+
960
+ # Check for CSV coordinates
961
+ latitude, longitude = None, None
962
+ csv_coords = self._get_csv_coordinates(unique_name, location_map)
963
+ if csv_coords:
964
+ latitude, longitude = csv_coords
965
+
966
+ # Lines always use AC carrier
967
+ carrier_id = self._get_or_create_carrier(conn, "AC")
968
+
969
+ # Create component record using atomic function
970
+ request = CreateComponentRequest(
971
+ component_type="LINE",
972
+ name=unique_name, # Use deduplicated name
973
+ bus0_id=bus0_id,
974
+ bus1_id=bus1_id,
975
+ carrier_id=carrier_id,
976
+ latitude=latitude,
977
+ longitude=longitude,
978
+ )
979
+ component_id = insert_component(conn, request)
980
+
981
+ # Import line attributes
982
+ self._import_component_attributes(
983
+ conn, component_id, line_data, "LINE", strict_validation
984
+ )
985
+
986
+ # Import timeseries attributes for lines
987
+ self._import_component_timeseries(
988
+ conn, network, component_id, line_name, "LINE", strict_validation
989
+ )
990
+
991
+ count += 1
992
+
993
+ except Exception as e:
994
+ if strict_validation:
995
+ raise
996
+ self.logger.warning(f"Failed to import line {line_name}: {e}")
997
+ continue
998
+
999
+ return count
1000
+
1001
+ def _import_links(
1002
+ self, conn, network, strict_validation: bool, location_map
1003
+ ) -> int:
1004
+ """Import links from PyPSA network (single network per database)"""
1005
+ count = 0
1006
+
1007
+ if not hasattr(network, "links") or network.links.empty:
1008
+ return count
1009
+
1010
+ bus_map = get_bus_name_to_id_map(conn)
1011
+
1012
+ for link_name, link_data in network.links.iterrows():
1013
+ try:
1014
+ bus0_id = bus_map.get(link_data["bus0"])
1015
+ bus1_id = bus_map.get(link_data["bus1"])
1016
+
1017
+ if bus0_id is None or bus1_id is None:
1018
+ self.logger.warning(
1019
+ f"Bus not found for link '{link_name}': bus0='{link_data['bus0']}', bus1='{link_data['bus1']}'"
1020
+ )
1021
+ continue
1022
+
1023
+ # Generate a unique name for this link
1024
+ unique_name = self._generate_unique_name(str(link_name), "LINK")
1025
+
1026
+ # Check for CSV coordinates
1027
+ latitude, longitude = None, None
1028
+ csv_coords = self._get_csv_coordinates(unique_name, location_map)
1029
+ if csv_coords:
1030
+ latitude, longitude = csv_coords
1031
+
1032
+ # Get carrier ID if carrier is specified
1033
+ carrier_id = None
1034
+ if "carrier" in link_data and pd.notna(link_data["carrier"]):
1035
+ carrier_id = self._get_or_create_carrier(conn, link_data["carrier"])
1036
+ else:
1037
+ # Default to DC for links
1038
+ carrier_id = self._get_or_create_carrier(conn, "DC")
1039
+
1040
+ # Create component record using atomic function
1041
+ request = CreateComponentRequest(
1042
+ component_type="LINK",
1043
+ name=unique_name, # Use globally unique name
1044
+ bus0_id=bus0_id,
1045
+ bus1_id=bus1_id,
1046
+ carrier_id=carrier_id,
1047
+ latitude=latitude,
1048
+ longitude=longitude,
1049
+ )
1050
+ component_id = insert_component(conn, request)
1051
+
1052
+ # Import link attributes
1053
+ self._import_component_attributes(
1054
+ conn, component_id, link_data, "LINK", strict_validation
1055
+ )
1056
+
1057
+ # Import timeseries attributes for links
1058
+ self._import_component_timeseries(
1059
+ conn, network, component_id, link_name, "LINK", strict_validation
1060
+ )
1061
+
1062
+ count += 1
1063
+
1064
+ except Exception as e:
1065
+ if strict_validation:
1066
+ raise
1067
+ self.logger.warning(f"Failed to import link {link_name}: {e}")
1068
+ continue
1069
+
1070
+ return count
1071
+
1072
+ def _import_storage_units(
1073
+ self,
1074
+ conn,
1075
+ network,
1076
+ strict_validation: bool,
1077
+ scatter_radius: float,
1078
+ location_map,
1079
+ ) -> int:
1080
+ """Import storage units from PyPSA network"""
1081
+ count = 0
1082
+
1083
+ if not hasattr(network, "storage_units") or network.storage_units.empty:
1084
+ return count
1085
+
1086
+ bus_map = get_bus_name_to_id_map(conn)
1087
+ bus_coords = self._get_bus_coordinates_map(conn)
1088
+
1089
+ # Count components per bus for better distribution
1090
+ components_per_bus = {}
1091
+ for su_name, su_data in network.storage_units.iterrows():
1092
+ bus_name = su_data["bus"]
1093
+ components_per_bus[bus_name] = components_per_bus.get(bus_name, 0) + 1
1094
+
1095
+ bus_component_counters = {}
1096
+
1097
+ for su_name, su_data in network.storage_units.iterrows():
1098
+ try:
1099
+ bus_id = bus_map.get(su_data["bus"])
1100
+ if bus_id is None:
1101
+ self.logger.warning(
1102
+ f"Bus '{su_data['bus']}' not found for storage unit '{su_name}'"
1103
+ )
1104
+ continue
1105
+
1106
+ # Generate a unique name for this storage unit
1107
+ unique_name = self._generate_unique_name(str(su_name), "STORAGE_UNIT")
1108
+
1109
+ # Try to get coordinates from CSV first, then fall back to scattered coordinates
1110
+ latitude, longitude = None, None
1111
+
1112
+ # Check CSV coordinates first
1113
+ csv_coords = self._get_csv_coordinates(unique_name, location_map)
1114
+ if csv_coords:
1115
+ latitude, longitude = csv_coords
1116
+ elif bus_id in bus_coords:
1117
+ # Fall back to scattered coordinates around the connected bus
1118
+ bus_lat, bus_lon = bus_coords[bus_id]
1119
+ bus_name = su_data["bus"]
1120
+
1121
+ # Get component index for this bus
1122
+ component_index = bus_component_counters.get(bus_name, 0)
1123
+ bus_component_counters[bus_name] = component_index + 1
1124
+
1125
+ latitude, longitude = self._generate_scattered_coordinates(
1126
+ bus_lat,
1127
+ bus_lon,
1128
+ scatter_radius,
1129
+ components_per_bus[bus_name],
1130
+ component_index,
1131
+ )
1132
+
1133
+ # Get carrier ID if carrier is specified
1134
+ carrier_id = None
1135
+ if "carrier" in su_data and pd.notna(su_data["carrier"]):
1136
+ carrier_id = self._get_or_create_carrier(conn, su_data["carrier"])
1137
+
1138
+ # Create component record using atomic function
1139
+ request = CreateComponentRequest(
1140
+ component_type="STORAGE_UNIT",
1141
+ name=unique_name, # Use globally unique name
1142
+ bus_id=bus_id,
1143
+ carrier_id=carrier_id,
1144
+ latitude=latitude,
1145
+ longitude=longitude,
1146
+ )
1147
+ component_id = insert_component(conn, request)
1148
+
1149
+ # Import storage unit attributes
1150
+ self._import_component_attributes(
1151
+ conn, component_id, su_data, "STORAGE_UNIT", strict_validation
1152
+ )
1153
+
1154
+ # Import timeseries attributes for storage units
1155
+ self._import_component_timeseries(
1156
+ conn,
1157
+ network,
1158
+ component_id,
1159
+ su_name,
1160
+ "STORAGE_UNIT",
1161
+ strict_validation,
1162
+ )
1163
+
1164
+ count += 1
1165
+
1166
+ except Exception as e:
1167
+ if strict_validation:
1168
+ raise
1169
+ self.logger.warning(f"Failed to import storage unit {su_name}: {e}")
1170
+ continue
1171
+
1172
+ return count
1173
+
1174
+ def _import_stores(
1175
+ self,
1176
+ conn,
1177
+ network,
1178
+ strict_validation: bool,
1179
+ scatter_radius: float,
1180
+ location_map,
1181
+ ) -> int:
1182
+ """Import stores from PyPSA network (single network per database)"""
1183
+ count = 0
1184
+ name_counter = {} # Track duplicate names
1185
+
1186
+ if not hasattr(network, "stores") or network.stores.empty:
1187
+ return count
1188
+
1189
+ bus_map = get_bus_name_to_id_map(conn)
1190
+ bus_coords = self._get_bus_coordinates_map(conn)
1191
+
1192
+ # Count components per bus for better distribution
1193
+ components_per_bus = {}
1194
+ for store_name, store_data in network.stores.iterrows():
1195
+ bus_name = store_data["bus"]
1196
+ components_per_bus[bus_name] = components_per_bus.get(bus_name, 0) + 1
1197
+
1198
+ bus_component_counters = (
1199
+ {}
1200
+ ) # Track how many components we've placed at each bus
1201
+
1202
+ for store_name, store_data in network.stores.iterrows():
1203
+ try:
1204
+ bus_id = bus_map.get(store_data["bus"])
1205
+ if bus_id is None:
1206
+ self.logger.warning(
1207
+ f"Bus '{store_data['bus']}' not found for store '{store_name}'"
1208
+ )
1209
+ continue
1210
+
1211
+ # Handle duplicate names by appending counter
1212
+ unique_name = store_name
1213
+ if store_name in name_counter:
1214
+ name_counter[store_name] += 1
1215
+ unique_name = f"{store_name}_{name_counter[store_name]}"
1216
+ self.logger.warning(
1217
+ f"Duplicate store name '{store_name}' renamed to '{unique_name}'"
1218
+ )
1219
+ else:
1220
+ name_counter[store_name] = 0
1221
+
1222
+ # Try to get coordinates from CSV first, then fall back to scattered coordinates
1223
+ latitude, longitude = None, None
1224
+
1225
+ # Check CSV coordinates first
1226
+ csv_coords = self._get_csv_coordinates(unique_name, location_map)
1227
+ if csv_coords:
1228
+ latitude, longitude = csv_coords
1229
+ elif bus_id in bus_coords:
1230
+ # Fall back to scattered coordinates around the connected bus
1231
+ bus_lat, bus_lon = bus_coords[bus_id]
1232
+ bus_name = store_data["bus"]
1233
+
1234
+ # Get component index for this bus
1235
+ component_index = bus_component_counters.get(bus_name, 0)
1236
+ bus_component_counters[bus_name] = component_index + 1
1237
+
1238
+ latitude, longitude = self._generate_scattered_coordinates(
1239
+ bus_lat,
1240
+ bus_lon,
1241
+ scatter_radius,
1242
+ components_per_bus[bus_name],
1243
+ component_index,
1244
+ )
1245
+
1246
+ # Get carrier ID if carrier is specified
1247
+ carrier_id = None
1248
+ if "carrier" in store_data and pd.notna(store_data["carrier"]):
1249
+ carrier_id = self._get_or_create_carrier(
1250
+ conn, store_data["carrier"]
1251
+ )
1252
+
1253
+ # Create component record using atomic function
1254
+ request = CreateComponentRequest(
1255
+ component_type="STORE",
1256
+ name=unique_name, # Use deduplicated name
1257
+ bus_id=bus_id,
1258
+ carrier_id=carrier_id,
1259
+ latitude=latitude,
1260
+ longitude=longitude,
1261
+ )
1262
+ component_id = insert_component(conn, request)
1263
+
1264
+ # Import store attributes
1265
+ self._import_component_attributes(
1266
+ conn, component_id, store_data, "STORE", strict_validation
1267
+ )
1268
+
1269
+ # Import timeseries attributes for stores
1270
+ self._import_component_timeseries(
1271
+ conn, network, component_id, store_name, "STORE", strict_validation
1272
+ )
1273
+
1274
+ count += 1
1275
+
1276
+ except Exception as e:
1277
+ if strict_validation:
1278
+ raise
1279
+ self.logger.warning(f"Failed to import store {store_name}: {e}")
1280
+ continue
1281
+
1282
+ return count
1283
+
1284
+ def _get_bus_coordinates(self, conn) -> List[Tuple[float, float]]:
1285
+ """Get coordinates of all buses in the network that have valid coordinates (single network per database)"""
1286
+ cursor = conn.execute(
1287
+ """
1288
+ SELECT latitude, longitude FROM components
1289
+ WHERE component_type = 'BUS'
1290
+ AND latitude IS NOT NULL AND longitude IS NOT NULL
1291
+ AND NOT (latitude = 0 AND longitude = 0)
1292
+ """,
1293
+ (),
1294
+ )
1295
+
1296
+ coordinates = [(row[0], row[1]) for row in cursor.fetchall()]
1297
+ return coordinates
1298
+
1299
+ def _calculate_bus_separation_radius(
1300
+ self, bus_coordinates: List[Tuple[float, float]]
1301
+ ) -> float:
1302
+ """Calculate the minimum separation between buses and return a radius for scattering"""
1303
+ if len(bus_coordinates) < 2:
1304
+ return 0.01 # ~1km at equator
1305
+
1306
+ min_distance_degrees = float("inf")
1307
+ min_separation_threshold = 0.001 # ~100m threshold to exclude co-located buses
1308
+
1309
+ for i, (lat1, lon1) in enumerate(bus_coordinates):
1310
+ for j, (lat2, lon2) in enumerate(bus_coordinates[i + 1 :], i + 1):
1311
+ # Simple Euclidean distance in degrees
1312
+ distance_degrees = math.sqrt((lat2 - lat1) ** 2 + (lon2 - lon1) ** 2)
1313
+
1314
+ if distance_degrees > min_separation_threshold:
1315
+ min_distance_degrees = min(min_distance_degrees, distance_degrees)
1316
+
1317
+ if min_distance_degrees == float("inf"):
1318
+ scatter_radius_degrees = 0.05 # ~5km default
1319
+ else:
1320
+ scatter_radius_degrees = min_distance_degrees * 0.25
1321
+
1322
+ # Ensure reasonable bounds: between 1km and 100km equivalent in degrees
1323
+ min_radius = 0.01 # ~1km
1324
+ max_radius = 1.0 # ~100km
1325
+ scatter_radius_degrees = max(
1326
+ min_radius, min(max_radius, scatter_radius_degrees)
1327
+ )
1328
+
1329
+ return scatter_radius_degrees
1330
+
1331
+ def _detect_and_load_location_csv(
1332
+ self, netcdf_path: str
1333
+ ) -> Optional[Dict[str, Tuple[float, float]]]:
1334
+ """
1335
+ Detect and load companion CSV file with component locations.
1336
+
1337
+ Args:
1338
+ netcdf_path: Path to the NetCDF file (e.g., /path/to/fileX.nc)
1339
+
1340
+ Returns:
1341
+ Dictionary mapping component names to (latitude, longitude) tuples, or None if no CSV found
1342
+ """
1343
+ try:
1344
+ # Construct expected CSV path: replace .nc with _locations.csv
1345
+ netcdf_file = Path(netcdf_path)
1346
+ csv_path = netcdf_file.parent / f"{netcdf_file.stem}_locations.csv"
1347
+
1348
+ if not csv_path.exists():
1349
+ return None
1350
+
1351
+ # Parse the CSV file
1352
+ try:
1353
+ location_df = pd.read_csv(csv_path)
1354
+
1355
+ # Validate required columns
1356
+ required_columns = {"name", "longitude", "latitude"}
1357
+ if not required_columns.issubset(location_df.columns):
1358
+ missing_cols = required_columns - set(location_df.columns)
1359
+ self.logger.warning(
1360
+ f"Location CSV missing required columns: {missing_cols}. Found columns: {list(location_df.columns)}"
1361
+ )
1362
+ return None
1363
+
1364
+ # Create lookup dictionary
1365
+ location_map = {}
1366
+ skipped_count = 0
1367
+
1368
+ for _, row in location_df.iterrows():
1369
+ name = row["name"]
1370
+ longitude = row["longitude"]
1371
+ latitude = row["latitude"]
1372
+
1373
+ # Skip rows with missing data
1374
+ if pd.isna(name) or pd.isna(longitude) or pd.isna(latitude):
1375
+ skipped_count += 1
1376
+ continue
1377
+
1378
+ # Validate coordinate ranges
1379
+ if not (-180 <= longitude <= 180) or not (-90 <= latitude <= 90):
1380
+ self.logger.warning(
1381
+ f"Invalid coordinates for '{name}': longitude={longitude}, latitude={latitude}"
1382
+ )
1383
+ skipped_count += 1
1384
+ continue
1385
+
1386
+ location_map[str(name).strip()] = (
1387
+ float(latitude),
1388
+ float(longitude),
1389
+ )
1390
+
1391
+ self.logger.info(
1392
+ f"Loaded {len(location_map)} component locations from CSV (skipped {skipped_count} invalid entries)"
1393
+ )
1394
+ return location_map
1395
+
1396
+ except Exception as e:
1397
+ self.logger.error(f"Failed to parse location CSV {csv_path}: {e}")
1398
+ return None
1399
+
1400
+ except Exception as e:
1401
+ self.logger.warning(f"Error detecting location CSV: {e}")
1402
+ return None
1403
+
1404
+ def _get_or_create_carrier(self, conn, carrier_name: str) -> int:
1405
+ """Get existing carrier ID or create new carrier (single network per database)"""
1406
+ # Try to find existing carrier
1407
+ cursor = conn.execute("SELECT id FROM carriers WHERE name = ?", (carrier_name,))
1408
+ result = cursor.fetchone()
1409
+ if result:
1410
+ return result[0]
1411
+
1412
+ # Create new carrier
1413
+ carrier_id = create_carrier(conn, carrier_name, 0.0, "#3498db", carrier_name)
1414
+ return carrier_id
1415
+
1416
+ def _generate_component_coordinates(
1417
+ self,
1418
+ conn,
1419
+ bus_id: int,
1420
+ scatter_radius: float,
1421
+ location_map: Optional[Dict],
1422
+ component_name: str,
1423
+ ) -> Tuple[Optional[float], Optional[float]]:
1424
+ """Generate coordinates for a component near its connected bus"""
1425
+ # Check location map first
1426
+ if location_map and component_name in location_map:
1427
+ return location_map[component_name]
1428
+
1429
+ # Get bus coordinates
1430
+ cursor = conn.execute(
1431
+ "SELECT latitude, longitude FROM components WHERE id = ?", (bus_id,)
1432
+ )
1433
+ result = cursor.fetchone()
1434
+ if not result or result[0] is None or result[1] is None:
1435
+ return None, None
1436
+
1437
+ bus_lat, bus_lon = result[0], result[1]
1438
+
1439
+ # Generate unique name-based offset
1440
+ name_hash = hash(component_name) % 1000
1441
+ angle = (name_hash / 1000.0) * 2 * math.pi
1442
+
1443
+ # Apply scatter radius
1444
+ lat_offset = scatter_radius * math.cos(angle)
1445
+ lon_offset = scatter_radius * math.sin(angle)
1446
+
1447
+ return bus_lat + lat_offset, bus_lon + lon_offset
1448
+
1449
+ def _import_component_attributes(
1450
+ self,
1451
+ conn,
1452
+ component_id: int,
1453
+ component_data: pd.Series,
1454
+ component_type: str,
1455
+ strict_validation: bool,
1456
+ ):
1457
+ """Import component attributes, excluding bus connection columns"""
1458
+
1459
+ # Get master scenario ID
1460
+ scenario_id = None
1461
+
1462
+ # Skip these columns as they're handled in the components table
1463
+ skip_columns = {
1464
+ "bus",
1465
+ "bus0",
1466
+ "bus1",
1467
+ "name", # Bus connections and name
1468
+ "x",
1469
+ "y",
1470
+ "location", # Coordinate/location data (stored as latitude/longitude columns)
1471
+ "carrier", # Carrier reference (stored as carrier_id column)
1472
+ }
1473
+
1474
+ attribute_count = 0
1475
+ skipped_count = 0
1476
+
1477
+ for attr_name, value in component_data.items():
1478
+ if attr_name in skip_columns:
1479
+ skipped_count += 1
1480
+ continue
1481
+
1482
+ if pd.isna(value):
1483
+ skipped_count += 1
1484
+ continue
1485
+
1486
+ # Convert value to appropriate format for our database and use smart attribute setting
1487
+ try:
1488
+ # Get validation rule to check expected data type
1489
+ try:
1490
+ rule = get_validation_rule(conn, component_type, attr_name)
1491
+ expected_type = rule.data_type
1492
+ except:
1493
+ expected_type = None
1494
+
1495
+ # Convert based on expected type or infer from value
1496
+ if expected_type == "boolean":
1497
+ # Handle boolean attributes that might come as int/float from PyPSA
1498
+ if isinstance(value, (bool, np.bool_)):
1499
+ static_value = StaticValue(bool(value))
1500
+ elif isinstance(value, (int, np.integer)):
1501
+ static_value = StaticValue(bool(value)) # 0 -> False, 1 -> True
1502
+ elif isinstance(value, (float, np.floating)):
1503
+ static_value = StaticValue(
1504
+ bool(int(value))
1505
+ ) # 0.0 -> False, 1.0 -> True
1506
+ else:
1507
+ static_value = StaticValue(str(value).lower() == "true")
1508
+ elif expected_type == "int":
1509
+ # Handle integer attributes
1510
+ if isinstance(value, (int, np.integer)):
1511
+ static_value = StaticValue(int(value))
1512
+ elif isinstance(value, (float, np.floating)):
1513
+ if np.isfinite(value):
1514
+ static_value = StaticValue(int(value))
1515
+ else:
1516
+ skipped_count += 1
1517
+ continue
1518
+ elif isinstance(value, bool):
1519
+ static_value = StaticValue(int(value))
1520
+ else:
1521
+ static_value = StaticValue(int(float(str(value))))
1522
+ elif expected_type == "float":
1523
+ # Handle float attributes
1524
+ if isinstance(value, (float, np.floating)):
1525
+ if np.isfinite(value):
1526
+ static_value = StaticValue(float(value))
1527
+ else:
1528
+ skipped_count += 1
1529
+ continue
1530
+ elif isinstance(value, (int, np.integer)):
1531
+ static_value = StaticValue(float(value))
1532
+ elif isinstance(value, bool):
1533
+ static_value = StaticValue(float(value))
1534
+ else:
1535
+ static_value = StaticValue(float(str(value)))
1536
+ else:
1537
+ # Fallback to type inference for unknown or string types
1538
+ if isinstance(value, bool):
1539
+ static_value = StaticValue(bool(value))
1540
+ elif isinstance(value, (int, np.integer)):
1541
+ static_value = StaticValue(int(value))
1542
+ elif isinstance(value, (float, np.floating)):
1543
+ if np.isfinite(value):
1544
+ static_value = StaticValue(float(value))
1545
+ else:
1546
+ skipped_count += 1
1547
+ continue # Skip infinite/NaN values
1548
+ else:
1549
+ static_value = StaticValue(str(value))
1550
+
1551
+ # Use direct static attribute setting
1552
+ set_static_attribute(
1553
+ conn, component_id, attr_name, static_value, scenario_id
1554
+ )
1555
+ attribute_count += 1
1556
+
1557
+ except Exception as e:
1558
+ # Handle validation errors from db_utils functions
1559
+ if (
1560
+ "No validation rule found" in str(e)
1561
+ or "does not allow" in str(e)
1562
+ or "ValidationError" in str(type(e).__name__)
1563
+ ):
1564
+ if strict_validation:
1565
+ raise
1566
+ else:
1567
+ self.logger.warning(
1568
+ f"Skipping undefined/invalid attribute '{attr_name}' for {component_type} component {component_id}: {e}"
1569
+ )
1570
+ skipped_count += 1
1571
+ continue
1572
+ else:
1573
+ # Log but don't fail on other attribute import errors (like type conversion issues)
1574
+ self.logger.warning(
1575
+ f"Skipping attribute {attr_name} for component {component_id}: {e}"
1576
+ )
1577
+ skipped_count += 1
1578
+
1579
+ def _import_component_timeseries(
1580
+ self,
1581
+ conn,
1582
+ network,
1583
+ component_id: int,
1584
+ component_name: str,
1585
+ component_type: str,
1586
+ strict_validation: bool,
1587
+ ):
1588
+ """Import timeseries attributes from PyPSA network"""
1589
+
1590
+ # Get master scenario ID
1591
+ scenario_id = None
1592
+
1593
+ # Map component types to their PyPSA timeseries DataFrames
1594
+ timeseries_map = {
1595
+ "BUS": getattr(network, "buses_t", {}),
1596
+ "GENERATOR": getattr(network, "generators_t", {}),
1597
+ "LOAD": getattr(network, "loads_t", {}),
1598
+ "LINE": getattr(network, "lines_t", {}),
1599
+ "LINK": getattr(network, "links_t", {}),
1600
+ "STORAGE_UNIT": getattr(network, "storage_units_t", {}),
1601
+ "STORE": getattr(network, "stores_t", {}),
1602
+ }
1603
+
1604
+ component_timeseries = timeseries_map.get(component_type, {})
1605
+
1606
+ if not component_timeseries:
1607
+ return
1608
+
1609
+ timeseries_count = 0
1610
+
1611
+ # Iterate through each timeseries attribute (e.g., 'p', 'q', 'p_set', 'p_max_pu', etc.)
1612
+ for attr_name, timeseries_df in component_timeseries.items():
1613
+ if component_name not in timeseries_df.columns:
1614
+ continue
1615
+
1616
+ # Get the timeseries data for this component
1617
+ component_series = timeseries_df[component_name]
1618
+
1619
+ # Skip if all values are NaN
1620
+ if component_series.isna().all():
1621
+ continue
1622
+
1623
+ try:
1624
+ # Convert pandas Series to list of values (using optimized approach)
1625
+ values = []
1626
+
1627
+ for value in component_series:
1628
+ # Skip NaN values by using 0.0 as default (PyPSA convention)
1629
+ if pd.isna(value):
1630
+ values.append(0.0)
1631
+ else:
1632
+ values.append(float(value))
1633
+
1634
+ if not values:
1635
+ self.logger.warning(
1636
+ f"No valid timeseries points for '{attr_name}' on {component_type} '{component_name}'"
1637
+ )
1638
+ continue
1639
+
1640
+ # Use optimized timeseries attribute setting
1641
+ set_timeseries_attribute(
1642
+ conn, component_id, attr_name, values, scenario_id
1643
+ )
1644
+ timeseries_count += 1
1645
+
1646
+ except Exception as e:
1647
+ if strict_validation:
1648
+ raise
1649
+ else:
1650
+ self.logger.warning(
1651
+ f"Skipping timeseries attribute '{attr_name}' for {component_type} component '{component_name}': {e}"
1652
+ )
1653
+ continue
1654
+
1655
+ if timeseries_count > 0:
1656
+ self.logger.debug(
1657
+ f"Imported {timeseries_count} timeseries attributes for {component_type} '{component_name}'"
1658
+ )
1659
+
1660
+ def _generate_unique_name(self, base_name: str, component_type: str) -> str:
1661
+ """
1662
+ Generate a unique name for a component, ensuring no duplicates across all component types.
1663
+
1664
+ Args:
1665
+ base_name: The original name to start with
1666
+ component_type: The type of component (used in the suffix if needed)
1667
+
1668
+ Returns:
1669
+ A unique name that hasn't been used yet
1670
+ """
1671
+ # First try the base name
1672
+ if base_name not in self._used_names:
1673
+ self._used_names.add(base_name)
1674
+ return base_name
1675
+
1676
+ # If base name is taken, try appending the component type
1677
+ typed_name = f"{base_name}_{component_type.lower()}"
1678
+ if typed_name not in self._used_names:
1679
+ self._used_names.add(typed_name)
1680
+ return typed_name
1681
+
1682
+ # If that's taken too, start adding numbers
1683
+ counter = 1
1684
+ while True:
1685
+ unique_name = f"{base_name}_{counter}"
1686
+ if unique_name not in self._used_names:
1687
+ self._used_names.add(unique_name)
1688
+ return unique_name
1689
+ counter += 1
1690
+
1691
+ def _generate_scattered_coordinates(
1692
+ self,
1693
+ bus_lat: float,
1694
+ bus_lon: float,
1695
+ scatter_radius: float,
1696
+ component_count_at_bus: int,
1697
+ component_index: int,
1698
+ ) -> Tuple[float, float]:
1699
+ """
1700
+ Generate scattered coordinates around a bus location.
1701
+
1702
+ Args:
1703
+ bus_lat: Bus latitude
1704
+ bus_lon: Bus longitude
1705
+ scatter_radius: Radius in degrees to scatter within
1706
+ component_count_at_bus: Total number of components at this bus
1707
+ component_index: Index of this component (0-based)
1708
+
1709
+ Returns:
1710
+ Tuple of (latitude, longitude) for the scattered position
1711
+ """
1712
+ if component_count_at_bus == 1:
1713
+ # Single component - place it at a moderate distance from the bus
1714
+ angle = random.uniform(0, 2 * math.pi)
1715
+ distance = scatter_radius * random.uniform(
1716
+ 0.5, 0.8
1717
+ ) # 50-80% of scatter radius
1718
+ else:
1719
+ # Multiple components - arrange in a rough circle with some randomness
1720
+ base_angle = (2 * math.pi * component_index) / component_count_at_bus
1721
+ angle_jitter = random.uniform(
1722
+ -math.pi / 8, math.pi / 8
1723
+ ) # ±22.5 degrees jitter
1724
+ angle = base_angle + angle_jitter
1725
+
1726
+ # Vary distance randomly within the radius (use more of the available radius)
1727
+ distance = scatter_radius * random.uniform(
1728
+ 0.6, 1.0
1729
+ ) # 60-100% of scatter radius
1730
+
1731
+ # Calculate new coordinates
1732
+ new_lat = bus_lat + distance * math.cos(angle)
1733
+ new_lon = bus_lon + distance * math.sin(angle)
1734
+
1735
+ return new_lat, new_lon
1736
+
1737
+ def _get_bus_coordinates_map(self, conn) -> Dict[int, Tuple[float, float]]:
1738
+ """
1739
+ Get a mapping from bus component ID to coordinates.
1740
+
1741
+ Returns:
1742
+ Dictionary mapping bus component ID to (latitude, longitude) tuple
1743
+ """
1744
+ cursor = conn.execute(
1745
+ """
1746
+ SELECT id, latitude, longitude FROM components
1747
+ WHERE component_type = 'BUS'
1748
+ AND latitude IS NOT NULL AND longitude IS NOT NULL
1749
+ AND NOT (latitude = 0 AND longitude = 0)
1750
+ """,
1751
+ (),
1752
+ )
1753
+
1754
+ bus_coords = {row[0]: (row[1], row[2]) for row in cursor.fetchall()}
1755
+ return bus_coords
1756
+
1757
+ def _resolve_original_component_name(self, unique_name: str) -> str:
1758
+ """
1759
+ Resolve a potentially modified unique name back to its original name for CSV lookup.
1760
+
1761
+ Args:
1762
+ unique_name: The unique name that may have been modified (e.g., "component_1", "component_generator")
1763
+
1764
+ Returns:
1765
+ The original name for CSV lookup
1766
+ """
1767
+ # Remove common suffixes added by _generate_unique_name
1768
+ # Pattern 1: Remove "_NUMBER" suffix (e.g., "component_1" -> "component")
1769
+ import re
1770
+
1771
+ # First try removing "_NUMBER" pattern
1772
+ no_number_suffix = re.sub(r"_\d+$", "", unique_name)
1773
+ if no_number_suffix != unique_name:
1774
+ return no_number_suffix
1775
+
1776
+ # Then try removing "_COMPONENT_TYPE" pattern (e.g., "component_generator" -> "component")
1777
+ component_types = [
1778
+ "bus",
1779
+ "generator",
1780
+ "load",
1781
+ "line",
1782
+ "link",
1783
+ "storage_unit",
1784
+ "store",
1785
+ ]
1786
+ for comp_type in component_types:
1787
+ suffix = f"_{comp_type.lower()}"
1788
+ if unique_name.endswith(suffix):
1789
+ return unique_name[: -len(suffix)]
1790
+
1791
+ # If no patterns match, return the original name
1792
+ return unique_name
1793
+
1794
+ def _get_csv_coordinates(
1795
+ self,
1796
+ component_name: str,
1797
+ location_map: Optional[Dict[str, Tuple[float, float]]],
1798
+ ) -> Optional[Tuple[float, float]]:
1799
+ """
1800
+ Get coordinates for a component from the CSV location map.
1801
+
1802
+ Args:
1803
+ component_name: The component name (potentially modified for uniqueness)
1804
+ location_map: Dictionary mapping original names to coordinates
1805
+
1806
+ Returns:
1807
+ (latitude, longitude) tuple if found, None otherwise
1808
+ """
1809
+ if not location_map:
1810
+ return None
1811
+
1812
+ # Try exact match first
1813
+ if component_name in location_map:
1814
+ coordinates = location_map[component_name]
1815
+ self.logger.debug(
1816
+ f"CSV location exact match for '{component_name}': {coordinates}"
1817
+ )
1818
+ return coordinates
1819
+
1820
+ # Try resolving back to original name
1821
+ original_name = self._resolve_original_component_name(component_name)
1822
+ if original_name != component_name and original_name in location_map:
1823
+ coordinates = location_map[original_name]
1824
+ self.logger.debug(
1825
+ f"CSV location resolved match for '{component_name}' -> '{original_name}': {coordinates}"
1826
+ )
1827
+ return coordinates
1828
+
1829
+ # No match found
1830
+ self.logger.debug(
1831
+ f"No CSV location found for component '{component_name}' (original: '{original_name}')"
1832
+ )
1833
+ return None