pyconvexity 0.4.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pyconvexity might be problematic. Click here for more details.

Files changed (44) hide show
  1. pyconvexity/__init__.py +241 -0
  2. pyconvexity/_version.py +1 -0
  3. pyconvexity/core/__init__.py +60 -0
  4. pyconvexity/core/database.py +485 -0
  5. pyconvexity/core/errors.py +106 -0
  6. pyconvexity/core/types.py +400 -0
  7. pyconvexity/dashboard.py +265 -0
  8. pyconvexity/data/README.md +101 -0
  9. pyconvexity/data/__init__.py +17 -0
  10. pyconvexity/data/loaders/__init__.py +3 -0
  11. pyconvexity/data/loaders/cache.py +213 -0
  12. pyconvexity/data/schema/01_core_schema.sql +420 -0
  13. pyconvexity/data/schema/02_data_metadata.sql +120 -0
  14. pyconvexity/data/schema/03_validation_data.sql +507 -0
  15. pyconvexity/data/sources/__init__.py +5 -0
  16. pyconvexity/data/sources/gem.py +442 -0
  17. pyconvexity/io/__init__.py +26 -0
  18. pyconvexity/io/excel_exporter.py +1226 -0
  19. pyconvexity/io/excel_importer.py +1381 -0
  20. pyconvexity/io/netcdf_exporter.py +191 -0
  21. pyconvexity/io/netcdf_importer.py +1802 -0
  22. pyconvexity/models/__init__.py +195 -0
  23. pyconvexity/models/attributes.py +730 -0
  24. pyconvexity/models/carriers.py +159 -0
  25. pyconvexity/models/components.py +611 -0
  26. pyconvexity/models/network.py +503 -0
  27. pyconvexity/models/results.py +148 -0
  28. pyconvexity/models/scenarios.py +234 -0
  29. pyconvexity/solvers/__init__.py +29 -0
  30. pyconvexity/solvers/pypsa/__init__.py +30 -0
  31. pyconvexity/solvers/pypsa/api.py +446 -0
  32. pyconvexity/solvers/pypsa/batch_loader.py +296 -0
  33. pyconvexity/solvers/pypsa/builder.py +655 -0
  34. pyconvexity/solvers/pypsa/clearing_price.py +678 -0
  35. pyconvexity/solvers/pypsa/constraints.py +405 -0
  36. pyconvexity/solvers/pypsa/solver.py +1442 -0
  37. pyconvexity/solvers/pypsa/storage.py +2096 -0
  38. pyconvexity/timeseries.py +330 -0
  39. pyconvexity/validation/__init__.py +25 -0
  40. pyconvexity/validation/rules.py +312 -0
  41. pyconvexity-0.4.8.dist-info/METADATA +148 -0
  42. pyconvexity-0.4.8.dist-info/RECORD +44 -0
  43. pyconvexity-0.4.8.dist-info/WHEEL +5 -0
  44. pyconvexity-0.4.8.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1802 @@
1
+ """
2
+ NetCDF importer for PyConvexity energy system models.
3
+ Imports PyPSA NetCDF files into PyConvexity database format.
4
+ """
5
+
6
+ import pandas as pd
7
+ import numpy as np
8
+ from typing import Dict, Any, Optional, Callable, Tuple, List
9
+ from pathlib import Path
10
+ import random
11
+ import math
12
+
13
+ # Import functions directly from pyconvexity
14
+ from pyconvexity.core.database import open_connection, create_database_with_schema
15
+ from pyconvexity.core.types import (
16
+ StaticValue,
17
+ CreateNetworkRequest,
18
+ CreateComponentRequest,
19
+ )
20
+ from pyconvexity.core.errors import PyConvexityError as DbError, ValidationError
21
+ from pyconvexity.models import (
22
+ create_network,
23
+ create_carrier,
24
+ insert_component,
25
+ set_static_attribute,
26
+ get_bus_name_to_id_map,
27
+ set_timeseries_attribute,
28
+ get_component_type,
29
+ get_attribute,
30
+ get_network_time_periods,
31
+ )
32
+ from pyconvexity.validation import get_validation_rule
33
+ from pyconvexity.timeseries import set_timeseries
34
+
35
+
36
+ def _pandas_freq_to_iso8601(freq: str) -> str:
37
+ """
38
+ Convert pandas frequency code to ISO 8601 duration format.
39
+
40
+ Args:
41
+ freq: Pandas frequency code (e.g., "H", "30T", "2H", "15min", "D")
42
+
43
+ Returns:
44
+ ISO 8601 duration string (e.g., "PT1H", "PT30M", "PT2H", "PT15M", "P1D")
45
+ """
46
+ if not freq:
47
+ return "PT1H" # Default to hourly
48
+
49
+ freq = freq.strip().upper()
50
+
51
+ # Handle common pandas frequency codes
52
+ # Hourly patterns: "H", "1H", "2H", etc.
53
+ if freq == "H" or freq == "1H":
54
+ return "PT1H"
55
+ if freq.endswith("H"):
56
+ try:
57
+ hours = int(freq[:-1])
58
+ return f"PT{hours}H"
59
+ except ValueError:
60
+ pass
61
+
62
+ # Minute patterns: "T", "MIN", "30T", "30MIN", "15T", etc.
63
+ if freq == "T" or freq == "MIN" or freq == "1T" or freq == "1MIN":
64
+ return "PT1M"
65
+ if freq.endswith("T"):
66
+ try:
67
+ minutes = int(freq[:-1])
68
+ return f"PT{minutes}M"
69
+ except ValueError:
70
+ pass
71
+ if freq.endswith("MIN"):
72
+ try:
73
+ minutes = int(freq[:-3])
74
+ return f"PT{minutes}M"
75
+ except ValueError:
76
+ pass
77
+
78
+ # Second patterns: "S", "1S", "30S", etc.
79
+ if freq == "S" or freq == "1S":
80
+ return "PT1S"
81
+ if freq.endswith("S") and not freq.endswith("MS"):
82
+ try:
83
+ seconds = int(freq[:-1])
84
+ return f"PT{seconds}S"
85
+ except ValueError:
86
+ pass
87
+
88
+ # Daily patterns: "D", "1D", etc.
89
+ if freq == "D" or freq == "1D":
90
+ return "P1D"
91
+ if freq.endswith("D"):
92
+ try:
93
+ days = int(freq[:-1])
94
+ return f"P{days}D"
95
+ except ValueError:
96
+ pass
97
+
98
+ # Weekly patterns: "W", "1W", etc.
99
+ if freq == "W" or freq == "1W" or freq.startswith("W-"):
100
+ return "P1W"
101
+
102
+ # If we can't parse it, default to hourly
103
+ return "PT1H"
104
+
105
+
106
+ class NetCDFModelImporter:
107
+ """Import PyPSA NetCDF files into PyConvexity database format"""
108
+
109
+ def __init__(self):
110
+ # Set random seed for reproducible coordinate generation
111
+ random.seed(42)
112
+ np.random.seed(42)
113
+ self._used_names = set() # Global registry of all used names
114
+
115
+ def import_netcdf_to_database(
116
+ self,
117
+ netcdf_path: str,
118
+ db_path: str,
119
+ network_name: str,
120
+ network_description: Optional[str] = None,
121
+ progress_callback: Optional[Callable[[int, str], None]] = None,
122
+ strict_validation: bool = False,
123
+ ) -> Dict[str, Any]:
124
+ """
125
+ Import a PyPSA NetCDF file into a new database.
126
+
127
+ Args:
128
+ netcdf_path: Path to the PyPSA NetCDF file
129
+ db_path: Path where to create the database
130
+ network_name: Name for the imported network
131
+ network_description: Optional description
132
+ progress_callback: Optional callback for progress updates (progress: int, message: str)
133
+ strict_validation: Whether to skip undefined attributes rather than failing completely.
134
+ If True, will fail on any attribute not defined in the database schema.
135
+ If False (default), will skip undefined attributes with warnings.
136
+
137
+ Returns:
138
+ Dictionary with import results and statistics
139
+ """
140
+ try:
141
+ if progress_callback:
142
+ progress_callback(0, "Starting NetCDF import...")
143
+
144
+ # Import PyPSA
145
+ pypsa = self._import_pypsa()
146
+
147
+ if progress_callback:
148
+ progress_callback(5, "Loading PyPSA network from NetCDF...")
149
+
150
+ # Load the PyPSA network
151
+ network = pypsa.Network(netcdf_path)
152
+
153
+ if progress_callback:
154
+ progress_callback(
155
+ 15,
156
+ f"Loaded network: {len(network.buses)} buses, {len(network.generators)} generators",
157
+ )
158
+
159
+ # Use the shared import logic
160
+ return self._import_network_to_database(
161
+ network=network,
162
+ db_path=db_path,
163
+ network_name=network_name,
164
+ network_description=network_description,
165
+ progress_callback=progress_callback,
166
+ strict_validation=strict_validation,
167
+ import_source="NetCDF",
168
+ netcdf_path=netcdf_path,
169
+ )
170
+
171
+ except Exception as e:
172
+ if progress_callback:
173
+ progress_callback(None, f"Error: {str(e)}")
174
+ raise
175
+
176
+ def import_csv_to_database(
177
+ self,
178
+ csv_directory: str,
179
+ db_path: str,
180
+ network_name: str,
181
+ network_description: Optional[str] = None,
182
+ progress_callback: Optional[Callable[[int, str], None]] = None,
183
+ strict_validation: bool = False,
184
+ ) -> Dict[str, Any]:
185
+ """
186
+ Import a PyPSA network from CSV files into a new database.
187
+
188
+ Args:
189
+ csv_directory: Path to the directory containing PyPSA CSV files
190
+ db_path: Path where to create the database
191
+ network_name: Name for the imported network
192
+ network_description: Optional description
193
+ progress_callback: Optional callback for progress updates (progress: int, message: str)
194
+ strict_validation: Whether to skip undefined attributes rather than failing
195
+
196
+ Returns:
197
+ Dictionary with import results and statistics
198
+ """
199
+ try:
200
+ if progress_callback:
201
+ progress_callback(0, "Starting PyPSA CSV import...")
202
+
203
+ # Import PyPSA
204
+ pypsa = self._import_pypsa()
205
+
206
+ if progress_callback:
207
+ progress_callback(5, "Validating CSV files...")
208
+
209
+ # Validate CSV directory and files before attempting import
210
+ self._validate_csv_directory(csv_directory)
211
+
212
+ if progress_callback:
213
+ progress_callback(10, "Loading PyPSA network from CSV files...")
214
+
215
+ # Load the PyPSA network from CSV directory
216
+ network = pypsa.Network()
217
+
218
+ try:
219
+ network.import_from_csv_folder(csv_directory)
220
+ except Exception as e:
221
+ # Provide more helpful error message
222
+ error_msg = f"PyPSA CSV import failed: {str(e)}"
223
+ if "'name'" in str(e):
224
+ error_msg += "\n\nThis usually means one of your CSV files is missing a 'name' column. PyPSA CSV files require:\n"
225
+ error_msg += "- All component CSV files (buses.csv, generators.csv, etc.) must have a 'name' column as the first column\n"
226
+ error_msg += "- The 'name' column should contain unique identifiers for each component\n"
227
+ error_msg += "- Check that your CSV files follow the PyPSA CSV format specification"
228
+ elif "KeyError" in str(e):
229
+ error_msg += f"\n\nThis indicates a required column is missing from one of your CSV files. "
230
+ error_msg += "Please ensure your CSV files follow the PyPSA format specification."
231
+
232
+ raise ValueError(error_msg)
233
+
234
+ if progress_callback:
235
+ progress_callback(
236
+ 20,
237
+ f"Loaded network: {len(network.buses)} buses, {len(network.generators)} generators",
238
+ )
239
+
240
+ # Use the shared import logic
241
+ return self._import_network_to_database(
242
+ network=network,
243
+ db_path=db_path,
244
+ network_name=network_name,
245
+ network_description=network_description,
246
+ progress_callback=progress_callback,
247
+ strict_validation=strict_validation,
248
+ import_source="CSV",
249
+ )
250
+
251
+ except Exception as e:
252
+ if progress_callback:
253
+ progress_callback(None, f"Error: {str(e)}")
254
+ raise
255
+
256
+ def _import_pypsa(self):
257
+ """Import PyPSA with standard error handling."""
258
+ try:
259
+ import pypsa
260
+
261
+ return pypsa
262
+ except ImportError as e:
263
+ raise ImportError(
264
+ "PyPSA is not installed or could not be imported. "
265
+ "Please ensure it is installed correctly in the environment."
266
+ ) from e
267
+
268
+ def _validate_csv_directory(self, csv_directory: str) -> None:
269
+ """Validate that the CSV directory contains valid PyPSA CSV files"""
270
+ import os
271
+ import pandas as pd
272
+
273
+ csv_path = Path(csv_directory)
274
+ if not csv_path.exists():
275
+ raise ValueError(f"CSV directory does not exist: {csv_directory}")
276
+
277
+ if not csv_path.is_dir():
278
+ raise ValueError(f"Path is not a directory: {csv_directory}")
279
+
280
+ # Find CSV files
281
+ csv_files = list(csv_path.glob("*.csv"))
282
+ if not csv_files:
283
+ raise ValueError(f"No CSV files found in directory: {csv_directory}")
284
+
285
+ # Check each CSV file for basic validity
286
+ component_files = [
287
+ "buses.csv",
288
+ "generators.csv",
289
+ "loads.csv",
290
+ "lines.csv",
291
+ "links.csv",
292
+ "storage_units.csv",
293
+ "stores.csv",
294
+ ]
295
+ required_files = ["buses.csv"] # At minimum, we need buses
296
+
297
+ # Check for required files
298
+ existing_files = [f.name for f in csv_files]
299
+ missing_required = [f for f in required_files if f not in existing_files]
300
+ if missing_required:
301
+ raise ValueError(f"Missing required CSV files: {missing_required}")
302
+
303
+ # Validate each component CSV file that exists
304
+ for csv_file in csv_files:
305
+ if csv_file.name in component_files:
306
+ try:
307
+ df = pd.read_csv(csv_file, nrows=0) # Just read headers
308
+ if "name" not in df.columns:
309
+ raise ValueError(
310
+ f"CSV file '{csv_file.name}' is missing required 'name' column. Found columns: {list(df.columns)}"
311
+ )
312
+ except Exception as e:
313
+ raise ValueError(
314
+ f"Error reading CSV file '{csv_file.name}': {str(e)}"
315
+ )
316
+
317
+ def _import_network_to_database(
318
+ self,
319
+ network,
320
+ db_path: str,
321
+ network_name: str,
322
+ network_description: Optional[str] = None,
323
+ progress_callback: Optional[Callable[[int, str], None]] = None,
324
+ strict_validation: bool = False,
325
+ import_source: str = "PyPSA",
326
+ netcdf_path: Optional[str] = None,
327
+ ) -> Dict[str, Any]:
328
+ """
329
+ Shared logic to import a PyPSA network object into a database.
330
+ This method is used by both NetCDF and CSV import functions.
331
+ """
332
+ try:
333
+ if progress_callback:
334
+ progress_callback(0, "Starting network import...")
335
+
336
+ # Create the database with schema using atomic utility
337
+ create_database_with_schema(db_path)
338
+
339
+ if progress_callback:
340
+ progress_callback(5, "Database schema created")
341
+
342
+ # Connect to database
343
+ conn = open_connection(db_path)
344
+
345
+ try:
346
+ # Load companion location CSV if available (for NetCDF imports only)
347
+ location_map = None
348
+ if import_source == "NetCDF" and netcdf_path:
349
+ location_map = self._detect_and_load_location_csv(netcdf_path)
350
+
351
+ # Create the network record
352
+ self._create_network_record(
353
+ conn, network, network_name, network_description
354
+ )
355
+
356
+ if progress_callback:
357
+ progress_callback(10, "Created network record")
358
+
359
+ # Note: In the new schema, the base network uses scenario_id = NULL
360
+ # No master scenario record is needed in the scenarios table
361
+
362
+ # Create network time periods from PyPSA snapshots
363
+ self._create_network_time_periods(conn, network)
364
+
365
+ if progress_callback:
366
+ progress_callback(15, f"Created network time periods")
367
+
368
+ # Import carriers
369
+ carriers_count = self._import_carriers(conn, network)
370
+
371
+ if progress_callback:
372
+ progress_callback(20, f"Imported {carriers_count} carriers")
373
+
374
+ # Import buses
375
+ buses_count = self._import_buses(conn, network, strict_validation)
376
+
377
+ if progress_callback:
378
+ progress_callback(25, f"Imported {buses_count} buses")
379
+
380
+ # Calculate scatter radius for non-bus components based on bus separation
381
+ bus_coordinates = self._get_bus_coordinates(conn)
382
+ scatter_radius = self._calculate_bus_separation_radius(bus_coordinates)
383
+
384
+ # Import generators
385
+ generators_count = self._import_generators(
386
+ conn, network, strict_validation, scatter_radius, location_map
387
+ )
388
+
389
+ if progress_callback:
390
+ progress_callback(30, f"Imported {generators_count} generators")
391
+
392
+ # Import loads
393
+ loads_count = self._import_loads(
394
+ conn, network, strict_validation, scatter_radius, location_map
395
+ )
396
+
397
+ if progress_callback:
398
+ progress_callback(35, f"Imported {loads_count} loads")
399
+
400
+ # Import lines
401
+ lines_count = self._import_lines(
402
+ conn, network, strict_validation, location_map
403
+ )
404
+
405
+ if progress_callback:
406
+ progress_callback(40, f"Imported {lines_count} lines")
407
+
408
+ # Import links
409
+ links_count = self._import_links(
410
+ conn, network, strict_validation, location_map
411
+ )
412
+
413
+ if progress_callback:
414
+ progress_callback(45, f"Imported {links_count} links")
415
+
416
+ # Import storage units
417
+ storage_units_count = self._import_storage_units(
418
+ conn, network, strict_validation, scatter_radius, location_map
419
+ )
420
+
421
+ if progress_callback:
422
+ progress_callback(
423
+ 50, f"Imported {storage_units_count} storage units"
424
+ )
425
+
426
+ # Import stores
427
+ stores_count = self._import_stores(
428
+ conn, network, strict_validation, scatter_radius, location_map
429
+ )
430
+
431
+ if progress_callback:
432
+ progress_callback(55, f"Imported {stores_count} stores")
433
+
434
+ conn.commit()
435
+
436
+ if progress_callback:
437
+ progress_callback(100, "Import completed successfully")
438
+
439
+ # Collect final statistics
440
+ stats = {
441
+ "network_name": network_name,
442
+ "carriers": carriers_count,
443
+ "buses": buses_count,
444
+ "generators": generators_count,
445
+ "loads": loads_count,
446
+ "lines": lines_count,
447
+ "links": links_count,
448
+ "storage_units": storage_units_count,
449
+ "stores": stores_count,
450
+ "total_components": (
451
+ buses_count
452
+ + generators_count
453
+ + loads_count
454
+ + lines_count
455
+ + links_count
456
+ + storage_units_count
457
+ + stores_count
458
+ ),
459
+ "snapshots": (
460
+ len(network.snapshots) if hasattr(network, "snapshots") else 0
461
+ ),
462
+ }
463
+
464
+ return {
465
+ "success": True,
466
+ "message": f"Network imported successfully from {import_source}",
467
+ "db_path": db_path,
468
+ "stats": stats,
469
+ }
470
+
471
+ finally:
472
+ conn.close()
473
+
474
+ except Exception as e:
475
+ if progress_callback:
476
+ progress_callback(None, f"Error: {str(e)}")
477
+ raise
478
+
479
+ # Helper methods for the import process
480
+ # Note: These are simplified versions of the methods from the original netcdf_importer.py
481
+ # The full implementation would include all the detailed import logic for each component type
482
+
483
+ def _extract_datetime_snapshots(self, network) -> pd.DatetimeIndex:
484
+ """Extract datetime snapshots from a PyPSA network"""
485
+ if not hasattr(network, "snapshots") or len(network.snapshots) == 0:
486
+ return pd.DatetimeIndex([])
487
+
488
+ snapshots = network.snapshots
489
+
490
+ try:
491
+ # Try direct conversion first (works for simple DatetimeIndex)
492
+ return pd.to_datetime(snapshots)
493
+ except (TypeError, ValueError):
494
+ # Handle MultiIndex case
495
+ if hasattr(snapshots, "nlevels") and snapshots.nlevels > 1:
496
+ # Try to use the timesteps attribute if available (common in multi-period networks)
497
+ if hasattr(network, "timesteps") and isinstance(
498
+ network.timesteps, pd.DatetimeIndex
499
+ ):
500
+ return network.timesteps
501
+
502
+ # Try to extract datetime from the last level of the MultiIndex
503
+ try:
504
+ # Get the last level (usually the timestep level)
505
+ last_level = snapshots.get_level_values(snapshots.nlevels - 1)
506
+ datetime_snapshots = pd.to_datetime(last_level)
507
+ return datetime_snapshots
508
+ except Exception:
509
+ pass
510
+
511
+ # Final fallback: create a default hourly range
512
+ default_start = pd.Timestamp("2024-01-01 00:00:00")
513
+ default_end = pd.Timestamp("2024-01-01 23:59:59")
514
+ return pd.date_range(start=default_start, end=default_end, freq="H")
515
+
516
+ def _create_network_record(
517
+ self,
518
+ conn,
519
+ network,
520
+ network_name: str,
521
+ network_description: Optional[str] = None,
522
+ ) -> None:
523
+ """Create the network record and return network ID"""
524
+
525
+ # Extract time information from PyPSA network using our robust helper
526
+ snapshots = self._extract_datetime_snapshots(network)
527
+
528
+ if len(snapshots) > 0:
529
+ time_start = snapshots.min().strftime("%Y-%m-%d %H:%M:%S")
530
+ time_end = snapshots.max().strftime("%Y-%m-%d %H:%M:%S")
531
+
532
+ # Try to infer time interval and convert to ISO 8601 format
533
+ if len(snapshots) > 1:
534
+ freq = pd.infer_freq(snapshots)
535
+ time_interval = _pandas_freq_to_iso8601(freq) if freq else "PT1H"
536
+ else:
537
+ time_interval = "PT1H"
538
+ else:
539
+ # Default time range if no snapshots
540
+ time_start = "2024-01-01 00:00:00"
541
+ time_end = "2024-01-01 23:59:59"
542
+ time_interval = "PT1H"
543
+
544
+ description = (
545
+ network_description
546
+ or f"Imported from PyPSA NetCDF on {pd.Timestamp.now().strftime('%Y-%m-%d %H:%M:%S')}"
547
+ )
548
+
549
+ request = CreateNetworkRequest(
550
+ name=network_name,
551
+ description=description,
552
+ time_resolution=time_interval,
553
+ start_time=time_start,
554
+ end_time=time_end,
555
+ )
556
+ create_network(conn, request) # Single network per database
557
+
558
+ def _create_network_time_periods(self, conn, network) -> None:
559
+ """Create network time periods from PyPSA snapshots using optimized approach (single network per database)"""
560
+ # Use our robust helper to extract datetime snapshots
561
+ snapshots = self._extract_datetime_snapshots(network)
562
+
563
+ if len(snapshots) == 0:
564
+ return
565
+
566
+ # Insert optimized time periods metadata
567
+ period_count = len(snapshots)
568
+ start_timestamp = int(snapshots[0].timestamp())
569
+
570
+ # Calculate interval in seconds
571
+ if len(snapshots) > 1:
572
+ interval_seconds = int((snapshots[1] - snapshots[0]).total_seconds())
573
+ else:
574
+ interval_seconds = 3600 # Default to hourly
575
+
576
+ conn.execute(
577
+ """
578
+ INSERT INTO network_time_periods (period_count, start_timestamp, interval_seconds)
579
+ VALUES (?, ?, ?)
580
+ """,
581
+ (period_count, start_timestamp, interval_seconds),
582
+ )
583
+
584
+ # Placeholder methods - in a full implementation, these would contain
585
+ # the detailed import logic from the original netcdf_importer.py
586
+
587
+ def _import_carriers(self, conn, network) -> int:
588
+ """Import carriers from PyPSA network, discovering from both network and component levels (single network per database)"""
589
+ count = 0
590
+ created_carriers = set()
591
+
592
+ # Discover all carriers from components (not just n.carriers table)
593
+ all_carriers = set()
594
+
595
+ # Get carriers from network.carriers table if it exists
596
+ if hasattr(network, "carriers") and not network.carriers.empty:
597
+ all_carriers.update(network.carriers.index)
598
+
599
+ # Get carriers from generators
600
+ if (
601
+ hasattr(network, "generators")
602
+ and not network.generators.empty
603
+ and "carrier" in network.generators.columns
604
+ ):
605
+ component_carriers = set(network.generators.carrier.dropna().unique())
606
+ all_carriers.update(component_carriers)
607
+
608
+ # Get carriers from storage units
609
+ if (
610
+ hasattr(network, "storage_units")
611
+ and not network.storage_units.empty
612
+ and "carrier" in network.storage_units.columns
613
+ ):
614
+ component_carriers = set(network.storage_units.carrier.dropna().unique())
615
+ all_carriers.update(component_carriers)
616
+
617
+ # Get carriers from stores
618
+ if (
619
+ hasattr(network, "stores")
620
+ and not network.stores.empty
621
+ and "carrier" in network.stores.columns
622
+ ):
623
+ component_carriers = set(network.stores.carrier.dropna().unique())
624
+ all_carriers.update(component_carriers)
625
+
626
+ # Get carriers from loads (if they have carriers)
627
+ if (
628
+ hasattr(network, "loads")
629
+ and not network.loads.empty
630
+ and "carrier" in network.loads.columns
631
+ ):
632
+ component_carriers = set(network.loads.carrier.dropna().unique())
633
+ all_carriers.update(component_carriers)
634
+
635
+ # Get carriers from buses (if they have carriers)
636
+ if (
637
+ hasattr(network, "buses")
638
+ and not network.buses.empty
639
+ and "carrier" in network.buses.columns
640
+ ):
641
+ component_carriers = set(network.buses.carrier.dropna().unique())
642
+ all_carriers.update(component_carriers)
643
+
644
+ # Convert to sorted list for consistent ordering
645
+ all_carriers = sorted(list(all_carriers))
646
+
647
+ # Define a color palette similar to the Python code
648
+ color_palette = [
649
+ "#1f77b4", # C0 - blue
650
+ "#ff7f0e", # C1 - orange
651
+ "#2ca02c", # C2 - green
652
+ "#d62728", # C3 - red
653
+ "#9467bd", # C4 - purple
654
+ "#8c564b", # C5 - brown
655
+ "#e377c2", # C6 - pink
656
+ "#7f7f7f", # C7 - gray
657
+ "#bcbd22", # C8 - olive
658
+ "#17becf", # C9 - cyan
659
+ "#aec7e8", # light blue
660
+ "#ffbb78", # light orange
661
+ "#98df8a", # light green
662
+ "#ff9896", # light red
663
+ "#c5b0d5", # light purple
664
+ ]
665
+
666
+ # Create carriers from discovered list
667
+ for i, carrier_name in enumerate(all_carriers):
668
+ # Get carrier data from network.carriers if available
669
+ carrier_data = {}
670
+ if (
671
+ hasattr(network, "carriers")
672
+ and not network.carriers.empty
673
+ and carrier_name in network.carriers.index
674
+ ):
675
+ # Use .iloc with index position to avoid fragmentation
676
+ carrier_idx = network.carriers.index.get_loc(carrier_name)
677
+ carrier_data = network.carriers.iloc[carrier_idx]
678
+
679
+ # Extract attributes with defaults
680
+ co2_emissions = carrier_data.get("co2_emissions", 0.0)
681
+
682
+ # Use color from network.carriers if available, otherwise assign from palette
683
+ if "color" in carrier_data and pd.notna(carrier_data["color"]):
684
+ color = carrier_data["color"]
685
+ else:
686
+ color = color_palette[i % len(color_palette)]
687
+
688
+ nice_name = carrier_data.get("nice_name", None)
689
+
690
+ # Create the carrier
691
+ create_carrier(conn, carrier_name, co2_emissions, color, nice_name)
692
+ created_carriers.add(carrier_name)
693
+ count += 1
694
+
695
+ # Ensure we have essential carriers for bus validation
696
+ # Buses can only use AC, DC, heat, or gas carriers according to database constraints
697
+ essential_carriers = {
698
+ "AC": {
699
+ "co2_emissions": 0.0,
700
+ "color": "#3498db",
701
+ "nice_name": "AC Electricity",
702
+ },
703
+ "electricity": {
704
+ "co2_emissions": 0.0,
705
+ "color": "#2ecc71",
706
+ "nice_name": "Electricity",
707
+ },
708
+ }
709
+
710
+ for carrier_name, carrier_props in essential_carriers.items():
711
+ if carrier_name not in created_carriers:
712
+ create_carrier(
713
+ conn,
714
+ carrier_name,
715
+ carrier_props["co2_emissions"],
716
+ carrier_props["color"],
717
+ carrier_props["nice_name"],
718
+ )
719
+ created_carriers.add(carrier_name)
720
+ count += 1
721
+
722
+ return count
723
+
724
+ def _import_buses(self, conn, network, strict_validation: bool) -> int:
725
+ """Import buses from PyPSA network (single network per database)"""
726
+ count = 0
727
+
728
+ if not hasattr(network, "buses") or network.buses.empty:
729
+ return count
730
+
731
+ for bus_name, bus_data in network.buses.iterrows():
732
+ try:
733
+ # Generate a unique name for this bus
734
+ unique_name = self._generate_unique_name(str(bus_name), "BUS")
735
+
736
+ # Extract coordinate data
737
+ x_value = bus_data.get("x", None)
738
+ y_value = bus_data.get("y", None)
739
+
740
+ # Handle NaN/None values properly
741
+ longitude = (
742
+ None
743
+ if x_value is None
744
+ or (hasattr(x_value, "__iter__") and len(str(x_value)) == 0)
745
+ else float(x_value) if x_value != "" else None
746
+ )
747
+ latitude = (
748
+ None
749
+ if y_value is None
750
+ or (hasattr(y_value, "__iter__") and len(str(y_value)) == 0)
751
+ else float(y_value) if y_value != "" else None
752
+ )
753
+
754
+ # Additional check for pandas NaN values
755
+ if longitude is not None and pd.isna(longitude):
756
+ longitude = None
757
+ if latitude is not None and pd.isna(latitude):
758
+ latitude = None
759
+
760
+ # Get or create carrier
761
+ carrier_name = bus_data.get("carrier", "AC")
762
+ carrier_id = self._get_or_create_carrier(conn, carrier_name)
763
+
764
+ # Create component record using atomic function
765
+ # Note: PyPSA 'x'/'y' coordinates are mapped to 'longitude'/'latitude' columns here
766
+ request = CreateComponentRequest(
767
+ component_type="BUS",
768
+ name=unique_name, # Use globally unique name
769
+ latitude=latitude, # PyPSA y -> latitude
770
+ longitude=longitude, # PyPSA x -> longitude
771
+ carrier_id=carrier_id,
772
+ )
773
+ component_id = insert_component(conn, request)
774
+
775
+ # Import bus attributes (location/coordinate data is handled above, not as attributes)
776
+ self._import_component_attributes(
777
+ conn, component_id, bus_data, "BUS", strict_validation
778
+ )
779
+
780
+ # Import timeseries attributes for buses
781
+ self._import_component_timeseries(
782
+ conn, network, component_id, bus_name, "BUS", strict_validation
783
+ )
784
+
785
+ count += 1
786
+
787
+ except Exception as e:
788
+ if strict_validation:
789
+ raise
790
+ continue
791
+
792
+ return count
793
+
794
+ # Additional placeholder methods for other component types
795
+ def _import_generators(
796
+ self,
797
+ conn,
798
+ network,
799
+ strict_validation: bool,
800
+ scatter_radius: float,
801
+ location_map,
802
+ ) -> int:
803
+ """Import generators from PyPSA network (single network per database)"""
804
+ count = 0
805
+
806
+ if not hasattr(network, "generators") or network.generators.empty:
807
+ return count
808
+
809
+ # Get bus name to ID mapping
810
+ bus_name_to_id = get_bus_name_to_id_map(conn)
811
+
812
+ # Get master scenario ID
813
+ master_scenario_id = None
814
+
815
+ for gen_name, gen_data in network.generators.iterrows():
816
+ try:
817
+ # Get bus connection
818
+ bus_name = gen_data.get("bus")
819
+ bus_id = bus_name_to_id.get(bus_name) if bus_name else None
820
+
821
+ if not bus_id:
822
+ continue
823
+
824
+ # Get or create carrier
825
+ carrier_name = gen_data.get("carrier", "AC")
826
+ carrier_id = self._get_or_create_carrier(conn, carrier_name)
827
+
828
+ # Generate coordinates near the bus
829
+ latitude, longitude = self._generate_component_coordinates(
830
+ conn, bus_id, scatter_radius, location_map, gen_name
831
+ )
832
+
833
+ # Create component record
834
+ request = CreateComponentRequest(
835
+ component_type="GENERATOR",
836
+ name=str(gen_name),
837
+ latitude=latitude,
838
+ longitude=longitude,
839
+ carrier_id=carrier_id,
840
+ bus_id=bus_id,
841
+ )
842
+ component_id = insert_component(conn, request)
843
+
844
+ # Import generator attributes
845
+ self._import_component_attributes(
846
+ conn, component_id, gen_data, "GENERATOR", strict_validation
847
+ )
848
+
849
+ # Import timeseries attributes for generators
850
+ self._import_component_timeseries(
851
+ conn,
852
+ network,
853
+ component_id,
854
+ gen_name,
855
+ "GENERATOR",
856
+ strict_validation,
857
+ )
858
+
859
+ count += 1
860
+
861
+ except Exception as e:
862
+ if strict_validation:
863
+ raise
864
+ continue
865
+
866
+ return count
867
+
868
+ def _import_loads(
869
+ self,
870
+ conn,
871
+ network,
872
+ strict_validation: bool,
873
+ scatter_radius: float,
874
+ location_map,
875
+ ) -> int:
876
+ """Import loads from PyPSA network (single network per database)"""
877
+ count = 0
878
+
879
+ if not hasattr(network, "loads") or network.loads.empty:
880
+ return count
881
+
882
+ bus_map = get_bus_name_to_id_map(conn)
883
+ bus_coords = self._get_bus_coordinates_map(conn)
884
+
885
+ # Count components per bus for better distribution
886
+ components_per_bus = {}
887
+ for load_name, load_data in network.loads.iterrows():
888
+ bus_name = load_data["bus"]
889
+ components_per_bus[bus_name] = components_per_bus.get(bus_name, 0) + 1
890
+
891
+ bus_component_counters = {}
892
+
893
+ for load_name, load_data in network.loads.iterrows():
894
+ try:
895
+ bus_id = bus_map.get(load_data["bus"])
896
+ if bus_id is None:
897
+ continue
898
+
899
+ # Generate a unique name for this load
900
+ unique_name = self._generate_unique_name(str(load_name), "LOAD")
901
+
902
+ # Try to get coordinates from CSV first, then fall back to scattered coordinates
903
+ latitude, longitude = None, None
904
+
905
+ # Check CSV coordinates first
906
+ csv_coords = self._get_csv_coordinates(unique_name, location_map)
907
+ if csv_coords:
908
+ latitude, longitude = csv_coords
909
+ elif bus_id in bus_coords:
910
+ # Fall back to scattered coordinates around the connected bus
911
+ bus_lat, bus_lon = bus_coords[bus_id]
912
+ bus_name = load_data["bus"]
913
+
914
+ # Get component index for this bus
915
+ component_index = bus_component_counters.get(bus_name, 0)
916
+ bus_component_counters[bus_name] = component_index + 1
917
+
918
+ latitude, longitude = self._generate_scattered_coordinates(
919
+ bus_lat,
920
+ bus_lon,
921
+ scatter_radius,
922
+ components_per_bus[bus_name],
923
+ component_index,
924
+ )
925
+
926
+ # Get carrier ID if carrier is specified
927
+ carrier_id = None
928
+ if "carrier" in load_data and pd.notna(load_data["carrier"]):
929
+ carrier_id = self._get_or_create_carrier(conn, load_data["carrier"])
930
+
931
+ # Create component record using atomic function
932
+ request = CreateComponentRequest(
933
+ component_type="LOAD",
934
+ name=unique_name, # Use globally unique name
935
+ bus_id=bus_id,
936
+ carrier_id=carrier_id,
937
+ latitude=latitude,
938
+ longitude=longitude,
939
+ )
940
+ component_id = insert_component(conn, request)
941
+
942
+ # Import load attributes
943
+ self._import_component_attributes(
944
+ conn, component_id, load_data, "LOAD", strict_validation
945
+ )
946
+
947
+ # Import timeseries attributes for loads
948
+ self._import_component_timeseries(
949
+ conn, network, component_id, load_name, "LOAD", strict_validation
950
+ )
951
+
952
+ count += 1
953
+
954
+ except Exception as e:
955
+ if strict_validation:
956
+ raise
957
+ continue
958
+
959
+ return count
960
+
961
+ def _import_lines(
962
+ self, conn, network, strict_validation: bool, location_map
963
+ ) -> int:
964
+ """Import lines from PyPSA network (single network per database)"""
965
+ count = 0
966
+ name_counter = {} # Track duplicate names
967
+
968
+ if not hasattr(network, "lines") or network.lines.empty:
969
+ return count
970
+
971
+ bus_map = get_bus_name_to_id_map(conn)
972
+
973
+ for line_name, line_data in network.lines.iterrows():
974
+ try:
975
+ bus0_id = bus_map.get(line_data["bus0"])
976
+ bus1_id = bus_map.get(line_data["bus1"])
977
+
978
+ if bus0_id is None or bus1_id is None:
979
+ continue
980
+
981
+ # Handle duplicate names by appending counter
982
+ unique_name = line_name
983
+ if line_name in name_counter:
984
+ name_counter[line_name] += 1
985
+ unique_name = f"{line_name}_{name_counter[line_name]}"
986
+ else:
987
+ name_counter[line_name] = 0
988
+
989
+ # Check for CSV coordinates
990
+ latitude, longitude = None, None
991
+ csv_coords = self._get_csv_coordinates(unique_name, location_map)
992
+ if csv_coords:
993
+ latitude, longitude = csv_coords
994
+
995
+ # Lines always use AC carrier
996
+ carrier_id = self._get_or_create_carrier(conn, "AC")
997
+
998
+ # Create component record using atomic function
999
+ request = CreateComponentRequest(
1000
+ component_type="LINE",
1001
+ name=unique_name, # Use deduplicated name
1002
+ bus0_id=bus0_id,
1003
+ bus1_id=bus1_id,
1004
+ carrier_id=carrier_id,
1005
+ latitude=latitude,
1006
+ longitude=longitude,
1007
+ )
1008
+ component_id = insert_component(conn, request)
1009
+
1010
+ # Import line attributes
1011
+ self._import_component_attributes(
1012
+ conn, component_id, line_data, "LINE", strict_validation
1013
+ )
1014
+
1015
+ # Import timeseries attributes for lines
1016
+ self._import_component_timeseries(
1017
+ conn, network, component_id, line_name, "LINE", strict_validation
1018
+ )
1019
+
1020
+ count += 1
1021
+
1022
+ except Exception as e:
1023
+ if strict_validation:
1024
+ raise
1025
+ continue
1026
+
1027
+ return count
1028
+
1029
+ def _import_links(
1030
+ self, conn, network, strict_validation: bool, location_map
1031
+ ) -> int:
1032
+ """Import links from PyPSA network (single network per database)"""
1033
+ count = 0
1034
+
1035
+ if not hasattr(network, "links") or network.links.empty:
1036
+ return count
1037
+
1038
+ bus_map = get_bus_name_to_id_map(conn)
1039
+
1040
+ for link_name, link_data in network.links.iterrows():
1041
+ try:
1042
+ bus0_id = bus_map.get(link_data["bus0"])
1043
+ bus1_id = bus_map.get(link_data["bus1"])
1044
+
1045
+ if bus0_id is None or bus1_id is None:
1046
+ continue
1047
+
1048
+ # Generate a unique name for this link
1049
+ unique_name = self._generate_unique_name(str(link_name), "LINK")
1050
+
1051
+ # Check for CSV coordinates
1052
+ latitude, longitude = None, None
1053
+ csv_coords = self._get_csv_coordinates(unique_name, location_map)
1054
+ if csv_coords:
1055
+ latitude, longitude = csv_coords
1056
+
1057
+ # Get carrier ID if carrier is specified
1058
+ carrier_id = None
1059
+ if "carrier" in link_data and pd.notna(link_data["carrier"]):
1060
+ carrier_id = self._get_or_create_carrier(conn, link_data["carrier"])
1061
+ else:
1062
+ # Default to DC for links
1063
+ carrier_id = self._get_or_create_carrier(conn, "DC")
1064
+
1065
+ # Create component record using atomic function
1066
+ request = CreateComponentRequest(
1067
+ component_type="LINK",
1068
+ name=unique_name, # Use globally unique name
1069
+ bus0_id=bus0_id,
1070
+ bus1_id=bus1_id,
1071
+ carrier_id=carrier_id,
1072
+ latitude=latitude,
1073
+ longitude=longitude,
1074
+ )
1075
+ component_id = insert_component(conn, request)
1076
+
1077
+ # Import link attributes
1078
+ self._import_component_attributes(
1079
+ conn, component_id, link_data, "LINK", strict_validation
1080
+ )
1081
+
1082
+ # Import timeseries attributes for links
1083
+ self._import_component_timeseries(
1084
+ conn, network, component_id, link_name, "LINK", strict_validation
1085
+ )
1086
+
1087
+ count += 1
1088
+
1089
+ except Exception as e:
1090
+ if strict_validation:
1091
+ raise
1092
+ continue
1093
+
1094
+ return count
1095
+
1096
+ def _import_storage_units(
1097
+ self,
1098
+ conn,
1099
+ network,
1100
+ strict_validation: bool,
1101
+ scatter_radius: float,
1102
+ location_map,
1103
+ ) -> int:
1104
+ """Import storage units from PyPSA network"""
1105
+ count = 0
1106
+
1107
+ if not hasattr(network, "storage_units") or network.storage_units.empty:
1108
+ return count
1109
+
1110
+ bus_map = get_bus_name_to_id_map(conn)
1111
+ bus_coords = self._get_bus_coordinates_map(conn)
1112
+
1113
+ # Count components per bus for better distribution
1114
+ components_per_bus = {}
1115
+ for su_name, su_data in network.storage_units.iterrows():
1116
+ bus_name = su_data["bus"]
1117
+ components_per_bus[bus_name] = components_per_bus.get(bus_name, 0) + 1
1118
+
1119
+ bus_component_counters = {}
1120
+
1121
+ for su_name, su_data in network.storage_units.iterrows():
1122
+ try:
1123
+ bus_id = bus_map.get(su_data["bus"])
1124
+ if bus_id is None:
1125
+ continue
1126
+
1127
+ # Generate a unique name for this storage unit
1128
+ unique_name = self._generate_unique_name(str(su_name), "STORAGE_UNIT")
1129
+
1130
+ # Try to get coordinates from CSV first, then fall back to scattered coordinates
1131
+ latitude, longitude = None, None
1132
+
1133
+ # Check CSV coordinates first
1134
+ csv_coords = self._get_csv_coordinates(unique_name, location_map)
1135
+ if csv_coords:
1136
+ latitude, longitude = csv_coords
1137
+ elif bus_id in bus_coords:
1138
+ # Fall back to scattered coordinates around the connected bus
1139
+ bus_lat, bus_lon = bus_coords[bus_id]
1140
+ bus_name = su_data["bus"]
1141
+
1142
+ # Get component index for this bus
1143
+ component_index = bus_component_counters.get(bus_name, 0)
1144
+ bus_component_counters[bus_name] = component_index + 1
1145
+
1146
+ latitude, longitude = self._generate_scattered_coordinates(
1147
+ bus_lat,
1148
+ bus_lon,
1149
+ scatter_radius,
1150
+ components_per_bus[bus_name],
1151
+ component_index,
1152
+ )
1153
+
1154
+ # Get carrier ID if carrier is specified
1155
+ carrier_id = None
1156
+ if "carrier" in su_data and pd.notna(su_data["carrier"]):
1157
+ carrier_id = self._get_or_create_carrier(conn, su_data["carrier"])
1158
+
1159
+ # Create component record using atomic function
1160
+ request = CreateComponentRequest(
1161
+ component_type="STORAGE_UNIT",
1162
+ name=unique_name, # Use globally unique name
1163
+ bus_id=bus_id,
1164
+ carrier_id=carrier_id,
1165
+ latitude=latitude,
1166
+ longitude=longitude,
1167
+ )
1168
+ component_id = insert_component(conn, request)
1169
+
1170
+ # Import storage unit attributes
1171
+ self._import_component_attributes(
1172
+ conn, component_id, su_data, "STORAGE_UNIT", strict_validation
1173
+ )
1174
+
1175
+ # Import timeseries attributes for storage units
1176
+ self._import_component_timeseries(
1177
+ conn,
1178
+ network,
1179
+ component_id,
1180
+ su_name,
1181
+ "STORAGE_UNIT",
1182
+ strict_validation,
1183
+ )
1184
+
1185
+ count += 1
1186
+
1187
+ except Exception as e:
1188
+ if strict_validation:
1189
+ raise
1190
+ continue
1191
+
1192
+ return count
1193
+
1194
+ def _import_stores(
1195
+ self,
1196
+ conn,
1197
+ network,
1198
+ strict_validation: bool,
1199
+ scatter_radius: float,
1200
+ location_map,
1201
+ ) -> int:
1202
+ """Import stores from PyPSA network (single network per database)"""
1203
+ count = 0
1204
+ name_counter = {} # Track duplicate names
1205
+
1206
+ if not hasattr(network, "stores") or network.stores.empty:
1207
+ return count
1208
+
1209
+ bus_map = get_bus_name_to_id_map(conn)
1210
+ bus_coords = self._get_bus_coordinates_map(conn)
1211
+
1212
+ # Count components per bus for better distribution
1213
+ components_per_bus = {}
1214
+ for store_name, store_data in network.stores.iterrows():
1215
+ bus_name = store_data["bus"]
1216
+ components_per_bus[bus_name] = components_per_bus.get(bus_name, 0) + 1
1217
+
1218
+ bus_component_counters = (
1219
+ {}
1220
+ ) # Track how many components we've placed at each bus
1221
+
1222
+ for store_name, store_data in network.stores.iterrows():
1223
+ try:
1224
+ bus_id = bus_map.get(store_data["bus"])
1225
+ if bus_id is None:
1226
+ continue
1227
+
1228
+ # Handle duplicate names by appending counter
1229
+ unique_name = store_name
1230
+ if store_name in name_counter:
1231
+ name_counter[store_name] += 1
1232
+ unique_name = f"{store_name}_{name_counter[store_name]}"
1233
+ else:
1234
+ name_counter[store_name] = 0
1235
+
1236
+ # Try to get coordinates from CSV first, then fall back to scattered coordinates
1237
+ latitude, longitude = None, None
1238
+
1239
+ # Check CSV coordinates first
1240
+ csv_coords = self._get_csv_coordinates(unique_name, location_map)
1241
+ if csv_coords:
1242
+ latitude, longitude = csv_coords
1243
+ elif bus_id in bus_coords:
1244
+ # Fall back to scattered coordinates around the connected bus
1245
+ bus_lat, bus_lon = bus_coords[bus_id]
1246
+ bus_name = store_data["bus"]
1247
+
1248
+ # Get component index for this bus
1249
+ component_index = bus_component_counters.get(bus_name, 0)
1250
+ bus_component_counters[bus_name] = component_index + 1
1251
+
1252
+ latitude, longitude = self._generate_scattered_coordinates(
1253
+ bus_lat,
1254
+ bus_lon,
1255
+ scatter_radius,
1256
+ components_per_bus[bus_name],
1257
+ component_index,
1258
+ )
1259
+
1260
+ # Get carrier ID if carrier is specified
1261
+ carrier_id = None
1262
+ if "carrier" in store_data and pd.notna(store_data["carrier"]):
1263
+ carrier_id = self._get_or_create_carrier(
1264
+ conn, store_data["carrier"]
1265
+ )
1266
+
1267
+ # Create component record using atomic function
1268
+ request = CreateComponentRequest(
1269
+ component_type="STORE",
1270
+ name=unique_name, # Use deduplicated name
1271
+ bus_id=bus_id,
1272
+ carrier_id=carrier_id,
1273
+ latitude=latitude,
1274
+ longitude=longitude,
1275
+ )
1276
+ component_id = insert_component(conn, request)
1277
+
1278
+ # Import store attributes
1279
+ self._import_component_attributes(
1280
+ conn, component_id, store_data, "STORE", strict_validation
1281
+ )
1282
+
1283
+ # Import timeseries attributes for stores
1284
+ self._import_component_timeseries(
1285
+ conn, network, component_id, store_name, "STORE", strict_validation
1286
+ )
1287
+
1288
+ count += 1
1289
+
1290
+ except Exception as e:
1291
+ if strict_validation:
1292
+ raise
1293
+ continue
1294
+
1295
+ return count
1296
+
1297
+ def _get_bus_coordinates(self, conn) -> List[Tuple[float, float]]:
1298
+ """Get coordinates of all buses in the network that have valid coordinates (single network per database)"""
1299
+ cursor = conn.execute(
1300
+ """
1301
+ SELECT latitude, longitude FROM components
1302
+ WHERE component_type = 'BUS'
1303
+ AND latitude IS NOT NULL AND longitude IS NOT NULL
1304
+ AND NOT (latitude = 0 AND longitude = 0)
1305
+ """,
1306
+ (),
1307
+ )
1308
+
1309
+ coordinates = [(row[0], row[1]) for row in cursor.fetchall()]
1310
+ return coordinates
1311
+
1312
+ def _calculate_bus_separation_radius(
1313
+ self, bus_coordinates: List[Tuple[float, float]]
1314
+ ) -> float:
1315
+ """Calculate the minimum separation between buses and return a radius for scattering"""
1316
+ if len(bus_coordinates) < 2:
1317
+ return 0.01 # ~1km at equator
1318
+
1319
+ min_distance_degrees = float("inf")
1320
+ min_separation_threshold = 0.001 # ~100m threshold to exclude co-located buses
1321
+
1322
+ for i, (lat1, lon1) in enumerate(bus_coordinates):
1323
+ for j, (lat2, lon2) in enumerate(bus_coordinates[i + 1 :], i + 1):
1324
+ # Simple Euclidean distance in degrees
1325
+ distance_degrees = math.sqrt((lat2 - lat1) ** 2 + (lon2 - lon1) ** 2)
1326
+
1327
+ if distance_degrees > min_separation_threshold:
1328
+ min_distance_degrees = min(min_distance_degrees, distance_degrees)
1329
+
1330
+ if min_distance_degrees == float("inf"):
1331
+ scatter_radius_degrees = 0.05 # ~5km default
1332
+ else:
1333
+ scatter_radius_degrees = min_distance_degrees * 0.25
1334
+
1335
+ # Ensure reasonable bounds: between 1km and 100km equivalent in degrees
1336
+ min_radius = 0.01 # ~1km
1337
+ max_radius = 1.0 # ~100km
1338
+ scatter_radius_degrees = max(
1339
+ min_radius, min(max_radius, scatter_radius_degrees)
1340
+ )
1341
+
1342
+ return scatter_radius_degrees
1343
+
1344
+ def _detect_and_load_location_csv(
1345
+ self, netcdf_path: str
1346
+ ) -> Optional[Dict[str, Tuple[float, float]]]:
1347
+ """
1348
+ Detect and load companion CSV file with component locations.
1349
+
1350
+ Args:
1351
+ netcdf_path: Path to the NetCDF file (e.g., /path/to/fileX.nc)
1352
+
1353
+ Returns:
1354
+ Dictionary mapping component names to (latitude, longitude) tuples, or None if no CSV found
1355
+ """
1356
+ try:
1357
+ # Construct expected CSV path: replace .nc with _locations.csv
1358
+ netcdf_file = Path(netcdf_path)
1359
+ csv_path = netcdf_file.parent / f"{netcdf_file.stem}_locations.csv"
1360
+
1361
+ if not csv_path.exists():
1362
+ return None
1363
+
1364
+ # Parse the CSV file
1365
+ try:
1366
+ location_df = pd.read_csv(csv_path)
1367
+
1368
+ # Validate required columns
1369
+ required_columns = {"name", "longitude", "latitude"}
1370
+ if not required_columns.issubset(location_df.columns):
1371
+ return None
1372
+
1373
+ # Create lookup dictionary
1374
+ location_map = {}
1375
+
1376
+ for _, row in location_df.iterrows():
1377
+ name = row["name"]
1378
+ longitude = row["longitude"]
1379
+ latitude = row["latitude"]
1380
+
1381
+ # Skip rows with missing data
1382
+ if pd.isna(name) or pd.isna(longitude) or pd.isna(latitude):
1383
+ continue
1384
+
1385
+ # Validate coordinate ranges
1386
+ if not (-180 <= longitude <= 180) or not (-90 <= latitude <= 90):
1387
+ continue
1388
+
1389
+ location_map[str(name).strip()] = (
1390
+ float(latitude),
1391
+ float(longitude),
1392
+ )
1393
+
1394
+ return location_map
1395
+
1396
+ except Exception:
1397
+ return None
1398
+
1399
+ except Exception:
1400
+ return None
1401
+
1402
+ def _get_or_create_carrier(self, conn, carrier_name: str) -> int:
1403
+ """Get existing carrier ID or create new carrier (single network per database)"""
1404
+ # Try to find existing carrier
1405
+ cursor = conn.execute("SELECT id FROM carriers WHERE name = ?", (carrier_name,))
1406
+ result = cursor.fetchone()
1407
+ if result:
1408
+ return result[0]
1409
+
1410
+ # Create new carrier
1411
+ carrier_id = create_carrier(conn, carrier_name, 0.0, "#3498db", carrier_name)
1412
+ return carrier_id
1413
+
1414
+ def _generate_component_coordinates(
1415
+ self,
1416
+ conn,
1417
+ bus_id: int,
1418
+ scatter_radius: float,
1419
+ location_map: Optional[Dict],
1420
+ component_name: str,
1421
+ ) -> Tuple[Optional[float], Optional[float]]:
1422
+ """Generate coordinates for a component near its connected bus"""
1423
+ # Check location map first
1424
+ if location_map and component_name in location_map:
1425
+ return location_map[component_name]
1426
+
1427
+ # Get bus coordinates
1428
+ cursor = conn.execute(
1429
+ "SELECT latitude, longitude FROM components WHERE id = ?", (bus_id,)
1430
+ )
1431
+ result = cursor.fetchone()
1432
+ if not result or result[0] is None or result[1] is None:
1433
+ return None, None
1434
+
1435
+ bus_lat, bus_lon = result[0], result[1]
1436
+
1437
+ # Generate unique name-based offset
1438
+ name_hash = hash(component_name) % 1000
1439
+ angle = (name_hash / 1000.0) * 2 * math.pi
1440
+
1441
+ # Apply scatter radius
1442
+ lat_offset = scatter_radius * math.cos(angle)
1443
+ lon_offset = scatter_radius * math.sin(angle)
1444
+
1445
+ return bus_lat + lat_offset, bus_lon + lon_offset
1446
+
1447
+ def _import_component_attributes(
1448
+ self,
1449
+ conn,
1450
+ component_id: int,
1451
+ component_data: pd.Series,
1452
+ component_type: str,
1453
+ strict_validation: bool,
1454
+ ):
1455
+ """Import component attributes, excluding bus connection columns"""
1456
+
1457
+ # Get master scenario ID
1458
+ scenario_id = None
1459
+
1460
+ # Skip these columns as they're handled in the components table
1461
+ skip_columns = {
1462
+ "bus",
1463
+ "bus0",
1464
+ "bus1",
1465
+ "name", # Bus connections and name
1466
+ "x",
1467
+ "y",
1468
+ "location", # Coordinate/location data (stored as latitude/longitude columns)
1469
+ "carrier", # Carrier reference (stored as carrier_id column)
1470
+ }
1471
+
1472
+ attribute_count = 0
1473
+ skipped_count = 0
1474
+
1475
+ for attr_name, value in component_data.items():
1476
+ if attr_name in skip_columns:
1477
+ skipped_count += 1
1478
+ continue
1479
+
1480
+ if pd.isna(value):
1481
+ skipped_count += 1
1482
+ continue
1483
+
1484
+ # Convert value to appropriate format for our database and use smart attribute setting
1485
+ try:
1486
+ # Get validation rule to check expected data type
1487
+ try:
1488
+ rule = get_validation_rule(conn, component_type, attr_name)
1489
+ expected_type = rule.data_type
1490
+ except:
1491
+ expected_type = None
1492
+
1493
+ # Convert based on expected type or infer from value
1494
+ if expected_type == "boolean":
1495
+ # Handle boolean attributes that might come as int/float from PyPSA
1496
+ if isinstance(value, (bool, np.bool_)):
1497
+ static_value = StaticValue(bool(value))
1498
+ elif isinstance(value, (int, np.integer)):
1499
+ static_value = StaticValue(bool(value)) # 0 -> False, 1 -> True
1500
+ elif isinstance(value, (float, np.floating)):
1501
+ static_value = StaticValue(
1502
+ bool(int(value))
1503
+ ) # 0.0 -> False, 1.0 -> True
1504
+ else:
1505
+ static_value = StaticValue(str(value).lower() == "true")
1506
+ elif expected_type == "int":
1507
+ # Handle integer attributes
1508
+ if isinstance(value, (int, np.integer)):
1509
+ static_value = StaticValue(int(value))
1510
+ elif isinstance(value, (float, np.floating)):
1511
+ if np.isfinite(value):
1512
+ static_value = StaticValue(int(value))
1513
+ else:
1514
+ skipped_count += 1
1515
+ continue
1516
+ elif isinstance(value, bool):
1517
+ static_value = StaticValue(int(value))
1518
+ else:
1519
+ static_value = StaticValue(int(float(str(value))))
1520
+ elif expected_type == "float":
1521
+ # Handle float attributes
1522
+ if isinstance(value, (float, np.floating)):
1523
+ if np.isfinite(value):
1524
+ static_value = StaticValue(float(value))
1525
+ else:
1526
+ skipped_count += 1
1527
+ continue
1528
+ elif isinstance(value, (int, np.integer)):
1529
+ static_value = StaticValue(float(value))
1530
+ elif isinstance(value, bool):
1531
+ static_value = StaticValue(float(value))
1532
+ else:
1533
+ static_value = StaticValue(float(str(value)))
1534
+ else:
1535
+ # Fallback to type inference for unknown or string types
1536
+ if isinstance(value, bool):
1537
+ static_value = StaticValue(bool(value))
1538
+ elif isinstance(value, (int, np.integer)):
1539
+ static_value = StaticValue(int(value))
1540
+ elif isinstance(value, (float, np.floating)):
1541
+ if np.isfinite(value):
1542
+ static_value = StaticValue(float(value))
1543
+ else:
1544
+ skipped_count += 1
1545
+ continue # Skip infinite/NaN values
1546
+ else:
1547
+ static_value = StaticValue(str(value))
1548
+
1549
+ # Use direct static attribute setting
1550
+ set_static_attribute(
1551
+ conn, component_id, attr_name, static_value, scenario_id
1552
+ )
1553
+ attribute_count += 1
1554
+
1555
+ except Exception as e:
1556
+ # Handle validation errors from db_utils functions
1557
+ if (
1558
+ "No validation rule found" in str(e)
1559
+ or "does not allow" in str(e)
1560
+ or "ValidationError" in str(type(e).__name__)
1561
+ ):
1562
+ if strict_validation:
1563
+ raise
1564
+ else:
1565
+ skipped_count += 1
1566
+ continue
1567
+ else:
1568
+ skipped_count += 1
1569
+
1570
+ def _import_component_timeseries(
1571
+ self,
1572
+ conn,
1573
+ network,
1574
+ component_id: int,
1575
+ component_name: str,
1576
+ component_type: str,
1577
+ strict_validation: bool,
1578
+ ):
1579
+ """Import timeseries attributes from PyPSA network"""
1580
+
1581
+ # Get master scenario ID
1582
+ scenario_id = None
1583
+
1584
+ # Map component types to their PyPSA timeseries DataFrames
1585
+ timeseries_map = {
1586
+ "BUS": getattr(network, "buses_t", {}),
1587
+ "GENERATOR": getattr(network, "generators_t", {}),
1588
+ "LOAD": getattr(network, "loads_t", {}),
1589
+ "LINE": getattr(network, "lines_t", {}),
1590
+ "LINK": getattr(network, "links_t", {}),
1591
+ "STORAGE_UNIT": getattr(network, "storage_units_t", {}),
1592
+ "STORE": getattr(network, "stores_t", {}),
1593
+ }
1594
+
1595
+ component_timeseries = timeseries_map.get(component_type, {})
1596
+
1597
+ if not component_timeseries:
1598
+ return
1599
+
1600
+ timeseries_count = 0
1601
+
1602
+ # Iterate through each timeseries attribute (e.g., 'p', 'q', 'p_set', 'p_max_pu', etc.)
1603
+ for attr_name, timeseries_df in component_timeseries.items():
1604
+ if component_name not in timeseries_df.columns:
1605
+ continue
1606
+
1607
+ # Get the timeseries data for this component
1608
+ component_series = timeseries_df[component_name]
1609
+
1610
+ # Skip if all values are NaN
1611
+ if component_series.isna().all():
1612
+ continue
1613
+
1614
+ try:
1615
+ # Convert pandas Series to list of values (using optimized approach)
1616
+ values = []
1617
+
1618
+ for value in component_series:
1619
+ # Skip NaN values by using 0.0 as default (PyPSA convention)
1620
+ if pd.isna(value):
1621
+ values.append(0.0)
1622
+ else:
1623
+ values.append(float(value))
1624
+
1625
+ if not values:
1626
+ continue
1627
+
1628
+ # Use optimized timeseries attribute setting
1629
+ set_timeseries_attribute(
1630
+ conn, component_id, attr_name, values, scenario_id
1631
+ )
1632
+ timeseries_count += 1
1633
+
1634
+ except Exception as e:
1635
+ if strict_validation:
1636
+ raise
1637
+ else:
1638
+ continue
1639
+
1640
+ def _generate_unique_name(self, base_name: str, component_type: str) -> str:
1641
+ """
1642
+ Generate a unique name for a component, ensuring no duplicates across all component types.
1643
+
1644
+ Args:
1645
+ base_name: The original name to start with
1646
+ component_type: The type of component (used in the suffix if needed)
1647
+
1648
+ Returns:
1649
+ A unique name that hasn't been used yet
1650
+ """
1651
+ # First try the base name
1652
+ if base_name not in self._used_names:
1653
+ self._used_names.add(base_name)
1654
+ return base_name
1655
+
1656
+ # If base name is taken, try appending the component type
1657
+ typed_name = f"{base_name}_{component_type.lower()}"
1658
+ if typed_name not in self._used_names:
1659
+ self._used_names.add(typed_name)
1660
+ return typed_name
1661
+
1662
+ # If that's taken too, start adding numbers
1663
+ counter = 1
1664
+ while True:
1665
+ unique_name = f"{base_name}_{counter}"
1666
+ if unique_name not in self._used_names:
1667
+ self._used_names.add(unique_name)
1668
+ return unique_name
1669
+ counter += 1
1670
+
1671
+ def _generate_scattered_coordinates(
1672
+ self,
1673
+ bus_lat: float,
1674
+ bus_lon: float,
1675
+ scatter_radius: float,
1676
+ component_count_at_bus: int,
1677
+ component_index: int,
1678
+ ) -> Tuple[float, float]:
1679
+ """
1680
+ Generate scattered coordinates around a bus location.
1681
+
1682
+ Args:
1683
+ bus_lat: Bus latitude
1684
+ bus_lon: Bus longitude
1685
+ scatter_radius: Radius in degrees to scatter within
1686
+ component_count_at_bus: Total number of components at this bus
1687
+ component_index: Index of this component (0-based)
1688
+
1689
+ Returns:
1690
+ Tuple of (latitude, longitude) for the scattered position
1691
+ """
1692
+ if component_count_at_bus == 1:
1693
+ # Single component - place it at a moderate distance from the bus
1694
+ angle = random.uniform(0, 2 * math.pi)
1695
+ distance = scatter_radius * random.uniform(
1696
+ 0.5, 0.8
1697
+ ) # 50-80% of scatter radius
1698
+ else:
1699
+ # Multiple components - arrange in a rough circle with some randomness
1700
+ base_angle = (2 * math.pi * component_index) / component_count_at_bus
1701
+ angle_jitter = random.uniform(
1702
+ -math.pi / 8, math.pi / 8
1703
+ ) # +/- 22.5 degrees jitter
1704
+ angle = base_angle + angle_jitter
1705
+
1706
+ # Vary distance randomly within the radius (use more of the available radius)
1707
+ distance = scatter_radius * random.uniform(
1708
+ 0.6, 1.0
1709
+ ) # 60-100% of scatter radius
1710
+
1711
+ # Calculate new coordinates
1712
+ new_lat = bus_lat + distance * math.cos(angle)
1713
+ new_lon = bus_lon + distance * math.sin(angle)
1714
+
1715
+ return new_lat, new_lon
1716
+
1717
+ def _get_bus_coordinates_map(self, conn) -> Dict[int, Tuple[float, float]]:
1718
+ """
1719
+ Get a mapping from bus component ID to coordinates.
1720
+
1721
+ Returns:
1722
+ Dictionary mapping bus component ID to (latitude, longitude) tuple
1723
+ """
1724
+ cursor = conn.execute(
1725
+ """
1726
+ SELECT id, latitude, longitude FROM components
1727
+ WHERE component_type = 'BUS'
1728
+ AND latitude IS NOT NULL AND longitude IS NOT NULL
1729
+ AND NOT (latitude = 0 AND longitude = 0)
1730
+ """,
1731
+ (),
1732
+ )
1733
+
1734
+ bus_coords = {row[0]: (row[1], row[2]) for row in cursor.fetchall()}
1735
+ return bus_coords
1736
+
1737
+ def _resolve_original_component_name(self, unique_name: str) -> str:
1738
+ """
1739
+ Resolve a potentially modified unique name back to its original name for CSV lookup.
1740
+
1741
+ Args:
1742
+ unique_name: The unique name that may have been modified (e.g., "component_1", "component_generator")
1743
+
1744
+ Returns:
1745
+ The original name for CSV lookup
1746
+ """
1747
+ # Remove common suffixes added by _generate_unique_name
1748
+ # Pattern 1: Remove "_NUMBER" suffix (e.g., "component_1" -> "component")
1749
+ import re
1750
+
1751
+ # First try removing "_NUMBER" pattern
1752
+ no_number_suffix = re.sub(r"_\d+$", "", unique_name)
1753
+ if no_number_suffix != unique_name:
1754
+ return no_number_suffix
1755
+
1756
+ # Then try removing "_COMPONENT_TYPE" pattern (e.g., "component_generator" -> "component")
1757
+ component_types = [
1758
+ "bus",
1759
+ "generator",
1760
+ "load",
1761
+ "line",
1762
+ "link",
1763
+ "storage_unit",
1764
+ "store",
1765
+ ]
1766
+ for comp_type in component_types:
1767
+ suffix = f"_{comp_type.lower()}"
1768
+ if unique_name.endswith(suffix):
1769
+ return unique_name[: -len(suffix)]
1770
+
1771
+ # If no patterns match, return the original name
1772
+ return unique_name
1773
+
1774
+ def _get_csv_coordinates(
1775
+ self,
1776
+ component_name: str,
1777
+ location_map: Optional[Dict[str, Tuple[float, float]]],
1778
+ ) -> Optional[Tuple[float, float]]:
1779
+ """
1780
+ Get coordinates for a component from the CSV location map.
1781
+
1782
+ Args:
1783
+ component_name: The component name (potentially modified for uniqueness)
1784
+ location_map: Dictionary mapping original names to coordinates
1785
+
1786
+ Returns:
1787
+ (latitude, longitude) tuple if found, None otherwise
1788
+ """
1789
+ if not location_map:
1790
+ return None
1791
+
1792
+ # Try exact match first
1793
+ if component_name in location_map:
1794
+ return location_map[component_name]
1795
+
1796
+ # Try resolving back to original name
1797
+ original_name = self._resolve_original_component_name(component_name)
1798
+ if original_name != component_name and original_name in location_map:
1799
+ return location_map[original_name]
1800
+
1801
+ # No match found
1802
+ return None