pyconvexity 0.1.3__py3-none-any.whl → 0.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pyconvexity might be problematic. Click here for more details.
- pyconvexity/__init__.py +27 -2
- pyconvexity/_version.py +1 -2
- pyconvexity/core/__init__.py +0 -2
- pyconvexity/core/database.py +158 -0
- pyconvexity/core/types.py +105 -18
- pyconvexity/data/__pycache__/__init__.cpython-313.pyc +0 -0
- pyconvexity/data/loaders/__pycache__/__init__.cpython-313.pyc +0 -0
- pyconvexity/data/loaders/__pycache__/cache.cpython-313.pyc +0 -0
- pyconvexity/data/schema/01_core_schema.sql +12 -12
- pyconvexity/data/schema/02_data_metadata.sql +17 -321
- pyconvexity/data/sources/__pycache__/__init__.cpython-313.pyc +0 -0
- pyconvexity/data/sources/__pycache__/gem.cpython-313.pyc +0 -0
- pyconvexity/data/sources/gem.py +5 -5
- pyconvexity/io/excel_exporter.py +34 -13
- pyconvexity/io/excel_importer.py +48 -51
- pyconvexity/io/netcdf_importer.py +1054 -51
- pyconvexity/models/attributes.py +209 -72
- pyconvexity/models/network.py +17 -15
- pyconvexity/solvers/pypsa/api.py +24 -1
- pyconvexity/solvers/pypsa/batch_loader.py +37 -44
- pyconvexity/solvers/pypsa/builder.py +62 -152
- pyconvexity/solvers/pypsa/solver.py +104 -253
- pyconvexity/solvers/pypsa/storage.py +740 -1373
- pyconvexity/timeseries.py +327 -0
- pyconvexity/validation/rules.py +2 -2
- {pyconvexity-0.1.3.dist-info → pyconvexity-0.1.4.dist-info}/METADATA +1 -1
- pyconvexity-0.1.4.dist-info/RECORD +46 -0
- pyconvexity-0.1.3.dist-info/RECORD +0 -45
- {pyconvexity-0.1.3.dist-info → pyconvexity-0.1.4.dist-info}/WHEEL +0 -0
- {pyconvexity-0.1.3.dist-info → pyconvexity-0.1.4.dist-info}/top_level.txt +0 -0
pyconvexity/models/attributes.py
CHANGED
|
@@ -8,14 +8,14 @@ with support for both static values and timeseries data.
|
|
|
8
8
|
import sqlite3
|
|
9
9
|
import json
|
|
10
10
|
import logging
|
|
11
|
-
from typing import Dict, Any, Optional, List
|
|
11
|
+
from typing import Dict, Any, Optional, List, Union
|
|
12
12
|
import pandas as pd
|
|
13
13
|
from io import BytesIO
|
|
14
14
|
import pyarrow as pa
|
|
15
15
|
import pyarrow.parquet as pq
|
|
16
16
|
|
|
17
17
|
from pyconvexity.core.types import (
|
|
18
|
-
StaticValue,
|
|
18
|
+
StaticValue, Timeseries, TimeseriesMetadata, AttributeValue, TimePeriod
|
|
19
19
|
)
|
|
20
20
|
from pyconvexity.core.errors import (
|
|
21
21
|
ComponentNotFound, AttributeNotFound, ValidationError, TimeseriesError
|
|
@@ -86,7 +86,7 @@ def set_timeseries_attribute(
|
|
|
86
86
|
conn: sqlite3.Connection,
|
|
87
87
|
component_id: int,
|
|
88
88
|
attribute_name: str,
|
|
89
|
-
timeseries: List[
|
|
89
|
+
timeseries: Union[Timeseries, List[float]],
|
|
90
90
|
scenario_id: Optional[int] = None
|
|
91
91
|
) -> None:
|
|
92
92
|
"""
|
|
@@ -96,7 +96,7 @@ def set_timeseries_attribute(
|
|
|
96
96
|
conn: Database connection
|
|
97
97
|
component_id: Component ID
|
|
98
98
|
attribute_name: Name of the attribute
|
|
99
|
-
timeseries:
|
|
99
|
+
timeseries: Timeseries object or list of float values
|
|
100
100
|
scenario_id: Scenario ID (uses master scenario if None)
|
|
101
101
|
|
|
102
102
|
Raises:
|
|
@@ -116,25 +116,34 @@ def set_timeseries_attribute(
|
|
|
116
116
|
if not rule.allows_timeseries:
|
|
117
117
|
raise ValidationError(f"Attribute '{attribute_name}' for {component_type} does not allow timeseries values")
|
|
118
118
|
|
|
119
|
-
# 4.
|
|
120
|
-
|
|
119
|
+
# 4. Convert input to values array
|
|
120
|
+
if isinstance(timeseries, Timeseries):
|
|
121
|
+
values = timeseries.values
|
|
122
|
+
elif isinstance(timeseries, list) and all(isinstance(v, (int, float)) for v in timeseries):
|
|
123
|
+
# Direct values array
|
|
124
|
+
values = [float(v) for v in timeseries]
|
|
125
|
+
else:
|
|
126
|
+
raise ValueError("timeseries must be Timeseries or List[float]")
|
|
121
127
|
|
|
122
|
-
# 5.
|
|
128
|
+
# 5. Serialize to binary format (ultra-fast, matches Rust exactly)
|
|
129
|
+
binary_data = serialize_values_to_binary(values)
|
|
130
|
+
|
|
131
|
+
# 6. Resolve scenario ID (get master scenario if None)
|
|
123
132
|
resolved_scenario_id = resolve_scenario_id(conn, component_id, scenario_id)
|
|
124
133
|
|
|
125
|
-
#
|
|
134
|
+
# 7. Remove any existing attribute for this scenario
|
|
126
135
|
cursor = conn.cursor()
|
|
127
136
|
cursor.execute(
|
|
128
137
|
"DELETE FROM component_attributes WHERE component_id = ? AND attribute_name = ? AND scenario_id = ?",
|
|
129
138
|
(component_id, attribute_name, resolved_scenario_id)
|
|
130
139
|
)
|
|
131
140
|
|
|
132
|
-
#
|
|
141
|
+
# 8. Insert new timeseries attribute
|
|
133
142
|
cursor.execute(
|
|
134
143
|
"""INSERT INTO component_attributes
|
|
135
144
|
(component_id, attribute_name, scenario_id, storage_type, timeseries_data, data_type, unit, is_input)
|
|
136
145
|
VALUES (?, ?, ?, 'timeseries', ?, ?, ?, ?)""",
|
|
137
|
-
(component_id, attribute_name, resolved_scenario_id,
|
|
146
|
+
(component_id, attribute_name, resolved_scenario_id, binary_data,
|
|
138
147
|
rule.data_type, rule.unit, rule.is_input)
|
|
139
148
|
)
|
|
140
149
|
|
|
@@ -239,22 +248,19 @@ def get_attribute(
|
|
|
239
248
|
if not timeseries_data:
|
|
240
249
|
raise ValidationError("Timeseries attribute missing data")
|
|
241
250
|
|
|
242
|
-
#
|
|
243
|
-
|
|
244
|
-
network_row = cursor.fetchone()
|
|
251
|
+
# Deserialize from binary format to new efficient Timeseries format
|
|
252
|
+
values = deserialize_values_from_binary(timeseries_data)
|
|
245
253
|
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
+
timeseries = Timeseries(
|
|
255
|
+
values=values,
|
|
256
|
+
length=len(values),
|
|
257
|
+
start_index=0,
|
|
258
|
+
data_type=data_type,
|
|
259
|
+
unit=unit,
|
|
260
|
+
is_input=True # Default, could be enhanced with actual is_input from DB
|
|
261
|
+
)
|
|
254
262
|
|
|
255
|
-
|
|
256
|
-
timeseries_points = deserialize_timeseries_from_parquet(timeseries_data, network_time_periods)
|
|
257
|
-
return AttributeValue.timeseries(timeseries_points)
|
|
263
|
+
return AttributeValue.timeseries(timeseries)
|
|
258
264
|
|
|
259
265
|
else:
|
|
260
266
|
raise ValidationError(f"Unknown storage type: {storage_type}")
|
|
@@ -322,64 +328,195 @@ def get_master_scenario_id(conn: sqlite3.Connection, network_id: int) -> int:
|
|
|
322
328
|
return result[0]
|
|
323
329
|
|
|
324
330
|
|
|
325
|
-
#
|
|
331
|
+
# ============================================================================
|
|
332
|
+
# EFFICIENT TIMESERIES SERIALIZATION - MATCHES RUST IMPLEMENTATION EXACTLY
|
|
333
|
+
# ============================================================================
|
|
326
334
|
|
|
327
|
-
def
|
|
328
|
-
"""
|
|
329
|
-
|
|
330
|
-
schema = pa.schema([
|
|
331
|
-
('period_index', pa.int32()),
|
|
332
|
-
('value', pa.float64())
|
|
333
|
-
])
|
|
335
|
+
def serialize_values_to_binary(values: List[float]) -> bytes:
|
|
336
|
+
"""
|
|
337
|
+
Serialize f32 values to binary format - EXACT MATCH WITH RUST.
|
|
334
338
|
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
period_indices = [p.period_index for p in timeseries]
|
|
343
|
-
values = [p.value for p in timeseries]
|
|
344
|
-
|
|
345
|
-
# Create arrays with explicit types to ensure Int32 for period_index
|
|
346
|
-
period_array = pa.array(period_indices, type=pa.int32())
|
|
347
|
-
value_array = pa.array(values, type=pa.float64())
|
|
348
|
-
|
|
349
|
-
table = pa.table([period_array, value_array], schema=schema)
|
|
339
|
+
Ultra-fast binary format: just raw Float32 array, little-endian.
|
|
340
|
+
"""
|
|
341
|
+
if not values:
|
|
342
|
+
return b''
|
|
343
|
+
|
|
344
|
+
import struct
|
|
345
|
+
buffer = bytearray(len(values) * 4) # 4 bytes per Float32
|
|
350
346
|
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
347
|
+
for i, value in enumerate(values):
|
|
348
|
+
# Pack as little-endian Float32 to match Rust exactly
|
|
349
|
+
struct.pack_into('<f', buffer, i * 4, float(value))
|
|
350
|
+
|
|
351
|
+
return bytes(buffer)
|
|
355
352
|
|
|
356
353
|
|
|
357
|
-
def
|
|
358
|
-
"""
|
|
354
|
+
def deserialize_values_from_binary(data: bytes) -> List[float]:
|
|
355
|
+
"""
|
|
356
|
+
Deserialize f32 values from binary format - EXACT MATCH WITH RUST.
|
|
357
|
+
|
|
358
|
+
Ultra-fast deserialization: read raw Float32 values only.
|
|
359
|
+
"""
|
|
359
360
|
if not data:
|
|
360
361
|
return []
|
|
361
362
|
|
|
362
|
-
|
|
363
|
-
|
|
363
|
+
# Ensure data length is multiple of 4 (Float32 size)
|
|
364
|
+
if len(data) % 4 != 0:
|
|
365
|
+
raise ValueError("Invalid binary data length - must be multiple of 4 bytes")
|
|
366
|
+
|
|
367
|
+
import struct
|
|
368
|
+
values = []
|
|
369
|
+
|
|
370
|
+
# Ultra-fast deserialization: read raw Float32 values
|
|
371
|
+
for i in range(0, len(data), 4):
|
|
372
|
+
value = struct.unpack('<f', data[i:i+4])[0] # Little-endian Float32
|
|
373
|
+
values.append(value)
|
|
374
|
+
|
|
375
|
+
return values
|
|
376
|
+
|
|
377
|
+
|
|
378
|
+
def get_timeseries_length_from_binary(data: bytes) -> int:
|
|
379
|
+
"""Get the length of a timeseries without deserializing the full data."""
|
|
380
|
+
if not data:
|
|
381
|
+
return 0
|
|
382
|
+
|
|
383
|
+
# Ultra-fast: just divide by 4 bytes per Float32
|
|
384
|
+
if len(data) % 4 != 0:
|
|
385
|
+
raise ValueError("Invalid binary data length - must be multiple of 4 bytes")
|
|
386
|
+
|
|
387
|
+
return len(data) // 4
|
|
388
|
+
|
|
364
389
|
|
|
365
|
-
|
|
366
|
-
|
|
390
|
+
# ============================================================================
|
|
391
|
+
# UNIFIED TIMESERIES FUNCTIONS - MATCH RUST API
|
|
392
|
+
# ============================================================================
|
|
367
393
|
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
394
|
+
def get_timeseries(
|
|
395
|
+
conn: sqlite3.Connection,
|
|
396
|
+
component_id: int,
|
|
397
|
+
attribute_name: str,
|
|
398
|
+
scenario_id: Optional[int] = None,
|
|
399
|
+
start_index: Optional[int] = None,
|
|
400
|
+
end_index: Optional[int] = None,
|
|
401
|
+
max_points: Optional[int] = None
|
|
402
|
+
) -> Timeseries:
|
|
403
|
+
"""
|
|
404
|
+
Get timeseries data with unified interface matching Rust implementation.
|
|
405
|
+
|
|
406
|
+
Args:
|
|
407
|
+
conn: Database connection
|
|
408
|
+
component_id: Component ID
|
|
409
|
+
attribute_name: Name of the attribute
|
|
410
|
+
scenario_id: Scenario ID (uses master scenario if None)
|
|
411
|
+
start_index: Start index for range queries
|
|
412
|
+
end_index: End index for range queries
|
|
413
|
+
max_points: Maximum number of points (for sampling)
|
|
371
414
|
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
timestamp = network_time_periods[period_index].timestamp
|
|
375
|
-
else:
|
|
376
|
-
# Fallback: use period_index as timestamp (matching previous behavior for compatibility)
|
|
377
|
-
timestamp = period_index
|
|
415
|
+
Returns:
|
|
416
|
+
Timeseries object with efficient array-based data
|
|
378
417
|
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
418
|
+
Raises:
|
|
419
|
+
ComponentNotFound: If component doesn't exist
|
|
420
|
+
AttributeNotFound: If attribute doesn't exist
|
|
421
|
+
"""
|
|
422
|
+
# Get the attribute value
|
|
423
|
+
attr_value = get_attribute(conn, component_id, attribute_name, scenario_id)
|
|
424
|
+
|
|
425
|
+
if not attr_value.is_timeseries():
|
|
426
|
+
raise ValueError(f"Attribute '{attribute_name}' is not a timeseries")
|
|
427
|
+
|
|
428
|
+
timeseries = attr_value.as_timeseries()
|
|
429
|
+
if not timeseries:
|
|
430
|
+
raise ValueError("Failed to get timeseries data")
|
|
431
|
+
|
|
432
|
+
# Apply range filtering if requested
|
|
433
|
+
if start_index is not None and end_index is not None:
|
|
434
|
+
timeseries = timeseries.slice(start_index, end_index)
|
|
435
|
+
|
|
436
|
+
# Apply sampling if requested
|
|
437
|
+
if max_points is not None:
|
|
438
|
+
timeseries = timeseries.sample(max_points)
|
|
439
|
+
|
|
440
|
+
return timeseries
|
|
441
|
+
|
|
384
442
|
|
|
385
|
-
|
|
443
|
+
def get_timeseries_metadata(
|
|
444
|
+
conn: sqlite3.Connection,
|
|
445
|
+
component_id: int,
|
|
446
|
+
attribute_name: str,
|
|
447
|
+
scenario_id: Optional[int] = None
|
|
448
|
+
) -> TimeseriesMetadata:
|
|
449
|
+
"""
|
|
450
|
+
Get timeseries metadata without loading the full data.
|
|
451
|
+
|
|
452
|
+
Args:
|
|
453
|
+
conn: Database connection
|
|
454
|
+
component_id: Component ID
|
|
455
|
+
attribute_name: Name of the attribute
|
|
456
|
+
scenario_id: Scenario ID (uses master scenario if None)
|
|
457
|
+
|
|
458
|
+
Returns:
|
|
459
|
+
TimeseriesMetadata with length and type information
|
|
460
|
+
"""
|
|
461
|
+
# Get basic attribute info without loading full data
|
|
462
|
+
cursor = conn.cursor()
|
|
463
|
+
|
|
464
|
+
# Get network_id from component
|
|
465
|
+
cursor.execute("SELECT network_id FROM components WHERE id = ?", (component_id,))
|
|
466
|
+
result = cursor.fetchone()
|
|
467
|
+
if not result:
|
|
468
|
+
raise ComponentNotFound(component_id)
|
|
469
|
+
|
|
470
|
+
network_id = result[0]
|
|
471
|
+
|
|
472
|
+
# Get master scenario ID
|
|
473
|
+
master_scenario_id = get_master_scenario_id(conn, network_id)
|
|
474
|
+
current_scenario_id = scenario_id if scenario_id is not None else master_scenario_id
|
|
475
|
+
|
|
476
|
+
# Get timeseries metadata
|
|
477
|
+
cursor.execute(
|
|
478
|
+
"""SELECT timeseries_data, data_type, unit, is_input
|
|
479
|
+
FROM component_attributes
|
|
480
|
+
WHERE component_id = ? AND attribute_name = ? AND storage_type = 'timeseries' AND scenario_id = ?""",
|
|
481
|
+
(component_id, attribute_name, current_scenario_id)
|
|
482
|
+
)
|
|
483
|
+
result = cursor.fetchone()
|
|
484
|
+
|
|
485
|
+
# Try fallback to master scenario if not found
|
|
486
|
+
if not result and current_scenario_id != master_scenario_id:
|
|
487
|
+
cursor.execute(
|
|
488
|
+
"""SELECT timeseries_data, data_type, unit, is_input
|
|
489
|
+
FROM component_attributes
|
|
490
|
+
WHERE component_id = ? AND attribute_name = ? AND storage_type = 'timeseries' AND scenario_id = ?""",
|
|
491
|
+
(component_id, attribute_name, master_scenario_id)
|
|
492
|
+
)
|
|
493
|
+
result = cursor.fetchone()
|
|
494
|
+
|
|
495
|
+
if not result:
|
|
496
|
+
raise AttributeNotFound(component_id, attribute_name)
|
|
497
|
+
|
|
498
|
+
timeseries_data, data_type, unit, is_input = result
|
|
499
|
+
|
|
500
|
+
# Get length without full deserialization
|
|
501
|
+
length = get_timeseries_length_from_binary(timeseries_data)
|
|
502
|
+
|
|
503
|
+
# Get time range from network time periods
|
|
504
|
+
try:
|
|
505
|
+
from pyconvexity.models.network import get_network_time_periods
|
|
506
|
+
time_periods = get_network_time_periods(conn, network_id)
|
|
507
|
+
start_time = time_periods[0].timestamp if time_periods else 0
|
|
508
|
+
end_time = time_periods[-1].timestamp if time_periods else 0
|
|
509
|
+
except Exception:
|
|
510
|
+
start_time = 0
|
|
511
|
+
end_time = length - 1
|
|
512
|
+
|
|
513
|
+
return TimeseriesMetadata(
|
|
514
|
+
length=length,
|
|
515
|
+
start_time=start_time,
|
|
516
|
+
end_time=end_time,
|
|
517
|
+
start_index=0,
|
|
518
|
+
end_index=length,
|
|
519
|
+
data_type=data_type,
|
|
520
|
+
unit=unit,
|
|
521
|
+
is_input=is_input
|
|
522
|
+
)
|
pyconvexity/models/network.py
CHANGED
|
@@ -9,7 +9,7 @@ import sqlite3
|
|
|
9
9
|
import json
|
|
10
10
|
import logging
|
|
11
11
|
from typing import Dict, Any, Optional, List
|
|
12
|
-
from datetime import datetime
|
|
12
|
+
from datetime import datetime, timezone
|
|
13
13
|
|
|
14
14
|
from pyconvexity.core.types import (
|
|
15
15
|
CreateNetworkRequest, TimePeriod, Network
|
|
@@ -105,7 +105,7 @@ def get_network_time_periods(
|
|
|
105
105
|
network_id: int
|
|
106
106
|
) -> List[TimePeriod]:
|
|
107
107
|
"""
|
|
108
|
-
Get network time periods.
|
|
108
|
+
Get network time periods using optimized storage.
|
|
109
109
|
|
|
110
110
|
Args:
|
|
111
111
|
conn: Database connection
|
|
@@ -115,28 +115,30 @@ def get_network_time_periods(
|
|
|
115
115
|
List of TimePeriod objects ordered by period_index
|
|
116
116
|
"""
|
|
117
117
|
cursor = conn.execute("""
|
|
118
|
-
SELECT
|
|
118
|
+
SELECT period_count, start_timestamp, interval_seconds
|
|
119
119
|
FROM network_time_periods
|
|
120
|
-
WHERE network_id = ?
|
|
121
|
-
ORDER BY period_index
|
|
120
|
+
WHERE network_id = ?
|
|
122
121
|
""", (network_id,))
|
|
123
122
|
|
|
123
|
+
row = cursor.fetchone()
|
|
124
|
+
if not row:
|
|
125
|
+
return [] # No time periods defined
|
|
126
|
+
|
|
127
|
+
period_count, start_timestamp, interval_seconds = row
|
|
128
|
+
|
|
129
|
+
# Generate all time periods computationally
|
|
124
130
|
periods = []
|
|
125
|
-
for
|
|
126
|
-
|
|
131
|
+
for period_index in range(period_count):
|
|
132
|
+
timestamp = start_timestamp + (period_index * interval_seconds)
|
|
127
133
|
|
|
128
|
-
#
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
timestamp = int(dt.timestamp())
|
|
132
|
-
except ValueError:
|
|
133
|
-
# Fallback: use period_index as timestamp
|
|
134
|
-
timestamp = period_index
|
|
134
|
+
# Format timestamp as string for compatibility - ALWAYS use UTC to avoid DST duplicates
|
|
135
|
+
dt = datetime.fromtimestamp(timestamp, tz=timezone.utc)
|
|
136
|
+
formatted_time = dt.strftime("%Y-%m-%d %H:%M:%S")
|
|
135
137
|
|
|
136
138
|
periods.append(TimePeriod(
|
|
137
139
|
timestamp=timestamp,
|
|
138
140
|
period_index=period_index,
|
|
139
|
-
formatted_time=
|
|
141
|
+
formatted_time=formatted_time
|
|
140
142
|
))
|
|
141
143
|
|
|
142
144
|
return periods
|
pyconvexity/solvers/pypsa/api.py
CHANGED
|
@@ -109,7 +109,30 @@ def solve_network(
|
|
|
109
109
|
)
|
|
110
110
|
|
|
111
111
|
if progress_callback:
|
|
112
|
-
progress_callback(
|
|
112
|
+
progress_callback(95, "Solve completed successfully")
|
|
113
|
+
|
|
114
|
+
# Optimize database after successful solve (if solve was successful)
|
|
115
|
+
if solve_result.get("success", False):
|
|
116
|
+
try:
|
|
117
|
+
if progress_callback:
|
|
118
|
+
progress_callback(98, "Optimizing database...")
|
|
119
|
+
|
|
120
|
+
from pyconvexity.core.database import should_optimize_database, optimize_database
|
|
121
|
+
|
|
122
|
+
# Only optimize if there's significant free space (>5% threshold for post-solve)
|
|
123
|
+
if should_optimize_database(conn, free_space_threshold_percent=5.0):
|
|
124
|
+
logger.info("Running database optimization after successful solve")
|
|
125
|
+
optimization_result = optimize_database(conn)
|
|
126
|
+
logger.info(f"Database optimization completed: {optimization_result['space_reclaimed']:,} bytes reclaimed")
|
|
127
|
+
else:
|
|
128
|
+
logger.debug("Skipping database optimization - insufficient free space")
|
|
129
|
+
|
|
130
|
+
except Exception as e:
|
|
131
|
+
# Don't fail the solve if optimization fails
|
|
132
|
+
logger.warning(f"Database optimization failed (non-critical): {e}")
|
|
133
|
+
|
|
134
|
+
if progress_callback:
|
|
135
|
+
progress_callback(100, "Complete")
|
|
113
136
|
|
|
114
137
|
# Return simple status if requested (for sidecar/async usage)
|
|
115
138
|
# Results are now stored in database regardless of this flag
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
"""
|
|
2
2
|
PyPSA Batch Data Loader
|
|
3
|
-
|
|
4
|
-
Eliminates N+1 query patterns for improved performance.
|
|
3
|
+
Simplified to always create MultiIndex timeseries for consistent multi-period optimization.
|
|
5
4
|
"""
|
|
6
5
|
|
|
7
6
|
import logging
|
|
@@ -9,16 +8,16 @@ import pandas as pd
|
|
|
9
8
|
import json
|
|
10
9
|
from typing import Dict, Any, List, Optional
|
|
11
10
|
|
|
12
|
-
|
|
13
|
-
from pyconvexity.models
|
|
11
|
+
from pyconvexity.models.attributes import get_timeseries
|
|
12
|
+
from pyconvexity.models import get_network_time_periods
|
|
14
13
|
|
|
15
14
|
logger = logging.getLogger(__name__)
|
|
16
15
|
|
|
17
16
|
|
|
18
17
|
class PyPSABatchLoader:
|
|
19
18
|
"""
|
|
20
|
-
|
|
21
|
-
|
|
19
|
+
Simplified batch data loader for PyPSA network construction.
|
|
20
|
+
Always creates MultiIndex timeseries for consistent multi-period optimization.
|
|
22
21
|
"""
|
|
23
22
|
|
|
24
23
|
def __init__(self):
|
|
@@ -148,23 +147,25 @@ class PyPSABatchLoader:
|
|
|
148
147
|
}
|
|
149
148
|
|
|
150
149
|
def batch_load_component_timeseries(self, conn, component_ids: List[int], scenario_id: Optional[int]) -> Dict[int, Dict[str, pd.Series]]:
|
|
151
|
-
"""Batch load all timeseries attributes
|
|
150
|
+
"""Batch load all timeseries attributes - always create MultiIndex for consistency"""
|
|
152
151
|
if not component_ids:
|
|
153
152
|
return {}
|
|
154
153
|
|
|
155
154
|
# Get network time periods for proper timestamp alignment
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
155
|
+
cursor = conn.execute("SELECT network_id FROM components WHERE id = ? LIMIT 1", (component_ids[0],))
|
|
156
|
+
result = cursor.fetchone()
|
|
157
|
+
if not result:
|
|
158
|
+
return {comp_id: {} for comp_id in component_ids}
|
|
159
|
+
|
|
160
|
+
network_id = result[0]
|
|
161
|
+
network_time_periods = get_network_time_periods(conn, network_id)
|
|
162
|
+
if not network_time_periods:
|
|
163
|
+
logger.warning("No time periods found for network")
|
|
164
|
+
return {comp_id: {} for comp_id in component_ids}
|
|
165
|
+
|
|
166
|
+
# Convert to timestamps and extract years
|
|
167
|
+
timestamps = [pd.Timestamp(tp.formatted_time) for tp in network_time_periods]
|
|
168
|
+
years = sorted(list(set([ts.year for ts in timestamps])))
|
|
168
169
|
|
|
169
170
|
# Build a single query to get all timeseries attributes for all components
|
|
170
171
|
placeholders = ','.join(['?' for _ in component_ids])
|
|
@@ -188,7 +189,7 @@ class PyPSABatchLoader:
|
|
|
188
189
|
master_id = None
|
|
189
190
|
if scenario_id is not None:
|
|
190
191
|
# Get master scenario ID for fallback
|
|
191
|
-
cursor = conn.execute("SELECT id FROM scenarios WHERE network_id =
|
|
192
|
+
cursor = conn.execute("SELECT id FROM scenarios WHERE network_id = ? AND is_master = 1", (network_id,))
|
|
192
193
|
result = cursor.fetchone()
|
|
193
194
|
if result:
|
|
194
195
|
master_id = result[0]
|
|
@@ -197,7 +198,7 @@ class PyPSABatchLoader:
|
|
|
197
198
|
scenario_filter_values = [scenario_id]
|
|
198
199
|
else:
|
|
199
200
|
# Get master scenario ID
|
|
200
|
-
cursor = conn.execute("SELECT id FROM scenarios WHERE network_id =
|
|
201
|
+
cursor = conn.execute("SELECT id FROM scenarios WHERE network_id = ? AND is_master = 1", (network_id,))
|
|
201
202
|
result = cursor.fetchone()
|
|
202
203
|
if result:
|
|
203
204
|
master_id = result[0]
|
|
@@ -208,7 +209,6 @@ class PyPSABatchLoader:
|
|
|
208
209
|
scen_placeholders = ','.join(['?' for _ in scenario_filter_values])
|
|
209
210
|
|
|
210
211
|
# Single query to get all timeseries
|
|
211
|
-
# CRITICAL: Order by scenario_id to prioritize current scenario over master
|
|
212
212
|
query = f"""
|
|
213
213
|
SELECT component_id, attribute_name, timeseries_data, scenario_id
|
|
214
214
|
FROM component_attributes
|
|
@@ -246,31 +246,24 @@ class PyPSABatchLoader:
|
|
|
246
246
|
|
|
247
247
|
# Deserialize timeseries data
|
|
248
248
|
try:
|
|
249
|
-
|
|
250
|
-
if
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
249
|
+
timeseries = get_timeseries(conn, comp_id, attr_name, row_scenario_id)
|
|
250
|
+
if timeseries and timeseries.values:
|
|
251
|
+
values = timeseries.values
|
|
252
|
+
|
|
253
|
+
# Always create MultiIndex following PyPSA multi-investment tutorial format
|
|
254
|
+
# First level: investment periods (years), Second level: timesteps
|
|
255
|
+
multi_snapshots = []
|
|
256
|
+
for i, ts in enumerate(timestamps[:len(values)]):
|
|
257
|
+
multi_snapshots.append((ts.year, ts))
|
|
255
258
|
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
timestamps = []
|
|
260
|
-
for point in timeseries_points:
|
|
261
|
-
if point.period_index < len(network_time_periods):
|
|
262
|
-
tp = network_time_periods[point.period_index]
|
|
263
|
-
timestamps.append(pd.Timestamp(tp.formatted_time))
|
|
264
|
-
else:
|
|
265
|
-
logger.warning(f"Period index {point.period_index} out of range for network time periods")
|
|
266
|
-
timestamps.append(pd.Timestamp.now()) # Fallback
|
|
267
|
-
component_timeseries[comp_id][attr_name] = pd.Series(values, index=timestamps)
|
|
259
|
+
if multi_snapshots:
|
|
260
|
+
multi_index = pd.MultiIndex.from_tuples(multi_snapshots, names=['period', 'timestep'])
|
|
261
|
+
component_timeseries[comp_id][attr_name] = pd.Series(values, index=multi_index)
|
|
268
262
|
else:
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
component_timeseries[comp_id][attr_name] = pd.Series(values, index=period_indices)
|
|
263
|
+
logger.warning(f"No valid timestamps for timeseries {attr_name}")
|
|
264
|
+
|
|
272
265
|
except Exception as e:
|
|
273
|
-
logger.warning(f"Failed to
|
|
266
|
+
logger.warning(f"Failed to load timeseries {attr_name} for component {comp_id}: {e}")
|
|
274
267
|
continue
|
|
275
268
|
|
|
276
269
|
return component_timeseries
|