pyconvexity 0.1.2__tar.gz → 0.1.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pyconvexity might be problematic. Click here for more details.

Files changed (55) hide show
  1. {pyconvexity-0.1.2 → pyconvexity-0.1.4}/PKG-INFO +5 -2
  2. {pyconvexity-0.1.2 → pyconvexity-0.1.4}/pyproject.toml +7 -3
  3. {pyconvexity-0.1.2 → pyconvexity-0.1.4}/src/pyconvexity/__init__.py +57 -8
  4. pyconvexity-0.1.4/src/pyconvexity/_version.py +1 -0
  5. {pyconvexity-0.1.2 → pyconvexity-0.1.4}/src/pyconvexity/core/__init__.py +0 -2
  6. {pyconvexity-0.1.2 → pyconvexity-0.1.4}/src/pyconvexity/core/database.py +158 -0
  7. {pyconvexity-0.1.2 → pyconvexity-0.1.4}/src/pyconvexity/core/types.py +105 -18
  8. pyconvexity-0.1.4/src/pyconvexity/data/README.md +101 -0
  9. pyconvexity-0.1.4/src/pyconvexity/data/__init__.py +18 -0
  10. pyconvexity-0.1.4/src/pyconvexity/data/__pycache__/__init__.cpython-313.pyc +0 -0
  11. pyconvexity-0.1.4/src/pyconvexity/data/loaders/__init__.py +3 -0
  12. pyconvexity-0.1.4/src/pyconvexity/data/loaders/__pycache__/__init__.cpython-313.pyc +0 -0
  13. pyconvexity-0.1.4/src/pyconvexity/data/loaders/__pycache__/cache.cpython-313.pyc +0 -0
  14. pyconvexity-0.1.4/src/pyconvexity/data/loaders/cache.py +212 -0
  15. {pyconvexity-0.1.2 → pyconvexity-0.1.4}/src/pyconvexity/data/schema/01_core_schema.sql +12 -12
  16. pyconvexity-0.1.4/src/pyconvexity/data/schema/02_data_metadata.sql +250 -0
  17. pyconvexity-0.1.4/src/pyconvexity/data/sources/__init__.py +5 -0
  18. pyconvexity-0.1.4/src/pyconvexity/data/sources/__pycache__/__init__.cpython-313.pyc +0 -0
  19. pyconvexity-0.1.4/src/pyconvexity/data/sources/__pycache__/gem.cpython-313.pyc +0 -0
  20. pyconvexity-0.1.4/src/pyconvexity/data/sources/gem.py +412 -0
  21. pyconvexity-0.1.4/src/pyconvexity/io/__init__.py +32 -0
  22. pyconvexity-0.1.4/src/pyconvexity/io/excel_exporter.py +1012 -0
  23. pyconvexity-0.1.4/src/pyconvexity/io/excel_importer.py +1109 -0
  24. pyconvexity-0.1.4/src/pyconvexity/io/netcdf_exporter.py +192 -0
  25. pyconvexity-0.1.4/src/pyconvexity/io/netcdf_importer.py +1602 -0
  26. {pyconvexity-0.1.2 → pyconvexity-0.1.4}/src/pyconvexity/models/__init__.py +7 -0
  27. {pyconvexity-0.1.2 → pyconvexity-0.1.4}/src/pyconvexity/models/attributes.py +209 -72
  28. {pyconvexity-0.1.2 → pyconvexity-0.1.4}/src/pyconvexity/models/components.py +3 -0
  29. {pyconvexity-0.1.2 → pyconvexity-0.1.4}/src/pyconvexity/models/network.py +17 -15
  30. pyconvexity-0.1.4/src/pyconvexity/models/scenarios.py +177 -0
  31. pyconvexity-0.1.4/src/pyconvexity/solvers/__init__.py +29 -0
  32. pyconvexity-0.1.4/src/pyconvexity/solvers/pypsa/__init__.py +24 -0
  33. pyconvexity-0.1.4/src/pyconvexity/solvers/pypsa/api.py +421 -0
  34. pyconvexity-0.1.4/src/pyconvexity/solvers/pypsa/batch_loader.py +304 -0
  35. pyconvexity-0.1.4/src/pyconvexity/solvers/pypsa/builder.py +566 -0
  36. pyconvexity-0.1.4/src/pyconvexity/solvers/pypsa/constraints.py +321 -0
  37. pyconvexity-0.1.4/src/pyconvexity/solvers/pypsa/solver.py +1106 -0
  38. pyconvexity-0.1.4/src/pyconvexity/solvers/pypsa/storage.py +1574 -0
  39. pyconvexity-0.1.4/src/pyconvexity/timeseries.py +327 -0
  40. {pyconvexity-0.1.2 → pyconvexity-0.1.4}/src/pyconvexity/validation/rules.py +2 -2
  41. {pyconvexity-0.1.2 → pyconvexity-0.1.4}/src/pyconvexity.egg-info/PKG-INFO +5 -2
  42. pyconvexity-0.1.4/src/pyconvexity.egg-info/SOURCES.txt +50 -0
  43. {pyconvexity-0.1.2 → pyconvexity-0.1.4}/src/pyconvexity.egg-info/requires.txt +5 -1
  44. pyconvexity-0.1.2/src/pyconvexity/_version.py +0 -2
  45. pyconvexity-0.1.2/src/pyconvexity/data/schema/02_data_metadata.sql +0 -554
  46. pyconvexity-0.1.2/src/pyconvexity.egg-info/SOURCES.txt +0 -24
  47. {pyconvexity-0.1.2 → pyconvexity-0.1.4}/README.md +0 -0
  48. {pyconvexity-0.1.2 → pyconvexity-0.1.4}/setup.cfg +0 -0
  49. {pyconvexity-0.1.2 → pyconvexity-0.1.4}/src/pyconvexity/core/errors.py +0 -0
  50. {pyconvexity-0.1.2 → pyconvexity-0.1.4}/src/pyconvexity/data/schema/03_validation_data.sql +0 -0
  51. {pyconvexity-0.1.2 → pyconvexity-0.1.4}/src/pyconvexity/data/schema/04_scenario_schema.sql +0 -0
  52. {pyconvexity-0.1.2 → pyconvexity-0.1.4}/src/pyconvexity/validation/__init__.py +0 -0
  53. {pyconvexity-0.1.2 → pyconvexity-0.1.4}/src/pyconvexity.egg-info/dependency_links.txt +0 -0
  54. {pyconvexity-0.1.2 → pyconvexity-0.1.4}/src/pyconvexity.egg-info/top_level.txt +0 -0
  55. {pyconvexity-0.1.2 → pyconvexity-0.1.4}/tests/test_core_types.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pyconvexity
3
- Version: 0.1.2
3
+ Version: 0.1.4
4
4
  Summary: Python library for energy system modeling and optimization with PyPSA
5
5
  Author-email: Convexity Team <info@convexity.com>
6
6
  License: MIT
@@ -32,6 +32,9 @@ Requires-Dist: xlsxwriter>=3.0.0; extra == "excel"
32
32
  Provides-Extra: netcdf
33
33
  Requires-Dist: netcdf4>=1.6.0; extra == "netcdf"
34
34
  Requires-Dist: xarray>=2022.3.0; extra == "netcdf"
35
+ Provides-Extra: data
36
+ Requires-Dist: country-converter>=1.0.0; extra == "data"
37
+ Requires-Dist: pyyaml>=6.0.0; extra == "data"
35
38
  Provides-Extra: dev
36
39
  Requires-Dist: pytest>=7.0.0; extra == "dev"
37
40
  Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
@@ -40,7 +43,7 @@ Requires-Dist: isort>=5.10.0; extra == "dev"
40
43
  Requires-Dist: mypy>=1.0.0; extra == "dev"
41
44
  Requires-Dist: pre-commit>=2.20.0; extra == "dev"
42
45
  Provides-Extra: all
43
- Requires-Dist: pyconvexity[excel,netcdf,pypsa]; extra == "all"
46
+ Requires-Dist: pyconvexity[data,excel,netcdf,pypsa]; extra == "all"
44
47
 
45
48
  # PyConvexity
46
49
 
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "pyconvexity"
7
- version = "0.1.2"
7
+ version = "0.1.4"
8
8
  description = "Python library for energy system modeling and optimization with PyPSA"
9
9
  readme = "README.md"
10
10
  license = {text = "MIT"}
@@ -44,6 +44,10 @@ netcdf = [
44
44
  "netcdf4>=1.6.0",
45
45
  "xarray>=2022.3.0",
46
46
  ]
47
+ data = [
48
+ "country-converter>=1.0.0",
49
+ "pyyaml>=6.0.0",
50
+ ]
47
51
  dev = [
48
52
  "pytest>=7.0.0",
49
53
  "pytest-cov>=4.0.0",
@@ -53,7 +57,7 @@ dev = [
53
57
  "pre-commit>=2.20.0",
54
58
  ]
55
59
  all = [
56
- "pyconvexity[pypsa,excel,netcdf]",
60
+ "pyconvexity[pypsa,excel,netcdf,data]",
57
61
  ]
58
62
 
59
63
  [project.urls]
@@ -77,7 +81,7 @@ profile = "black"
77
81
  line_length = 100
78
82
 
79
83
  [tool.mypy]
80
- python_version = "3.9"
84
+ python_version = "0.1.4"
81
85
  warn_return_any = true
82
86
  warn_unused_configs = true
83
87
  disallow_untyped_defs = true
@@ -21,7 +21,8 @@ from pyconvexity.core.errors import (
21
21
 
22
22
  from pyconvexity.core.types import (
23
23
  StaticValue,
24
- TimeseriesPoint,
24
+ Timeseries,
25
+ TimeseriesMetadata,
25
26
  Component,
26
27
  Network,
27
28
  CreateNetworkRequest,
@@ -33,6 +34,12 @@ from pyconvexity.core.database import (
33
34
  database_context,
34
35
  open_connection,
35
36
  validate_database,
37
+ # Database maintenance functions
38
+ vacuum_database,
39
+ analyze_database,
40
+ optimize_database,
41
+ get_database_size_info,
42
+ should_optimize_database,
36
43
  )
37
44
 
38
45
  # Import main API functions
@@ -48,12 +55,21 @@ from pyconvexity.models import (
48
55
  create_network, get_network_info, get_network_time_periods, list_networks,
49
56
  create_carrier, list_carriers, get_network_config, set_network_config,
50
57
  get_master_scenario_id, resolve_scenario_id,
58
+
59
+ # Scenario operations
60
+ create_scenario, list_scenarios, get_scenario, delete_scenario,
51
61
  )
52
62
 
53
63
  from pyconvexity.validation import (
54
64
  get_validation_rule, list_validation_rules, validate_timeseries_alignment
55
65
  )
56
66
 
67
+ # High-level timeseries API - recommended for new code
68
+ from pyconvexity.timeseries import (
69
+ get_timeseries, set_timeseries, get_timeseries_metadata,
70
+ get_multiple_timeseries, timeseries_to_numpy, numpy_to_timeseries
71
+ )
72
+
57
73
  # High-level API functions
58
74
  __all__ = [
59
75
  # Version info
@@ -62,7 +78,8 @@ __all__ = [
62
78
 
63
79
  # Core types
64
80
  "StaticValue",
65
- "TimeseriesPoint",
81
+ "Timeseries",
82
+ "TimeseriesMetadata",
66
83
  "Component",
67
84
  "Network",
68
85
  "CreateNetworkRequest",
@@ -74,6 +91,13 @@ __all__ = [
74
91
  "open_connection",
75
92
  "validate_database",
76
93
 
94
+ # Database maintenance
95
+ "vacuum_database",
96
+ "analyze_database",
97
+ "optimize_database",
98
+ "get_database_size_info",
99
+ "should_optimize_database",
100
+
77
101
  # Exceptions
78
102
  "PyConvexityError",
79
103
  "DatabaseError",
@@ -93,28 +117,53 @@ __all__ = [
93
117
  "create_carrier", "list_carriers", "get_network_config", "set_network_config",
94
118
  "get_master_scenario_id", "resolve_scenario_id",
95
119
 
120
+ # Scenario operations
121
+ "create_scenario", "list_scenarios", "get_scenario", "delete_scenario",
122
+
96
123
  # Validation
97
124
  "get_validation_rule", "list_validation_rules", "validate_timeseries_alignment",
125
+
126
+ # High-level timeseries API
127
+ "get_timeseries", "set_timeseries", "get_timeseries_metadata",
128
+ "get_multiple_timeseries", "timeseries_to_numpy", "numpy_to_timeseries",
98
129
  ]
99
130
 
131
+ # Data module imports
132
+ try:
133
+ from pyconvexity import data
134
+ __all__.append("data")
135
+ except ImportError:
136
+ # Data dependencies not available
137
+ pass
138
+
100
139
  # Optional imports with graceful fallbacks
101
140
  try:
102
- from pyconvexity.solvers.pypsa import PyPSASolver
103
- __all__.append("PyPSASolver")
141
+ from pyconvexity.solvers.pypsa import (
142
+ solve_network, build_pypsa_network, solve_pypsa_network,
143
+ load_network_components, apply_constraints, store_solve_results
144
+ )
145
+ __all__.extend([
146
+ "solve_network", "build_pypsa_network", "solve_pypsa_network",
147
+ "load_network_components", "apply_constraints", "store_solve_results"
148
+ ])
104
149
  except ImportError:
105
150
  # PyPSA not available
106
151
  pass
107
152
 
153
+ # Excel I/O functionality
108
154
  try:
109
- from pyconvexity.io.excel import ExcelImporter, ExcelExporter
110
- __all__.extend(["ExcelImporter", "ExcelExporter"])
155
+ from pyconvexity.io import ExcelModelExporter, ExcelModelImporter
156
+ __all__.extend([
157
+ "ExcelModelExporter", "ExcelModelImporter"
158
+ ])
111
159
  except ImportError:
112
160
  # Excel dependencies not available
113
161
  pass
114
162
 
163
+
115
164
  try:
116
- from pyconvexity.io.netcdf import NetCDFImporter, NetCDFExporter
117
- __all__.extend(["NetCDFImporter", "NetCDFExporter"])
165
+ from pyconvexity.io import NetCDFModelExporter, NetCDFModelImporter
166
+ __all__.extend(["NetCDFModelExporter", "NetCDFModelImporter"])
118
167
  except ImportError:
119
168
  # NetCDF dependencies not available
120
169
  pass
@@ -0,0 +1 @@
1
+ __version__ = "0.1.4"
@@ -16,7 +16,6 @@ from pyconvexity.core.errors import (
16
16
 
17
17
  from pyconvexity.core.types import (
18
18
  StaticValue,
19
- TimeseriesPoint,
20
19
  AttributeValue,
21
20
  ValidationRule,
22
21
  Component,
@@ -46,7 +45,6 @@ __all__ = [
46
45
 
47
46
  # Types
48
47
  "StaticValue",
49
- "TimeseriesPoint",
50
48
  "AttributeValue",
51
49
  "ValidationRule",
52
50
  "Component",
@@ -90,6 +90,13 @@ def open_connection(db_path: str, read_only: bool = False) -> sqlite3.Connection
90
90
  conn.row_factory = sqlite3.Row # Enable column access by name
91
91
  conn.execute("PRAGMA foreign_keys = ON") # Enable foreign key constraints
92
92
 
93
+ # Configure for concurrent access (WAL mode for better concurrency)
94
+ if not read_only:
95
+ conn.execute("PRAGMA journal_mode = WAL") # Write-Ahead Logging for concurrency
96
+ conn.execute("PRAGMA synchronous = NORMAL") # Faster than FULL, still safe
97
+ conn.execute("PRAGMA wal_autocheckpoint = 1000") # Less frequent checkpoints
98
+ conn.execute("PRAGMA temp_store = MEMORY") # Faster temporary operations
99
+
93
100
  # Set reasonable timeouts
94
101
  conn.execute("PRAGMA busy_timeout = 30000") # 30 second timeout
95
102
 
@@ -183,6 +190,13 @@ def create_database_with_schema(db_path: str) -> None:
183
190
  # Enable foreign key constraints
184
191
  conn.execute("PRAGMA foreign_keys = ON")
185
192
 
193
+ # Configure for concurrent access
194
+ conn.execute("PRAGMA journal_mode = WAL")
195
+ conn.execute("PRAGMA synchronous = NORMAL")
196
+ conn.execute("PRAGMA wal_autocheckpoint = 1000")
197
+ conn.execute("PRAGMA temp_store = MEMORY")
198
+ conn.execute("PRAGMA busy_timeout = 30000")
199
+
186
200
  # Execute schemas in order
187
201
  for filename in schema_files:
188
202
  schema_file = schema_dir / filename
@@ -317,3 +331,147 @@ def check_database_compatibility(conn: sqlite3.Connection) -> dict:
317
331
  result["warnings"].append("No version information found in database")
318
332
 
319
333
  return result
334
+
335
+
336
+ # ============================================================================
337
+ # DATABASE MAINTENANCE FUNCTIONS
338
+ # ============================================================================
339
+
340
+ def vacuum_database(conn: sqlite3.Connection) -> None:
341
+ """
342
+ Run VACUUM to reclaim database space and defragment.
343
+
344
+ VACUUM rebuilds the database file, repacking it into a minimal amount of disk space.
345
+ This is useful after deleting large amounts of data or after many INSERT/UPDATE/DELETE operations.
346
+
347
+ Args:
348
+ conn: Database connection
349
+
350
+ Note:
351
+ VACUUM can take a significant amount of time on large databases and requires
352
+ temporary disk space up to twice the size of the original database.
353
+ """
354
+ import logging
355
+ logger = logging.getLogger(__name__)
356
+
357
+ logger.info("Running VACUUM to reclaim database space and defragment")
358
+ conn.execute("VACUUM")
359
+ logger.info("VACUUM completed successfully")
360
+
361
+
362
+ def analyze_database(conn: sqlite3.Connection) -> None:
363
+ """
364
+ Run ANALYZE to update query planner statistics.
365
+
366
+ ANALYZE gathers statistics about the contents of tables and indices.
367
+ These statistics are used by the query planner to help make better choices about how to perform queries.
368
+
369
+ Args:
370
+ conn: Database connection
371
+ """
372
+ import logging
373
+ logger = logging.getLogger(__name__)
374
+
375
+ logger.info("Running ANALYZE to update query planner statistics")
376
+ conn.execute("ANALYZE")
377
+ logger.info("ANALYZE completed successfully")
378
+
379
+
380
+ def optimize_database(conn: sqlite3.Connection) -> dict:
381
+ """
382
+ Run complete database optimization (VACUUM + ANALYZE).
383
+
384
+ This performs both VACUUM and ANALYZE operations in the correct order:
385
+ 1. VACUUM first to reclaim space and defragment
386
+ 2. ANALYZE to update statistics with the new layout
387
+
388
+ Args:
389
+ conn: Database connection
390
+
391
+ Returns:
392
+ Dictionary with optimization results including before/after size information
393
+ """
394
+ import logging
395
+ import time
396
+ logger = logging.getLogger(__name__)
397
+
398
+ logger.info("Running database optimization (VACUUM + ANALYZE)")
399
+ start_time = time.time()
400
+
401
+ # Get size before optimization
402
+ size_before = get_database_size_info(conn)
403
+
404
+ # VACUUM first to reclaim space and defragment
405
+ vacuum_database(conn)
406
+
407
+ # Then ANALYZE to update statistics with the new layout
408
+ analyze_database(conn)
409
+
410
+ # Get size after optimization
411
+ size_after = get_database_size_info(conn)
412
+
413
+ optimization_time = time.time() - start_time
414
+
415
+ result = {
416
+ "success": True,
417
+ "optimization_time": optimization_time,
418
+ "size_before": size_before,
419
+ "size_after": size_after,
420
+ "space_reclaimed": size_before["total_size"] - size_after["total_size"],
421
+ "free_pages_reclaimed": size_before["free_pages"] - size_after["free_pages"]
422
+ }
423
+
424
+ logger.info(f"Database optimization completed in {optimization_time:.2f} seconds")
425
+ logger.info(f"Space reclaimed: {result['space_reclaimed']:,} bytes ({result['space_reclaimed']/1024/1024:.1f} MB)")
426
+
427
+ return result
428
+
429
+
430
+ def get_database_size_info(conn: sqlite3.Connection) -> dict:
431
+ """
432
+ Get detailed information about database size and space usage.
433
+
434
+ Args:
435
+ conn: Database connection
436
+
437
+ Returns:
438
+ Dictionary with size information including total, used, and free space
439
+ """
440
+ # Get page count, page size, and freelist count
441
+ page_count = conn.execute("PRAGMA page_count").fetchone()[0]
442
+ page_size = conn.execute("PRAGMA page_size").fetchone()[0]
443
+ freelist_count = conn.execute("PRAGMA freelist_count").fetchone()[0]
444
+
445
+ total_size = page_count * page_size
446
+ free_size = freelist_count * page_size
447
+ used_size = total_size - free_size
448
+
449
+ return {
450
+ "total_size": total_size,
451
+ "used_size": used_size,
452
+ "free_size": free_size,
453
+ "page_count": page_count,
454
+ "page_size": page_size,
455
+ "free_pages": freelist_count,
456
+ "utilization_percent": (used_size / total_size * 100) if total_size > 0 else 0
457
+ }
458
+
459
+
460
+ def should_optimize_database(conn: sqlite3.Connection, free_space_threshold_percent: float = 10.0) -> bool:
461
+ """
462
+ Check if database would benefit from optimization based on free space.
463
+
464
+ Args:
465
+ conn: Database connection
466
+ free_space_threshold_percent: Threshold percentage of free space to trigger optimization
467
+
468
+ Returns:
469
+ True if optimization is recommended, False otherwise
470
+ """
471
+ size_info = get_database_size_info(conn)
472
+
473
+ if size_info["total_size"] == 0:
474
+ return False
475
+
476
+ free_space_percent = (size_info["free_size"] / size_info["total_size"]) * 100
477
+ return free_space_percent >= free_space_threshold_percent
@@ -38,8 +38,14 @@ class StaticValue:
38
38
  Rust stores: 123.45, 42, true, "hello"
39
39
  Not: {"Float": 123.45}, {"Integer": 42}, etc.
40
40
  """
41
+ import math
42
+
41
43
  if "Float" in self.data:
42
- return json.dumps(self.data["Float"])
44
+ float_val = self.data["Float"]
45
+ # Ensure finite values only
46
+ if not math.isfinite(float_val):
47
+ raise ValueError(f"Cannot serialize non-finite float value: {float_val}")
48
+ return json.dumps(float_val)
43
49
  elif "Integer" in self.data:
44
50
  return json.dumps(self.data["Integer"])
45
51
  elif "Boolean" in self.data:
@@ -100,21 +106,94 @@ class StaticValue:
100
106
 
101
107
 
102
108
  @dataclass
103
- class TimeseriesPoint:
109
+ class Timeseries:
104
110
  """
105
- A single point in a time series.
111
+ Efficient timeseries data structure matching the new Rust implementation.
106
112
 
107
- Mirrors Rust TimeseriesPoint with exact field matching.
113
+ Stores values as a flat array for maximum performance, matching the
114
+ unified Rust Timeseries struct.
108
115
  """
109
- timestamp: int
110
- value: float
111
- period_index: int
116
+ values: List[float]
117
+ length: int
118
+ start_index: int
119
+ data_type: str
120
+ unit: Optional[str]
121
+ is_input: bool
112
122
 
113
123
  def __post_init__(self):
114
- # Ensure types are correct
115
- self.timestamp = int(self.timestamp)
116
- self.value = float(self.value)
117
- self.period_index = int(self.period_index)
124
+ # Ensure length matches values array
125
+ self.length = len(self.values)
126
+ # Ensure all values are float32-compatible
127
+ self.values = [float(v) for v in self.values]
128
+
129
+ def get_value(self, index: int) -> Optional[float]:
130
+ """Get value at specific index."""
131
+ if 0 <= index < len(self.values):
132
+ return self.values[index]
133
+ return None
134
+
135
+ def get_range(self, start: int, end: int) -> List[float]:
136
+ """Get a range of values efficiently."""
137
+ end = min(end, len(self.values))
138
+ start = min(start, end)
139
+ return self.values[start:end]
140
+
141
+ def sample(self, max_points: int) -> 'Timeseries':
142
+ """Apply sampling if the timeseries is too large."""
143
+ if len(self.values) <= max_points:
144
+ return self
145
+
146
+ step = len(self.values) // max_points
147
+ sampled_values = []
148
+
149
+ for i in range(0, len(self.values), max(1, step)):
150
+ sampled_values.append(self.values[i])
151
+
152
+ # Always include the last point if not already included
153
+ if self.values and sampled_values[-1] != self.values[-1]:
154
+ sampled_values.append(self.values[-1])
155
+
156
+ return Timeseries(
157
+ values=sampled_values,
158
+ length=len(sampled_values),
159
+ start_index=self.start_index,
160
+ data_type=self.data_type,
161
+ unit=self.unit,
162
+ is_input=self.is_input
163
+ )
164
+
165
+ def slice(self, start_index: int, end_index: int) -> 'Timeseries':
166
+ """Apply range filtering."""
167
+ start = max(0, start_index - self.start_index)
168
+ end = max(0, end_index - self.start_index)
169
+ end = min(end, len(self.values))
170
+ start = min(start, end)
171
+
172
+ return Timeseries(
173
+ values=self.values[start:end],
174
+ length=end - start,
175
+ start_index=self.start_index + start,
176
+ data_type=self.data_type,
177
+ unit=self.unit,
178
+ is_input=self.is_input
179
+ )
180
+
181
+
182
+ @dataclass
183
+ class TimeseriesMetadata:
184
+ """
185
+ Metadata about a timeseries without loading the full data.
186
+
187
+ Mirrors Rust TimeseriesMetadata struct.
188
+ """
189
+ length: int
190
+ start_time: int
191
+ end_time: int
192
+ start_index: int
193
+ end_index: int
194
+ data_type: str
195
+ unit: Optional[str]
196
+ is_input: bool
118
197
 
119
198
 
120
199
  @dataclass
@@ -168,21 +247,22 @@ class AttributeValue:
168
247
  """
169
248
  Represents either a static value or timeseries data for a component attribute.
170
249
 
250
+ Uses efficient Timeseries format for optimal performance.
171
251
  Mirrors Rust AttributeValue enum.
172
252
  """
173
253
 
174
- def __init__(self, value: Union[StaticValue, List[TimeseriesPoint]]):
254
+ def __init__(self, value: Union[StaticValue, Timeseries]):
175
255
  if isinstance(value, StaticValue):
176
256
  self.variant = "Static"
177
257
  self.static_value = value
178
258
  self.timeseries_value = None
179
- elif isinstance(value, list) and all(isinstance(p, TimeseriesPoint) for p in value):
259
+ elif isinstance(value, Timeseries):
180
260
  self.variant = "Timeseries"
181
261
  self.static_value = None
182
262
  self.timeseries_value = value
183
263
  else:
184
264
  raise ValueError(
185
- f"AttributeValue must be StaticValue or List[TimeseriesPoint], got {type(value)}"
265
+ f"AttributeValue must be StaticValue or Timeseries, got {type(value)}"
186
266
  )
187
267
 
188
268
  @classmethod
@@ -191,9 +271,10 @@ class AttributeValue:
191
271
  return cls(value)
192
272
 
193
273
  @classmethod
194
- def timeseries(cls, points: List[TimeseriesPoint]) -> 'AttributeValue':
195
- """Create a timeseries attribute value"""
196
- return cls(points)
274
+ def timeseries(cls, timeseries: Timeseries) -> 'AttributeValue':
275
+ """Create a timeseries attribute value (new format)"""
276
+ return cls(timeseries)
277
+
197
278
 
198
279
  def is_static(self) -> bool:
199
280
  """Check if this is a static value"""
@@ -203,11 +284,17 @@ class AttributeValue:
203
284
  """Check if this is a timeseries value"""
204
285
  return self.variant == "Timeseries"
205
286
 
287
+ def as_timeseries(self) -> Optional[Timeseries]:
288
+ """Get the timeseries data in new format"""
289
+ return self.timeseries_value if self.is_timeseries() else None
290
+
291
+
206
292
  def __repr__(self) -> str:
207
293
  if self.is_static():
208
294
  return f"AttributeValue.static({self.static_value})"
209
295
  else:
210
- return f"AttributeValue.timeseries({len(self.timeseries_value)} points)"
296
+ length = len(self.timeseries_value.values) if self.timeseries_value else 0
297
+ return f"AttributeValue.timeseries({length} points)"
211
298
 
212
299
 
213
300
  @dataclass
@@ -0,0 +1,101 @@
1
+ # PyConvexity Data Module
2
+
3
+ The `pyconvexity.data` module provides functions for loading external energy data and integrating it with PyConvexity models. This is a simple, expert-friendly toolbox for working with real-world energy data.
4
+
5
+ ## Installation
6
+
7
+ Install PyConvexity with data dependencies:
8
+
9
+ ```bash
10
+ pip install pyconvexity[data]
11
+ ```
12
+
13
+ ## Current Data Sources
14
+
15
+ ### Global Energy Monitor (GEM)
16
+
17
+ Load power plant data from GEM's Global Integrated Power dataset.
18
+
19
+ **Setup:**
20
+ 1. Download the GEM Excel file: `Global-Integrated-Power-August-2025.xlsx`
21
+ 2. Place it in a `data/raw/global-energy-monitor/` directory, or set the path manually
22
+
23
+ **Usage:**
24
+
25
+ ```python
26
+ import pyconvexity as px
27
+
28
+ # Load generators for a specific country
29
+ generators = px.data.get_generators_from_gem(
30
+ country="USA", # ISO 3-letter country code
31
+ technology_types=["solar", "wind", "nuclear"], # Optional filter
32
+ min_capacity_mw=100.0 # Optional minimum capacity
33
+ )
34
+
35
+ # Create a network and add generators
36
+ px.create_database_with_schema("my_model.db")
37
+
38
+ with px.database_context("my_model.db") as conn:
39
+ network_id = px.create_network(conn, network_req)
40
+
41
+ # Create carriers
42
+ carriers = {}
43
+ for carrier_name in generators['carrier'].unique():
44
+ carriers[carrier_name] = px.create_carrier(conn, network_id, carrier_name)
45
+
46
+ # Add generators to network
47
+ generator_ids = px.data.add_gem_generators_to_network(
48
+ conn, network_id, generators, carrier_mapping=carriers
49
+ )
50
+ ```
51
+
52
+ ## Data Output Format
53
+
54
+ The `get_generators_from_gem()` function returns a pandas DataFrame with these columns:
55
+
56
+ - `plant_name`: Name of the power plant
57
+ - `country_iso_3`: ISO 3-letter country code
58
+ - `category`: Energy category (nuclear, thermal, renewables, storage, etc.)
59
+ - `carrier`: Energy carrier (coal, gas, solar, wind, nuclear, etc.)
60
+ - `type`: Technology type (subcritical, combined-cycle, photovoltaic, etc.)
61
+ - `capacity_mw`: Capacity in megawatts
62
+ - `start_year`: Year the plant started operation
63
+ - `latitude`: Latitude coordinate
64
+ - `longitude`: Longitude coordinate
65
+
66
+ ## Technology Mapping
67
+
68
+ GEM technologies are automatically mapped to a standardized schema:
69
+
70
+ - **Nuclear**: pressurized-water-reactor, boiling-water-reactor, small-modular-reactor
71
+ - **Thermal**: subcritical, supercritical, combined-cycle, gas-turbine
72
+ - **Renewables**: photovoltaic, thermal (solar), onshore/offshore (wind), run-of-river (hydro)
73
+ - **Storage**: lithium-ion (battery), pumped-hydro
74
+ - **Bioenergy**: biomass, biogas
75
+
76
+ ## Caching
77
+
78
+ Data is automatically cached for 7 days to improve performance. You can:
79
+
80
+ ```python
81
+ # Disable caching
82
+ generators = px.data.get_generators_from_gem(country="USA", use_cache=False)
83
+
84
+ # Clear cache
85
+ cache = px.data.DataCache()
86
+ cache.clear_cache('gem_generators')
87
+ ```
88
+
89
+ ## Examples
90
+
91
+ See `examples/gem_data_example.py` for a complete working example.
92
+
93
+ ## Future Data Sources
94
+
95
+ The framework is designed to be extensible. Planned additions include:
96
+
97
+ - IRENA Global Energy Atlas (renewable resource data)
98
+ - World Bank energy statistics
99
+ - IEA World Energy Outlook data
100
+ - OpenStreetMap transmission infrastructure
101
+ - NASA weather data for renewable profiles
@@ -0,0 +1,18 @@
1
+ """
2
+ PyConvexity Data Module
3
+
4
+ Provides functions for loading external energy data and integrating it with PyConvexity models.
5
+ This module offers a simple, expert-friendly toolbox for working with real-world energy data.
6
+ """
7
+
8
+ from .sources.gem import get_generators_from_gem, add_gem_generators_to_network
9
+ from .loaders.cache import DataCache
10
+
11
+ __all__ = [
12
+ # GEM (Global Energy Monitor) functions
13
+ "get_generators_from_gem",
14
+ "add_gem_generators_to_network",
15
+
16
+ # Caching utilities
17
+ "DataCache",
18
+ ]