PyPI - pyconvexity - Versions diffs - 0.1.3__py3-none-any.whl → 0.1.4__py3-none-any.whl - Mend

pyconvexity 0.1.3py3-none-any.whl → 0.1.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pyconvexity might be problematic. Click here for more details.

Files changed (30) hide show

pyconvexity/__init__.py +27 -2
pyconvexity/_version.py +1 -2
pyconvexity/core/__init__.py +0 -2
pyconvexity/core/database.py +158 -0
pyconvexity/core/types.py +105 -18
pyconvexity/data/__pycache__/__init__.cpython-313.pyc +0 -0
pyconvexity/data/loaders/__pycache__/__init__.cpython-313.pyc +0 -0
pyconvexity/data/loaders/__pycache__/cache.cpython-313.pyc +0 -0
pyconvexity/data/schema/01_core_schema.sql +12 -12
pyconvexity/data/schema/02_data_metadata.sql +17 -321
pyconvexity/data/sources/__pycache__/__init__.cpython-313.pyc +0 -0
pyconvexity/data/sources/__pycache__/gem.cpython-313.pyc +0 -0
pyconvexity/data/sources/gem.py +5 -5
pyconvexity/io/excel_exporter.py +34 -13
pyconvexity/io/excel_importer.py +48 -51
pyconvexity/io/netcdf_importer.py +1054 -51
pyconvexity/models/attributes.py +209 -72
pyconvexity/models/network.py +17 -15
pyconvexity/solvers/pypsa/api.py +24 -1
pyconvexity/solvers/pypsa/batch_loader.py +37 -44
pyconvexity/solvers/pypsa/builder.py +62 -152
pyconvexity/solvers/pypsa/solver.py +104 -253
pyconvexity/solvers/pypsa/storage.py +740 -1373
pyconvexity/timeseries.py +327 -0
pyconvexity/validation/rules.py +2 -2
{pyconvexity-0.1.3.dist-info → pyconvexity-0.1.4.dist-info}/METADATA +1 -1
pyconvexity-0.1.4.dist-info/RECORD +46 -0
pyconvexity-0.1.3.dist-info/RECORD +0 -45
{pyconvexity-0.1.3.dist-info → pyconvexity-0.1.4.dist-info}/WHEEL +0 -0
{pyconvexity-0.1.3.dist-info → pyconvexity-0.1.4.dist-info}/top_level.txt +0 -0

pyconvexity/__init__.py CHANGED Viewed

@@ -21,7 +21,8 @@ from pyconvexity.core.errors import (
 from pyconvexity.core.types import (
     StaticValue,
-    TimeseriesPoint,
+    Timeseries,
+    TimeseriesMetadata,
     Component,
     Network,
     CreateNetworkRequest,
@@ -33,6 +34,12 @@ from pyconvexity.core.database import (
     database_context,
     open_connection,
     validate_database,
+    # Database maintenance functions
+    vacuum_database,
+    analyze_database,
+    optimize_database,
+    get_database_size_info,
+    should_optimize_database,
 )
 # Import main API functions
@@ -57,6 +64,12 @@ from pyconvexity.validation import (
     get_validation_rule, list_validation_rules, validate_timeseries_alignment
 )
+# High-level timeseries API - recommended for new code
+from pyconvexity.timeseries import (
+    get_timeseries, set_timeseries, get_timeseries_metadata,
+    get_multiple_timeseries, timeseries_to_numpy, numpy_to_timeseries
+)
 # High-level API functions
 __all__ = [
     # Version info
@@ -65,7 +78,8 @@ __all__ = [
     # Core types
     "StaticValue",
-    "TimeseriesPoint",
+    "Timeseries",
+    "TimeseriesMetadata",
     "Component",
     "Network",
     "CreateNetworkRequest",
@@ -77,6 +91,13 @@ __all__ = [
     "open_connection",
     "validate_database",
+    # Database maintenance
+    "vacuum_database",
+    "analyze_database",
+    "optimize_database",
+    "get_database_size_info",
+    "should_optimize_database",
     # Exceptions
     "PyConvexityError",
     "DatabaseError",
@@ -101,6 +122,10 @@ __all__ = [
     # Validation
     "get_validation_rule", "list_validation_rules", "validate_timeseries_alignment",
+    # High-level timeseries API
+    "get_timeseries", "set_timeseries", "get_timeseries_metadata",
+    "get_multiple_timeseries", "timeseries_to_numpy", "numpy_to_timeseries",
 ]
 # Data module imports

pyconvexity/_version.py CHANGED Viewed

@@ -1,2 +1 @@
-# This file is automatically updated by GitHub Actions during release
-__version__ = "0.1.2"  # Default version for local development
+__version__ = "0.1.4"

pyconvexity/core/__init__.py CHANGED Viewed

@@ -16,7 +16,6 @@ from pyconvexity.core.errors import (
 from pyconvexity.core.types import (
     StaticValue,
-    TimeseriesPoint,
     AttributeValue,
     ValidationRule,
     Component,
@@ -46,7 +45,6 @@ __all__ = [
     # Types
     "StaticValue",
-    "TimeseriesPoint",
     "AttributeValue",
     "ValidationRule",
     "Component",

pyconvexity/core/database.py CHANGED Viewed

@@ -90,6 +90,13 @@ def open_connection(db_path: str, read_only: bool = False) -> sqlite3.Connection
         conn.row_factory = sqlite3.Row  # Enable column access by name
         conn.execute("PRAGMA foreign_keys = ON")  # Enable foreign key constraints
+        # Configure for concurrent access (WAL mode for better concurrency)
+        if not read_only:
+            conn.execute("PRAGMA journal_mode = WAL")  # Write-Ahead Logging for concurrency
+            conn.execute("PRAGMA synchronous = NORMAL")  # Faster than FULL, still safe
+            conn.execute("PRAGMA wal_autocheckpoint = 1000")  # Less frequent checkpoints
+            conn.execute("PRAGMA temp_store = MEMORY")  # Faster temporary operations
         # Set reasonable timeouts
         conn.execute("PRAGMA busy_timeout = 30000")  # 30 second timeout
@@ -183,6 +190,13 @@ def create_database_with_schema(db_path: str) -> None:
         # Enable foreign key constraints
         conn.execute("PRAGMA foreign_keys = ON")
+        # Configure for concurrent access
+        conn.execute("PRAGMA journal_mode = WAL")
+        conn.execute("PRAGMA synchronous = NORMAL")
+        conn.execute("PRAGMA wal_autocheckpoint = 1000")
+        conn.execute("PRAGMA temp_store = MEMORY")
+        conn.execute("PRAGMA busy_timeout = 30000")
         # Execute schemas in order
         for filename in schema_files:
             schema_file = schema_dir / filename
@@ -317,3 +331,147 @@ def check_database_compatibility(conn: sqlite3.Connection) -> dict:
         result["warnings"].append("No version information found in database")
     return result
+# ============================================================================
+# DATABASE MAINTENANCE FUNCTIONS
+# ============================================================================
+def vacuum_database(conn: sqlite3.Connection) -> None:
+    """
+    Run VACUUM to reclaim database space and defragment.
+    VACUUM rebuilds the database file, repacking it into a minimal amount of disk space.
+    This is useful after deleting large amounts of data or after many INSERT/UPDATE/DELETE operations.
+    Args:
+        conn: Database connection
+    Note:
+        VACUUM can take a significant amount of time on large databases and requires
+        temporary disk space up to twice the size of the original database.
+    """
+    import logging
+    logger = logging.getLogger(__name__)
+    logger.info("Running VACUUM to reclaim database space and defragment")
+    conn.execute("VACUUM")
+    logger.info("VACUUM completed successfully")
+def analyze_database(conn: sqlite3.Connection) -> None:
+    """
+    Run ANALYZE to update query planner statistics.
+    ANALYZE gathers statistics about the contents of tables and indices.
+    These statistics are used by the query planner to help make better choices about how to perform queries.
+    Args:
+        conn: Database connection
+    """
+    import logging
+    logger = logging.getLogger(__name__)
+    logger.info("Running ANALYZE to update query planner statistics")
+    conn.execute("ANALYZE")
+    logger.info("ANALYZE completed successfully")
+def optimize_database(conn: sqlite3.Connection) -> dict:
+    """
+    Run complete database optimization (VACUUM + ANALYZE).
+    This performs both VACUUM and ANALYZE operations in the correct order:
+    1. VACUUM first to reclaim space and defragment
+    2. ANALYZE to update statistics with the new layout
+    Args:
+        conn: Database connection
+    Returns:
+        Dictionary with optimization results including before/after size information
+    """
+    import logging
+    import time
+    logger = logging.getLogger(__name__)
+    logger.info("Running database optimization (VACUUM + ANALYZE)")
+    start_time = time.time()
+    # Get size before optimization
+    size_before = get_database_size_info(conn)
+    # VACUUM first to reclaim space and defragment
+    vacuum_database(conn)
+    # Then ANALYZE to update statistics with the new layout
+    analyze_database(conn)
+    # Get size after optimization
+    size_after = get_database_size_info(conn)
+    optimization_time = time.time() - start_time
+    result = {
+        "success": True,
+        "optimization_time": optimization_time,
+        "size_before": size_before,
+        "size_after": size_after,
+        "space_reclaimed": size_before["total_size"] - size_after["total_size"],
+        "free_pages_reclaimed": size_before["free_pages"] - size_after["free_pages"]
+    }
+    logger.info(f"Database optimization completed in {optimization_time:.2f} seconds")
+    logger.info(f"Space reclaimed: {result['space_reclaimed']:,} bytes ({result['space_reclaimed']/1024/1024:.1f} MB)")
+    return result
+def get_database_size_info(conn: sqlite3.Connection) -> dict:
+    """
+    Get detailed information about database size and space usage.
+    Args:
+        conn: Database connection
+    Returns:
+        Dictionary with size information including total, used, and free space
+    """
+    # Get page count, page size, and freelist count
+    page_count = conn.execute("PRAGMA page_count").fetchone()[0]
+    page_size = conn.execute("PRAGMA page_size").fetchone()[0]
+    freelist_count = conn.execute("PRAGMA freelist_count").fetchone()[0]
+    total_size = page_count * page_size
+    free_size = freelist_count * page_size
+    used_size = total_size - free_size
+    return {
+        "total_size": total_size,
+        "used_size": used_size,
+        "free_size": free_size,
+        "page_count": page_count,
+        "page_size": page_size,
+        "free_pages": freelist_count,
+        "utilization_percent": (used_size / total_size * 100) if total_size > 0 else 0
+    }
+def should_optimize_database(conn: sqlite3.Connection, free_space_threshold_percent: float = 10.0) -> bool:
+    """
+    Check if database would benefit from optimization based on free space.
+    Args:
+        conn: Database connection
+        free_space_threshold_percent: Threshold percentage of free space to trigger optimization
+    Returns:
+        True if optimization is recommended, False otherwise
+    """
+    size_info = get_database_size_info(conn)
+    if size_info["total_size"] == 0:
+        return False
+    free_space_percent = (size_info["free_size"] / size_info["total_size"]) * 100
+    return free_space_percent >= free_space_threshold_percent

pyconvexity/core/types.py CHANGED Viewed

@@ -38,8 +38,14 @@ class StaticValue:
         Rust stores: 123.45, 42, true, "hello"
         Not: {"Float": 123.45}, {"Integer": 42}, etc.
         """
+        import math
         if "Float" in self.data:
-            return json.dumps(self.data["Float"])
+            float_val = self.data["Float"]
+            # Ensure finite values only
+            if not math.isfinite(float_val):
+                raise ValueError(f"Cannot serialize non-finite float value: {float_val}")
+            return json.dumps(float_val)
         elif "Integer" in self.data:
             return json.dumps(self.data["Integer"])
         elif "Boolean" in self.data:
@@ -100,21 +106,94 @@ class StaticValue:
 @dataclass
-class TimeseriesPoint:
+class Timeseries:
     """
-    A single point in a time series.
+    Efficient timeseries data structure matching the new Rust implementation.
-    Mirrors Rust TimeseriesPoint with exact field matching.
+    Stores values as a flat array for maximum performance, matching the
+    unified Rust Timeseries struct.
     """
-    timestamp: int
-    value: float
-    period_index: int
+    values: List[float]
+    length: int
+    start_index: int
+    data_type: str
+    unit: Optional[str]
+    is_input: bool
     def __post_init__(self):
-        # Ensure types are correct
-        self.timestamp = int(self.timestamp)
-        self.value = float(self.value)
-        self.period_index = int(self.period_index)
+        # Ensure length matches values array
+        self.length = len(self.values)
+        # Ensure all values are float32-compatible
+        self.values = [float(v) for v in self.values]
+    def get_value(self, index: int) -> Optional[float]:
+        """Get value at specific index."""
+        if 0 <= index < len(self.values):
+            return self.values[index]
+        return None
+    def get_range(self, start: int, end: int) -> List[float]:
+        """Get a range of values efficiently."""
+        end = min(end, len(self.values))
+        start = min(start, end)
+        return self.values[start:end]
+    def sample(self, max_points: int) -> 'Timeseries':
+        """Apply sampling if the timeseries is too large."""
+        if len(self.values) <= max_points:
+            return self
+        step = len(self.values) // max_points
+        sampled_values = []
+        for i in range(0, len(self.values), max(1, step)):
+            sampled_values.append(self.values[i])
+        # Always include the last point if not already included
+        if self.values and sampled_values[-1] != self.values[-1]:
+            sampled_values.append(self.values[-1])
+        return Timeseries(
+            values=sampled_values,
+            length=len(sampled_values),
+            start_index=self.start_index,
+            data_type=self.data_type,
+            unit=self.unit,
+            is_input=self.is_input
+        )
+    def slice(self, start_index: int, end_index: int) -> 'Timeseries':
+        """Apply range filtering."""
+        start = max(0, start_index - self.start_index)
+        end = max(0, end_index - self.start_index)
+        end = min(end, len(self.values))
+        start = min(start, end)
+        return Timeseries(
+            values=self.values[start:end],
+            length=end - start,
+            start_index=self.start_index + start,
+            data_type=self.data_type,
+            unit=self.unit,
+            is_input=self.is_input
+        )
+@dataclass
+class TimeseriesMetadata:
+    """
+    Metadata about a timeseries without loading the full data.
+    Mirrors Rust TimeseriesMetadata struct.
+    """
+    length: int
+    start_time: int
+    end_time: int
+    start_index: int
+    end_index: int
+    data_type: str
+    unit: Optional[str]
+    is_input: bool
 @dataclass
@@ -168,21 +247,22 @@ class AttributeValue:
     """
     Represents either a static value or timeseries data for a component attribute.
+    Uses efficient Timeseries format for optimal performance.
     Mirrors Rust AttributeValue enum.
     """
-    def __init__(self, value: Union[StaticValue, List[TimeseriesPoint]]):
+    def __init__(self, value: Union[StaticValue, Timeseries]):
         if isinstance(value, StaticValue):
             self.variant = "Static"
             self.static_value = value
             self.timeseries_value = None
-        elif isinstance(value, list) and all(isinstance(p, TimeseriesPoint) for p in value):
+        elif isinstance(value, Timeseries):
             self.variant = "Timeseries"
             self.static_value = None
             self.timeseries_value = value
         else:
             raise ValueError(
-                f"AttributeValue must be StaticValue or List[TimeseriesPoint], got {type(value)}"
+                f"AttributeValue must be StaticValue or Timeseries, got {type(value)}"
             )
     @classmethod
@@ -191,9 +271,10 @@ class AttributeValue:
         return cls(value)
     @classmethod
-    def timeseries(cls, points: List[TimeseriesPoint]) -> 'AttributeValue':
-        """Create a timeseries attribute value"""
-        return cls(points)
+    def timeseries(cls, timeseries: Timeseries) -> 'AttributeValue':
+        """Create a timeseries attribute value (new format)"""
+        return cls(timeseries)
     def is_static(self) -> bool:
         """Check if this is a static value"""
@@ -203,11 +284,17 @@ class AttributeValue:
         """Check if this is a timeseries value"""
         return self.variant == "Timeseries"
+    def as_timeseries(self) -> Optional[Timeseries]:
+        """Get the timeseries data in new format"""
+        return self.timeseries_value if self.is_timeseries() else None
     def __repr__(self) -> str:
         if self.is_static():
             return f"AttributeValue.static({self.static_value})"
         else:
-            return f"AttributeValue.timeseries({len(self.timeseries_value)} points)"
+            length = len(self.timeseries_value.values) if self.timeseries_value else 0
+            return f"AttributeValue.timeseries({length} points)"
 @dataclass

pyconvexity/data/__pycache__/__init__.cpython-313.pyc CHANGED Viewed

Binary file

pyconvexity/data/loaders/__pycache__/__init__.cpython-313.pyc CHANGED Viewed

Binary file

pyconvexity/data/loaders/__pycache__/cache.cpython-313.pyc CHANGED Viewed

Binary file

pyconvexity/data/schema/01_core_schema.sql CHANGED Viewed

@@ -34,25 +34,25 @@ CREATE TABLE networks (
 CREATE INDEX idx_networks_name ON networks(name);
 CREATE INDEX idx_networks_created_at ON networks(created_at);
--- Network time periods - computed from time axis definition
--- This table is populated automatically based on network time axis
+-- Network time periods - optimized storage using computed timestamps
+-- Instead of storing 75k+ timestamp strings, we compute them from the time axis
+-- This reduces storage from ~3.4MB to ~24 bytes per network
 CREATE TABLE network_time_periods (
-    id INTEGER PRIMARY KEY AUTOINCREMENT,
     network_id INTEGER NOT NULL,
-    timestamp DATETIME NOT NULL,
-    period_index INTEGER NOT NULL,  -- 0-based index for array operations
+    period_count INTEGER NOT NULL,      -- Total number of periods (e.g., 8760 for hourly year)
+    start_timestamp INTEGER NOT NULL,   -- Unix timestamp of first period
+    interval_seconds INTEGER NOT NULL,  -- Seconds between periods (3600 for hourly)
+    PRIMARY KEY (network_id),
     CONSTRAINT fk_time_periods_network
         FOREIGN KEY (network_id) REFERENCES networks(id) ON DELETE CASCADE,
-    CONSTRAINT uq_time_period_network_index
-        UNIQUE (network_id, period_index),
-    CONSTRAINT uq_time_period_network_timestamp
-        UNIQUE (network_id, timestamp)
+    CONSTRAINT valid_period_count CHECK (period_count > 0),
+    CONSTRAINT valid_interval CHECK (interval_seconds > 0)
 );
-CREATE INDEX idx_time_periods_network ON network_time_periods(network_id);
-CREATE INDEX idx_time_periods_timestamp ON network_time_periods(timestamp);
-CREATE INDEX idx_time_periods_index ON network_time_periods(network_id, period_index);
+-- No additional indexes needed - primary key on network_id is sufficient
+-- Timestamps are computed as: start_timestamp + (period_index * interval_seconds)
 -- Network locks - prevents concurrent modifications
 CREATE TABLE network_locks (

pyconvexity 0.1.3__py3-none-any.whl → 0.1.4__py3-none-any.whl

Potentially problematic release.

pyconvexity 0.1.3py3-none-any.whl → 0.1.4py3-none-any.whl