pyconvexity 0.1.3__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pyconvexity might be problematic. Click here for more details.

pyconvexity/__init__.py CHANGED
@@ -21,7 +21,8 @@ from pyconvexity.core.errors import (
21
21
 
22
22
  from pyconvexity.core.types import (
23
23
  StaticValue,
24
- TimeseriesPoint,
24
+ Timeseries,
25
+ TimeseriesMetadata,
25
26
  Component,
26
27
  Network,
27
28
  CreateNetworkRequest,
@@ -33,6 +34,12 @@ from pyconvexity.core.database import (
33
34
  database_context,
34
35
  open_connection,
35
36
  validate_database,
37
+ # Database maintenance functions
38
+ vacuum_database,
39
+ analyze_database,
40
+ optimize_database,
41
+ get_database_size_info,
42
+ should_optimize_database,
36
43
  )
37
44
 
38
45
  # Import main API functions
@@ -57,6 +64,12 @@ from pyconvexity.validation import (
57
64
  get_validation_rule, list_validation_rules, validate_timeseries_alignment
58
65
  )
59
66
 
67
+ # High-level timeseries API - recommended for new code
68
+ from pyconvexity.timeseries import (
69
+ get_timeseries, set_timeseries, get_timeseries_metadata,
70
+ get_multiple_timeseries, timeseries_to_numpy, numpy_to_timeseries
71
+ )
72
+
60
73
  # High-level API functions
61
74
  __all__ = [
62
75
  # Version info
@@ -65,7 +78,8 @@ __all__ = [
65
78
 
66
79
  # Core types
67
80
  "StaticValue",
68
- "TimeseriesPoint",
81
+ "Timeseries",
82
+ "TimeseriesMetadata",
69
83
  "Component",
70
84
  "Network",
71
85
  "CreateNetworkRequest",
@@ -77,6 +91,13 @@ __all__ = [
77
91
  "open_connection",
78
92
  "validate_database",
79
93
 
94
+ # Database maintenance
95
+ "vacuum_database",
96
+ "analyze_database",
97
+ "optimize_database",
98
+ "get_database_size_info",
99
+ "should_optimize_database",
100
+
80
101
  # Exceptions
81
102
  "PyConvexityError",
82
103
  "DatabaseError",
@@ -101,6 +122,10 @@ __all__ = [
101
122
 
102
123
  # Validation
103
124
  "get_validation_rule", "list_validation_rules", "validate_timeseries_alignment",
125
+
126
+ # High-level timeseries API
127
+ "get_timeseries", "set_timeseries", "get_timeseries_metadata",
128
+ "get_multiple_timeseries", "timeseries_to_numpy", "numpy_to_timeseries",
104
129
  ]
105
130
 
106
131
  # Data module imports
pyconvexity/_version.py CHANGED
@@ -1,2 +1 @@
1
- # This file is automatically updated by GitHub Actions during release
2
- __version__ = "0.1.2" # Default version for local development
1
+ __version__ = "0.1.4"
@@ -16,7 +16,6 @@ from pyconvexity.core.errors import (
16
16
 
17
17
  from pyconvexity.core.types import (
18
18
  StaticValue,
19
- TimeseriesPoint,
20
19
  AttributeValue,
21
20
  ValidationRule,
22
21
  Component,
@@ -46,7 +45,6 @@ __all__ = [
46
45
 
47
46
  # Types
48
47
  "StaticValue",
49
- "TimeseriesPoint",
50
48
  "AttributeValue",
51
49
  "ValidationRule",
52
50
  "Component",
@@ -90,6 +90,13 @@ def open_connection(db_path: str, read_only: bool = False) -> sqlite3.Connection
90
90
  conn.row_factory = sqlite3.Row # Enable column access by name
91
91
  conn.execute("PRAGMA foreign_keys = ON") # Enable foreign key constraints
92
92
 
93
+ # Configure for concurrent access (WAL mode for better concurrency)
94
+ if not read_only:
95
+ conn.execute("PRAGMA journal_mode = WAL") # Write-Ahead Logging for concurrency
96
+ conn.execute("PRAGMA synchronous = NORMAL") # Faster than FULL, still safe
97
+ conn.execute("PRAGMA wal_autocheckpoint = 1000") # Less frequent checkpoints
98
+ conn.execute("PRAGMA temp_store = MEMORY") # Faster temporary operations
99
+
93
100
  # Set reasonable timeouts
94
101
  conn.execute("PRAGMA busy_timeout = 30000") # 30 second timeout
95
102
 
@@ -183,6 +190,13 @@ def create_database_with_schema(db_path: str) -> None:
183
190
  # Enable foreign key constraints
184
191
  conn.execute("PRAGMA foreign_keys = ON")
185
192
 
193
+ # Configure for concurrent access
194
+ conn.execute("PRAGMA journal_mode = WAL")
195
+ conn.execute("PRAGMA synchronous = NORMAL")
196
+ conn.execute("PRAGMA wal_autocheckpoint = 1000")
197
+ conn.execute("PRAGMA temp_store = MEMORY")
198
+ conn.execute("PRAGMA busy_timeout = 30000")
199
+
186
200
  # Execute schemas in order
187
201
  for filename in schema_files:
188
202
  schema_file = schema_dir / filename
@@ -317,3 +331,147 @@ def check_database_compatibility(conn: sqlite3.Connection) -> dict:
317
331
  result["warnings"].append("No version information found in database")
318
332
 
319
333
  return result
334
+
335
+
336
+ # ============================================================================
337
+ # DATABASE MAINTENANCE FUNCTIONS
338
+ # ============================================================================
339
+
340
+ def vacuum_database(conn: sqlite3.Connection) -> None:
341
+ """
342
+ Run VACUUM to reclaim database space and defragment.
343
+
344
+ VACUUM rebuilds the database file, repacking it into a minimal amount of disk space.
345
+ This is useful after deleting large amounts of data or after many INSERT/UPDATE/DELETE operations.
346
+
347
+ Args:
348
+ conn: Database connection
349
+
350
+ Note:
351
+ VACUUM can take a significant amount of time on large databases and requires
352
+ temporary disk space up to twice the size of the original database.
353
+ """
354
+ import logging
355
+ logger = logging.getLogger(__name__)
356
+
357
+ logger.info("Running VACUUM to reclaim database space and defragment")
358
+ conn.execute("VACUUM")
359
+ logger.info("VACUUM completed successfully")
360
+
361
+
362
+ def analyze_database(conn: sqlite3.Connection) -> None:
363
+ """
364
+ Run ANALYZE to update query planner statistics.
365
+
366
+ ANALYZE gathers statistics about the contents of tables and indices.
367
+ These statistics are used by the query planner to help make better choices about how to perform queries.
368
+
369
+ Args:
370
+ conn: Database connection
371
+ """
372
+ import logging
373
+ logger = logging.getLogger(__name__)
374
+
375
+ logger.info("Running ANALYZE to update query planner statistics")
376
+ conn.execute("ANALYZE")
377
+ logger.info("ANALYZE completed successfully")
378
+
379
+
380
+ def optimize_database(conn: sqlite3.Connection) -> dict:
381
+ """
382
+ Run complete database optimization (VACUUM + ANALYZE).
383
+
384
+ This performs both VACUUM and ANALYZE operations in the correct order:
385
+ 1. VACUUM first to reclaim space and defragment
386
+ 2. ANALYZE to update statistics with the new layout
387
+
388
+ Args:
389
+ conn: Database connection
390
+
391
+ Returns:
392
+ Dictionary with optimization results including before/after size information
393
+ """
394
+ import logging
395
+ import time
396
+ logger = logging.getLogger(__name__)
397
+
398
+ logger.info("Running database optimization (VACUUM + ANALYZE)")
399
+ start_time = time.time()
400
+
401
+ # Get size before optimization
402
+ size_before = get_database_size_info(conn)
403
+
404
+ # VACUUM first to reclaim space and defragment
405
+ vacuum_database(conn)
406
+
407
+ # Then ANALYZE to update statistics with the new layout
408
+ analyze_database(conn)
409
+
410
+ # Get size after optimization
411
+ size_after = get_database_size_info(conn)
412
+
413
+ optimization_time = time.time() - start_time
414
+
415
+ result = {
416
+ "success": True,
417
+ "optimization_time": optimization_time,
418
+ "size_before": size_before,
419
+ "size_after": size_after,
420
+ "space_reclaimed": size_before["total_size"] - size_after["total_size"],
421
+ "free_pages_reclaimed": size_before["free_pages"] - size_after["free_pages"]
422
+ }
423
+
424
+ logger.info(f"Database optimization completed in {optimization_time:.2f} seconds")
425
+ logger.info(f"Space reclaimed: {result['space_reclaimed']:,} bytes ({result['space_reclaimed']/1024/1024:.1f} MB)")
426
+
427
+ return result
428
+
429
+
430
+ def get_database_size_info(conn: sqlite3.Connection) -> dict:
431
+ """
432
+ Get detailed information about database size and space usage.
433
+
434
+ Args:
435
+ conn: Database connection
436
+
437
+ Returns:
438
+ Dictionary with size information including total, used, and free space
439
+ """
440
+ # Get page count, page size, and freelist count
441
+ page_count = conn.execute("PRAGMA page_count").fetchone()[0]
442
+ page_size = conn.execute("PRAGMA page_size").fetchone()[0]
443
+ freelist_count = conn.execute("PRAGMA freelist_count").fetchone()[0]
444
+
445
+ total_size = page_count * page_size
446
+ free_size = freelist_count * page_size
447
+ used_size = total_size - free_size
448
+
449
+ return {
450
+ "total_size": total_size,
451
+ "used_size": used_size,
452
+ "free_size": free_size,
453
+ "page_count": page_count,
454
+ "page_size": page_size,
455
+ "free_pages": freelist_count,
456
+ "utilization_percent": (used_size / total_size * 100) if total_size > 0 else 0
457
+ }
458
+
459
+
460
+ def should_optimize_database(conn: sqlite3.Connection, free_space_threshold_percent: float = 10.0) -> bool:
461
+ """
462
+ Check if database would benefit from optimization based on free space.
463
+
464
+ Args:
465
+ conn: Database connection
466
+ free_space_threshold_percent: Threshold percentage of free space to trigger optimization
467
+
468
+ Returns:
469
+ True if optimization is recommended, False otherwise
470
+ """
471
+ size_info = get_database_size_info(conn)
472
+
473
+ if size_info["total_size"] == 0:
474
+ return False
475
+
476
+ free_space_percent = (size_info["free_size"] / size_info["total_size"]) * 100
477
+ return free_space_percent >= free_space_threshold_percent
pyconvexity/core/types.py CHANGED
@@ -38,8 +38,14 @@ class StaticValue:
38
38
  Rust stores: 123.45, 42, true, "hello"
39
39
  Not: {"Float": 123.45}, {"Integer": 42}, etc.
40
40
  """
41
+ import math
42
+
41
43
  if "Float" in self.data:
42
- return json.dumps(self.data["Float"])
44
+ float_val = self.data["Float"]
45
+ # Ensure finite values only
46
+ if not math.isfinite(float_val):
47
+ raise ValueError(f"Cannot serialize non-finite float value: {float_val}")
48
+ return json.dumps(float_val)
43
49
  elif "Integer" in self.data:
44
50
  return json.dumps(self.data["Integer"])
45
51
  elif "Boolean" in self.data:
@@ -100,21 +106,94 @@ class StaticValue:
100
106
 
101
107
 
102
108
  @dataclass
103
- class TimeseriesPoint:
109
+ class Timeseries:
104
110
  """
105
- A single point in a time series.
111
+ Efficient timeseries data structure matching the new Rust implementation.
106
112
 
107
- Mirrors Rust TimeseriesPoint with exact field matching.
113
+ Stores values as a flat array for maximum performance, matching the
114
+ unified Rust Timeseries struct.
108
115
  """
109
- timestamp: int
110
- value: float
111
- period_index: int
116
+ values: List[float]
117
+ length: int
118
+ start_index: int
119
+ data_type: str
120
+ unit: Optional[str]
121
+ is_input: bool
112
122
 
113
123
  def __post_init__(self):
114
- # Ensure types are correct
115
- self.timestamp = int(self.timestamp)
116
- self.value = float(self.value)
117
- self.period_index = int(self.period_index)
124
+ # Ensure length matches values array
125
+ self.length = len(self.values)
126
+ # Ensure all values are float32-compatible
127
+ self.values = [float(v) for v in self.values]
128
+
129
+ def get_value(self, index: int) -> Optional[float]:
130
+ """Get value at specific index."""
131
+ if 0 <= index < len(self.values):
132
+ return self.values[index]
133
+ return None
134
+
135
+ def get_range(self, start: int, end: int) -> List[float]:
136
+ """Get a range of values efficiently."""
137
+ end = min(end, len(self.values))
138
+ start = min(start, end)
139
+ return self.values[start:end]
140
+
141
+ def sample(self, max_points: int) -> 'Timeseries':
142
+ """Apply sampling if the timeseries is too large."""
143
+ if len(self.values) <= max_points:
144
+ return self
145
+
146
+ step = len(self.values) // max_points
147
+ sampled_values = []
148
+
149
+ for i in range(0, len(self.values), max(1, step)):
150
+ sampled_values.append(self.values[i])
151
+
152
+ # Always include the last point if not already included
153
+ if self.values and sampled_values[-1] != self.values[-1]:
154
+ sampled_values.append(self.values[-1])
155
+
156
+ return Timeseries(
157
+ values=sampled_values,
158
+ length=len(sampled_values),
159
+ start_index=self.start_index,
160
+ data_type=self.data_type,
161
+ unit=self.unit,
162
+ is_input=self.is_input
163
+ )
164
+
165
+ def slice(self, start_index: int, end_index: int) -> 'Timeseries':
166
+ """Apply range filtering."""
167
+ start = max(0, start_index - self.start_index)
168
+ end = max(0, end_index - self.start_index)
169
+ end = min(end, len(self.values))
170
+ start = min(start, end)
171
+
172
+ return Timeseries(
173
+ values=self.values[start:end],
174
+ length=end - start,
175
+ start_index=self.start_index + start,
176
+ data_type=self.data_type,
177
+ unit=self.unit,
178
+ is_input=self.is_input
179
+ )
180
+
181
+
182
+ @dataclass
183
+ class TimeseriesMetadata:
184
+ """
185
+ Metadata about a timeseries without loading the full data.
186
+
187
+ Mirrors Rust TimeseriesMetadata struct.
188
+ """
189
+ length: int
190
+ start_time: int
191
+ end_time: int
192
+ start_index: int
193
+ end_index: int
194
+ data_type: str
195
+ unit: Optional[str]
196
+ is_input: bool
118
197
 
119
198
 
120
199
  @dataclass
@@ -168,21 +247,22 @@ class AttributeValue:
168
247
  """
169
248
  Represents either a static value or timeseries data for a component attribute.
170
249
 
250
+ Uses efficient Timeseries format for optimal performance.
171
251
  Mirrors Rust AttributeValue enum.
172
252
  """
173
253
 
174
- def __init__(self, value: Union[StaticValue, List[TimeseriesPoint]]):
254
+ def __init__(self, value: Union[StaticValue, Timeseries]):
175
255
  if isinstance(value, StaticValue):
176
256
  self.variant = "Static"
177
257
  self.static_value = value
178
258
  self.timeseries_value = None
179
- elif isinstance(value, list) and all(isinstance(p, TimeseriesPoint) for p in value):
259
+ elif isinstance(value, Timeseries):
180
260
  self.variant = "Timeseries"
181
261
  self.static_value = None
182
262
  self.timeseries_value = value
183
263
  else:
184
264
  raise ValueError(
185
- f"AttributeValue must be StaticValue or List[TimeseriesPoint], got {type(value)}"
265
+ f"AttributeValue must be StaticValue or Timeseries, got {type(value)}"
186
266
  )
187
267
 
188
268
  @classmethod
@@ -191,9 +271,10 @@ class AttributeValue:
191
271
  return cls(value)
192
272
 
193
273
  @classmethod
194
- def timeseries(cls, points: List[TimeseriesPoint]) -> 'AttributeValue':
195
- """Create a timeseries attribute value"""
196
- return cls(points)
274
+ def timeseries(cls, timeseries: Timeseries) -> 'AttributeValue':
275
+ """Create a timeseries attribute value (new format)"""
276
+ return cls(timeseries)
277
+
197
278
 
198
279
  def is_static(self) -> bool:
199
280
  """Check if this is a static value"""
@@ -203,11 +284,17 @@ class AttributeValue:
203
284
  """Check if this is a timeseries value"""
204
285
  return self.variant == "Timeseries"
205
286
 
287
+ def as_timeseries(self) -> Optional[Timeseries]:
288
+ """Get the timeseries data in new format"""
289
+ return self.timeseries_value if self.is_timeseries() else None
290
+
291
+
206
292
  def __repr__(self) -> str:
207
293
  if self.is_static():
208
294
  return f"AttributeValue.static({self.static_value})"
209
295
  else:
210
- return f"AttributeValue.timeseries({len(self.timeseries_value)} points)"
296
+ length = len(self.timeseries_value.values) if self.timeseries_value else 0
297
+ return f"AttributeValue.timeseries({length} points)"
211
298
 
212
299
 
213
300
  @dataclass
@@ -34,25 +34,25 @@ CREATE TABLE networks (
34
34
  CREATE INDEX idx_networks_name ON networks(name);
35
35
  CREATE INDEX idx_networks_created_at ON networks(created_at);
36
36
 
37
- -- Network time periods - computed from time axis definition
38
- -- This table is populated automatically based on network time axis
37
+ -- Network time periods - optimized storage using computed timestamps
38
+ -- Instead of storing 75k+ timestamp strings, we compute them from the time axis
39
+ -- This reduces storage from ~3.4MB to ~24 bytes per network
39
40
  CREATE TABLE network_time_periods (
40
- id INTEGER PRIMARY KEY AUTOINCREMENT,
41
41
  network_id INTEGER NOT NULL,
42
- timestamp DATETIME NOT NULL,
43
- period_index INTEGER NOT NULL, -- 0-based index for array operations
42
+ period_count INTEGER NOT NULL, -- Total number of periods (e.g., 8760 for hourly year)
43
+ start_timestamp INTEGER NOT NULL, -- Unix timestamp of first period
44
+ interval_seconds INTEGER NOT NULL, -- Seconds between periods (3600 for hourly)
45
+
46
+ PRIMARY KEY (network_id),
44
47
 
45
48
  CONSTRAINT fk_time_periods_network
46
49
  FOREIGN KEY (network_id) REFERENCES networks(id) ON DELETE CASCADE,
47
- CONSTRAINT uq_time_period_network_index
48
- UNIQUE (network_id, period_index),
49
- CONSTRAINT uq_time_period_network_timestamp
50
- UNIQUE (network_id, timestamp)
50
+ CONSTRAINT valid_period_count CHECK (period_count > 0),
51
+ CONSTRAINT valid_interval CHECK (interval_seconds > 0)
51
52
  );
52
53
 
53
- CREATE INDEX idx_time_periods_network ON network_time_periods(network_id);
54
- CREATE INDEX idx_time_periods_timestamp ON network_time_periods(timestamp);
55
- CREATE INDEX idx_time_periods_index ON network_time_periods(network_id, period_index);
54
+ -- No additional indexes needed - primary key on network_id is sufficient
55
+ -- Timestamps are computed as: start_timestamp + (period_index * interval_seconds)
56
56
 
57
57
  -- Network locks - prevents concurrent modifications
58
58
  CREATE TABLE network_locks (