pyconvexity 0.3.8.post7__py3-none-any.whl → 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyconvexity/__init__.py +87 -46
- pyconvexity/_version.py +1 -1
- pyconvexity/core/__init__.py +3 -5
- pyconvexity/core/database.py +111 -103
- pyconvexity/core/errors.py +16 -10
- pyconvexity/core/types.py +61 -54
- pyconvexity/data/__init__.py +0 -1
- pyconvexity/data/loaders/cache.py +65 -64
- pyconvexity/data/schema/01_core_schema.sql +134 -234
- pyconvexity/data/schema/02_data_metadata.sql +38 -168
- pyconvexity/data/schema/03_validation_data.sql +327 -264
- pyconvexity/data/sources/gem.py +169 -139
- pyconvexity/io/__init__.py +4 -10
- pyconvexity/io/excel_exporter.py +694 -480
- pyconvexity/io/excel_importer.py +817 -545
- pyconvexity/io/netcdf_exporter.py +66 -61
- pyconvexity/io/netcdf_importer.py +850 -619
- pyconvexity/models/__init__.py +109 -59
- pyconvexity/models/attributes.py +197 -178
- pyconvexity/models/carriers.py +70 -67
- pyconvexity/models/components.py +260 -236
- pyconvexity/models/network.py +202 -284
- pyconvexity/models/results.py +65 -55
- pyconvexity/models/scenarios.py +58 -88
- pyconvexity/solvers/__init__.py +5 -5
- pyconvexity/solvers/pypsa/__init__.py +3 -3
- pyconvexity/solvers/pypsa/api.py +150 -134
- pyconvexity/solvers/pypsa/batch_loader.py +165 -162
- pyconvexity/solvers/pypsa/builder.py +390 -291
- pyconvexity/solvers/pypsa/constraints.py +184 -162
- pyconvexity/solvers/pypsa/solver.py +968 -666
- pyconvexity/solvers/pypsa/storage.py +1377 -671
- pyconvexity/timeseries.py +63 -60
- pyconvexity/validation/__init__.py +14 -6
- pyconvexity/validation/rules.py +95 -84
- pyconvexity-0.4.1.dist-info/METADATA +46 -0
- pyconvexity-0.4.1.dist-info/RECORD +42 -0
- pyconvexity/data/__pycache__/__init__.cpython-313.pyc +0 -0
- pyconvexity/data/loaders/__pycache__/__init__.cpython-313.pyc +0 -0
- pyconvexity/data/loaders/__pycache__/cache.cpython-313.pyc +0 -0
- pyconvexity/data/schema/04_scenario_schema.sql +0 -122
- pyconvexity/data/schema/migrate_add_geometries.sql +0 -73
- pyconvexity/data/sources/__pycache__/__init__.cpython-313.pyc +0 -0
- pyconvexity/data/sources/__pycache__/gem.cpython-313.pyc +0 -0
- pyconvexity-0.3.8.post7.dist-info/METADATA +0 -138
- pyconvexity-0.3.8.post7.dist-info/RECORD +0 -49
- {pyconvexity-0.3.8.post7.dist-info → pyconvexity-0.4.1.dist-info}/WHEEL +0 -0
- {pyconvexity-0.3.8.post7.dist-info → pyconvexity-0.4.1.dist-info}/top_level.txt +0 -0
pyconvexity/core/types.py
CHANGED
|
@@ -13,11 +13,11 @@ from typing import Dict, Any, Optional, List, Union
|
|
|
13
13
|
class StaticValue:
|
|
14
14
|
"""
|
|
15
15
|
Represents a static (non-time-varying) attribute value.
|
|
16
|
-
|
|
16
|
+
|
|
17
17
|
Mirrors the Rust StaticValue enum while providing Python conveniences.
|
|
18
18
|
Supports float, int, bool, and string values with proper type conversion.
|
|
19
19
|
"""
|
|
20
|
-
|
|
20
|
+
|
|
21
21
|
def __init__(self, value: Union[float, int, bool, str]):
|
|
22
22
|
# Check bool before int since bool is subclass of int in Python
|
|
23
23
|
if isinstance(value, bool):
|
|
@@ -30,21 +30,23 @@ class StaticValue:
|
|
|
30
30
|
self.data = {"String": value}
|
|
31
31
|
else:
|
|
32
32
|
raise ValueError(f"Unsupported value type: {type(value)}")
|
|
33
|
-
|
|
33
|
+
|
|
34
34
|
def to_json(self) -> str:
|
|
35
35
|
"""
|
|
36
36
|
Return raw value as JSON to match Rust serialization format.
|
|
37
|
-
|
|
37
|
+
|
|
38
38
|
Rust stores: 123.45, 42, true, "hello"
|
|
39
39
|
Not: {"Float": 123.45}, {"Integer": 42}, etc.
|
|
40
40
|
"""
|
|
41
41
|
import math
|
|
42
|
-
|
|
42
|
+
|
|
43
43
|
if "Float" in self.data:
|
|
44
44
|
float_val = self.data["Float"]
|
|
45
45
|
# Ensure finite values only
|
|
46
46
|
if not math.isfinite(float_val):
|
|
47
|
-
raise ValueError(
|
|
47
|
+
raise ValueError(
|
|
48
|
+
f"Cannot serialize non-finite float value: {float_val}"
|
|
49
|
+
)
|
|
48
50
|
return json.dumps(float_val)
|
|
49
51
|
elif "Integer" in self.data:
|
|
50
52
|
return json.dumps(self.data["Integer"])
|
|
@@ -55,7 +57,7 @@ class StaticValue:
|
|
|
55
57
|
else:
|
|
56
58
|
# Fallback to original format if unknown
|
|
57
59
|
return json.dumps(self.data)
|
|
58
|
-
|
|
60
|
+
|
|
59
61
|
def data_type(self) -> str:
|
|
60
62
|
"""Get data type name - mirrors Rust implementation"""
|
|
61
63
|
if "Float" in self.data:
|
|
@@ -68,7 +70,7 @@ class StaticValue:
|
|
|
68
70
|
return "string"
|
|
69
71
|
else:
|
|
70
72
|
return "unknown"
|
|
71
|
-
|
|
73
|
+
|
|
72
74
|
def as_f64(self) -> float:
|
|
73
75
|
"""Convert to float, mirroring Rust implementation"""
|
|
74
76
|
if "Float" in self.data:
|
|
@@ -82,7 +84,7 @@ class StaticValue:
|
|
|
82
84
|
return float(self.data["String"])
|
|
83
85
|
except ValueError:
|
|
84
86
|
return 0.0
|
|
85
|
-
|
|
87
|
+
|
|
86
88
|
def value(self) -> Union[float, int, bool, str]:
|
|
87
89
|
"""Get the raw Python value"""
|
|
88
90
|
if "Float" in self.data:
|
|
@@ -95,10 +97,10 @@ class StaticValue:
|
|
|
95
97
|
return self.data["String"]
|
|
96
98
|
else:
|
|
97
99
|
raise ValueError("Unknown data type in StaticValue")
|
|
98
|
-
|
|
100
|
+
|
|
99
101
|
def __repr__(self) -> str:
|
|
100
102
|
return f"StaticValue({self.value()})"
|
|
101
|
-
|
|
103
|
+
|
|
102
104
|
def __eq__(self, other) -> bool:
|
|
103
105
|
if isinstance(other, StaticValue):
|
|
104
106
|
return self.data == other.data
|
|
@@ -109,73 +111,74 @@ class StaticValue:
|
|
|
109
111
|
class Timeseries:
|
|
110
112
|
"""
|
|
111
113
|
Efficient timeseries data structure matching the new Rust implementation.
|
|
112
|
-
|
|
113
|
-
Stores values as a flat array for maximum performance, matching the
|
|
114
|
+
|
|
115
|
+
Stores values as a flat array for maximum performance, matching the
|
|
114
116
|
unified Rust Timeseries struct.
|
|
115
117
|
"""
|
|
118
|
+
|
|
116
119
|
values: List[float]
|
|
117
120
|
length: int
|
|
118
121
|
start_index: int
|
|
119
122
|
data_type: str
|
|
120
123
|
unit: Optional[str]
|
|
121
124
|
is_input: bool
|
|
122
|
-
|
|
125
|
+
|
|
123
126
|
def __post_init__(self):
|
|
124
127
|
# Ensure length matches values array
|
|
125
128
|
self.length = len(self.values)
|
|
126
129
|
# Ensure all values are float32-compatible
|
|
127
130
|
self.values = [float(v) for v in self.values]
|
|
128
|
-
|
|
131
|
+
|
|
129
132
|
def get_value(self, index: int) -> Optional[float]:
|
|
130
133
|
"""Get value at specific index."""
|
|
131
134
|
if 0 <= index < len(self.values):
|
|
132
135
|
return self.values[index]
|
|
133
136
|
return None
|
|
134
|
-
|
|
137
|
+
|
|
135
138
|
def get_range(self, start: int, end: int) -> List[float]:
|
|
136
139
|
"""Get a range of values efficiently."""
|
|
137
140
|
end = min(end, len(self.values))
|
|
138
141
|
start = min(start, end)
|
|
139
142
|
return self.values[start:end]
|
|
140
|
-
|
|
141
|
-
def sample(self, max_points: int) ->
|
|
143
|
+
|
|
144
|
+
def sample(self, max_points: int) -> "Timeseries":
|
|
142
145
|
"""Apply sampling if the timeseries is too large."""
|
|
143
146
|
if len(self.values) <= max_points:
|
|
144
147
|
return self
|
|
145
|
-
|
|
148
|
+
|
|
146
149
|
step = len(self.values) // max_points
|
|
147
150
|
sampled_values = []
|
|
148
|
-
|
|
151
|
+
|
|
149
152
|
for i in range(0, len(self.values), max(1, step)):
|
|
150
153
|
sampled_values.append(self.values[i])
|
|
151
|
-
|
|
154
|
+
|
|
152
155
|
# Always include the last point if not already included
|
|
153
156
|
if self.values and sampled_values[-1] != self.values[-1]:
|
|
154
157
|
sampled_values.append(self.values[-1])
|
|
155
|
-
|
|
158
|
+
|
|
156
159
|
return Timeseries(
|
|
157
160
|
values=sampled_values,
|
|
158
161
|
length=len(sampled_values),
|
|
159
162
|
start_index=self.start_index,
|
|
160
163
|
data_type=self.data_type,
|
|
161
164
|
unit=self.unit,
|
|
162
|
-
is_input=self.is_input
|
|
165
|
+
is_input=self.is_input,
|
|
163
166
|
)
|
|
164
|
-
|
|
165
|
-
def slice(self, start_index: int, end_index: int) ->
|
|
167
|
+
|
|
168
|
+
def slice(self, start_index: int, end_index: int) -> "Timeseries":
|
|
166
169
|
"""Apply range filtering."""
|
|
167
170
|
start = max(0, start_index - self.start_index)
|
|
168
171
|
end = max(0, end_index - self.start_index)
|
|
169
172
|
end = min(end, len(self.values))
|
|
170
173
|
start = min(start, end)
|
|
171
|
-
|
|
174
|
+
|
|
172
175
|
return Timeseries(
|
|
173
176
|
values=self.values[start:end],
|
|
174
177
|
length=end - start,
|
|
175
178
|
start_index=self.start_index + start,
|
|
176
179
|
data_type=self.data_type,
|
|
177
180
|
unit=self.unit,
|
|
178
|
-
is_input=self.is_input
|
|
181
|
+
is_input=self.is_input,
|
|
179
182
|
)
|
|
180
183
|
|
|
181
184
|
|
|
@@ -183,9 +186,10 @@ class Timeseries:
|
|
|
183
186
|
class TimeseriesMetadata:
|
|
184
187
|
"""
|
|
185
188
|
Metadata about a timeseries without loading the full data.
|
|
186
|
-
|
|
189
|
+
|
|
187
190
|
Mirrors Rust TimeseriesMetadata struct.
|
|
188
191
|
"""
|
|
192
|
+
|
|
189
193
|
length: int
|
|
190
194
|
start_time: int
|
|
191
195
|
end_time: int
|
|
@@ -200,9 +204,10 @@ class TimeseriesMetadata:
|
|
|
200
204
|
class TimePeriod:
|
|
201
205
|
"""
|
|
202
206
|
Represents a time period in the network's time axis.
|
|
203
|
-
|
|
207
|
+
|
|
204
208
|
Mirrors Rust TimePeriod structure.
|
|
205
209
|
"""
|
|
210
|
+
|
|
206
211
|
timestamp: int
|
|
207
212
|
period_index: int
|
|
208
213
|
formatted_time: str
|
|
@@ -212,9 +217,10 @@ class TimePeriod:
|
|
|
212
217
|
class TimeseriesValidationResult:
|
|
213
218
|
"""
|
|
214
219
|
Result of validating timeseries alignment with network time periods.
|
|
215
|
-
|
|
220
|
+
|
|
216
221
|
Mirrors Rust TimeseriesValidationResult.
|
|
217
222
|
"""
|
|
223
|
+
|
|
218
224
|
is_valid: bool
|
|
219
225
|
missing_periods: List[int]
|
|
220
226
|
extra_periods: List[int]
|
|
@@ -226,9 +232,10 @@ class TimeseriesValidationResult:
|
|
|
226
232
|
class ValidationRule:
|
|
227
233
|
"""
|
|
228
234
|
Validation rule for component attributes.
|
|
229
|
-
|
|
235
|
+
|
|
230
236
|
Mirrors Rust ValidationRule with all fields.
|
|
231
237
|
"""
|
|
238
|
+
|
|
232
239
|
component_type: str
|
|
233
240
|
attribute_name: str
|
|
234
241
|
data_type: str
|
|
@@ -246,11 +253,11 @@ class ValidationRule:
|
|
|
246
253
|
class AttributeValue:
|
|
247
254
|
"""
|
|
248
255
|
Represents either a static value or timeseries data for a component attribute.
|
|
249
|
-
|
|
256
|
+
|
|
250
257
|
Uses efficient Timeseries format for optimal performance.
|
|
251
258
|
Mirrors Rust AttributeValue enum.
|
|
252
259
|
"""
|
|
253
|
-
|
|
260
|
+
|
|
254
261
|
def __init__(self, value: Union[StaticValue, Timeseries]):
|
|
255
262
|
if isinstance(value, StaticValue):
|
|
256
263
|
self.variant = "Static"
|
|
@@ -264,31 +271,29 @@ class AttributeValue:
|
|
|
264
271
|
raise ValueError(
|
|
265
272
|
f"AttributeValue must be StaticValue or Timeseries, got {type(value)}"
|
|
266
273
|
)
|
|
267
|
-
|
|
274
|
+
|
|
268
275
|
@classmethod
|
|
269
|
-
def static(cls, value: StaticValue) ->
|
|
276
|
+
def static(cls, value: StaticValue) -> "AttributeValue":
|
|
270
277
|
"""Create a static attribute value"""
|
|
271
278
|
return cls(value)
|
|
272
|
-
|
|
279
|
+
|
|
273
280
|
@classmethod
|
|
274
|
-
def timeseries(cls, timeseries: Timeseries) ->
|
|
281
|
+
def timeseries(cls, timeseries: Timeseries) -> "AttributeValue":
|
|
275
282
|
"""Create a timeseries attribute value (new format)"""
|
|
276
283
|
return cls(timeseries)
|
|
277
|
-
|
|
278
|
-
|
|
284
|
+
|
|
279
285
|
def is_static(self) -> bool:
|
|
280
286
|
"""Check if this is a static value"""
|
|
281
287
|
return self.variant == "Static"
|
|
282
|
-
|
|
288
|
+
|
|
283
289
|
def is_timeseries(self) -> bool:
|
|
284
290
|
"""Check if this is a timeseries value"""
|
|
285
291
|
return self.variant == "Timeseries"
|
|
286
|
-
|
|
292
|
+
|
|
287
293
|
def as_timeseries(self) -> Optional[Timeseries]:
|
|
288
294
|
"""Get the timeseries data in new format"""
|
|
289
295
|
return self.timeseries_value if self.is_timeseries() else None
|
|
290
|
-
|
|
291
|
-
|
|
296
|
+
|
|
292
297
|
def __repr__(self) -> str:
|
|
293
298
|
if self.is_static():
|
|
294
299
|
return f"AttributeValue.static({self.static_value})"
|
|
@@ -300,12 +305,12 @@ class AttributeValue:
|
|
|
300
305
|
@dataclass
|
|
301
306
|
class Component:
|
|
302
307
|
"""
|
|
303
|
-
Represents a component in the energy system model.
|
|
304
|
-
|
|
305
|
-
Mirrors Rust Component struct
|
|
308
|
+
Represents a component in the energy system model (single network per database).
|
|
309
|
+
|
|
310
|
+
Mirrors Rust Component struct.
|
|
306
311
|
"""
|
|
312
|
+
|
|
307
313
|
id: int
|
|
308
|
-
network_id: int
|
|
309
314
|
component_type: str
|
|
310
315
|
name: str
|
|
311
316
|
longitude: Optional[float] = None
|
|
@@ -320,9 +325,10 @@ class Component:
|
|
|
320
325
|
class Network:
|
|
321
326
|
"""
|
|
322
327
|
Represents a network/model in the system.
|
|
323
|
-
|
|
328
|
+
|
|
324
329
|
Enhanced version of network information with additional metadata.
|
|
325
330
|
"""
|
|
331
|
+
|
|
326
332
|
id: int
|
|
327
333
|
name: str
|
|
328
334
|
description: Optional[str] = None
|
|
@@ -336,11 +342,11 @@ class Network:
|
|
|
336
342
|
@dataclass
|
|
337
343
|
class CreateComponentRequest:
|
|
338
344
|
"""
|
|
339
|
-
Request structure for creating a new component.
|
|
340
|
-
|
|
345
|
+
Request structure for creating a new component (single network per database).
|
|
346
|
+
|
|
341
347
|
Mirrors Rust CreateComponentRequest.
|
|
342
348
|
"""
|
|
343
|
-
|
|
349
|
+
|
|
344
350
|
component_type: str
|
|
345
351
|
name: str
|
|
346
352
|
description: Optional[str] = None
|
|
@@ -356,9 +362,10 @@ class CreateComponentRequest:
|
|
|
356
362
|
class CreateNetworkRequest:
|
|
357
363
|
"""
|
|
358
364
|
Request structure for creating a new network.
|
|
359
|
-
|
|
365
|
+
|
|
360
366
|
Mirrors Rust CreateNetworkRequest.
|
|
361
367
|
"""
|
|
368
|
+
|
|
362
369
|
name: str
|
|
363
370
|
description: Optional[str] = None
|
|
364
371
|
time_resolution: Optional[str] = None
|
|
@@ -371,8 +378,8 @@ class Carrier:
|
|
|
371
378
|
"""
|
|
372
379
|
Represents an energy carrier (e.g., electricity, heat, gas).
|
|
373
380
|
"""
|
|
381
|
+
|
|
374
382
|
id: int
|
|
375
|
-
network_id: int
|
|
376
383
|
name: str
|
|
377
384
|
co2_emissions: float = 0.0
|
|
378
385
|
color: Optional[str] = None
|
|
@@ -384,8 +391,8 @@ class Scenario:
|
|
|
384
391
|
"""
|
|
385
392
|
Represents a scenario within a network.
|
|
386
393
|
"""
|
|
394
|
+
|
|
387
395
|
id: int
|
|
388
|
-
network_id: int
|
|
389
396
|
name: str
|
|
390
397
|
description: Optional[str] = None
|
|
391
398
|
is_master: bool = False
|
pyconvexity/data/__init__.py
CHANGED
|
@@ -14,112 +14,113 @@ from datetime import datetime, timedelta
|
|
|
14
14
|
|
|
15
15
|
logger = logging.getLogger(__name__)
|
|
16
16
|
|
|
17
|
+
|
|
17
18
|
class DataCache:
|
|
18
19
|
"""Manages caching of processed datasets."""
|
|
19
|
-
|
|
20
|
+
|
|
20
21
|
def __init__(self, cache_dir: Optional[str] = None):
|
|
21
22
|
"""
|
|
22
23
|
Initialize the cache manager.
|
|
23
|
-
|
|
24
|
+
|
|
24
25
|
Args:
|
|
25
26
|
cache_dir: Directory to store cache files. Defaults to 'data/cache'
|
|
26
27
|
"""
|
|
27
28
|
if cache_dir is None:
|
|
28
29
|
cache_dir = "data/cache"
|
|
29
|
-
|
|
30
|
+
|
|
30
31
|
self.cache_dir = Path(cache_dir)
|
|
31
32
|
self.cache_dir.mkdir(parents=True, exist_ok=True)
|
|
32
|
-
|
|
33
|
+
|
|
33
34
|
# Cache metadata file
|
|
34
35
|
self.metadata_file = self.cache_dir / "cache_metadata.json"
|
|
35
36
|
self._load_metadata()
|
|
36
|
-
|
|
37
|
+
|
|
37
38
|
def _load_metadata(self):
|
|
38
39
|
"""Load cache metadata from file."""
|
|
39
40
|
if self.metadata_file.exists():
|
|
40
41
|
try:
|
|
41
|
-
with open(self.metadata_file,
|
|
42
|
+
with open(self.metadata_file, "r") as f:
|
|
42
43
|
self.metadata = json.load(f)
|
|
43
44
|
except (json.JSONDecodeError, FileNotFoundError):
|
|
44
45
|
self.metadata = {}
|
|
45
46
|
else:
|
|
46
47
|
self.metadata = {}
|
|
47
|
-
|
|
48
|
+
|
|
48
49
|
def _save_metadata(self):
|
|
49
50
|
"""Save cache metadata to file."""
|
|
50
|
-
with open(self.metadata_file,
|
|
51
|
+
with open(self.metadata_file, "w") as f:
|
|
51
52
|
json.dump(self.metadata, f, indent=2)
|
|
52
|
-
|
|
53
|
+
|
|
53
54
|
def _get_cache_key(self, dataset_name: str, filters: Dict[str, Any]) -> str:
|
|
54
55
|
"""Generate a unique cache key for a dataset and filters combination."""
|
|
55
56
|
# Create a hash of the filters
|
|
56
57
|
filters_str = json.dumps(filters, sort_keys=True)
|
|
57
58
|
filters_hash = hashlib.md5(filters_str.encode()).hexdigest()
|
|
58
|
-
|
|
59
|
+
|
|
59
60
|
return f"{dataset_name}_{filters_hash}"
|
|
60
|
-
|
|
61
|
+
|
|
61
62
|
def _get_cache_file_path(self, cache_key: str) -> Path:
|
|
62
63
|
"""Get the file path for a cache key."""
|
|
63
64
|
return self.cache_dir / f"{cache_key}.parquet"
|
|
64
|
-
|
|
65
|
+
|
|
65
66
|
def get_cached_data(
|
|
66
|
-
self,
|
|
67
|
-
dataset_name: str,
|
|
68
|
-
filters: Dict[str, Any]
|
|
67
|
+
self, dataset_name: str, filters: Dict[str, Any]
|
|
69
68
|
) -> Optional[pd.DataFrame]:
|
|
70
69
|
"""
|
|
71
70
|
Retrieve cached data if available and not expired.
|
|
72
|
-
|
|
71
|
+
|
|
73
72
|
Args:
|
|
74
73
|
dataset_name: Name of the dataset
|
|
75
74
|
filters: Filters applied to the dataset
|
|
76
|
-
|
|
75
|
+
|
|
77
76
|
Returns:
|
|
78
77
|
pandas.DataFrame or None: Cached data if available and valid
|
|
79
78
|
"""
|
|
80
79
|
cache_key = self._get_cache_key(dataset_name, filters)
|
|
81
80
|
cache_file = self._get_cache_file_path(cache_key)
|
|
82
|
-
|
|
81
|
+
|
|
83
82
|
# Check if cache file exists
|
|
84
83
|
if not cache_file.exists():
|
|
85
84
|
return None
|
|
86
|
-
|
|
85
|
+
|
|
87
86
|
# Check if cache entry exists in metadata
|
|
88
87
|
if cache_key not in self.metadata:
|
|
89
88
|
# Clean up orphaned cache file
|
|
90
89
|
cache_file.unlink(missing_ok=True)
|
|
91
90
|
return None
|
|
92
|
-
|
|
91
|
+
|
|
93
92
|
# Check if cache is expired (default: 7 days)
|
|
94
93
|
cache_info = self.metadata[cache_key]
|
|
95
|
-
created_time = datetime.fromisoformat(cache_info[
|
|
96
|
-
max_age = timedelta(days=cache_info.get(
|
|
97
|
-
|
|
94
|
+
created_time = datetime.fromisoformat(cache_info["created"])
|
|
95
|
+
max_age = timedelta(days=cache_info.get("max_age_days", 7))
|
|
96
|
+
|
|
98
97
|
if datetime.now() - created_time > max_age:
|
|
99
98
|
logger.info(f"Cache expired for '{dataset_name}', removing...")
|
|
100
99
|
self._remove_cache_entry(cache_key)
|
|
101
100
|
return None
|
|
102
|
-
|
|
101
|
+
|
|
103
102
|
# Load cached data
|
|
104
103
|
try:
|
|
105
104
|
cached_data = pd.read_parquet(cache_file)
|
|
106
|
-
logger.info(
|
|
105
|
+
logger.info(
|
|
106
|
+
f"Loaded cached data for '{dataset_name}' ({len(cached_data)} rows)"
|
|
107
|
+
)
|
|
107
108
|
return cached_data
|
|
108
109
|
except Exception as e:
|
|
109
110
|
logger.warning(f"Failed to load cached data for '{dataset_name}': {e}")
|
|
110
111
|
self._remove_cache_entry(cache_key)
|
|
111
112
|
return None
|
|
112
|
-
|
|
113
|
+
|
|
113
114
|
def cache_data(
|
|
114
|
-
self,
|
|
115
|
-
dataset_name: str,
|
|
116
|
-
data: pd.DataFrame,
|
|
115
|
+
self,
|
|
116
|
+
dataset_name: str,
|
|
117
|
+
data: pd.DataFrame,
|
|
117
118
|
filters: Dict[str, Any],
|
|
118
|
-
max_age_days: int = 7
|
|
119
|
+
max_age_days: int = 7,
|
|
119
120
|
):
|
|
120
121
|
"""
|
|
121
122
|
Cache processed data.
|
|
122
|
-
|
|
123
|
+
|
|
123
124
|
Args:
|
|
124
125
|
dataset_name: Name of the dataset
|
|
125
126
|
data: Processed pandas DataFrame
|
|
@@ -128,84 +129,84 @@ class DataCache:
|
|
|
128
129
|
"""
|
|
129
130
|
cache_key = self._get_cache_key(dataset_name, filters)
|
|
130
131
|
cache_file = self._get_cache_file_path(cache_key)
|
|
131
|
-
|
|
132
|
+
|
|
132
133
|
# Save data to parquet file
|
|
133
134
|
data.to_parquet(cache_file, index=False)
|
|
134
|
-
|
|
135
|
+
|
|
135
136
|
# Update metadata
|
|
136
137
|
self.metadata[cache_key] = {
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
138
|
+
"dataset_name": dataset_name,
|
|
139
|
+
"filters": filters,
|
|
140
|
+
"created": datetime.now().isoformat(),
|
|
141
|
+
"max_age_days": max_age_days,
|
|
142
|
+
"rows": len(data),
|
|
143
|
+
"columns": list(data.columns),
|
|
143
144
|
}
|
|
144
|
-
|
|
145
|
+
|
|
145
146
|
self._save_metadata()
|
|
146
147
|
logger.info(f"Cached data for '{dataset_name}' ({len(data)} rows)")
|
|
147
|
-
|
|
148
|
+
|
|
148
149
|
def _remove_cache_entry(self, cache_key: str):
|
|
149
150
|
"""Remove a cache entry and its file."""
|
|
150
151
|
cache_file = self._get_cache_file_path(cache_key)
|
|
151
152
|
cache_file.unlink(missing_ok=True)
|
|
152
|
-
|
|
153
|
+
|
|
153
154
|
if cache_key in self.metadata:
|
|
154
155
|
del self.metadata[cache_key]
|
|
155
156
|
self._save_metadata()
|
|
156
|
-
|
|
157
|
+
|
|
157
158
|
def clear_cache(self, dataset_name: Optional[str] = None):
|
|
158
159
|
"""
|
|
159
160
|
Clear cache entries.
|
|
160
|
-
|
|
161
|
+
|
|
161
162
|
Args:
|
|
162
163
|
dataset_name: If provided, only clear cache for this dataset
|
|
163
164
|
"""
|
|
164
165
|
keys_to_remove = []
|
|
165
|
-
|
|
166
|
+
|
|
166
167
|
for cache_key, info in self.metadata.items():
|
|
167
|
-
if dataset_name is None or info[
|
|
168
|
+
if dataset_name is None or info["dataset_name"] == dataset_name:
|
|
168
169
|
keys_to_remove.append(cache_key)
|
|
169
|
-
|
|
170
|
+
|
|
170
171
|
for key in keys_to_remove:
|
|
171
172
|
self._remove_cache_entry(key)
|
|
172
|
-
|
|
173
|
+
|
|
173
174
|
logger.info(f"Cleared {len(keys_to_remove)} cache entries")
|
|
174
|
-
|
|
175
|
+
|
|
175
176
|
def get_cache_info(self) -> Dict[str, Any]:
|
|
176
177
|
"""Get information about the cache."""
|
|
177
178
|
total_size = 0
|
|
178
179
|
dataset_counts = {}
|
|
179
|
-
|
|
180
|
+
|
|
180
181
|
for cache_key, info in self.metadata.items():
|
|
181
|
-
dataset_name = info[
|
|
182
|
+
dataset_name = info["dataset_name"]
|
|
182
183
|
dataset_counts[dataset_name] = dataset_counts.get(dataset_name, 0) + 1
|
|
183
|
-
|
|
184
|
+
|
|
184
185
|
cache_file = self._get_cache_file_path(cache_key)
|
|
185
186
|
if cache_file.exists():
|
|
186
187
|
total_size += cache_file.stat().st_size
|
|
187
|
-
|
|
188
|
+
|
|
188
189
|
return {
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
190
|
+
"total_entries": len(self.metadata),
|
|
191
|
+
"total_size_mb": round(total_size / (1024 * 1024), 2),
|
|
192
|
+
"dataset_counts": dataset_counts,
|
|
193
|
+
"cache_dir": str(self.cache_dir),
|
|
193
194
|
}
|
|
194
|
-
|
|
195
|
+
|
|
195
196
|
def cleanup_expired_cache(self):
|
|
196
197
|
"""Remove expired cache entries."""
|
|
197
198
|
expired_keys = []
|
|
198
|
-
|
|
199
|
+
|
|
199
200
|
for cache_key, info in self.metadata.items():
|
|
200
|
-
created_time = datetime.fromisoformat(info[
|
|
201
|
-
max_age = timedelta(days=info.get(
|
|
202
|
-
|
|
201
|
+
created_time = datetime.fromisoformat(info["created"])
|
|
202
|
+
max_age = timedelta(days=info.get("max_age_days", 7))
|
|
203
|
+
|
|
203
204
|
if datetime.now() - created_time > max_age:
|
|
204
205
|
expired_keys.append(cache_key)
|
|
205
|
-
|
|
206
|
+
|
|
206
207
|
for key in expired_keys:
|
|
207
208
|
self._remove_cache_entry(key)
|
|
208
|
-
|
|
209
|
+
|
|
209
210
|
if expired_keys:
|
|
210
211
|
logger.info(f"Cleaned up {len(expired_keys)} expired cache entries")
|
|
211
212
|
else:
|