domainiac 0.1.5__tar.gz → 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {domainiac-0.1.5 → domainiac-0.2.0}/PKG-INFO +1 -1
- {domainiac-0.1.5 → domainiac-0.2.0}/domainiac/managers/masterdata_manager.py +31 -5
- {domainiac-0.1.5 → domainiac-0.2.0}/domainiac/managers/plant_manager.py +2 -0
- domainiac-0.2.0/domainiac/managers/resource_manager.py +112 -0
- {domainiac-0.1.5 → domainiac-0.2.0}/domainiac/managers/unit_manager.py +2 -0
- domainiac-0.2.0/domainiac/wrappers/__init__.py +1 -0
- domainiac-0.2.0/domainiac/wrappers/cache_wrapper.py +16 -0
- {domainiac-0.1.5 → domainiac-0.2.0}/pyproject.toml +1 -1
- domainiac-0.1.5/domainiac/managers/resource_manager.py +0 -79
- {domainiac-0.1.5 → domainiac-0.2.0}/domainiac/__init__.py +0 -0
- {domainiac-0.1.5 → domainiac-0.2.0}/domainiac/managers/__init__.py +0 -0
- {domainiac-0.1.5 → domainiac-0.2.0}/domainiac/managers/metering_manager.py +0 -0
@@ -2,6 +2,8 @@ import datamazing.pandas as pdz
|
|
2
2
|
import pandas as pd
|
3
3
|
from typeguard import typechecked
|
4
4
|
|
5
|
+
from ..wrappers import cache_decorator
|
6
|
+
|
5
7
|
|
6
8
|
class MasterdataManager:
|
7
9
|
"""
|
@@ -13,27 +15,51 @@ class MasterdataManager:
|
|
13
15
|
db: pdz.Database,
|
14
16
|
time_interval: pdz.TimeInterval,
|
15
17
|
resolution: pd.Timedelta,
|
18
|
+
cache_masterdata: bool = False,
|
16
19
|
) -> None:
|
17
20
|
self.db = db
|
18
21
|
self.time_interval = time_interval
|
19
22
|
self.resolution = resolution
|
23
|
+
self.cache_masterdata = cache_masterdata
|
24
|
+
|
25
|
+
masterdata_cache = {}
|
26
|
+
|
27
|
+
@typechecked
|
28
|
+
def _get_operational_entities(self, table: str) -> pd.DataFrame:
|
29
|
+
filters = {"standing_entity_state": "InOperation"}
|
30
|
+
df = self.db.query(table, filters=filters)
|
31
|
+
return df
|
20
32
|
|
21
33
|
@typechecked
|
22
|
-
def get_operational_entities(self, table: str
|
34
|
+
def get_operational_entities(self, table: str) -> pd.DataFrame:
|
23
35
|
"""Gets the operational data for a given table."""
|
24
36
|
|
25
|
-
|
26
|
-
|
37
|
+
if self.cache_masterdata:
|
38
|
+
cached_query = cache_decorator(self.masterdata_cache)(
|
39
|
+
self._get_operational_entities
|
40
|
+
)
|
41
|
+
df = cached_query(table)
|
42
|
+
else:
|
43
|
+
df = self._get_operational_entities(table)
|
44
|
+
|
27
45
|
return df
|
28
46
|
|
29
47
|
@typechecked
|
30
48
|
def get_data(
|
31
|
-
self,
|
49
|
+
self,
|
50
|
+
table: str,
|
51
|
+
filters: dict = {},
|
52
|
+
columns: list = [],
|
32
53
|
) -> pd.DataFrame:
|
33
54
|
"""Gets the data for a given table.
|
34
55
|
Filters for rows valid at the end of time interval.
|
35
56
|
"""
|
36
|
-
|
57
|
+
# Get operational entities
|
58
|
+
df = self.get_operational_entities(table)
|
59
|
+
|
60
|
+
# Apply the filters
|
61
|
+
for column, value in filters.items():
|
62
|
+
df = df[df[column] == value].reset_index()
|
37
63
|
|
38
64
|
for column in columns:
|
39
65
|
if column not in df.columns:
|
@@ -14,10 +14,12 @@ class PlantManager(MasterdataManager):
|
|
14
14
|
db: pdz.Database,
|
15
15
|
time_interval: pdz.TimeInterval,
|
16
16
|
resolution: pd.Timedelta,
|
17
|
+
cache_masterdata: bool = False,
|
17
18
|
) -> None:
|
18
19
|
self.db = db
|
19
20
|
self.time_interval = time_interval
|
20
21
|
self.resolution = resolution
|
22
|
+
self.cache_masterdata = cache_masterdata
|
21
23
|
|
22
24
|
def get_plants(
|
23
25
|
self,
|
@@ -0,0 +1,112 @@
|
|
1
|
+
import datamazing.pandas as pdz
|
2
|
+
import pandas as pd
|
3
|
+
from typeguard import typechecked
|
4
|
+
|
5
|
+
from ..wrappers import cache_decorator
|
6
|
+
|
7
|
+
DEFAULT_RESOLUTION = pd.Timedelta("PT5M")
|
8
|
+
|
9
|
+
|
10
|
+
class ResourceManager:
|
11
|
+
"""
|
12
|
+
Manager which simplifies the process of handling resource schedules.
|
13
|
+
Scheduled resources are delivered in a 5 min resolution. If a higher
|
14
|
+
resolution is chosen,
|
15
|
+
the result will be the most conservative value in the time interval,
|
16
|
+
e.i. max of minimum capacity and min of maximum capacity.
|
17
|
+
If no resolution is given, the default is 5 min.
|
18
|
+
If resolution is less than 5 min, the resulting time series will
|
19
|
+
default to 5 min resolution.
|
20
|
+
"""
|
21
|
+
|
22
|
+
def __init__(
|
23
|
+
self,
|
24
|
+
db: pdz.Database,
|
25
|
+
time_interval: pdz.TimeInterval,
|
26
|
+
resolution: pd.Timedelta = DEFAULT_RESOLUTION,
|
27
|
+
cache_reource_schedules: bool = False,
|
28
|
+
) -> None:
|
29
|
+
self.db = db
|
30
|
+
self.time_interval = time_interval
|
31
|
+
self.resolution = resolution
|
32
|
+
self.cache_reource_schedules = cache_reource_schedules
|
33
|
+
|
34
|
+
resource_schedules_cache = {}
|
35
|
+
|
36
|
+
@typechecked
|
37
|
+
def _query_resource_schedules(self, table: str) -> pd.DataFrame:
|
38
|
+
return self.db.query(
|
39
|
+
table_name=table,
|
40
|
+
time_interval=self.time_interval,
|
41
|
+
)
|
42
|
+
|
43
|
+
@typechecked
|
44
|
+
def query_resource_schedules(self, table: str) -> pd.DataFrame:
|
45
|
+
if self.cache_reource_schedules:
|
46
|
+
cached_query = cache_decorator(self.resource_schedules_cache)(
|
47
|
+
self._query_resource_schedules
|
48
|
+
)
|
49
|
+
df = cached_query(table)
|
50
|
+
else:
|
51
|
+
df = self._query_resource_schedules(table)
|
52
|
+
return df
|
53
|
+
|
54
|
+
@typechecked
|
55
|
+
def get_resource_schedules(self, resource_gsrn: str | list[str]) -> pd.DataFrame:
|
56
|
+
"""Gets resource schedules for a given list of resource gsrns."""
|
57
|
+
df_resource_schedules = self.query_resource_schedules(
|
58
|
+
"scheduleResourcePowerPlan"
|
59
|
+
)
|
60
|
+
|
61
|
+
if isinstance(resource_gsrn, str):
|
62
|
+
resource_gsrn = [resource_gsrn]
|
63
|
+
df_resource_schedules = df_resource_schedules[
|
64
|
+
df_resource_schedules["resource_gsrn"].isin(resource_gsrn)
|
65
|
+
]
|
66
|
+
|
67
|
+
if df_resource_schedules.empty:
|
68
|
+
raise ValueError(f"No resource schedules found for gsrn {resource_gsrn}.")
|
69
|
+
|
70
|
+
if self.resolution != DEFAULT_RESOLUTION:
|
71
|
+
df_resource_schedules = (
|
72
|
+
pdz.group(
|
73
|
+
df_resource_schedules,
|
74
|
+
by=[
|
75
|
+
"market_participant",
|
76
|
+
"created_time_utc",
|
77
|
+
"price_area",
|
78
|
+
"resource_gsrn",
|
79
|
+
],
|
80
|
+
)
|
81
|
+
.resample(on="time_utc", resolution=self.resolution)
|
82
|
+
.agg(
|
83
|
+
{
|
84
|
+
"schedule_power_MW": "mean",
|
85
|
+
"schedule_capacity_min_MW": "max",
|
86
|
+
"schedule_capacity_max_MW": "min",
|
87
|
+
}
|
88
|
+
)
|
89
|
+
.dropna()
|
90
|
+
)
|
91
|
+
return df_resource_schedules.drop(
|
92
|
+
columns=["masterdata_gsrn", "datahub_gsrn_e18"], errors="ignore"
|
93
|
+
)
|
94
|
+
|
95
|
+
@typechecked
|
96
|
+
def get_latest_resource_schedules(
|
97
|
+
self,
|
98
|
+
resource_gsrn: str | list[str],
|
99
|
+
) -> pd.DataFrame:
|
100
|
+
"""Gets the lastest resource schedules for a given list of resource gsrns."""
|
101
|
+
|
102
|
+
df_resource_schedules = self.get_resource_schedules(resource_gsrn=resource_gsrn)
|
103
|
+
|
104
|
+
df_latest_created_time = pdz.group(
|
105
|
+
df=df_resource_schedules, by=["resource_gsrn", "time_utc"]
|
106
|
+
).agg({"created_time_utc": "max"})
|
107
|
+
|
108
|
+
df_resource_latest = df_latest_created_time.merge(
|
109
|
+
df_resource_schedules, on=list(df_latest_created_time.columns)
|
110
|
+
)
|
111
|
+
|
112
|
+
return df_resource_latest
|
@@ -14,10 +14,12 @@ class UnitManager(MasterdataManager):
|
|
14
14
|
db: pdz.Database,
|
15
15
|
time_interval: pdz.TimeInterval,
|
16
16
|
resolution: pd.Timedelta,
|
17
|
+
cache_masterdata: bool = False,
|
17
18
|
) -> None:
|
18
19
|
self.db = db
|
19
20
|
self.time_interval = time_interval
|
20
21
|
self.resolution = resolution
|
22
|
+
self.cache_masterdata = cache_masterdata
|
21
23
|
|
22
24
|
def get_units(
|
23
25
|
self,
|
@@ -0,0 +1 @@
|
|
1
|
+
from .cache_wrapper import cache_decorator
|
@@ -0,0 +1,16 @@
|
|
1
|
+
from functools import wraps
|
2
|
+
|
3
|
+
|
4
|
+
def cache_decorator(cache):
|
5
|
+
def decorator(func):
|
6
|
+
@wraps(func)
|
7
|
+
def wrapper(*args, **kwargs):
|
8
|
+
if args in cache:
|
9
|
+
return cache[args]
|
10
|
+
result = func(*args, **kwargs)
|
11
|
+
cache[args] = result
|
12
|
+
return result
|
13
|
+
|
14
|
+
return wrapper
|
15
|
+
|
16
|
+
return decorator
|
@@ -1,79 +0,0 @@
|
|
1
|
-
import datamazing.pandas as pdz
|
2
|
-
import pandas as pd
|
3
|
-
from typeguard import typechecked
|
4
|
-
|
5
|
-
|
6
|
-
class ResourceManager:
|
7
|
-
"""
|
8
|
-
Manager which simplifies the process of handling resource schedules.
|
9
|
-
Scheduled resources are delivered in a 5 min resolution. If a higher
|
10
|
-
resolution is chosen,
|
11
|
-
the result will be the most conservative value in the time interval,
|
12
|
-
e.i. max of minimum capacity and min of maximum capacity.
|
13
|
-
If no resolution is given, the default is 5 min.
|
14
|
-
If resolution is less than 5 min, the resulting time series will
|
15
|
-
default to 5 min resolution.
|
16
|
-
"""
|
17
|
-
|
18
|
-
def __init__(
|
19
|
-
self,
|
20
|
-
db: pdz.Database,
|
21
|
-
time_interval: pdz.TimeInterval,
|
22
|
-
resolution: pd.Timedelta = pd.Timedelta("PT5M"),
|
23
|
-
) -> None:
|
24
|
-
self.db = db
|
25
|
-
self.time_interval = time_interval
|
26
|
-
self.resolution = resolution
|
27
|
-
|
28
|
-
@typechecked
|
29
|
-
def get_resource_schedules(self, resource_gsrn: str | list[str]) -> pd.DataFrame:
|
30
|
-
"""Gets resource schedules for a given list of resource gsrns."""
|
31
|
-
df_resource_schedules = self.db.query(
|
32
|
-
table_name="scheduleResourcePowerPlan",
|
33
|
-
time_interval=self.time_interval,
|
34
|
-
filters={"resource_gsrn": resource_gsrn},
|
35
|
-
)
|
36
|
-
|
37
|
-
if df_resource_schedules.empty:
|
38
|
-
raise ValueError(f"No resource schedules found for gsrn {resource_gsrn}.")
|
39
|
-
|
40
|
-
df_resource_resampled = (
|
41
|
-
pdz.group(
|
42
|
-
df_resource_schedules,
|
43
|
-
by=[
|
44
|
-
"market_participant",
|
45
|
-
"created_time_utc",
|
46
|
-
"price_area",
|
47
|
-
"resource_gsrn",
|
48
|
-
],
|
49
|
-
)
|
50
|
-
.resample(on="time_utc", resolution=self.resolution)
|
51
|
-
.agg(
|
52
|
-
{
|
53
|
-
"schedule_power_MW": "mean",
|
54
|
-
"schedule_capacity_min_MW": "max",
|
55
|
-
"schedule_capacity_max_MW": "min",
|
56
|
-
}
|
57
|
-
)
|
58
|
-
.dropna()
|
59
|
-
)
|
60
|
-
return df_resource_resampled
|
61
|
-
|
62
|
-
@typechecked
|
63
|
-
def get_latest_resource_schedules(
|
64
|
-
self,
|
65
|
-
resource_gsrn: str | list[str],
|
66
|
-
) -> pd.DataFrame:
|
67
|
-
"""Gets the lastest resource schedules for a given list of resource gsrns."""
|
68
|
-
|
69
|
-
df_resource_schedules = self.get_resource_schedules(resource_gsrn=resource_gsrn)
|
70
|
-
|
71
|
-
df_latest_created_time = pdz.group(
|
72
|
-
df=df_resource_schedules, by=["resource_gsrn", "time_utc"]
|
73
|
-
).agg({"created_time_utc": "max"})
|
74
|
-
|
75
|
-
df_resource_latest = df_latest_created_time.merge(
|
76
|
-
df_resource_schedules, on=list(df_latest_created_time.columns)
|
77
|
-
)
|
78
|
-
|
79
|
-
return df_resource_latest
|
File without changes
|
File without changes
|
File without changes
|