domainiac 0.1.5__tar.gz → 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: domainiac
3
- Version: 0.1.5
3
+ Version: 0.2.0
4
4
  Summary: Package for working with Energinet data, but with specialized functions used for Enigma.
5
5
  Author: Team Enigma
6
6
  Author-email: gridop-enigma@energinet.dk
@@ -2,6 +2,8 @@ import datamazing.pandas as pdz
2
2
  import pandas as pd
3
3
  from typeguard import typechecked
4
4
 
5
+ from ..wrappers import cache_decorator
6
+
5
7
 
6
8
  class MasterdataManager:
7
9
  """
@@ -13,27 +15,51 @@ class MasterdataManager:
13
15
  db: pdz.Database,
14
16
  time_interval: pdz.TimeInterval,
15
17
  resolution: pd.Timedelta,
18
+ cache_masterdata: bool = False,
16
19
  ) -> None:
17
20
  self.db = db
18
21
  self.time_interval = time_interval
19
22
  self.resolution = resolution
23
+ self.cache_masterdata = cache_masterdata
24
+
25
+ masterdata_cache = {}
26
+
27
+ @typechecked
28
+ def _get_operational_entities(self, table: str) -> pd.DataFrame:
29
+ filters = {"standing_entity_state": "InOperation"}
30
+ df = self.db.query(table, filters=filters)
31
+ return df
20
32
 
21
33
  @typechecked
22
- def get_operational_entities(self, table: str, filters: dict = {}) -> pd.DataFrame:
34
+ def get_operational_entities(self, table: str) -> pd.DataFrame:
23
35
  """Gets the operational data for a given table."""
24
36
 
25
- filters["standing_entity_state"] = "InOperation"
26
- df = self.db.query(table, filters=filters)
37
+ if self.cache_masterdata:
38
+ cached_query = cache_decorator(self.masterdata_cache)(
39
+ self._get_operational_entities
40
+ )
41
+ df = cached_query(table)
42
+ else:
43
+ df = self._get_operational_entities(table)
44
+
27
45
  return df
28
46
 
29
47
  @typechecked
30
48
  def get_data(
31
- self, table: str, filters: dict = {}, columns: list = []
49
+ self,
50
+ table: str,
51
+ filters: dict = {},
52
+ columns: list = [],
32
53
  ) -> pd.DataFrame:
33
54
  """Gets the data for a given table.
34
55
  Filters for rows valid at the end of time interval.
35
56
  """
36
- df = self.get_operational_entities(table, filters)
57
+ # Get operational entities
58
+ df = self.get_operational_entities(table)
59
+
60
+ # Apply the filters
61
+ for column, value in filters.items():
62
+ df = df[df[column] == value].reset_index()
37
63
 
38
64
  for column in columns:
39
65
  if column not in df.columns:
@@ -14,10 +14,12 @@ class PlantManager(MasterdataManager):
14
14
  db: pdz.Database,
15
15
  time_interval: pdz.TimeInterval,
16
16
  resolution: pd.Timedelta,
17
+ cache_masterdata: bool = False,
17
18
  ) -> None:
18
19
  self.db = db
19
20
  self.time_interval = time_interval
20
21
  self.resolution = resolution
22
+ self.cache_masterdata = cache_masterdata
21
23
 
22
24
  def get_plants(
23
25
  self,
@@ -0,0 +1,112 @@
1
+ import datamazing.pandas as pdz
2
+ import pandas as pd
3
+ from typeguard import typechecked
4
+
5
+ from ..wrappers import cache_decorator
6
+
7
+ DEFAULT_RESOLUTION = pd.Timedelta("PT5M")
8
+
9
+
10
+ class ResourceManager:
11
+ """
12
+ Manager which simplifies the process of handling resource schedules.
13
+ Scheduled resources are delivered in a 5 min resolution. If a higher
14
+ resolution is chosen,
15
+ the result will be the most conservative value in the time interval,
16
+ e.i. max of minimum capacity and min of maximum capacity.
17
+ If no resolution is given, the default is 5 min.
18
+ If resolution is less than 5 min, the resulting time series will
19
+ default to 5 min resolution.
20
+ """
21
+
22
+ def __init__(
23
+ self,
24
+ db: pdz.Database,
25
+ time_interval: pdz.TimeInterval,
26
+ resolution: pd.Timedelta = DEFAULT_RESOLUTION,
27
+ cache_reource_schedules: bool = False,
28
+ ) -> None:
29
+ self.db = db
30
+ self.time_interval = time_interval
31
+ self.resolution = resolution
32
+ self.cache_reource_schedules = cache_reource_schedules
33
+
34
+ resource_schedules_cache = {}
35
+
36
+ @typechecked
37
+ def _query_resource_schedules(self, table: str) -> pd.DataFrame:
38
+ return self.db.query(
39
+ table_name=table,
40
+ time_interval=self.time_interval,
41
+ )
42
+
43
+ @typechecked
44
+ def query_resource_schedules(self, table: str) -> pd.DataFrame:
45
+ if self.cache_reource_schedules:
46
+ cached_query = cache_decorator(self.resource_schedules_cache)(
47
+ self._query_resource_schedules
48
+ )
49
+ df = cached_query(table)
50
+ else:
51
+ df = self._query_resource_schedules(table)
52
+ return df
53
+
54
+ @typechecked
55
+ def get_resource_schedules(self, resource_gsrn: str | list[str]) -> pd.DataFrame:
56
+ """Gets resource schedules for a given list of resource gsrns."""
57
+ df_resource_schedules = self.query_resource_schedules(
58
+ "scheduleResourcePowerPlan"
59
+ )
60
+
61
+ if isinstance(resource_gsrn, str):
62
+ resource_gsrn = [resource_gsrn]
63
+ df_resource_schedules = df_resource_schedules[
64
+ df_resource_schedules["resource_gsrn"].isin(resource_gsrn)
65
+ ]
66
+
67
+ if df_resource_schedules.empty:
68
+ raise ValueError(f"No resource schedules found for gsrn {resource_gsrn}.")
69
+
70
+ if self.resolution != DEFAULT_RESOLUTION:
71
+ df_resource_schedules = (
72
+ pdz.group(
73
+ df_resource_schedules,
74
+ by=[
75
+ "market_participant",
76
+ "created_time_utc",
77
+ "price_area",
78
+ "resource_gsrn",
79
+ ],
80
+ )
81
+ .resample(on="time_utc", resolution=self.resolution)
82
+ .agg(
83
+ {
84
+ "schedule_power_MW": "mean",
85
+ "schedule_capacity_min_MW": "max",
86
+ "schedule_capacity_max_MW": "min",
87
+ }
88
+ )
89
+ .dropna()
90
+ )
91
+ return df_resource_schedules.drop(
92
+ columns=["masterdata_gsrn", "datahub_gsrn_e18"], errors="ignore"
93
+ )
94
+
95
+ @typechecked
96
+ def get_latest_resource_schedules(
97
+ self,
98
+ resource_gsrn: str | list[str],
99
+ ) -> pd.DataFrame:
100
+ """Gets the lastest resource schedules for a given list of resource gsrns."""
101
+
102
+ df_resource_schedules = self.get_resource_schedules(resource_gsrn=resource_gsrn)
103
+
104
+ df_latest_created_time = pdz.group(
105
+ df=df_resource_schedules, by=["resource_gsrn", "time_utc"]
106
+ ).agg({"created_time_utc": "max"})
107
+
108
+ df_resource_latest = df_latest_created_time.merge(
109
+ df_resource_schedules, on=list(df_latest_created_time.columns)
110
+ )
111
+
112
+ return df_resource_latest
@@ -14,10 +14,12 @@ class UnitManager(MasterdataManager):
14
14
  db: pdz.Database,
15
15
  time_interval: pdz.TimeInterval,
16
16
  resolution: pd.Timedelta,
17
+ cache_masterdata: bool = False,
17
18
  ) -> None:
18
19
  self.db = db
19
20
  self.time_interval = time_interval
20
21
  self.resolution = resolution
22
+ self.cache_masterdata = cache_masterdata
21
23
 
22
24
  def get_units(
23
25
  self,
@@ -0,0 +1 @@
1
+ from .cache_wrapper import cache_decorator
@@ -0,0 +1,16 @@
1
+ from functools import wraps
2
+
3
+
4
+ def cache_decorator(cache):
5
+ def decorator(func):
6
+ @wraps(func)
7
+ def wrapper(*args, **kwargs):
8
+ if args in cache:
9
+ return cache[args]
10
+ result = func(*args, **kwargs)
11
+ cache[args] = result
12
+ return result
13
+
14
+ return wrapper
15
+
16
+ return decorator
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "domainiac"
3
- version = "0.1.5"
3
+ version = "0.2.0"
4
4
  description = "Package for working with Energinet data, but with specialized functions used for Enigma."
5
5
  authors = ["Team Enigma <gridop-enigma@energinet.dk>"]
6
6
  packages = [
@@ -1,79 +0,0 @@
1
- import datamazing.pandas as pdz
2
- import pandas as pd
3
- from typeguard import typechecked
4
-
5
-
6
- class ResourceManager:
7
- """
8
- Manager which simplifies the process of handling resource schedules.
9
- Scheduled resources are delivered in a 5 min resolution. If a higher
10
- resolution is chosen,
11
- the result will be the most conservative value in the time interval,
12
- e.i. max of minimum capacity and min of maximum capacity.
13
- If no resolution is given, the default is 5 min.
14
- If resolution is less than 5 min, the resulting time series will
15
- default to 5 min resolution.
16
- """
17
-
18
- def __init__(
19
- self,
20
- db: pdz.Database,
21
- time_interval: pdz.TimeInterval,
22
- resolution: pd.Timedelta = pd.Timedelta("PT5M"),
23
- ) -> None:
24
- self.db = db
25
- self.time_interval = time_interval
26
- self.resolution = resolution
27
-
28
- @typechecked
29
- def get_resource_schedules(self, resource_gsrn: str | list[str]) -> pd.DataFrame:
30
- """Gets resource schedules for a given list of resource gsrns."""
31
- df_resource_schedules = self.db.query(
32
- table_name="scheduleResourcePowerPlan",
33
- time_interval=self.time_interval,
34
- filters={"resource_gsrn": resource_gsrn},
35
- )
36
-
37
- if df_resource_schedules.empty:
38
- raise ValueError(f"No resource schedules found for gsrn {resource_gsrn}.")
39
-
40
- df_resource_resampled = (
41
- pdz.group(
42
- df_resource_schedules,
43
- by=[
44
- "market_participant",
45
- "created_time_utc",
46
- "price_area",
47
- "resource_gsrn",
48
- ],
49
- )
50
- .resample(on="time_utc", resolution=self.resolution)
51
- .agg(
52
- {
53
- "schedule_power_MW": "mean",
54
- "schedule_capacity_min_MW": "max",
55
- "schedule_capacity_max_MW": "min",
56
- }
57
- )
58
- .dropna()
59
- )
60
- return df_resource_resampled
61
-
62
- @typechecked
63
- def get_latest_resource_schedules(
64
- self,
65
- resource_gsrn: str | list[str],
66
- ) -> pd.DataFrame:
67
- """Gets the lastest resource schedules for a given list of resource gsrns."""
68
-
69
- df_resource_schedules = self.get_resource_schedules(resource_gsrn=resource_gsrn)
70
-
71
- df_latest_created_time = pdz.group(
72
- df=df_resource_schedules, by=["resource_gsrn", "time_utc"]
73
- ).agg({"created_time_utc": "max"})
74
-
75
- df_resource_latest = df_latest_created_time.merge(
76
- df_resource_schedules, on=list(df_latest_created_time.columns)
77
- )
78
-
79
- return df_resource_latest