domainiac 9.3.1__tar.gz → 10.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. {domainiac-9.3.1 → domainiac-10.0.0}/PKG-INFO +2 -1
  2. {domainiac-9.3.1 → domainiac-10.0.0}/domainiac/__init__.py +1 -2
  3. {domainiac-9.3.1 → domainiac-10.0.0}/domainiac/managers/__init__.py +0 -2
  4. domainiac-10.0.0/domainiac/managers/masterdata_manager.py +229 -0
  5. domainiac-10.0.0/domainiac/managers/schemas/masterdata_manager.json +40 -0
  6. {domainiac-9.3.1 → domainiac-10.0.0}/domainiac/modeling/plant.py +4 -4
  7. {domainiac-9.3.1 → domainiac-10.0.0}/pyproject.toml +2 -1
  8. domainiac-9.3.1/domainiac/managers/masterdata_manager.py +0 -79
  9. domainiac-9.3.1/domainiac/managers/plant_manager.py +0 -160
  10. domainiac-9.3.1/domainiac/managers/unit_manager.py +0 -42
  11. {domainiac-9.3.1 → domainiac-10.0.0}/domainiac/functions/__init__.py +0 -0
  12. {domainiac-9.3.1 → domainiac-10.0.0}/domainiac/functions/conversions.py +0 -0
  13. {domainiac-9.3.1 → domainiac-10.0.0}/domainiac/functions/interpolation.py +0 -0
  14. {domainiac-9.3.1 → domainiac-10.0.0}/domainiac/functions/solar.py +0 -0
  15. {domainiac-9.3.1 → domainiac-10.0.0}/domainiac/functions/temperature.py +0 -0
  16. {domainiac-9.3.1 → domainiac-10.0.0}/domainiac/functions/typing.py +0 -0
  17. {domainiac-9.3.1 → domainiac-10.0.0}/domainiac/functions/wind.py +0 -0
  18. {domainiac-9.3.1 → domainiac-10.0.0}/domainiac/managers/availability_manager.py +0 -0
  19. {domainiac-9.3.1 → domainiac-10.0.0}/domainiac/managers/metering_manager.py +0 -0
  20. {domainiac-9.3.1 → domainiac-10.0.0}/domainiac/managers/nwp_manager.py +0 -0
  21. {domainiac-9.3.1 → domainiac-10.0.0}/domainiac/managers/outage_manager.py +0 -0
  22. {domainiac-9.3.1 → domainiac-10.0.0}/domainiac/managers/resource_manager.py +0 -0
  23. {domainiac-9.3.1 → domainiac-10.0.0}/domainiac/modeling/__init__.py +0 -0
  24. {domainiac-9.3.1 → domainiac-10.0.0}/domainiac/modeling/nwp.py +0 -0
  25. {domainiac-9.3.1 → domainiac-10.0.0}/domainiac/wrappers/__init__.py +0 -0
  26. {domainiac-9.3.1 → domainiac-10.0.0}/domainiac/wrappers/cache_wrapper.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: domainiac
3
- Version: 9.3.1
3
+ Version: 10.0.0
4
4
  Summary: Package for working with Energinet data, but with specialized functions used for Enigma.
5
5
  Author: Team Enigma
6
6
  Author-email: enigma@energinet.dk
@@ -17,3 +17,4 @@ Requires-Dist: pvlib (>=0.13.1)
17
17
  Requires-Dist: scikit-learn (>=1.3.0)
18
18
  Requires-Dist: scipy (>=1.15.3)
19
19
  Requires-Dist: typeguard (>=4.2.1)
20
+ Requires-Dist: utm (>=0.8.1)
@@ -1,9 +1,8 @@
1
1
  from .managers import (
2
2
  AvailabilityManager,
3
+ MasterdataManager,
3
4
  MeteringManager,
4
5
  NWPManager,
5
- PlantManager,
6
6
  ResourceManager,
7
- UnitManager,
8
7
  )
9
8
  from .modeling import Coordinate, Group, Neighborhood, NWPParameter, NWPProvider, Plant
@@ -3,6 +3,4 @@ from .masterdata_manager import MasterdataManager
3
3
  from .metering_manager import MeteringManager
4
4
  from .nwp_manager import NWPManager
5
5
  from .outage_manager import OutageManager
6
- from .plant_manager import PlantManager
7
6
  from .resource_manager import ResourceManager
8
- from .unit_manager import UnitManager
@@ -0,0 +1,229 @@
1
+ import json
2
+ from pathlib import Path
3
+
4
+ import datamazing.pandas as pdz
5
+ import pandas as pd
6
+ import utm
7
+
8
+ CONNECTION_POINT_BEHIND_THE_METER = "installationstilsluttet (I)"
9
+
10
+
11
+ class MasterdataManager:
12
+
13
+ SCHEMA = json.loads(
14
+ (Path(__file__).parent / "schemas/masterdata_manager.json").read_bytes()
15
+ )
16
+
17
+ def __init__(
18
+ self,
19
+ db: pdz.Database,
20
+ time_interval: pdz.TimeInterval = None,
21
+ as_of_time: pd.Timestamp = None,
22
+ ) -> None:
23
+ self.db = db
24
+ self.time_interval = time_interval
25
+ self.as_of_time = as_of_time
26
+
27
+ if as_of_time is not None and as_of_time.utcoffset().total_seconds() != 0:
28
+ raise ValueError("'as_of_time' must be utc")
29
+
30
+ if as_of_time is not None and (time_interval is not None):
31
+ raise ValueError("Cannot provide both 'as_of_time' and 'time_interval'")
32
+ if as_of_time is None and (time_interval is None):
33
+ raise ValueError(
34
+ "Either 'as_of_time' must be provided, or 'time_interval' must be"
35
+ "provided"
36
+ )
37
+
38
+ @property
39
+ def start_time(self) -> pd.Timestamp:
40
+ if self.as_of_time:
41
+ return self.as_of_time
42
+ else:
43
+ return self.time_interval.left
44
+
45
+ @property
46
+ def end_time(self) -> pd.Timestamp:
47
+ if self.as_of_time:
48
+ return self.as_of_time + pdz.get_epsilon(dtype=pd.DatetimeTZDtype(tz="UTC"))
49
+ else:
50
+ return self.time_interval.right
51
+
52
+ @staticmethod
53
+ def _intersect_time_and_comission_intervals(df: pd.DataFrame) -> pd.DataFrame:
54
+ df = df.copy()
55
+ # take max og start time and comission time (if latter is not null)
56
+ df["start_time_utc"] = df[["start_time_utc", "commission_time_utc"]].max(axis=1)
57
+ # take min of end time and decommission time (if latter is not null)
58
+ df["end_time_utc"] = df[["end_time_utc", "decommission_time_utc"]].min(axis=1)
59
+ # filter out rows where start time is after end time
60
+ df = df[df["start_time_utc"] <= df["end_time_utc"]]
61
+
62
+ df = df.drop(columns=["commission_time_utc", "decommission_time_utc"])
63
+ return df
64
+
65
+ def _filter_interval(self, df: pd.DataFrame) -> pd.DataFrame:
66
+ df["start_time_utc"] = df["start_time_utc"].clip(lower=self.start_time)
67
+ df["end_time_utc"] = df["end_time_utc"].clip(upper=self.end_time)
68
+ df = df[df["start_time_utc"] < df["end_time_utc"]]
69
+ return df
70
+
71
+ def _get_table(self, table_name: str) -> pd.DataFrame:
72
+ df = self.db.query(table_name)
73
+ df = df.drop(columns=["emda_version", "created_time_utc"], errors="ignore")
74
+ return df
75
+
76
+ def get_plant_masterdata(self) -> pd.DataFrame:
77
+ df_plant = self._get_table("masterdata_emda_plants")
78
+ df_unit = self._get_table("masterdata_emda_units")
79
+ df_market_participant = self._get_table("masterdata_emda_market_participants")
80
+
81
+ df_plant = self._intersect_time_and_comission_intervals(df_plant)
82
+ df_unit = self._intersect_time_and_comission_intervals(df_unit)
83
+
84
+ df_plant = self._filter_interval(df_plant)
85
+ df_unit = self._filter_interval(df_unit)
86
+ df_market_participant = self._filter_interval(df_market_participant)
87
+
88
+ df_unit = df_unit.sort_values(by="capacity_max_MW", ascending=False)
89
+
90
+ df_unit_summary = pdz.group_interval(
91
+ df_unit,
92
+ by=["plant_id"],
93
+ interval=("start_time_utc", "end_time_utc"),
94
+ ).agg(
95
+ {
96
+ "power_system_resource_type": "first", # From largest unit
97
+ "asset_type": "first", # From largest unit
98
+ "power_system_resource": "first", # From largest unit
99
+ "coordinate_x_utm": "mean",
100
+ "coordinate_y_utm": "mean",
101
+ "hub_height_m": "mean",
102
+ "c11": "first", # Assuming unique per group
103
+ }
104
+ )
105
+
106
+ df_plant = pdz.merge_interval_interval(
107
+ df_plant,
108
+ df_unit_summary,
109
+ on=["plant_id"],
110
+ interval=("start_time_utc", "end_time_utc"),
111
+ how="left",
112
+ )
113
+
114
+ df_plant = pdz.merge_interval_interval(
115
+ df_plant,
116
+ df_market_participant,
117
+ on=["market_participant_id"],
118
+ interval=("start_time_utc", "end_time_utc"),
119
+ how="left",
120
+ )
121
+
122
+ df_address = self._get_table("masterdata_address")
123
+ df_address = df_address.rename(
124
+ columns={
125
+ "street_name": "address_street_name",
126
+ "house_number": "address_house_number",
127
+ "postal_code": "address_postal_code",
128
+ "latitude": "address_latitude",
129
+ "longitude": "address_longitude",
130
+ }
131
+ )
132
+
133
+ df_plant = pdz.merge(
134
+ df_plant,
135
+ df_address,
136
+ on=["address_street_name", "address_house_number", "address_postal_code"],
137
+ how="left",
138
+ )
139
+
140
+ df_plant["is_household"] = is_household(df_plant)
141
+
142
+ df_plant["latitude"], df_plant["longitude"] = coordinates(df_plant)
143
+ df_plant = df_plant.drop(columns=["address_latitude", "address_longitude"])
144
+
145
+ return df_plant
146
+
147
+
148
+ def is_household(df: pd.DataFrame) -> pd.Series:
149
+ """
150
+ Determine if a plant type corresponds to a household.
151
+ """
152
+ # due to inadequate quality of master data
153
+ # we apply several filters to estimate
154
+ # if an installation is a household or not,
155
+ # based on the description found in
156
+ # https://ens.dk/sites/ens.dk/files/Stoette_vedvarende_energi/energistyrelsens_vejledning_om_beregning_af_nettoafregning_og_opgoerelse_.pdf
157
+
158
+ # household installations will be behind-the-meter
159
+ # ("installationstilsluttet")
160
+ is_behind_the_meter = df["connection_point"] == CONNECTION_POINT_BEHIND_THE_METER
161
+
162
+ # household installations will be in the yearly
163
+ # settlement group 6 ("årsbaseret nettoafregning")
164
+ is_settlement_group_6 = df["settlement_group"] == 6
165
+
166
+ # also remove installations with installed power
167
+ # below 200 KW, as these will also most likely
168
+ # be behind-the-meter installations (this should be
169
+ # captured already in the above filters, but
170
+ # masterdata is not fully reliable)
171
+ # an example of a 200 KW plant can be found at coordinates 55.691, 9.397
172
+ is_small = df["capacity_max_MW"] <= 0.2
173
+
174
+ # if the installation is connected to the TSO,
175
+ # it is definitely not a household installation
176
+ is_tso_connected = df["operation_type"] == "Tso"
177
+
178
+ return (is_behind_the_meter | is_settlement_group_6 | is_small) & ~is_tso_connected
179
+
180
+
181
+ def coordinates(
182
+ df: pd.DataFrame,
183
+ ) -> pd.DataFrame:
184
+ # Translate UTM to lat/lon if they exist
185
+ latitude, longitude = utm_to_latlon(
186
+ df["coordinate_x_utm"],
187
+ df["coordinate_y_utm"],
188
+ df["price_area"],
189
+ )
190
+
191
+ # Prefer translated utm coordinates, fall back to address coordinates
192
+ latitude = latitude.combine_first(df["address_latitude"])
193
+ longitude = longitude.combine_first(df["address_longitude"])
194
+
195
+ return latitude, longitude
196
+
197
+
198
+ def utm_to_latlon(
199
+ x: pd.Series, y: pd.Series, price_area: pd.Series
200
+ ) -> tuple[pd.Series, pd.Series]:
201
+ """
202
+ Convert UTM coordinates to latitude and longitude, given the price area.
203
+ Ideally, we would use the zone number, but this is not available in masterdata
204
+ currently. Instead, we use a workaround based on the price area, which should
205
+ be sufficient for now. It follows the following logic:
206
+ - If the price area is DK1, use UTM zone 32
207
+ - If the price area is DK2, use UTM zone 33, unless the resulting longitude
208
+ is above 16.0 (about 50 km to the right of Bornholm): In this case, we assume
209
+ the real zone is actually 32, since no entities should be placed there.
210
+ - If the price area is undefined, use UTM zone 32, since this is the majority of
211
+ the area in Denmark.
212
+ """
213
+ if x.empty or y.empty or x.isnull().all() or y.isnull().all():
214
+ return pd.Series(dtype=float), pd.Series(dtype=float)
215
+
216
+ lat_32, lon_32 = utm.to_latlon(x, y, zone_number=32, northern=True, strict=False)
217
+
218
+ lat_33, lon_33 = utm.to_latlon(x, y, zone_number=33, northern=True, strict=False)
219
+
220
+ in_DK1 = price_area == "DK1"
221
+ in_DK2 = price_area == "DK2"
222
+ undefined = price_area.isnull()
223
+
224
+ is_zone_32 = in_DK1 | undefined | (in_DK2 & (lon_33 >= 16.0))
225
+
226
+ lat = lat_32.where(is_zone_32, lat_33)
227
+ lon = lon_32.where(is_zone_32, lon_33)
228
+
229
+ return lat, lon
@@ -0,0 +1,40 @@
1
+ {
2
+ "address_city": "object",
3
+ "address_country": "object",
4
+ "address_house_number": "object",
5
+ "address_municipality": "object",
6
+ "address_postal_code": "object",
7
+ "address_street_name": "object",
8
+ "asset_type": "object",
9
+ "c11": "object",
10
+ "capacity_max_MW": "float64",
11
+ "capacity_min_MW": "float64",
12
+ "connection_point": "object",
13
+ "coordinate_x_utm": "float64",
14
+ "coordinate_y_utm": "float64",
15
+ "datahub_gsrn_e17": "object",
16
+ "datahub_gsrn_e18": "object",
17
+ "end_time_utc": "datetime64[ns, UTC]",
18
+ "hub_height_m": "float64",
19
+ "market_participant_ecp_eic": "object",
20
+ "market_participant_eic": "object",
21
+ "market_participant_gln": "object",
22
+ "market_participant_id": "object",
23
+ "market_participant_name": "object",
24
+ "market_participant_short_name": "object",
25
+ "operation_type": "object",
26
+ "plant_gsrn": "object",
27
+ "plant_id": "object",
28
+ "plant_name": "object",
29
+ "plant_short_name": "object",
30
+ "plant_type": "object",
31
+ "power_system_resource": "object",
32
+ "power_system_resource_type": "object",
33
+ "price_area": "object",
34
+ "settlement_group": "object",
35
+ "start_time_utc": "datetime64[ns, UTC]",
36
+ "substation_id": "object",
37
+ "is_household": "bool",
38
+ "latitude": "float64",
39
+ "longitude": "float64"
40
+ }
@@ -12,7 +12,7 @@ class Plant:
12
12
  datahub_gsrn_e18: str
13
13
  price_area: str
14
14
  coordinate: Coordinate
15
- installed_power_MW: float
15
+ capacity_max_MW: float
16
16
  power_system_resource_type: str
17
17
 
18
18
  @classmethod
@@ -40,7 +40,7 @@ class Plant:
40
40
  longitude=row["longitude"],
41
41
  altitude=altitude,
42
42
  ),
43
- installed_power_MW=row["installed_power_MW"],
43
+ capacity_max_MW=row["capacity_max_MW"],
44
44
  power_system_resource_type=psrt,
45
45
  )
46
46
  plants.append(plant)
@@ -50,7 +50,7 @@ class Plant:
50
50
  @dataclass(frozen=True)
51
51
  class Group:
52
52
  coordinate: Coordinate
53
- installed_power_MW: float
53
+ capacity_max_MW: float
54
54
  identifiers: dict[str, str]
55
55
 
56
56
  @classmethod
@@ -65,7 +65,7 @@ class Group:
65
65
  identifiers = {identifier: row[identifier] for identifier in identifiers}
66
66
  group = cls(
67
67
  identifiers=identifiers,
68
- installed_power_MW=row["installed_power_MW"],
68
+ capacity_max_MW=row["capacity_max_MW"],
69
69
  coordinate=Coordinate(
70
70
  latitude=row["latitude"],
71
71
  longitude=row["longitude"],
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "domainiac"
3
- version = "9.3.1"
3
+ version = "10.0.0"
4
4
  description = "Package for working with Energinet data, but with specialized functions used for Enigma."
5
5
  authors = [{ name = "Team Enigma", email = "enigma@energinet.dk" }]
6
6
  requires-python = ">=3.10"
@@ -16,6 +16,7 @@ typeguard = ">=4.2.1"
16
16
  scikit-learn = ">=1.3.0"
17
17
  scipy = ">=1.15.3"
18
18
  pvlib = ">=0.13.1"
19
+ utm = ">=0.8.1"
19
20
 
20
21
  [tool.poetry.group.dev.dependencies]
21
22
  pre-commit = ">=2.20.0"
@@ -1,79 +0,0 @@
1
- import datamazing.pandas as pdz
2
- import pandas as pd
3
- from typeguard import typechecked
4
-
5
- from ..wrappers import cache_decorator
6
-
7
-
8
- class MasterdataManager:
9
- """
10
- Manager which simplifies the process of getting units from masterdata.
11
- """
12
-
13
- def __init__(
14
- self,
15
- db: pdz.Database,
16
- time_interval: pdz.TimeInterval,
17
- resolution: pd.Timedelta,
18
- cache_masterdata: bool = False,
19
- ) -> None:
20
- self.db = db
21
- self.time_interval = time_interval
22
- self.resolution = resolution
23
- self.cache_masterdata = cache_masterdata
24
-
25
- masterdata_cache = {}
26
-
27
- @typechecked
28
- def _get_operational_entities(self, table: str) -> pd.DataFrame:
29
- filters = {"standing_entity_state": "InOperation"}
30
- df = self.db.query(table, filters=filters)
31
- df = df[df["decommission_date_utc"].isna()].reset_index(drop=True)
32
- return df
33
-
34
- @typechecked
35
- def get_operational_entities(self, table: str) -> pd.DataFrame:
36
- """Gets the operational data for a given table."""
37
-
38
- if self.cache_masterdata:
39
- cached_query = cache_decorator(self.masterdata_cache)(
40
- self._get_operational_entities
41
- )
42
- df = cached_query(table)
43
- else:
44
- df = self._get_operational_entities(table)
45
-
46
- return df
47
-
48
- @typechecked
49
- def get_data(
50
- self,
51
- table: str,
52
- filters: dict = {},
53
- columns: list = [],
54
- ) -> pd.DataFrame:
55
- """Gets the data for a given table.
56
- Filters for rows valid at the end of time interval.
57
- """
58
- # Get operational entities
59
- df = self.get_operational_entities(table)
60
-
61
- # Apply the filters
62
- for column, value in filters.items():
63
- if isinstance(value, list):
64
- df = df[df[column].isin(value)].reset_index()
65
- else:
66
- df = df[df[column] == value].reset_index()
67
-
68
- for column in columns:
69
- if column not in df.columns:
70
- raise KeyError(f"Column {column} not found in {table}")
71
-
72
- df = pdz.as_of_time(
73
- df=df,
74
- period=("valid_from_date_utc", "valid_to_date_utc"),
75
- at=self.time_interval.right,
76
- )
77
- df = df.filter(columns)
78
-
79
- return df
@@ -1,160 +0,0 @@
1
- import datamazing.pandas as pdz
2
- import pandas as pd
3
-
4
- from .masterdata_manager import MasterdataManager
5
- from .unit_manager import UnitManager
6
-
7
-
8
- class PlantManager(MasterdataManager):
9
- """
10
- Manager which simplifies the process of getting plants from masterdata.
11
- """
12
-
13
- def __init__(
14
- self,
15
- db: pdz.Database,
16
- time_interval: pdz.TimeInterval,
17
- resolution: pd.Timedelta,
18
- cache_masterdata: bool = False,
19
- ) -> None:
20
- self.db = db
21
- self.time_interval = time_interval
22
- self.resolution = resolution
23
- self.cache_masterdata = cache_masterdata
24
- self.unit_manager = UnitManager(db, time_interval, resolution, cache_masterdata)
25
-
26
- def get_plants(
27
- self,
28
- filters: dict = {},
29
- columns: list | None = None,
30
- ) -> pd.DataFrame:
31
- """Gets the plants for a given plant type.
32
- Filters for plants valid at the end of time interval.
33
- Filters by default for plants in operation.
34
- """
35
- default_columns = [
36
- "plant_id",
37
- "masterdata_gsrn",
38
- "datahub_gsrn_e18",
39
- "installed_power_MW",
40
- "price_area",
41
- "is_tso_connected",
42
- "valid_from_date_utc",
43
- "valid_to_date_utc",
44
- "primary_net_component_id",
45
- ]
46
- if not columns:
47
- columns = default_columns
48
-
49
- # TODO: masterdata_plant table doesn't have net_component_id column
50
- # Find a better way to do this in future.
51
- plant_columns = [col for col in columns if col != "primary_net_component_id"]
52
- df_plant = self.get_data(
53
- "masterdataPlant", filters=filters, columns=plant_columns
54
- )
55
- df_psr = self._get_power_system_resource()
56
- df = df_plant.merge(
57
- df_psr, on=["plant_id"], how="left", validate="m:1"
58
- ).drop_duplicates()
59
-
60
- df = df[columns]
61
-
62
- return df
63
-
64
- def get_installed_power_timeseries(self, gsrn: str) -> pd.DataFrame:
65
- """Gets the installed power timeseries for a plant."""
66
-
67
- df_times = self.time_interval.to_range(self.resolution).to_frame(
68
- index=False, name="time_utc"
69
- )
70
-
71
- # explode plant to time series
72
- df_plant = self.get_operational_entities("masterdataPlant")
73
- df_plant = df_plant.query(f"masterdata_gsrn == '{gsrn}'")
74
-
75
- df_plant = pdz.merge_point_interval(
76
- df_times,
77
- df_plant,
78
- left_point="time_utc",
79
- right_interval=("valid_from_date_utc", "valid_to_date_utc"),
80
- )
81
-
82
- return df_plant.filter(["time_utc", "installed_power_MW"]).reset_index(
83
- drop=True
84
- )
85
-
86
- def _get_corrected_installed_power(
87
- self, gsrn: str, df_invalid_periods: pd.DataFrame
88
- ):
89
- df_times = self.time_interval.to_range(self.resolution).to_frame(
90
- index=False, name="time_utc"
91
- )
92
- df = self.get_installed_power_timeseries(gsrn=gsrn)
93
-
94
- # explode invalid periods to time series
95
- df_invalid_periods = df_invalid_periods.query(f"masterdata_gsrn == '{gsrn}'")
96
- df_invalid_periods = pdz.merge(
97
- df_times,
98
- df_invalid_periods,
99
- left_time="time_utc",
100
- right_period=("start_date_utc", "end_date_utc"),
101
- )
102
-
103
- df = pdz.merge(
104
- df,
105
- df_invalid_periods,
106
- on="time_utc",
107
- how="left",
108
- )
109
-
110
- # correct installed power for invalid periods
111
- df["installed_power_MW"] = df["installed_power_MW"].where(
112
- df["corrected_installed_power_MW"].isnull(),
113
- df["corrected_installed_power_MW"],
114
- )
115
-
116
- df = df[["time_utc", "installed_power_MW"]]
117
-
118
- return df
119
-
120
- def _get_power_system_resource(self) -> pd.DataFrame:
121
-
122
- df_unit = self.unit_manager.get_units(
123
- columns=["masterdata_gsrn", "capacity_min_MW", "capacity_max_MW"]
124
- )
125
-
126
- df_psr_mapping = self.db.query("masterdataAggregatedUnit")[
127
- ["unit_gsrn", "net_component_id"]
128
- ]
129
-
130
- df = pd.merge(
131
- df_psr_mapping,
132
- df_unit,
133
- left_on="unit_gsrn",
134
- right_on="masterdata_gsrn",
135
- how="left",
136
- validate="1:m",
137
- )
138
-
139
- # for a small number of plants, the underlying unit can
140
- # be associated with different net components. Too avoid
141
- # this issue, we choose for each plant, the net component,
142
- # for which the underlying units amounts to the largest
143
- # capacity
144
- df = pdz.group(df, by=["net_component_id", "plant_id"]).agg(
145
- {"capacity_min_MW": "sum", "capacity_max_MW": "sum"}
146
- )
147
-
148
- df["capacity_range_MW"] = df["capacity_max_MW"] - df["capacity_min_MW"]
149
-
150
- df = df.sort_values(
151
- ["plant_id", "capacity_range_MW"], ascending=False
152
- ).drop_duplicates(subset=["plant_id"], keep="first")
153
-
154
- df = df.rename(
155
- columns={
156
- "net_component_id": "primary_net_component_id",
157
- }
158
- )
159
-
160
- return df
@@ -1,42 +0,0 @@
1
- import datamazing.pandas as pdz
2
- import pandas as pd
3
-
4
- from .masterdata_manager import MasterdataManager
5
-
6
-
7
- class UnitManager(MasterdataManager):
8
- """
9
- Manager which simplifies the process of getting units from masterdata.
10
- """
11
-
12
- def __init__(
13
- self,
14
- db: pdz.Database,
15
- time_interval: pdz.TimeInterval,
16
- resolution: pd.Timedelta,
17
- cache_masterdata: bool = False,
18
- ) -> None:
19
- self.db = db
20
- self.time_interval = time_interval
21
- self.resolution = resolution
22
- self.cache_masterdata = cache_masterdata
23
-
24
- def get_units(
25
- self,
26
- filters: dict = {},
27
- columns: list | None = None,
28
- ) -> pd.DataFrame:
29
- """Gets the units for a given unit type.
30
- Filters for units valid at the end of time interval.
31
- Filters by default for units in operation.
32
- """
33
- default_columns = [
34
- "masterdata_gsrn",
35
- "plant_id",
36
- "power_system_resource_type",
37
- ]
38
- if not columns:
39
- columns = default_columns
40
- else:
41
- columns = list(set(default_columns + columns))
42
- return self.get_data("masterdataUnit", filters=filters, columns=columns)