mosaik-emissions 0.1.1__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,12 @@
1
+ import mosaik_api_v3
1
2
  from mosaik_components.emissions.emission_simulator import Simulator
2
3
 
3
4
  __all__ = ["Simulator"]
5
+
6
+
7
+ def main():
8
+ mosaik_api_v3.start_simulation(Simulator(), "run the emission simulator for mosaik")
9
+
10
+
11
+ if __name__ == "__main__":
12
+ main()
@@ -1,38 +1,43 @@
1
1
  from __future__ import annotations
2
2
 
3
+ from typing import TYPE_CHECKING, Any
4
+ from typing_extensions import override
5
+
3
6
  import os
4
- import numpy as np
7
+ import glob
8
+ import zipfile
5
9
  import pandas as pd
6
- import copy
7
- import arrow
8
- import functools
9
- from os.path import abspath
10
- from pathlib import Path
11
- from dataclasses import dataclass, field
12
- from typing import Any, Callable, Dict, Iterable, List, Optional, Set, Tuple
13
10
  import mosaik_api_v3
14
- from collections import OrderedDict
15
- from mosaik_api_v3.types import (
16
- CreateResult,
17
- CreateResultChild,
18
- Meta,
19
- ModelDescription,
20
- OutputData,
21
- OutputRequest,
22
- )
23
-
24
- DEFAULT_STEP_SIZE = 15 * 60 # minutes
25
- DATE_FORMAT = "YYYY-MM-DD HH:mm:ss"
26
- DEFAULT_DATA_FILE = Path(abspath(__file__)).parent / 'data' / 'data.csv'
27
- DEFAULT_CONFIG = OrderedDict([ # the order makes sense!
28
- ('method', None), # a callable that directly transforms input data to output
29
- ('co2_emission_factor', None), # a factor that represents [tones CO₂eq. / MWh]
30
- ('fuel', None), # a certain type of fuel used to produce electricity
31
- ('state', None), # a certain state of the country to filter the carbon intensity database out
32
- # it shuld be defined along with the country
33
- ('country', None), # just country to filter the carbon intensity database out
34
- ('coefficient', 1.0) # multiplies emissions output
35
- ])
11
+ from rapidfuzz import process
12
+ from pandas.tseries.frequencies import to_offset
13
+ import warnings
14
+
15
+
16
+ def nearest_value(series: pd.Series, target: Any) -> Any:
17
+ try:
18
+ # Works for numeric, datetime, etc.
19
+ return series.iloc[(series - target).abs().argmin()]
20
+ except: # noqa: E722
21
+ # Fallback for non-subtractable types: compare string similarity
22
+ matched = process.extractOne(str(target), series)
23
+ if matched is not None:
24
+ return matched[0]
25
+
26
+
27
+ if TYPE_CHECKING:
28
+ from mosaik_api_v3.types import (
29
+ CreateResult,
30
+ InputData,
31
+ OutputData,
32
+ OutputRequest,
33
+ Time,
34
+ )
35
+
36
+ warnings.simplefilter("ignore", SyntaxWarning)
37
+
38
+ DEFAULT_STEP_SIZE = 15 * 60 # minutes
39
+ DEFAULT_DATE_FORMAT = "ISO8601" # "%Y-%m-%d %H:%M:%S"
40
+ DEFAULT_DATA_FOLDER = os.path.abspath(os.path.join(os.path.dirname(__file__), "data"))
36
41
 
37
42
  META = {
38
43
  "api_version": "3.0",
@@ -41,123 +46,294 @@ META = {
41
46
  "Emission": {
42
47
  "public": True,
43
48
  "any_inputs": True,
44
- #"persistent": [],
45
- "params": list(DEFAULT_CONFIG.keys()),
46
- "attrs": ["P[MW]", # input/output from generator/external grid (p_mw float active power supply at the external grid [MW])
47
- #"Q[MVar]", # input from generator/external grid (q_mvar float reactive power supply at the external grid [MVar])
48
- "E[tCO2eq]" # output estimated total tonnes CO₂eq.
49
- ],
49
+ "params": [
50
+ "id",
51
+ "year",
52
+ "country_code",
53
+ "state_code",
54
+ "scope",
55
+ "fuel",
56
+ ],
57
+ "attrs": [
58
+ "P[MW]", # input/output power
59
+ "E[tCO₂eq]", # output estimated total emissions
60
+ "I[tCO₂eq/MWh]", # output estimated carbon intensity
61
+ ],
50
62
  }
51
63
  },
52
64
  }
53
65
 
54
- class Simulator(mosaik_api_v3.Simulator):
55
66
 
67
+ def read_zip(
68
+ zip_file_path: str,
69
+ use_file_name: str | None = None,
70
+ use_file_index: int = 0,
71
+ print_file_names: bool = False,
72
+ return_io: bool = False,
73
+ **kwargs: Any,
74
+ ):
75
+ fname = None
76
+ zf = zipfile.ZipFile(zip_file_path)
77
+ if use_file_name is not None:
78
+ for f in zf.infolist():
79
+ f = f.filename
80
+ if print_file_names:
81
+ print(f)
82
+ if use_file_name in f:
83
+ fname = f
84
+ break
85
+ else:
86
+ fname = zf.infolist()[use_file_index].filename
87
+ if print_file_names:
88
+ print(fname)
89
+ if fname is not None:
90
+ if return_io:
91
+ return zf.open(fname)
92
+ if ".xls" in fname.lower():
93
+ return pd.read_excel(zf.open(fname), **kwargs)
94
+ return pd.read_csv(zf.open(fname), **kwargs)
95
+
96
+
97
+ class Simulator(mosaik_api_v3.Simulator):
56
98
  def __init__(self) -> None:
57
99
  super().__init__(META)
58
-
59
- def init(self, sid: str, time_resolution: float, start: str, end: int,
60
- step_size: int = DEFAULT_STEP_SIZE,
61
- data_file: str = DEFAULT_DATA_FILE):
100
+ self.entities = {}
101
+ self.database = {}
102
+ self._cache = {}
103
+
104
+ @override
105
+ def init(
106
+ self,
107
+ sid: str = "EmissionSim-0",
108
+ time_resolution: float = 1.0,
109
+ start: str = "2020-01-01 00:00:00",
110
+ step_size: int = DEFAULT_STEP_SIZE,
111
+ date_format: str = DEFAULT_DATE_FORMAT,
112
+ data_folder: str = DEFAULT_DATA_FOLDER,
113
+ keep_database: bool = False,
114
+ **sim_params: dict[str, Any],
115
+ ):
116
+ self._sid = sid
62
117
  self._time_resolution = time_resolution
63
- self._data_file = data_file
118
+ self._keep_database = keep_database
119
+ self._data_folder = data_folder
64
120
  self._step_size = step_size
65
- self._start = start
66
- self._end = end
67
- self._sid = sid
68
- self.entities = {}
69
- self.current_step = pd.to_datetime(arrow.get(self._start, DATE_FORMAT).datetime, utc=True) - pd.Timedelta(self._step_size, unit='seconds')
70
- self.database = pd.read_csv(data_file, parse_dates=True, sep=';', low_memory=False, dtype={'year' : 'Int64'})
71
- self.database['datetime'] = pd.to_datetime(self.database['datetime'], utc=True)
72
- self.database.set_index('datetime', inplace=True)
121
+
122
+ if not self._step_size:
123
+ self.meta["type"] = "event-based"
124
+
125
+ self._start = pd.to_datetime(start, format=date_format, utc=True)
126
+ self.current_step = self._start
73
127
  return self.meta
74
128
 
75
- def create(self, num: int, model: str, **model_params: Any) -> List[CreateResult]:
129
+ def get_stored_values(self, **kwargs: Any):
130
+ id = kwargs.get("id", None)
131
+ if id is not None:
132
+ if id not in self.database: # database full scan
133
+ # print('COUNT DB')
134
+ file_list = [
135
+ i
136
+ for i in glob.glob(os.path.join(self._data_folder, "*.csv"))
137
+ if os.path.isfile(i)
138
+ ] + [
139
+ i
140
+ for i in glob.glob(os.path.join(self._data_folder, "*.zip"))
141
+ if os.path.isfile(i)
142
+ ]
143
+
144
+ if len(file_list) == 0:
145
+ warnings.warn(
146
+ f"The data folder '{self._data_folder}' is seemengly empty.",
147
+ UserWarning,
148
+ )
149
+
150
+ for f in file_list:
151
+ if f.endswith(".zip"):
152
+ f = read_zip(
153
+ f,
154
+ sep=";",
155
+ parse_dates=True,
156
+ low_memory=False,
157
+ dtype={"year": "Int64"},
158
+ )
159
+ elif f.endswith(".csv"):
160
+ f = pd.read_csv(
161
+ f,
162
+ sep=";",
163
+ parse_dates=True,
164
+ low_memory=False,
165
+ dtype={"year": "Int64"},
166
+ )
167
+ if isinstance(f, pd.DataFrame) and "id" in f.columns:
168
+ f["datetime"] = pd.to_datetime(f["datetime"], utc=True)
169
+ f.set_index("datetime", inplace=True)
170
+ self.database[str(f.id.iloc[0])] = f
171
+
172
+ if len(self.database) == 0:
173
+ warnings.warn(
174
+ f"The database {self._data_folder} is seemengly empty.",
175
+ UserWarning,
176
+ )
177
+
178
+ if id not in self.database:
179
+ warnings.warn(f"ID '{id}' is not in the database.", UserWarning)
180
+
181
+ cache_key = ", ".join(
182
+ [f"{k}:{v}" for k, v in sorted(kwargs.items(), key=lambda i: i[0])]
183
+ ) # make a cache key from model params
184
+
185
+ if cache_key not in self._cache: # mind setup_done() cleanup
186
+ params = {"year": self._start.year}
187
+ params.update(kwargs)
188
+ if id is not None:
189
+ db = self.database[id]
190
+
191
+ for p in reversed(db.columns):
192
+ if p == "id":
193
+ db = db.drop("id", axis=1)
194
+ break
195
+
196
+ if p in params:
197
+ if params[p] not in db[p].values:
198
+ params[p] = nearest_value(db[p], params[p])
199
+
200
+ if p == "year": # go forward from starting year
201
+ db = db[db[p] >= params[p]].drop(p, axis=1)
202
+ else:
203
+ db = db[db[p] == params[p]].drop(p, axis=1)
204
+ # db = db[db[p] == params[p]].drop(p, axis=1)
205
+
206
+ if self._start not in db.index: # re-index from _start
207
+ warnings.warn(
208
+ f"No timeseries start from {self._start}, the closest is used for {self._sid + '.' + kwargs.get('eid', '')}.",
209
+ UserWarning,
210
+ )
211
+ diff = self._start - db.index[0]
212
+ db.index += diff
213
+
214
+ intensity = db["carbon_intensity[gCO₂eq/kWh]"] # equal to tCO₂eq/MWh
215
+ intensity = intensity[~intensity.index.duplicated()] # index check
216
+
217
+ if len(intensity) > 1: # infer step size, if there are enough records
218
+ step_size = pd.to_timedelta(
219
+ to_offset(intensity.index.inferred_freq)
220
+ )
221
+ if step_size is None:
222
+ step_size = pd.to_timedelta(
223
+ intensity.index[1] - intensity.index[0]
224
+ )
225
+
226
+ else:
227
+ step_size = pd.to_timedelta(
228
+ pd.Timestamp(intensity.index[0].year, 12, 31).dayofyear,
229
+ unit="days",
230
+ )
231
+ new_step_size = pd.to_timedelta(
232
+ self._step_size * self._time_resolution, unit="seconds"
233
+ )
234
+ if new_step_size <= step_size: # resample timeseries
235
+ intensity = (
236
+ intensity.fillna(0).resample(new_step_size).ffill().fillna(0)
237
+ )
238
+ else:
239
+ intensity = (
240
+ intensity.fillna(0).resample(new_step_size).mean().fillna(0)
241
+ )
242
+ params.update({"intensity": intensity})
243
+ self._cache[cache_key] = params
244
+ return self._cache[cache_key]
245
+
246
+ def create(self, num: int, model: str, **model_params: Any) -> list[CreateResult]:
76
247
  new_entities = []
77
- if not len(model_params):
78
- raise ValueError(f"No methods specified")
79
- params = OrderedDict(DEFAULT_CONFIG)
80
- params.update(model_params)
81
- coefficient = params.pop('coefficient', 1.0)
82
248
  for n in range(len(self.entities), len(self.entities) + num):
83
249
  eid = f"{model}-{n}"
84
- self.entities.update({eid: {'params' : params,
85
- 'coefficient' : coefficient,
86
- 'cache' : {},
87
- }})
88
- new_entities.append({
89
- "eid": eid,
90
- "type": model,
91
- })
250
+ self.entities.update(
251
+ {
252
+ eid: {
253
+ "cache": {},
254
+ "multiplier": 1.0,
255
+ **self.get_stored_values(**model_params, eid=eid),
256
+ }
257
+ }
258
+ )
259
+ new_entities.append(
260
+ {
261
+ "eid": eid,
262
+ "type": model,
263
+ }
264
+ )
92
265
  return new_entities
93
266
 
94
- @functools.cache
95
- def get_stored_values(self, **kwargs):
96
- data = self.database.copy()
97
- try:
98
- # filter database with model_params
99
- for key, value in kwargs.items():
100
- if pd.notna(value) and key in data:
101
- if key == 'fuel':
102
- data = data[data[key] == value][['year', 'carbon_emission_factor']]
103
- # [kg CO₂eq. / TJ] -> [1 TJ = 277.7778 MWh] -> [tones CO₂eq. / MWh]
104
- data['carbon_emission_factor'] = data['carbon_emission_factor'] / 1000 / 277.7778
105
- break
106
- elif key == 'state': # it shuld be defined along with the country
107
- data = data[(data[key] == value) & (data['country'] == kwargs['country'])][['year', 'carbon_intensity_factor']]
108
- break
109
- elif key == 'country':
110
- data = data[pd.isna(data['state']) & (data[key] == kwargs['country'])][['year', 'carbon_intensity_factor']]
111
- break
112
- else:
113
- data = data[data[key] == value]
114
-
115
- # change history year to current one
116
- filtered_data = data[data['year'] == self.current_step.year]
117
- if len(filtered_data) == 0:
118
- filtered_data = data[data['year'] == data['year'].max()]
119
- ydiff = self.current_step.year - filtered_data.index[0].year
120
- filtered_data.index += pd.offsets.DateOffset(years=ydiff)
121
- filtered_data['year'] += ydiff
122
-
123
- if len(filtered_data) > 0:
124
- filtered_data = filtered_data.drop('year', axis=1)
125
- return filtered_data
126
-
127
- raise ValueError(f"No data for: {kwargs}")
128
- except Exception as e:
129
- raise ValueError(f"Getting value error for: {kwargs}, error: {str(e)}")
130
-
131
- def get_emission_factor(self, eid, attr, entity):
132
- params = self.entities[eid]['params']
133
- if attr in ['P[MW]']:
134
- if 'method' in params and callable(params['method']):
135
- return params['method'](self, eid, attr, entity, self.current_step, params)
136
- elif 'co2_emission_factor' in params and pd.notna(params['co2_emission_factor']):
137
- return params['co2_emission_factor']
138
- else:
139
- factor = self.get_stored_values(**params)
140
- index = factor.index.get_indexer([self.current_step], method='nearest')[0]
141
- if index < 0:
142
- index = 0
143
- factor = factor.iloc[index].values[0]
144
- return factor
267
+ @override
268
+ def setup_done(self):
269
+ if not self._keep_database:
270
+ self.database = {}
271
+ self._cache = {}
272
+
273
+ def get_emission_factor(
274
+ self, eid: str, sender_eid: str, attr: str, value: float, time: Time
275
+ ):
276
+ params = self.entities[eid].copy()
277
+ params.update(
278
+ {
279
+ "sender_eid": sender_eid,
280
+ "attr": attr,
281
+ "value": value,
282
+ "step_size": self._step_size,
283
+ "current_step": self.current_step,
284
+ "current_time": time,
285
+ }
286
+ )
287
+ method = params.pop("method", None)
288
+ if callable(method):
289
+ params.pop("cache", None)
290
+ return method(**params) * params["multiplier"]
291
+ elif "emission_factor" in params:
292
+ return params["emission_factor"] * params["multiplier"]
293
+
294
+ elif "id" in params:
295
+ intensity = self.get_stored_values(**params)["intensity"]
296
+ index = intensity.index.get_indexer([self.current_step], method="nearest")[
297
+ 0
298
+ ]
299
+ intensity = intensity.iloc[index if index >= 0 else 0]
300
+ return intensity * params["multiplier"]
301
+
145
302
  else:
146
- raise ValueError(f"No appropriate method assigned for '{attr}'")
303
+ raise ValueError("The model parameters are not properly specified!")
147
304
 
148
- def step(self, time, inputs, max_advance):
305
+ @override
306
+ def step(self, time: Time, inputs: InputData, max_advance: Time = 0) -> Time | None:
149
307
  # {'Emission-0': {'P[MW]': {'Grid-0.Gen-0': 1.0}}}
150
- self.current_step += pd.Timedelta(self._step_size, unit='seconds')
308
+ self.current_step = self._start + pd.to_timedelta(time, unit="seconds")
151
309
  for eid, data in inputs.items():
152
- self.entities[eid]['cache']['E[tCO2eq]'] = 0
310
+ total_power = 0
311
+ total_emissions = 0
153
312
  for attr, values in data.items():
154
- self.entities[eid]['cache'][attr] = 0
155
- for k, v in values.items():
156
- self.entities[eid]['cache'][attr] += v
157
- self.entities[eid]['cache']['E[tCO2eq]'] += v * self.get_emission_factor(eid, attr, k) * self.entities[eid]['coefficient']
158
- return time + self._step_size
159
-
313
+ if attr == "P[MW]":
314
+ for k, v in values.items():
315
+ total_power += abs(v)
316
+ total_emissions += abs(v) * self.get_emission_factor(
317
+ eid, k, attr, v, time
318
+ )
319
+ self.entities[eid]["cache"]["P[MW]"] = total_power
320
+ self.entities[eid]["cache"]["I[tCO₂eq/MWh]"] = (
321
+ total_emissions / total_power if total_power > 0 else 0
322
+ )
323
+ self.entities[eid]["cache"]["E[tCO₂eq]"] = (
324
+ total_emissions * self._step_size * self._time_resolution / 3600
325
+ )
326
+
327
+ if self._step_size:
328
+ return time + self._step_size
329
+
330
+ @override
160
331
  def get_data(self, outputs: OutputRequest) -> OutputData:
161
- return {eid: {attr: self.entities[eid]['cache'][attr]
162
- for attr in attrs
163
- } for eid, attrs in outputs.items()}
332
+ return {
333
+ eid: {
334
+ attr: self.entities[eid]["cache"][attr]
335
+ for attr in attrs
336
+ if attr in self.entities[eid]["cache"]
337
+ }
338
+ for eid, attrs in outputs.items()
339
+ }