PyPI - mosaik-emissions - Versions diffs - 0.1.1__py3-none-any.whl → 1.0.0__py3-none-any.whl - Mend

mosaik-emissions 0.1.1py3-none-any.whl → 1.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

mosaik_components/emissions/__init__.py CHANGED Viewed

@@ -1,3 +1,12 @@
+import mosaik_api_v3
 from mosaik_components.emissions.emission_simulator import Simulator
 __all__ = ["Simulator"]
+def main():
+    mosaik_api_v3.start_simulation(Simulator(), "run the emission simulator for mosaik")
+if __name__ == "__main__":
+    main()

mosaik_components/emissions/data/co2map_DE.zip ADDED Viewed

Binary file

mosaik_components/emissions/data/electricitymaps_DE.zip ADDED Viewed

Binary file

mosaik_components/emissions/data/ghgprotocol_2025.zip ADDED Viewed

Binary file

mosaik_components/emissions/data/ourworldindata_2025.zip ADDED Viewed

Binary file

mosaik_components/emissions/emission_simulator.py CHANGED Viewed

@@ -1,38 +1,43 @@
 from __future__ import annotations
+from typing import TYPE_CHECKING, Any
+from typing_extensions import override
 import os
-import numpy as np
+import glob
+import zipfile
 import pandas as pd
-import copy
-import arrow
-import functools
-from os.path import abspath
-from pathlib import Path
-from dataclasses import dataclass, field
-from typing import Any, Callable, Dict, Iterable, List, Optional, Set, Tuple
 import mosaik_api_v3
-from collections import OrderedDict
-from mosaik_api_v3.types import (
-    CreateResult,
-    CreateResultChild,
-    Meta,
-    ModelDescription,
-    OutputData,
-    OutputRequest,
-)
-DEFAULT_STEP_SIZE = 15 * 60 # minutes
-DATE_FORMAT = "YYYY-MM-DD HH:mm:ss"
-DEFAULT_DATA_FILE = Path(abspath(__file__)).parent / 'data' / 'data.csv'
-DEFAULT_CONFIG = OrderedDict([ # the order makes sense!
-    ('method', None), # a callable that directly transforms input data to output
-    ('co2_emission_factor', None), # a factor that represents [tones CO₂eq. / MWh]
-    ('fuel', None), # a certain type of fuel used to produce electricity
-    ('state', None), # a certain state of the country to filter the carbon intensity database out
-                     # it shuld be defined along with the country
-    ('country', None), # just country to filter the carbon intensity database out
-    ('coefficient', 1.0) # multiplies emissions output
-    ])
+from rapidfuzz import process
+from pandas.tseries.frequencies import to_offset
+import warnings
+def nearest_value(series: pd.Series, target: Any) -> Any:
+    try:
+        # Works for numeric, datetime, etc.
+        return series.iloc[(series - target).abs().argmin()]
+    except:  # noqa: E722
+        # Fallback for non-subtractable types: compare string similarity
+        matched = process.extractOne(str(target), series)
+        if matched is not None:
+            return matched[0]
+if TYPE_CHECKING:
+    from mosaik_api_v3.types import (
+        CreateResult,
+        InputData,
+        OutputData,
+        OutputRequest,
+        Time,
+    )
+warnings.simplefilter("ignore", SyntaxWarning)
+DEFAULT_STEP_SIZE = 15 * 60  # minutes
+DEFAULT_DATE_FORMAT = "ISO8601"  # "%Y-%m-%d %H:%M:%S"
+DEFAULT_DATA_FOLDER = os.path.abspath(os.path.join(os.path.dirname(__file__), "data"))
 META = {
     "api_version": "3.0",
@@ -41,123 +46,294 @@ META = {
         "Emission": {
             "public": True,
             "any_inputs": True,
-            #"persistent": [],
-            "params": list(DEFAULT_CONFIG.keys()),
-            "attrs": ["P[MW]",      # input/output from generator/external grid (p_mw float active power supply at the external grid [MW])
-                      #"Q[MVar]",   # input from generator/external grid (q_mvar float reactive power supply at the external grid [MVar])
-                      "E[tCO2eq]"   # output estimated total tonnes CO₂eq.
-            ],
+            "params": [
+                "id",
+                "year",
+                "country_code",
+                "state_code",
+                "scope",
+                "fuel",
+            ],
+            "attrs": [
+                "P[MW]",  # input/output power
+                "E[tCO₂eq]",  # output estimated total emissions
+                "I[tCO₂eq/MWh]",  # output estimated carbon intensity
+            ],
         }
     },
 }
-class Simulator(mosaik_api_v3.Simulator):
+def read_zip(
+    zip_file_path: str,
+    use_file_name: str | None = None,
+    use_file_index: int = 0,
+    print_file_names: bool = False,
+    return_io: bool = False,
+    **kwargs: Any,
+):
+    fname = None
+    zf = zipfile.ZipFile(zip_file_path)
+    if use_file_name is not None:
+        for f in zf.infolist():
+            f = f.filename
+            if print_file_names:
+                print(f)
+            if use_file_name in f:
+                fname = f
+                break
+    else:
+        fname = zf.infolist()[use_file_index].filename
+        if print_file_names:
+            print(fname)
+    if fname is not None:
+        if return_io:
+            return zf.open(fname)
+        if ".xls" in fname.lower():
+            return pd.read_excel(zf.open(fname), **kwargs)
+        return pd.read_csv(zf.open(fname), **kwargs)
+class Simulator(mosaik_api_v3.Simulator):
     def __init__(self) -> None:
         super().__init__(META)
-    def init(self, sid: str, time_resolution: float, start: str, end: int,
-             step_size: int = DEFAULT_STEP_SIZE,
-             data_file: str = DEFAULT_DATA_FILE):
+        self.entities = {}
+        self.database = {}
+        self._cache = {}
+    @override
+    def init(
+        self,
+        sid: str = "EmissionSim-0",
+        time_resolution: float = 1.0,
+        start: str = "2020-01-01 00:00:00",
+        step_size: int = DEFAULT_STEP_SIZE,
+        date_format: str = DEFAULT_DATE_FORMAT,
+        data_folder: str = DEFAULT_DATA_FOLDER,
+        keep_database: bool = False,
+        **sim_params: dict[str, Any],
+    ):
+        self._sid = sid
         self._time_resolution = time_resolution
-        self._data_file = data_file
+        self._keep_database = keep_database
+        self._data_folder = data_folder
         self._step_size = step_size
-        self._start = start
-        self._end = end
-        self._sid = sid
-        self.entities = {}
-        self.current_step = pd.to_datetime(arrow.get(self._start, DATE_FORMAT).datetime, utc=True) - pd.Timedelta(self._step_size, unit='seconds')
-        self.database = pd.read_csv(data_file, parse_dates=True, sep=';', low_memory=False, dtype={'year' : 'Int64'})
-        self.database['datetime'] = pd.to_datetime(self.database['datetime'], utc=True)
-        self.database.set_index('datetime', inplace=True)
+        if not self._step_size:
+            self.meta["type"] = "event-based"
+        self._start = pd.to_datetime(start, format=date_format, utc=True)
+        self.current_step = self._start
         return self.meta
-    def create(self, num: int, model: str, **model_params: Any) -> List[CreateResult]:
+    def get_stored_values(self, **kwargs: Any):
+        id = kwargs.get("id", None)
+        if id is not None:
+            if id not in self.database:  # database full scan
+                # print('COUNT DB')
+                file_list = [
+                    i
+                    for i in glob.glob(os.path.join(self._data_folder, "*.csv"))
+                    if os.path.isfile(i)
+                ] + [
+                    i
+                    for i in glob.glob(os.path.join(self._data_folder, "*.zip"))
+                    if os.path.isfile(i)
+                ]
+                if len(file_list) == 0:
+                    warnings.warn(
+                        f"The data folder '{self._data_folder}' is seemengly empty.",
+                        UserWarning,
+                    )
+                for f in file_list:
+                    if f.endswith(".zip"):
+                        f = read_zip(
+                            f,
+                            sep=";",
+                            parse_dates=True,
+                            low_memory=False,
+                            dtype={"year": "Int64"},
+                        )
+                    elif f.endswith(".csv"):
+                        f = pd.read_csv(
+                            f,
+                            sep=";",
+                            parse_dates=True,
+                            low_memory=False,
+                            dtype={"year": "Int64"},
+                        )
+                    if isinstance(f, pd.DataFrame) and "id" in f.columns:
+                        f["datetime"] = pd.to_datetime(f["datetime"], utc=True)
+                        f.set_index("datetime", inplace=True)
+                        self.database[str(f.id.iloc[0])] = f
+                if len(self.database) == 0:
+                    warnings.warn(
+                        f"The database {self._data_folder} is seemengly empty.",
+                        UserWarning,
+                    )
+            if id not in self.database:
+                warnings.warn(f"ID '{id}' is not in the database.", UserWarning)
+        cache_key = ", ".join(
+            [f"{k}:{v}" for k, v in sorted(kwargs.items(), key=lambda i: i[0])]
+        )  # make a cache key from model params
+        if cache_key not in self._cache:  # mind setup_done() cleanup
+            params = {"year": self._start.year}
+            params.update(kwargs)
+            if id is not None:
+                db = self.database[id]
+                for p in reversed(db.columns):
+                    if p == "id":
+                        db = db.drop("id", axis=1)
+                        break
+                    if p in params:
+                        if params[p] not in db[p].values:
+                            params[p] = nearest_value(db[p], params[p])
+                        if p == "year":  # go forward from starting year
+                            db = db[db[p] >= params[p]].drop(p, axis=1)
+                        else:
+                            db = db[db[p] == params[p]].drop(p, axis=1)
+                        # db = db[db[p] == params[p]].drop(p, axis=1)
+                if self._start not in db.index:  # re-index from _start
+                    warnings.warn(
+                        f"No timeseries start from {self._start}, the closest is used for {self._sid + '.' + kwargs.get('eid', '')}.",
+                        UserWarning,
+                    )
+                    diff = self._start - db.index[0]
+                    db.index += diff
+                intensity = db["carbon_intensity[gCO₂eq/kWh]"]  # equal to tCO₂eq/MWh
+                intensity = intensity[~intensity.index.duplicated()]  # index check
+                if len(intensity) > 1:  # infer step size, if there are enough records
+                    step_size = pd.to_timedelta(
+                        to_offset(intensity.index.inferred_freq)
+                    )
+                    if step_size is None:
+                        step_size = pd.to_timedelta(
+                            intensity.index[1] - intensity.index[0]
+                        )
+                else:
+                    step_size = pd.to_timedelta(
+                        pd.Timestamp(intensity.index[0].year, 12, 31).dayofyear,
+                        unit="days",
+                    )
+                new_step_size = pd.to_timedelta(
+                    self._step_size * self._time_resolution, unit="seconds"
+                )
+                if new_step_size <= step_size:  # resample timeseries
+                    intensity = (
+                        intensity.fillna(0).resample(new_step_size).ffill().fillna(0)
+                    )
+                else:
+                    intensity = (
+                        intensity.fillna(0).resample(new_step_size).mean().fillna(0)
+                    )
+                params.update({"intensity": intensity})
+            self._cache[cache_key] = params
+        return self._cache[cache_key]
+    def create(self, num: int, model: str, **model_params: Any) -> list[CreateResult]:
         new_entities = []
-        if not len(model_params):
-            raise ValueError(f"No methods specified")
-        params = OrderedDict(DEFAULT_CONFIG)
-        params.update(model_params)
-        coefficient = params.pop('coefficient', 1.0)
         for n in range(len(self.entities), len(self.entities) + num):
             eid = f"{model}-{n}"
-            self.entities.update({eid: {'params' : params,
-                                        'coefficient' : coefficient,
-                                        'cache' : {},
-                                        }})
-            new_entities.append({
-                "eid": eid,
-                "type": model,
-            })
+            self.entities.update(
+                {
+                    eid: {
+                        "cache": {},
+                        "multiplier": 1.0,
+                        **self.get_stored_values(**model_params, eid=eid),
+                    }
+                }
+            )
+            new_entities.append(
+                {
+                    "eid": eid,
+                    "type": model,
+                }
+            )
         return new_entities
-    @functools.cache
-    def get_stored_values(self, **kwargs):
-        data = self.database.copy()
-        try:
-            # filter database with model_params
-            for key, value in kwargs.items():
-                if pd.notna(value) and key in data:
-                    if key == 'fuel':
-                        data = data[data[key] == value][['year', 'carbon_emission_factor']]
-                        # [kg CO₂eq. / TJ] -> [1 TJ = 277.7778 MWh] -> [tones CO₂eq. / MWh]
-                        data['carbon_emission_factor'] = data['carbon_emission_factor'] / 1000 / 277.7778
-                        break
-                    elif key == 'state': # it shuld be defined along with the country
-                        data = data[(data[key] == value) & (data['country'] == kwargs['country'])][['year', 'carbon_intensity_factor']]
-                        break
-                    elif key == 'country':
-                        data = data[pd.isna(data['state']) & (data[key] == kwargs['country'])][['year', 'carbon_intensity_factor']]
-                        break
-                    else:
-                        data = data[data[key] == value]
-            # change history year to current one
-            filtered_data = data[data['year'] == self.current_step.year]
-            if len(filtered_data) == 0:
-                filtered_data = data[data['year'] == data['year'].max()]
-                ydiff = self.current_step.year - filtered_data.index[0].year
-                filtered_data.index += pd.offsets.DateOffset(years=ydiff)
-                filtered_data['year'] += ydiff
-            if len(filtered_data) > 0:
-                filtered_data = filtered_data.drop('year', axis=1)
-                return filtered_data
-            raise ValueError(f"No data for: {kwargs}")
-        except Exception as e:
-            raise ValueError(f"Getting value error for: {kwargs}, error: {str(e)}")
-    def get_emission_factor(self, eid, attr, entity):
-        params = self.entities[eid]['params']
-        if attr in ['P[MW]']:
-            if 'method' in params and callable(params['method']):
-                return params['method'](self, eid, attr, entity, self.current_step, params)
-            elif 'co2_emission_factor' in params and pd.notna(params['co2_emission_factor']):
-                return params['co2_emission_factor']
-            else:
-                factor = self.get_stored_values(**params)
-                index = factor.index.get_indexer([self.current_step], method='nearest')[0]
-                if index < 0:
-                    index = 0
-                factor = factor.iloc[index].values[0]
-                return factor
+    @override
+    def setup_done(self):
+        if not self._keep_database:
+            self.database = {}
+        self._cache = {}
+    def get_emission_factor(
+        self, eid: str, sender_eid: str, attr: str, value: float, time: Time
+    ):
+        params = self.entities[eid].copy()
+        params.update(
+            {
+                "sender_eid": sender_eid,
+                "attr": attr,
+                "value": value,
+                "step_size": self._step_size,
+                "current_step": self.current_step,
+                "current_time": time,
+            }
+        )
+        method = params.pop("method", None)
+        if callable(method):
+            params.pop("cache", None)
+            return method(**params) * params["multiplier"]
+        elif "emission_factor" in params:
+            return params["emission_factor"] * params["multiplier"]
+        elif "id" in params:
+            intensity = self.get_stored_values(**params)["intensity"]
+            index = intensity.index.get_indexer([self.current_step], method="nearest")[
+                0
+            ]
+            intensity = intensity.iloc[index if index >= 0 else 0]
+            return intensity * params["multiplier"]
         else:
-            raise ValueError(f"No appropriate method assigned for '{attr}'")
+            raise ValueError("The model parameters are not properly specified!")
-    def step(self, time, inputs, max_advance):
+    @override
+    def step(self, time: Time, inputs: InputData, max_advance: Time = 0) -> Time | None:
         # {'Emission-0': {'P[MW]': {'Grid-0.Gen-0': 1.0}}}
-        self.current_step += pd.Timedelta(self._step_size, unit='seconds')
+        self.current_step = self._start + pd.to_timedelta(time, unit="seconds")
         for eid, data in inputs.items():
-            self.entities[eid]['cache']['E[tCO2eq]'] = 0
+            total_power = 0
+            total_emissions = 0
             for attr, values in data.items():
-                self.entities[eid]['cache'][attr] = 0
-                for k, v in values.items():
-                    self.entities[eid]['cache'][attr] += v
-                    self.entities[eid]['cache']['E[tCO2eq]'] += v * self.get_emission_factor(eid, attr, k) * self.entities[eid]['coefficient']
-        return time + self._step_size
+                if attr == "P[MW]":
+                    for k, v in values.items():
+                        total_power += abs(v)
+                        total_emissions += abs(v) * self.get_emission_factor(
+                            eid, k, attr, v, time
+                        )
+            self.entities[eid]["cache"]["P[MW]"] = total_power
+            self.entities[eid]["cache"]["I[tCO₂eq/MWh]"] = (
+                total_emissions / total_power if total_power > 0 else 0
+            )
+            self.entities[eid]["cache"]["E[tCO₂eq]"] = (
+                total_emissions * self._step_size * self._time_resolution / 3600
+            )
+        if self._step_size:
+            return time + self._step_size
+    @override
     def get_data(self, outputs: OutputRequest) -> OutputData:
-        return {eid: {attr: self.entities[eid]['cache'][attr]
-                            for attr in attrs
-                                } for eid, attrs in outputs.items()}
+        return {
+            eid: {
+                attr: self.entities[eid]["cache"][attr]
+                for attr in attrs
+                if attr in self.entities[eid]["cache"]
+            }
+            for eid, attrs in outputs.items()
+        }

mosaik-emissions 0.1.1__py3-none-any.whl → 1.0.0__py3-none-any.whl

mosaik-emissions 0.1.1py3-none-any.whl → 1.0.0py3-none-any.whl