PyPI - openenergyid - Versions diffs - 0.1.6__py2.py3-none-any.whl → 0.1.8__py2.py3-none-any.whl - Mend

openenergyid 0.1.6py2.py3-none-any.whl → 0.1.8py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of openenergyid might be problematic. Click here for more details.

Files changed (12) hide show

openenergyid/__init__.py +1 -1
openenergyid/const.py +13 -0
openenergyid/models.py +42 -0
openenergyid/mvlr/__init__.py +12 -5
openenergyid/mvlr/main.py +29 -0
openenergyid/mvlr/models.py +126 -7
openenergyid/mvlr/mvlr.py +42 -44
{openenergyid-0.1.6.dist-info → openenergyid-0.1.8.dist-info}/METADATA +1 -1
openenergyid-0.1.8.dist-info/RECORD +13 -0
{openenergyid-0.1.6.dist-info → openenergyid-0.1.8.dist-info}/WHEEL +1 -1
openenergyid-0.1.6.dist-info/RECORD +0 -11
{openenergyid-0.1.6.dist-info → openenergyid-0.1.8.dist-info}/licenses/LICENSE +0 -0

openenergyid/__init__.py CHANGED Viewed

@@ -1,6 +1,6 @@
 """Open Energy ID Python SDK."""
-__version__ = "0.1.6"
+__version__ = "0.1.8"
 from .enums import Granularity
 from .models import TimeSeries

openenergyid/const.py ADDED Viewed

@@ -0,0 +1,13 @@
+"""Constants for the Open Energy ID package."""
+from typing import Literal
+# METRICS
+ELECTRICITY_DELIVERED: Literal["electricity_delivered"] = "electricity_delivered"
+ELECTRICITY_EXPORTED: Literal["electricity_exported"] = "electricity_exported"
+ELECTRICITY_PRODUCED: Literal["electricity_produced"] = "electricity_produced"
+PRICE_DAY_AHEAD: Literal["price_day_ahead"] = "price_day_ahead"
+PRICE_IMBALANCE_UPWARD: Literal["price_imbalance_upward"] = "price_imbalance_upward"
+PRICE_IMBALANCE_DOWNWARD: Literal["price_imbalance_downward"] = "price_imbalance_downward"

openenergyid/models.py CHANGED Viewed

@@ -1,6 +1,7 @@
 """Data models for the Open Energy ID."""
 import datetime as dt
+from typing import Optional, overload
 import pandas as pd
 from pydantic import BaseModel
@@ -23,3 +24,44 @@ class TimeSeries(BaseModel):
         frame = pd.DataFrame(self.data, columns=self.columns, index=self.index)
         frame.index = pd.to_datetime(frame.index, utc=True)
         return frame.tz_convert(timezone)
+    @overload
+    def to_json(self, path: None = None, **kwargs) -> str:
+        ...
+    @overload
+    def to_json(self, path: str, **kwargs) -> None:
+        ...
+    def to_json(self, path: Optional[str] = None, **kwargs) -> Optional[str]:
+        """Save the TimeSeries to a JSON file or return as string."""
+        if path is None:
+            return self.model_dump_json(**kwargs)
+        else:
+            encoding = kwargs.pop("encoding", "UTF-8")
+            with open(path, "w", encoding=encoding) as file:
+                file.write(self.model_dump_json(**kwargs))
+    @overload
+    @classmethod
+    def from_json(cls, string: str, **kwargs) -> "TimeSeries":
+        ...
+    @overload
+    @classmethod
+    def from_json(cls, path: str, **kwargs) -> "TimeSeries":
+        ...
+    @classmethod
+    def from_json(
+        cls, string: Optional[str] = None, path: Optional[str] = None, **kwargs
+    ) -> "TimeSeries":
+        """Load the TimeSeries from a JSON file or string."""
+        if string:
+            return cls.model_validate_json(string, **kwargs)
+        elif path:
+            encoding = kwargs.pop("encoding", "UTF-8")
+            with open(path, "r", encoding=encoding) as file:
+                return cls.model_validate_json(file.read(), **kwargs)
+        else:
+            raise ValueError("Either string or path must be provided.")

openenergyid/mvlr/__init__.py CHANGED Viewed

@@ -1,12 +1,19 @@
 """Multi-variable linear regression (MVLR) module."""
-from .mvlr import MultiVariableLinearRegression, find_best_mvlr, ValidationParameters
-from .models import IndependentVariable, MultiVariableRegressionResult
+from .main import find_best_mvlr
+from .models import (
+    IndependentVariableInput,
+    MultiVariableRegressionInput,
+    MultiVariableRegressionResult,
+    ValidationParameters,
+    IndependentVariableResult,
+)
 __all__ = [
-    "MultiVariableLinearRegression",
-    "MultiVariableRegressionResult",
-    "IndependentVariable",
     "find_best_mvlr",
+    "IndependentVariableInput",
+    "MultiVariableRegressionInput",
+    "MultiVariableRegressionResult",
     "ValidationParameters",
+    "IndependentVariableResult",
 ]

openenergyid/mvlr/main.py ADDED Viewed

@@ -0,0 +1,29 @@
+"""Main module for the MultiVariableLinearRegression class."""
+from .models import MultiVariableRegressionInput, MultiVariableRegressionResult
+from .helpers import resample_input_data
+from .mvlr import MultiVariableLinearRegression
+def find_best_mvlr(
+    data: MultiVariableRegressionInput,
+) -> MultiVariableRegressionResult:
+    """Cycle through multiple granularities and return the best model."""
+    for granularity in data.granularities:
+        frame = data.data_frame()
+        frame = resample_input_data(data=frame, granularity=granularity)
+        mvlr = MultiVariableLinearRegression(
+            data=frame,
+            y=data.dependent_variable,
+            granularity=granularity,
+            allow_negative_predictions=data.allow_negative_predictions,
+            single_use_exog_prefixes=data.single_use_exog_prefixes,
+        )
+        mvlr.do_analysis()
+        if mvlr.validate(
+            min_rsquared=data.validation_parameters.rsquared,
+            max_f_pvalue=data.validation_parameters.f_pvalue,
+            max_pvalues=data.validation_parameters.pvalues,
+        ):
+            return MultiVariableRegressionResult.from_mvlr(mvlr)
+    raise ValueError("No valid model found.")

openenergyid/mvlr/models.py CHANGED Viewed

@@ -1,5 +1,6 @@
 """Models for multivariable linear regression."""
-from typing import Optional
+from typing import Any, List, Optional
+import pandas as pd
 from pydantic import BaseModel, Field, ConfigDict
 import statsmodels.formula.api as fm
@@ -10,6 +11,124 @@ from openenergyid.models import TimeSeries
 from .mvlr import MultiVariableLinearRegression
+COLUMN_TEMPERATUREEQUIVALENT = "temperatureEquivalent"
+######################
+# MVLR Input Models #
+######################
+class ValidationParameters(BaseModel):
+    """Parameters for validation of a multivariable linear regression model."""
+    rsquared: float = Field(
+        0.75, ge=0, le=1, description="Minimum acceptable value for the adjusted R-squared"
+    )
+    f_pvalue: float = Field(
+        0.05, ge=0, le=1, description="Maximum acceptable value for the F-statistic"
+    )
+    pvalues: float = Field(
+        0.05, ge=0, le=1, description="Maximum acceptable value for the p-values of the t-statistic"
+    )
+class IndependentVariableInput(BaseModel):
+    """
+    Independent variable.
+    Has to corresponds to a column in the data frame.
+    """
+    name: str = Field(
+        description="Name of the independent variable. "
+        "If the name is `temperatureEquivalent`, "
+        "it will be unpacked into columns according to the variants."
+    )
+    variants: Optional[list[str]] = Field(
+        default=None,
+        description="Variants of the `temperatureEquivalent` independent variable. "
+        "Eg. `HDD_16.5` will be Heating Degree Days with a base temperature of 16.5°C, "
+        "`CDD_0` will be Cooling Degree Days with a base temperature of 0°C.",
+    )
+class MultiVariableRegressionInput(BaseModel):
+    """Multi-variable regression input."""
+    timezone: str = Field(alias="timeZone")
+    independent_variables: List[IndependentVariableInput] = Field(
+        alias="independentVariables", min_length=1
+    )
+    dependent_variable: str = Field(alias="dependentVariable")
+    frame: TimeSeries
+    granularities: list[Granularity]
+    allow_negative_predictions: bool = Field(alias="allowNegativePredictions", default=False)
+    validation_parameters: ValidationParameters = Field(
+        alias="validationParameters", default=ValidationParameters()
+    )
+    single_use_exog_prefixes: Optional[List[str]] = Field(
+        # default=["HDD", "CDD", "FDD"],
+        default=None,
+        alias="singleUseExogPrefixes",
+        description="List of prefixes to be used as single-use exogenous variables.",
+    )
+    def model_post_init(self, __context: Any) -> None:
+        """Post init hook."""
+        # Check if all independent variables are present in the data frame
+        for iv in self.independent_variables:  # pylint: disable=not-an-iterable
+            if iv.name not in self.frame.columns:
+                raise ValueError(f"Independent variable {iv.name} not found in the data frame.")
+        return super().model_post_init(__context)
+    def _data_frame(self) -> pd.DataFrame:
+        """Convert the data to a pandas DataFrame."""
+        return self.frame.to_pandas(timezone=self.timezone)
+    def data_frame(self) -> pd.DataFrame:
+        """
+        Return the data frame ready for analysis.
+        Unpacks degree days and removes unnecessary columns.
+        If an independent variable named `temperatureEquivalent` is present,
+        it will be unpacked into columns according to the variants.
+        Eg. Variant "HDD_16.5" will be Heating Degree Days
+        with a base temperature of 16.5°C,
+        "CDD_0" will be Cooling Degree Days with a base temperature of 0°C.
+        """
+        frame = self._data_frame()
+        columns_to_retain = [self.dependent_variable]
+        for iv in self.independent_variables:  # pylint: disable=not-an-iterable
+            if iv.name == COLUMN_TEMPERATUREEQUIVALENT and iv.variants is not None:
+                for variant in iv.variants:
+                    prefix, base_temperature = variant.split("_")
+                    if prefix == "CDD":
+                        frame[variant] = frame[COLUMN_TEMPERATUREEQUIVALENT] - float(
+                            base_temperature
+                        )
+                    else:
+                        frame[variant] = (
+                            float(base_temperature) - frame[COLUMN_TEMPERATUREEQUIVALENT]
+                        )
+                    frame[variant] = frame[variant].clip(lower=0)
+                    columns_to_retain.append(variant)
+                frame.drop(columns=[COLUMN_TEMPERATUREEQUIVALENT], inplace=True)
+            else:
+                columns_to_retain.append(iv.name)
+        frame = frame[columns_to_retain].copy()
+        return frame
+######################
+# MVLR Result Models #
+######################
 class ConfidenceInterval(BaseModel):
     """Confidence interval for a coefficient."""
@@ -18,7 +137,7 @@ class ConfidenceInterval(BaseModel):
     upper: float
-class IndependentVariable(BaseModel):
+class IndependentVariableResult(BaseModel):
     """Independent variable for a multivariable linear regression model."""
     name: str
@@ -33,7 +152,7 @@ class IndependentVariable(BaseModel):
     model_config = ConfigDict(populate_by_name=True)
     @classmethod
-    def from_fit(cls, fit: fm.ols, name: str) -> "IndependentVariable":
+    def from_fit(cls, fit: fm.ols, name: str) -> "IndependentVariableResult":
         """Create an IndependentVariable from a fit."""
         return cls(
             name=name,
@@ -53,12 +172,12 @@ class MultiVariableRegressionResult(BaseModel):
     """Result of a multivariable regression model."""
     dependent_variable: str = Field(alias="dependentVariable")
-    independent_variables: list[IndependentVariable] = Field(alias="independentVariables")
+    independent_variables: list[IndependentVariableResult] = Field(alias="independentVariables")
     r2: float = Field(ge=0, le=1, alias="rSquared")
     r2_adj: float = Field(ge=0, le=1, alias="rSquaredAdjusted")
     f_stat: float = Field(ge=0, alias="fStat")
     prob_f_stat: float = Field(ge=0, le=1, alias="probFStat")
-    intercept: IndependentVariable
+    intercept: IndependentVariableResult
     granularity: Granularity
     frame: TimeSeries
@@ -73,7 +192,7 @@ class MultiVariableRegressionResult(BaseModel):
         param_keys.remove("Intercept")
         independent_variables = []
         for k in param_keys:
-            independent_variables.append(IndependentVariable.from_fit(mvlr.fit, k))
+            independent_variables.append(IndependentVariableResult.from_fit(mvlr.fit, k))
         # Create resulting TimeSeries
         cols_to_keep = list(param_keys)
@@ -88,7 +207,7 @@ class MultiVariableRegressionResult(BaseModel):
             r2_adj=mvlr.fit.rsquared_adj,
             f_stat=mvlr.fit.fvalue,
             prob_f_stat=mvlr.fit.f_pvalue,
-            intercept=IndependentVariable.from_fit(mvlr.fit, "Intercept"),
+            intercept=IndependentVariableResult.from_fit(mvlr.fit, "Intercept"),
             granularity=mvlr.granularity,
             frame=TimeSeries.from_pandas(frame),
         )

openenergyid/mvlr/mvlr.py CHANGED Viewed

@@ -3,29 +3,12 @@ and Ordinary Least Squares (ols)."""
 import numpy as np
 import pandas as pd
-from pydantic import BaseModel, Field
 import statsmodels.formula.api as fm
 from patsy import LookupFactor, ModelDesc, Term  # pylint: disable=no-name-in-module
 from statsmodels.sandbox.regression.predstd import wls_prediction_std
 from openenergyid.enums import Granularity
-from .helpers import resample_input_data
-class ValidationParameters(BaseModel):
-    """Parameters for validation of a multivariable linear regression model."""
-    rsquared: float = Field(
-        0.75, ge=0, le=1, description="Minimum acceptable value for the adjusted R-squared"
-    )
-    f_pvalue: float = Field(
-        0.05, ge=0, le=1, description="Maximum acceptable value for the F-statistic"
-    )
-    pvalues: float = Field(
-        0.05, ge=0, le=1, description="Maximum acceptable value for the p-values of the t-statistic"
-    )
 class MultiVariableLinearRegression:
     """Multi-variable linear regression.
@@ -56,8 +39,8 @@ class MultiVariableLinearRegression:
         confint: float = 0.95,
         cross_validation: bool = False,
         allow_negative_predictions: bool = False,
-        validation_params: ValidationParameters = None,
         granularity: Granularity = None,
+        single_use_exog_prefixes: list[str] = None,
     ):
         """Parameters
         ----------
@@ -80,8 +63,15 @@ class MultiVariableLinearRegression:
             If True, allow predictions to be negative.
             For gas consumption or PV production, this is not physical
             so allow_negative_predictions should be False
-        validation_params : ValidationParameters, default=None
-            Parameters to validate the model.
+        granularity : Granularity, default=None
+            Granularity of the data. Is only used for the output of the model.
+            If None, the granularity is not set.
+        single_use_exog_prefixes : list of str, default=None
+            List of variable prefixes that indicate a variable type that should only be used once.
+            For example, if the list contains "HDD", only one of the columns "HDD1", "HDD2", "HDD3" etc.
+            will be used as an independent variable.
+            Once the best fit using a variable with a given prefix is found, the other variables with the same
+            prefix will not be used as independent variables.
         """
         self.data = data.copy()
         if y not in self.data.columns:
@@ -95,8 +85,8 @@ class MultiVariableLinearRegression:
         self.confint = confint
         self.cross_validation = cross_validation
         self.allow_negative_predictions = allow_negative_predictions
-        self.validation_params = validation_params or ValidationParameters()
         self.granularity = granularity
+        self.single_use_exog_prefixes = single_use_exog_prefixes
         self._fit = None
         self._list_of_fits = []
         self.list_of_cverrors = []
@@ -187,6 +177,18 @@ class MultiVariableLinearRegression:
             else:
                 self._list_of_fits.append(best_fit)
                 all_model_terms_dict.pop(best_x)
+                # Check if `best_x` starts with a prefix that should only be used once
+                # If so, remove all other variables with the same prefix from the list of candidates
+                if self.single_use_exog_prefixes:
+                    for prefix in self.single_use_exog_prefixes:
+                        if best_x.startswith(prefix):
+                            all_model_terms_dict = {
+                                k: v
+                                for k, v in all_model_terms_dict.items()
+                                if not k.startswith(prefix)
+                            }
         self._fit = self._list_of_fits[-1]
     def _do_analysis_cross_validation(self):
@@ -258,6 +260,17 @@ class MultiVariableLinearRegression:
             # next iteration with the found exog removed
             all_model_terms_dict.pop(best_x)
+            # Check if `best_x` starts with a prefix that should only be used once
+            # If so, remove all other variables with the same prefix from the list of candidates
+            if self.single_use_exog_prefixes:
+                for prefix in self.single_use_exog_prefixes:
+                    if best_x.startswith(prefix):
+                        all_model_terms_dict = {
+                            k: v
+                            for k, v in all_model_terms_dict.items()
+                            if not k.startswith(prefix)
+                        }
         self._fit = self._list_of_fits[-1]
     def _prune(self, fit: fm.ols, p_max: float) -> fm.ols:
@@ -299,7 +312,7 @@ class MultiVariableLinearRegression:
         pars_to_prune = fit.pvalues.where(fit.pvalues > p_max).dropna().index.tolist()
         try:
             pars_to_prune.remove("Intercept")
-        except KeyError:
+        except ValueError:
             pass
         while pars_to_prune:
             corrected_model_desc = remove_from_model_desc(
@@ -310,7 +323,7 @@ class MultiVariableLinearRegression:
             pars_to_prune = fit.pvalues.where(fit.pvalues > p_max).dropna().index.tolist()
             try:
                 pars_to_prune.remove("Intercept")
-            except KeyError:
+            except ValueError:
                 pass
         return fit
@@ -400,40 +413,25 @@ class MultiVariableLinearRegression:
         """
         self.data = self._predict(fit=self.fit, data=self.data)
-    @property
-    def is_valid(self) -> bool:
+    def validate(
+        self, min_rsquared: float = 0.75, max_f_pvalue: float = 0.05, max_pvalues: float = 0.05
+    ) -> bool:
         """Checks if the model is valid.
         Returns
         -------
             bool: True if the model is valid, False otherwise.
         """
-        if self.fit.rsquared_adj < self.validation_params.rsquared:
+        if self.fit.rsquared_adj < min_rsquared:
             return False
-        if self.fit.f_pvalue > self.validation_params.f_pvalue:
+        if self.fit.f_pvalue > max_f_pvalue:
             return False
         param_keys = self.fit.pvalues.keys().tolist()
         param_keys.remove("Intercept")
         for k in param_keys:
-            if self.fit.pvalues[k] > self.validation_params.pvalues:
+            if self.fit.pvalues[k] > max_pvalues:
                 return False
         return True
-def find_best_mvlr(
-    data: pd.DataFrame,
-    y: str,
-    granularities: list[Granularity],
-    **kwargs,
-) -> MultiVariableLinearRegression:
-    """Cycle through multiple granularities and return the best model."""
-    for granularity in granularities:
-        data = resample_input_data(data=data, granularity=granularity)
-        mvlr = MultiVariableLinearRegression(data=data, y=y, granularity=granularity, **kwargs)
-        mvlr.do_analysis()
-        if mvlr.is_valid:
-            return mvlr
-    raise ValueError("No valid model found.")

{openenergyid-0.1.6.dist-info → openenergyid-0.1.8.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: openenergyid
-Version: 0.1.6
+Version: 0.1.8
 Summary: Open Source Python library for energy analytics and simulations
 Project-URL: Homepage, https://energyid.eu
 Project-URL: Repository, https://github.com/EnergieID/OpenEnergyID

openenergyid-0.1.8.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,13 @@
+openenergyid/__init__.py,sha256=SddUHMaNL5tWsrK3W-8JyGXmxdYaeyxBhXqd1TsBChc,160
+openenergyid/const.py,sha256=bF-U-r0Qj2GWCYBBxReg8fbv2D0V1JzfPMwSEQ5ZWds,569
+openenergyid/enums.py,sha256=jdw4CB1gkisx0re_SesrTEyh_T-UxYp6uieE7iYlHdA,357
+openenergyid/models.py,sha256=pUJpQCodph0NukiIpFdc9X6Zj6qEGQPSWoztYDwqyuE,2214
+openenergyid/mvlr/__init__.py,sha256=Glrc218oqa8tq_Y2G9LXaSoN4Yba-vsjXUi9r9iPzaY,471
+openenergyid/mvlr/helpers.py,sha256=fsx-gSvBdU31BjncFkRd1RySmSPPYgwflCnmSFzox2Q,961
+openenergyid/mvlr/main.py,sha256=dwkl71u8HnlMAq-cmkwvI7z-XtlmqpvZRFoDc9CN-gg,1210
+openenergyid/mvlr/models.py,sha256=ncQ0W0LLCP7IZ4rDgLwIPZRQpK4-xC-qA17BW9tMwio,7878
+openenergyid/mvlr/mvlr.py,sha256=UbMuoWdepnGd1_heVtFOnLoBxVUB7WrPRLyOaDELxlI,18030
+openenergyid-0.1.8.dist-info/METADATA,sha256=7S_S8PFQ8VtflEhBFwCCxVt46Nkqzyh_UL8T89PWXm4,2431
+openenergyid-0.1.8.dist-info/WHEEL,sha256=ccEkY-EGGllEs7ySpwBlD8G4u70wR77CNej8Q6tzIqA,105
+openenergyid-0.1.8.dist-info/licenses/LICENSE,sha256=NgRdcNHwyXVCXZ8sJwoTp0DCowThJ9LWWl4xhbV1IUY,1074
+openenergyid-0.1.8.dist-info/RECORD,,

{openenergyid-0.1.6.dist-info → openenergyid-0.1.8.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: hatchling 1.18.0
+Generator: hatchling 1.21.1
 Root-Is-Purelib: true
 Tag: py2-none-any
 Tag: py3-none-any

openenergyid-0.1.6.dist-info/RECORD DELETED Viewed

@@ -1,11 +0,0 @@
-openenergyid/__init__.py,sha256=Y0iURqfHv7UMooQ4KmINfopw-53Qf0TwSUBM4CONpcY,160
-openenergyid/enums.py,sha256=jdw4CB1gkisx0re_SesrTEyh_T-UxYp6uieE7iYlHdA,357
-openenergyid/models.py,sha256=w6YJHi1fysmLZYEI6peTfQAbMS92Kf5sk0VtXw7HrAM,813
-openenergyid/mvlr/__init__.py,sha256=PzHuv0_uBTiAmzNrZKVObO5pxOYGJ2GFZCzK5Y82bGU,378
-openenergyid/mvlr/helpers.py,sha256=fsx-gSvBdU31BjncFkRd1RySmSPPYgwflCnmSFzox2Q,961
-openenergyid/mvlr/models.py,sha256=1yLcpVtA7ruPOmwSLY-Tg9Nd2lTbYH-nN3cb_CWXegs,3382
-openenergyid/mvlr/mvlr.py,sha256=mHdn_dh6GWxc79TWBhj61sp20PlACn6AfJX16dRmR6E,17407
-openenergyid-0.1.6.dist-info/METADATA,sha256=lx7sXCt-TrVC43NQXFpZr_2dr1avmh9R3CLV8uxn7x4,2431
-openenergyid-0.1.6.dist-info/WHEEL,sha256=fagL_Tj29mg80flwlxJNW45nBDbboxF04Tnbc_jt3Bg,105
-openenergyid-0.1.6.dist-info/licenses/LICENSE,sha256=NgRdcNHwyXVCXZ8sJwoTp0DCowThJ9LWWl4xhbV1IUY,1074
-openenergyid-0.1.6.dist-info/RECORD,,

{openenergyid-0.1.6.dist-info → openenergyid-0.1.8.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

openenergyid 0.1.6__py2.py3-none-any.whl → 0.1.8__py2.py3-none-any.whl

Potentially problematic release.

openenergyid 0.1.6py2.py3-none-any.whl → 0.1.8py2.py3-none-any.whl