openenergyid 0.1.6__py2.py3-none-any.whl → 0.1.8__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of openenergyid might be problematic. Click here for more details.
- openenergyid/__init__.py +1 -1
- openenergyid/const.py +13 -0
- openenergyid/models.py +42 -0
- openenergyid/mvlr/__init__.py +12 -5
- openenergyid/mvlr/main.py +29 -0
- openenergyid/mvlr/models.py +126 -7
- openenergyid/mvlr/mvlr.py +42 -44
- {openenergyid-0.1.6.dist-info → openenergyid-0.1.8.dist-info}/METADATA +1 -1
- openenergyid-0.1.8.dist-info/RECORD +13 -0
- {openenergyid-0.1.6.dist-info → openenergyid-0.1.8.dist-info}/WHEEL +1 -1
- openenergyid-0.1.6.dist-info/RECORD +0 -11
- {openenergyid-0.1.6.dist-info → openenergyid-0.1.8.dist-info}/licenses/LICENSE +0 -0
openenergyid/__init__.py
CHANGED
openenergyid/const.py
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
"""Constants for the Open Energy ID package."""
|
|
2
|
+
|
|
3
|
+
from typing import Literal
|
|
4
|
+
|
|
5
|
+
# METRICS
|
|
6
|
+
|
|
7
|
+
ELECTRICITY_DELIVERED: Literal["electricity_delivered"] = "electricity_delivered"
|
|
8
|
+
ELECTRICITY_EXPORTED: Literal["electricity_exported"] = "electricity_exported"
|
|
9
|
+
ELECTRICITY_PRODUCED: Literal["electricity_produced"] = "electricity_produced"
|
|
10
|
+
|
|
11
|
+
PRICE_DAY_AHEAD: Literal["price_day_ahead"] = "price_day_ahead"
|
|
12
|
+
PRICE_IMBALANCE_UPWARD: Literal["price_imbalance_upward"] = "price_imbalance_upward"
|
|
13
|
+
PRICE_IMBALANCE_DOWNWARD: Literal["price_imbalance_downward"] = "price_imbalance_downward"
|
openenergyid/models.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
"""Data models for the Open Energy ID."""
|
|
2
2
|
|
|
3
3
|
import datetime as dt
|
|
4
|
+
from typing import Optional, overload
|
|
4
5
|
|
|
5
6
|
import pandas as pd
|
|
6
7
|
from pydantic import BaseModel
|
|
@@ -23,3 +24,44 @@ class TimeSeries(BaseModel):
|
|
|
23
24
|
frame = pd.DataFrame(self.data, columns=self.columns, index=self.index)
|
|
24
25
|
frame.index = pd.to_datetime(frame.index, utc=True)
|
|
25
26
|
return frame.tz_convert(timezone)
|
|
27
|
+
|
|
28
|
+
@overload
|
|
29
|
+
def to_json(self, path: None = None, **kwargs) -> str:
|
|
30
|
+
...
|
|
31
|
+
|
|
32
|
+
@overload
|
|
33
|
+
def to_json(self, path: str, **kwargs) -> None:
|
|
34
|
+
...
|
|
35
|
+
|
|
36
|
+
def to_json(self, path: Optional[str] = None, **kwargs) -> Optional[str]:
|
|
37
|
+
"""Save the TimeSeries to a JSON file or return as string."""
|
|
38
|
+
if path is None:
|
|
39
|
+
return self.model_dump_json(**kwargs)
|
|
40
|
+
else:
|
|
41
|
+
encoding = kwargs.pop("encoding", "UTF-8")
|
|
42
|
+
with open(path, "w", encoding=encoding) as file:
|
|
43
|
+
file.write(self.model_dump_json(**kwargs))
|
|
44
|
+
|
|
45
|
+
@overload
|
|
46
|
+
@classmethod
|
|
47
|
+
def from_json(cls, string: str, **kwargs) -> "TimeSeries":
|
|
48
|
+
...
|
|
49
|
+
|
|
50
|
+
@overload
|
|
51
|
+
@classmethod
|
|
52
|
+
def from_json(cls, path: str, **kwargs) -> "TimeSeries":
|
|
53
|
+
...
|
|
54
|
+
|
|
55
|
+
@classmethod
|
|
56
|
+
def from_json(
|
|
57
|
+
cls, string: Optional[str] = None, path: Optional[str] = None, **kwargs
|
|
58
|
+
) -> "TimeSeries":
|
|
59
|
+
"""Load the TimeSeries from a JSON file or string."""
|
|
60
|
+
if string:
|
|
61
|
+
return cls.model_validate_json(string, **kwargs)
|
|
62
|
+
elif path:
|
|
63
|
+
encoding = kwargs.pop("encoding", "UTF-8")
|
|
64
|
+
with open(path, "r", encoding=encoding) as file:
|
|
65
|
+
return cls.model_validate_json(file.read(), **kwargs)
|
|
66
|
+
else:
|
|
67
|
+
raise ValueError("Either string or path must be provided.")
|
openenergyid/mvlr/__init__.py
CHANGED
|
@@ -1,12 +1,19 @@
|
|
|
1
1
|
"""Multi-variable linear regression (MVLR) module."""
|
|
2
2
|
|
|
3
|
-
from .
|
|
4
|
-
from .models import
|
|
3
|
+
from .main import find_best_mvlr
|
|
4
|
+
from .models import (
|
|
5
|
+
IndependentVariableInput,
|
|
6
|
+
MultiVariableRegressionInput,
|
|
7
|
+
MultiVariableRegressionResult,
|
|
8
|
+
ValidationParameters,
|
|
9
|
+
IndependentVariableResult,
|
|
10
|
+
)
|
|
5
11
|
|
|
6
12
|
__all__ = [
|
|
7
|
-
"MultiVariableLinearRegression",
|
|
8
|
-
"MultiVariableRegressionResult",
|
|
9
|
-
"IndependentVariable",
|
|
10
13
|
"find_best_mvlr",
|
|
14
|
+
"IndependentVariableInput",
|
|
15
|
+
"MultiVariableRegressionInput",
|
|
16
|
+
"MultiVariableRegressionResult",
|
|
11
17
|
"ValidationParameters",
|
|
18
|
+
"IndependentVariableResult",
|
|
12
19
|
]
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
"""Main module for the MultiVariableLinearRegression class."""
|
|
2
|
+
|
|
3
|
+
from .models import MultiVariableRegressionInput, MultiVariableRegressionResult
|
|
4
|
+
from .helpers import resample_input_data
|
|
5
|
+
from .mvlr import MultiVariableLinearRegression
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def find_best_mvlr(
|
|
9
|
+
data: MultiVariableRegressionInput,
|
|
10
|
+
) -> MultiVariableRegressionResult:
|
|
11
|
+
"""Cycle through multiple granularities and return the best model."""
|
|
12
|
+
for granularity in data.granularities:
|
|
13
|
+
frame = data.data_frame()
|
|
14
|
+
frame = resample_input_data(data=frame, granularity=granularity)
|
|
15
|
+
mvlr = MultiVariableLinearRegression(
|
|
16
|
+
data=frame,
|
|
17
|
+
y=data.dependent_variable,
|
|
18
|
+
granularity=granularity,
|
|
19
|
+
allow_negative_predictions=data.allow_negative_predictions,
|
|
20
|
+
single_use_exog_prefixes=data.single_use_exog_prefixes,
|
|
21
|
+
)
|
|
22
|
+
mvlr.do_analysis()
|
|
23
|
+
if mvlr.validate(
|
|
24
|
+
min_rsquared=data.validation_parameters.rsquared,
|
|
25
|
+
max_f_pvalue=data.validation_parameters.f_pvalue,
|
|
26
|
+
max_pvalues=data.validation_parameters.pvalues,
|
|
27
|
+
):
|
|
28
|
+
return MultiVariableRegressionResult.from_mvlr(mvlr)
|
|
29
|
+
raise ValueError("No valid model found.")
|
openenergyid/mvlr/models.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
"""Models for multivariable linear regression."""
|
|
2
|
-
from typing import Optional
|
|
2
|
+
from typing import Any, List, Optional
|
|
3
|
+
import pandas as pd
|
|
3
4
|
|
|
4
5
|
from pydantic import BaseModel, Field, ConfigDict
|
|
5
6
|
import statsmodels.formula.api as fm
|
|
@@ -10,6 +11,124 @@ from openenergyid.models import TimeSeries
|
|
|
10
11
|
from .mvlr import MultiVariableLinearRegression
|
|
11
12
|
|
|
12
13
|
|
|
14
|
+
COLUMN_TEMPERATUREEQUIVALENT = "temperatureEquivalent"
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
######################
|
|
18
|
+
# MVLR Input Models #
|
|
19
|
+
######################
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class ValidationParameters(BaseModel):
|
|
23
|
+
"""Parameters for validation of a multivariable linear regression model."""
|
|
24
|
+
|
|
25
|
+
rsquared: float = Field(
|
|
26
|
+
0.75, ge=0, le=1, description="Minimum acceptable value for the adjusted R-squared"
|
|
27
|
+
)
|
|
28
|
+
f_pvalue: float = Field(
|
|
29
|
+
0.05, ge=0, le=1, description="Maximum acceptable value for the F-statistic"
|
|
30
|
+
)
|
|
31
|
+
pvalues: float = Field(
|
|
32
|
+
0.05, ge=0, le=1, description="Maximum acceptable value for the p-values of the t-statistic"
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class IndependentVariableInput(BaseModel):
|
|
37
|
+
"""
|
|
38
|
+
Independent variable.
|
|
39
|
+
|
|
40
|
+
Has to corresponds to a column in the data frame.
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
name: str = Field(
|
|
44
|
+
description="Name of the independent variable. "
|
|
45
|
+
"If the name is `temperatureEquivalent`, "
|
|
46
|
+
"it will be unpacked into columns according to the variants."
|
|
47
|
+
)
|
|
48
|
+
variants: Optional[list[str]] = Field(
|
|
49
|
+
default=None,
|
|
50
|
+
description="Variants of the `temperatureEquivalent` independent variable. "
|
|
51
|
+
"Eg. `HDD_16.5` will be Heating Degree Days with a base temperature of 16.5°C, "
|
|
52
|
+
"`CDD_0` will be Cooling Degree Days with a base temperature of 0°C.",
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class MultiVariableRegressionInput(BaseModel):
|
|
57
|
+
"""Multi-variable regression input."""
|
|
58
|
+
|
|
59
|
+
timezone: str = Field(alias="timeZone")
|
|
60
|
+
independent_variables: List[IndependentVariableInput] = Field(
|
|
61
|
+
alias="independentVariables", min_length=1
|
|
62
|
+
)
|
|
63
|
+
dependent_variable: str = Field(alias="dependentVariable")
|
|
64
|
+
frame: TimeSeries
|
|
65
|
+
granularities: list[Granularity]
|
|
66
|
+
allow_negative_predictions: bool = Field(alias="allowNegativePredictions", default=False)
|
|
67
|
+
validation_parameters: ValidationParameters = Field(
|
|
68
|
+
alias="validationParameters", default=ValidationParameters()
|
|
69
|
+
)
|
|
70
|
+
single_use_exog_prefixes: Optional[List[str]] = Field(
|
|
71
|
+
# default=["HDD", "CDD", "FDD"],
|
|
72
|
+
default=None,
|
|
73
|
+
alias="singleUseExogPrefixes",
|
|
74
|
+
description="List of prefixes to be used as single-use exogenous variables.",
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
def model_post_init(self, __context: Any) -> None:
|
|
78
|
+
"""Post init hook."""
|
|
79
|
+
# Check if all independent variables are present in the data frame
|
|
80
|
+
for iv in self.independent_variables: # pylint: disable=not-an-iterable
|
|
81
|
+
if iv.name not in self.frame.columns:
|
|
82
|
+
raise ValueError(f"Independent variable {iv.name} not found in the data frame.")
|
|
83
|
+
|
|
84
|
+
return super().model_post_init(__context)
|
|
85
|
+
|
|
86
|
+
def _data_frame(self) -> pd.DataFrame:
|
|
87
|
+
"""Convert the data to a pandas DataFrame."""
|
|
88
|
+
return self.frame.to_pandas(timezone=self.timezone)
|
|
89
|
+
|
|
90
|
+
def data_frame(self) -> pd.DataFrame:
|
|
91
|
+
"""
|
|
92
|
+
Return the data frame ready for analysis.
|
|
93
|
+
|
|
94
|
+
Unpacks degree days and removes unnecessary columns.
|
|
95
|
+
|
|
96
|
+
If an independent variable named `temperatureEquivalent` is present,
|
|
97
|
+
it will be unpacked into columns according to the variants.
|
|
98
|
+
Eg. Variant "HDD_16.5" will be Heating Degree Days
|
|
99
|
+
with a base temperature of 16.5°C,
|
|
100
|
+
"CDD_0" will be Cooling Degree Days with a base temperature of 0°C.
|
|
101
|
+
"""
|
|
102
|
+
frame = self._data_frame()
|
|
103
|
+
columns_to_retain = [self.dependent_variable]
|
|
104
|
+
for iv in self.independent_variables: # pylint: disable=not-an-iterable
|
|
105
|
+
if iv.name == COLUMN_TEMPERATUREEQUIVALENT and iv.variants is not None:
|
|
106
|
+
for variant in iv.variants:
|
|
107
|
+
prefix, base_temperature = variant.split("_")
|
|
108
|
+
if prefix == "CDD":
|
|
109
|
+
frame[variant] = frame[COLUMN_TEMPERATUREEQUIVALENT] - float(
|
|
110
|
+
base_temperature
|
|
111
|
+
)
|
|
112
|
+
else:
|
|
113
|
+
frame[variant] = (
|
|
114
|
+
float(base_temperature) - frame[COLUMN_TEMPERATUREEQUIVALENT]
|
|
115
|
+
)
|
|
116
|
+
frame[variant] = frame[variant].clip(lower=0)
|
|
117
|
+
columns_to_retain.append(variant)
|
|
118
|
+
frame.drop(columns=[COLUMN_TEMPERATUREEQUIVALENT], inplace=True)
|
|
119
|
+
else:
|
|
120
|
+
columns_to_retain.append(iv.name)
|
|
121
|
+
|
|
122
|
+
frame = frame[columns_to_retain].copy()
|
|
123
|
+
|
|
124
|
+
return frame
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
######################
|
|
128
|
+
# MVLR Result Models #
|
|
129
|
+
######################
|
|
130
|
+
|
|
131
|
+
|
|
13
132
|
class ConfidenceInterval(BaseModel):
|
|
14
133
|
"""Confidence interval for a coefficient."""
|
|
15
134
|
|
|
@@ -18,7 +137,7 @@ class ConfidenceInterval(BaseModel):
|
|
|
18
137
|
upper: float
|
|
19
138
|
|
|
20
139
|
|
|
21
|
-
class
|
|
140
|
+
class IndependentVariableResult(BaseModel):
|
|
22
141
|
"""Independent variable for a multivariable linear regression model."""
|
|
23
142
|
|
|
24
143
|
name: str
|
|
@@ -33,7 +152,7 @@ class IndependentVariable(BaseModel):
|
|
|
33
152
|
model_config = ConfigDict(populate_by_name=True)
|
|
34
153
|
|
|
35
154
|
@classmethod
|
|
36
|
-
def from_fit(cls, fit: fm.ols, name: str) -> "
|
|
155
|
+
def from_fit(cls, fit: fm.ols, name: str) -> "IndependentVariableResult":
|
|
37
156
|
"""Create an IndependentVariable from a fit."""
|
|
38
157
|
return cls(
|
|
39
158
|
name=name,
|
|
@@ -53,12 +172,12 @@ class MultiVariableRegressionResult(BaseModel):
|
|
|
53
172
|
"""Result of a multivariable regression model."""
|
|
54
173
|
|
|
55
174
|
dependent_variable: str = Field(alias="dependentVariable")
|
|
56
|
-
independent_variables: list[
|
|
175
|
+
independent_variables: list[IndependentVariableResult] = Field(alias="independentVariables")
|
|
57
176
|
r2: float = Field(ge=0, le=1, alias="rSquared")
|
|
58
177
|
r2_adj: float = Field(ge=0, le=1, alias="rSquaredAdjusted")
|
|
59
178
|
f_stat: float = Field(ge=0, alias="fStat")
|
|
60
179
|
prob_f_stat: float = Field(ge=0, le=1, alias="probFStat")
|
|
61
|
-
intercept:
|
|
180
|
+
intercept: IndependentVariableResult
|
|
62
181
|
granularity: Granularity
|
|
63
182
|
frame: TimeSeries
|
|
64
183
|
|
|
@@ -73,7 +192,7 @@ class MultiVariableRegressionResult(BaseModel):
|
|
|
73
192
|
param_keys.remove("Intercept")
|
|
74
193
|
independent_variables = []
|
|
75
194
|
for k in param_keys:
|
|
76
|
-
independent_variables.append(
|
|
195
|
+
independent_variables.append(IndependentVariableResult.from_fit(mvlr.fit, k))
|
|
77
196
|
|
|
78
197
|
# Create resulting TimeSeries
|
|
79
198
|
cols_to_keep = list(param_keys)
|
|
@@ -88,7 +207,7 @@ class MultiVariableRegressionResult(BaseModel):
|
|
|
88
207
|
r2_adj=mvlr.fit.rsquared_adj,
|
|
89
208
|
f_stat=mvlr.fit.fvalue,
|
|
90
209
|
prob_f_stat=mvlr.fit.f_pvalue,
|
|
91
|
-
intercept=
|
|
210
|
+
intercept=IndependentVariableResult.from_fit(mvlr.fit, "Intercept"),
|
|
92
211
|
granularity=mvlr.granularity,
|
|
93
212
|
frame=TimeSeries.from_pandas(frame),
|
|
94
213
|
)
|
openenergyid/mvlr/mvlr.py
CHANGED
|
@@ -3,29 +3,12 @@ and Ordinary Least Squares (ols)."""
|
|
|
3
3
|
|
|
4
4
|
import numpy as np
|
|
5
5
|
import pandas as pd
|
|
6
|
-
from pydantic import BaseModel, Field
|
|
7
6
|
import statsmodels.formula.api as fm
|
|
8
7
|
from patsy import LookupFactor, ModelDesc, Term # pylint: disable=no-name-in-module
|
|
9
8
|
from statsmodels.sandbox.regression.predstd import wls_prediction_std
|
|
10
9
|
|
|
11
10
|
from openenergyid.enums import Granularity
|
|
12
11
|
|
|
13
|
-
from .helpers import resample_input_data
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
class ValidationParameters(BaseModel):
|
|
17
|
-
"""Parameters for validation of a multivariable linear regression model."""
|
|
18
|
-
|
|
19
|
-
rsquared: float = Field(
|
|
20
|
-
0.75, ge=0, le=1, description="Minimum acceptable value for the adjusted R-squared"
|
|
21
|
-
)
|
|
22
|
-
f_pvalue: float = Field(
|
|
23
|
-
0.05, ge=0, le=1, description="Maximum acceptable value for the F-statistic"
|
|
24
|
-
)
|
|
25
|
-
pvalues: float = Field(
|
|
26
|
-
0.05, ge=0, le=1, description="Maximum acceptable value for the p-values of the t-statistic"
|
|
27
|
-
)
|
|
28
|
-
|
|
29
12
|
|
|
30
13
|
class MultiVariableLinearRegression:
|
|
31
14
|
"""Multi-variable linear regression.
|
|
@@ -56,8 +39,8 @@ class MultiVariableLinearRegression:
|
|
|
56
39
|
confint: float = 0.95,
|
|
57
40
|
cross_validation: bool = False,
|
|
58
41
|
allow_negative_predictions: bool = False,
|
|
59
|
-
validation_params: ValidationParameters = None,
|
|
60
42
|
granularity: Granularity = None,
|
|
43
|
+
single_use_exog_prefixes: list[str] = None,
|
|
61
44
|
):
|
|
62
45
|
"""Parameters
|
|
63
46
|
----------
|
|
@@ -80,8 +63,15 @@ class MultiVariableLinearRegression:
|
|
|
80
63
|
If True, allow predictions to be negative.
|
|
81
64
|
For gas consumption or PV production, this is not physical
|
|
82
65
|
so allow_negative_predictions should be False
|
|
83
|
-
|
|
84
|
-
|
|
66
|
+
granularity : Granularity, default=None
|
|
67
|
+
Granularity of the data. Is only used for the output of the model.
|
|
68
|
+
If None, the granularity is not set.
|
|
69
|
+
single_use_exog_prefixes : list of str, default=None
|
|
70
|
+
List of variable prefixes that indicate a variable type that should only be used once.
|
|
71
|
+
For example, if the list contains "HDD", only one of the columns "HDD1", "HDD2", "HDD3" etc.
|
|
72
|
+
will be used as an independent variable.
|
|
73
|
+
Once the best fit using a variable with a given prefix is found, the other variables with the same
|
|
74
|
+
prefix will not be used as independent variables.
|
|
85
75
|
"""
|
|
86
76
|
self.data = data.copy()
|
|
87
77
|
if y not in self.data.columns:
|
|
@@ -95,8 +85,8 @@ class MultiVariableLinearRegression:
|
|
|
95
85
|
self.confint = confint
|
|
96
86
|
self.cross_validation = cross_validation
|
|
97
87
|
self.allow_negative_predictions = allow_negative_predictions
|
|
98
|
-
self.validation_params = validation_params or ValidationParameters()
|
|
99
88
|
self.granularity = granularity
|
|
89
|
+
self.single_use_exog_prefixes = single_use_exog_prefixes
|
|
100
90
|
self._fit = None
|
|
101
91
|
self._list_of_fits = []
|
|
102
92
|
self.list_of_cverrors = []
|
|
@@ -187,6 +177,18 @@ class MultiVariableLinearRegression:
|
|
|
187
177
|
else:
|
|
188
178
|
self._list_of_fits.append(best_fit)
|
|
189
179
|
all_model_terms_dict.pop(best_x)
|
|
180
|
+
|
|
181
|
+
# Check if `best_x` starts with a prefix that should only be used once
|
|
182
|
+
# If so, remove all other variables with the same prefix from the list of candidates
|
|
183
|
+
if self.single_use_exog_prefixes:
|
|
184
|
+
for prefix in self.single_use_exog_prefixes:
|
|
185
|
+
if best_x.startswith(prefix):
|
|
186
|
+
all_model_terms_dict = {
|
|
187
|
+
k: v
|
|
188
|
+
for k, v in all_model_terms_dict.items()
|
|
189
|
+
if not k.startswith(prefix)
|
|
190
|
+
}
|
|
191
|
+
|
|
190
192
|
self._fit = self._list_of_fits[-1]
|
|
191
193
|
|
|
192
194
|
def _do_analysis_cross_validation(self):
|
|
@@ -258,6 +260,17 @@ class MultiVariableLinearRegression:
|
|
|
258
260
|
# next iteration with the found exog removed
|
|
259
261
|
all_model_terms_dict.pop(best_x)
|
|
260
262
|
|
|
263
|
+
# Check if `best_x` starts with a prefix that should only be used once
|
|
264
|
+
# If so, remove all other variables with the same prefix from the list of candidates
|
|
265
|
+
if self.single_use_exog_prefixes:
|
|
266
|
+
for prefix in self.single_use_exog_prefixes:
|
|
267
|
+
if best_x.startswith(prefix):
|
|
268
|
+
all_model_terms_dict = {
|
|
269
|
+
k: v
|
|
270
|
+
for k, v in all_model_terms_dict.items()
|
|
271
|
+
if not k.startswith(prefix)
|
|
272
|
+
}
|
|
273
|
+
|
|
261
274
|
self._fit = self._list_of_fits[-1]
|
|
262
275
|
|
|
263
276
|
def _prune(self, fit: fm.ols, p_max: float) -> fm.ols:
|
|
@@ -299,7 +312,7 @@ class MultiVariableLinearRegression:
|
|
|
299
312
|
pars_to_prune = fit.pvalues.where(fit.pvalues > p_max).dropna().index.tolist()
|
|
300
313
|
try:
|
|
301
314
|
pars_to_prune.remove("Intercept")
|
|
302
|
-
except
|
|
315
|
+
except ValueError:
|
|
303
316
|
pass
|
|
304
317
|
while pars_to_prune:
|
|
305
318
|
corrected_model_desc = remove_from_model_desc(
|
|
@@ -310,7 +323,7 @@ class MultiVariableLinearRegression:
|
|
|
310
323
|
pars_to_prune = fit.pvalues.where(fit.pvalues > p_max).dropna().index.tolist()
|
|
311
324
|
try:
|
|
312
325
|
pars_to_prune.remove("Intercept")
|
|
313
|
-
except
|
|
326
|
+
except ValueError:
|
|
314
327
|
pass
|
|
315
328
|
return fit
|
|
316
329
|
|
|
@@ -400,40 +413,25 @@ class MultiVariableLinearRegression:
|
|
|
400
413
|
"""
|
|
401
414
|
self.data = self._predict(fit=self.fit, data=self.data)
|
|
402
415
|
|
|
403
|
-
|
|
404
|
-
|
|
416
|
+
def validate(
|
|
417
|
+
self, min_rsquared: float = 0.75, max_f_pvalue: float = 0.05, max_pvalues: float = 0.05
|
|
418
|
+
) -> bool:
|
|
405
419
|
"""Checks if the model is valid.
|
|
406
420
|
|
|
407
421
|
Returns
|
|
408
422
|
-------
|
|
409
423
|
bool: True if the model is valid, False otherwise.
|
|
410
424
|
"""
|
|
411
|
-
if self.fit.rsquared_adj <
|
|
425
|
+
if self.fit.rsquared_adj < min_rsquared:
|
|
412
426
|
return False
|
|
413
427
|
|
|
414
|
-
if self.fit.f_pvalue >
|
|
428
|
+
if self.fit.f_pvalue > max_f_pvalue:
|
|
415
429
|
return False
|
|
416
430
|
|
|
417
431
|
param_keys = self.fit.pvalues.keys().tolist()
|
|
418
432
|
param_keys.remove("Intercept")
|
|
419
433
|
for k in param_keys:
|
|
420
|
-
if self.fit.pvalues[k] >
|
|
434
|
+
if self.fit.pvalues[k] > max_pvalues:
|
|
421
435
|
return False
|
|
422
436
|
|
|
423
437
|
return True
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
def find_best_mvlr(
|
|
427
|
-
data: pd.DataFrame,
|
|
428
|
-
y: str,
|
|
429
|
-
granularities: list[Granularity],
|
|
430
|
-
**kwargs,
|
|
431
|
-
) -> MultiVariableLinearRegression:
|
|
432
|
-
"""Cycle through multiple granularities and return the best model."""
|
|
433
|
-
for granularity in granularities:
|
|
434
|
-
data = resample_input_data(data=data, granularity=granularity)
|
|
435
|
-
mvlr = MultiVariableLinearRegression(data=data, y=y, granularity=granularity, **kwargs)
|
|
436
|
-
mvlr.do_analysis()
|
|
437
|
-
if mvlr.is_valid:
|
|
438
|
-
return mvlr
|
|
439
|
-
raise ValueError("No valid model found.")
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
openenergyid/__init__.py,sha256=SddUHMaNL5tWsrK3W-8JyGXmxdYaeyxBhXqd1TsBChc,160
|
|
2
|
+
openenergyid/const.py,sha256=bF-U-r0Qj2GWCYBBxReg8fbv2D0V1JzfPMwSEQ5ZWds,569
|
|
3
|
+
openenergyid/enums.py,sha256=jdw4CB1gkisx0re_SesrTEyh_T-UxYp6uieE7iYlHdA,357
|
|
4
|
+
openenergyid/models.py,sha256=pUJpQCodph0NukiIpFdc9X6Zj6qEGQPSWoztYDwqyuE,2214
|
|
5
|
+
openenergyid/mvlr/__init__.py,sha256=Glrc218oqa8tq_Y2G9LXaSoN4Yba-vsjXUi9r9iPzaY,471
|
|
6
|
+
openenergyid/mvlr/helpers.py,sha256=fsx-gSvBdU31BjncFkRd1RySmSPPYgwflCnmSFzox2Q,961
|
|
7
|
+
openenergyid/mvlr/main.py,sha256=dwkl71u8HnlMAq-cmkwvI7z-XtlmqpvZRFoDc9CN-gg,1210
|
|
8
|
+
openenergyid/mvlr/models.py,sha256=ncQ0W0LLCP7IZ4rDgLwIPZRQpK4-xC-qA17BW9tMwio,7878
|
|
9
|
+
openenergyid/mvlr/mvlr.py,sha256=UbMuoWdepnGd1_heVtFOnLoBxVUB7WrPRLyOaDELxlI,18030
|
|
10
|
+
openenergyid-0.1.8.dist-info/METADATA,sha256=7S_S8PFQ8VtflEhBFwCCxVt46Nkqzyh_UL8T89PWXm4,2431
|
|
11
|
+
openenergyid-0.1.8.dist-info/WHEEL,sha256=ccEkY-EGGllEs7ySpwBlD8G4u70wR77CNej8Q6tzIqA,105
|
|
12
|
+
openenergyid-0.1.8.dist-info/licenses/LICENSE,sha256=NgRdcNHwyXVCXZ8sJwoTp0DCowThJ9LWWl4xhbV1IUY,1074
|
|
13
|
+
openenergyid-0.1.8.dist-info/RECORD,,
|
|
@@ -1,11 +0,0 @@
|
|
|
1
|
-
openenergyid/__init__.py,sha256=Y0iURqfHv7UMooQ4KmINfopw-53Qf0TwSUBM4CONpcY,160
|
|
2
|
-
openenergyid/enums.py,sha256=jdw4CB1gkisx0re_SesrTEyh_T-UxYp6uieE7iYlHdA,357
|
|
3
|
-
openenergyid/models.py,sha256=w6YJHi1fysmLZYEI6peTfQAbMS92Kf5sk0VtXw7HrAM,813
|
|
4
|
-
openenergyid/mvlr/__init__.py,sha256=PzHuv0_uBTiAmzNrZKVObO5pxOYGJ2GFZCzK5Y82bGU,378
|
|
5
|
-
openenergyid/mvlr/helpers.py,sha256=fsx-gSvBdU31BjncFkRd1RySmSPPYgwflCnmSFzox2Q,961
|
|
6
|
-
openenergyid/mvlr/models.py,sha256=1yLcpVtA7ruPOmwSLY-Tg9Nd2lTbYH-nN3cb_CWXegs,3382
|
|
7
|
-
openenergyid/mvlr/mvlr.py,sha256=mHdn_dh6GWxc79TWBhj61sp20PlACn6AfJX16dRmR6E,17407
|
|
8
|
-
openenergyid-0.1.6.dist-info/METADATA,sha256=lx7sXCt-TrVC43NQXFpZr_2dr1avmh9R3CLV8uxn7x4,2431
|
|
9
|
-
openenergyid-0.1.6.dist-info/WHEEL,sha256=fagL_Tj29mg80flwlxJNW45nBDbboxF04Tnbc_jt3Bg,105
|
|
10
|
-
openenergyid-0.1.6.dist-info/licenses/LICENSE,sha256=NgRdcNHwyXVCXZ8sJwoTp0DCowThJ9LWWl4xhbV1IUY,1074
|
|
11
|
-
openenergyid-0.1.6.dist-info/RECORD,,
|
|
File without changes
|