openenergyid 0.1.21__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of openenergyid might be problematic. Click here for more details.
- openenergyid/__init__.py +8 -0
- openenergyid/baseload/__init__.py +15 -0
- openenergyid/baseload/analysis.py +173 -0
- openenergyid/baseload/exceptions.py +9 -0
- openenergyid/baseload/models.py +31 -0
- openenergyid/capacity/__init__.py +6 -0
- openenergyid/capacity/main.py +102 -0
- openenergyid/capacity/models.py +30 -0
- openenergyid/const.py +18 -0
- openenergyid/dyntar/__init__.py +20 -0
- openenergyid/dyntar/const.py +31 -0
- openenergyid/dyntar/main.py +312 -0
- openenergyid/dyntar/models.py +110 -0
- openenergyid/energysharing/__init__.py +12 -0
- openenergyid/energysharing/const.py +8 -0
- openenergyid/energysharing/data_formatting.py +69 -0
- openenergyid/energysharing/main.py +111 -0
- openenergyid/energysharing/models.py +79 -0
- openenergyid/enums.py +16 -0
- openenergyid/models.py +164 -0
- openenergyid/mvlr/__init__.py +19 -0
- openenergyid/mvlr/helpers.py +30 -0
- openenergyid/mvlr/main.py +34 -0
- openenergyid/mvlr/models.py +228 -0
- openenergyid/mvlr/mvlr.py +450 -0
- openenergyid-0.1.21.dist-info/METADATA +50 -0
- openenergyid-0.1.21.dist-info/RECORD +29 -0
- openenergyid-0.1.21.dist-info/WHEEL +5 -0
- openenergyid-0.1.21.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
"""Data models for energy sharing."""
|
|
2
|
+
|
|
3
|
+
from enum import Enum
|
|
4
|
+
from typing import Annotated, Any
|
|
5
|
+
|
|
6
|
+
from pydantic import BaseModel, Field, confloat
|
|
7
|
+
import pandas as pd
|
|
8
|
+
|
|
9
|
+
from openenergyid import TimeDataFrame
|
|
10
|
+
from .data_formatting import create_multi_index_input_frame
|
|
11
|
+
from .const import NET_INJECTION, NET_OFFTAKE, SHARED_ENERGY
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class CalculationMethod(Enum):
|
|
15
|
+
"""Calculation method for energy sharing."""
|
|
16
|
+
|
|
17
|
+
FIXED = "Fixed"
|
|
18
|
+
RELATIVE = "Relative"
|
|
19
|
+
OPTIMAL = "Optimal"
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class KeyInput(TimeDataFrame):
|
|
23
|
+
"""Energy Sharing Keys."""
|
|
24
|
+
|
|
25
|
+
data: Annotated[
|
|
26
|
+
list[list[confloat(ge=0.0, le=1.0)]], # type: ignore
|
|
27
|
+
Field(
|
|
28
|
+
description="Key data, column per participant. "
|
|
29
|
+
"Must be between 0 and 1. "
|
|
30
|
+
"Each row must sum to 1."
|
|
31
|
+
),
|
|
32
|
+
]
|
|
33
|
+
|
|
34
|
+
def model_post_init(self, __context: Any) -> None:
|
|
35
|
+
"""Post-initialization validation."""
|
|
36
|
+
for row in self.data:
|
|
37
|
+
if round(sum(row), 3) != 1.0:
|
|
38
|
+
raise ValueError("Each row must sum to 1.")
|
|
39
|
+
return super().model_post_init(__context)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class EnergySharingInput(BaseModel):
|
|
43
|
+
"""Input data for energy sharing."""
|
|
44
|
+
|
|
45
|
+
gross_injection: Annotated[
|
|
46
|
+
TimeDataFrame,
|
|
47
|
+
Field(alias="grossInjection", description="Gross injection data, column per participant"),
|
|
48
|
+
]
|
|
49
|
+
gross_offtake: Annotated[
|
|
50
|
+
TimeDataFrame,
|
|
51
|
+
Field(alias="grossOfftake", description="Gross offtake data, column per participant"),
|
|
52
|
+
]
|
|
53
|
+
key: KeyInput
|
|
54
|
+
|
|
55
|
+
def to_pandas(self) -> pd.DataFrame:
|
|
56
|
+
"""Return the data as a combined DataFrame"""
|
|
57
|
+
df = create_multi_index_input_frame(
|
|
58
|
+
gross_injection=self.gross_injection.to_pandas(),
|
|
59
|
+
gross_offtake=self.gross_offtake.to_pandas(),
|
|
60
|
+
key=self.key.to_pandas(),
|
|
61
|
+
)
|
|
62
|
+
return df
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class EnergySharingOutput(BaseModel):
|
|
66
|
+
"""Output data for energy sharing."""
|
|
67
|
+
|
|
68
|
+
net_injection: TimeDataFrame = Field(alias="netInjection")
|
|
69
|
+
net_offtake: TimeDataFrame = Field(alias="netOfftake")
|
|
70
|
+
shared_energy: TimeDataFrame = Field(alias="sharedEnergy")
|
|
71
|
+
|
|
72
|
+
@classmethod
|
|
73
|
+
def from_calculation_result(cls, result: pd.DataFrame) -> "EnergySharingOutput":
|
|
74
|
+
"""Create an output model from a calculation result."""
|
|
75
|
+
return cls.model_construct(
|
|
76
|
+
net_injection=TimeDataFrame.from_pandas(result[NET_INJECTION]),
|
|
77
|
+
net_offtake=TimeDataFrame.from_pandas(result[NET_OFFTAKE]),
|
|
78
|
+
shared_energy=TimeDataFrame.from_pandas(result[SHARED_ENERGY]),
|
|
79
|
+
)
|
openenergyid/enums.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
"""Static enums for Open Energy ID."""
|
|
2
|
+
|
|
3
|
+
from enum import Enum
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class Granularity(Enum):
|
|
7
|
+
"""Granularity of a time series."""
|
|
8
|
+
|
|
9
|
+
P1Y = "P1Y" # 1 year
|
|
10
|
+
P1M = "P1M" # 1 month
|
|
11
|
+
P7D = "P7D" # 7 days
|
|
12
|
+
P1D = "P1D" # 1 day
|
|
13
|
+
PT1H = "PT1H" # 1 hour
|
|
14
|
+
PT15M = "PT15M" # 15 minutes
|
|
15
|
+
PT5M = "PT5M" # 5 minutes
|
|
16
|
+
PT1M = "PT1M" # 1 minute
|
openenergyid/models.py
ADDED
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
"""Data models for the Open Energy ID."""
|
|
2
|
+
|
|
3
|
+
import datetime as dt
|
|
4
|
+
from typing import overload
|
|
5
|
+
|
|
6
|
+
from typing import Self
|
|
7
|
+
|
|
8
|
+
import pandas as pd
|
|
9
|
+
from pydantic import BaseModel
|
|
10
|
+
import polars as pl
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class TimeSeriesBase(BaseModel):
|
|
14
|
+
"""Pydantic base model for time series data."""
|
|
15
|
+
|
|
16
|
+
index: list[dt.datetime]
|
|
17
|
+
|
|
18
|
+
@classmethod
|
|
19
|
+
def from_pandas(cls, data: pd.Series | pd.DataFrame) -> Self:
|
|
20
|
+
"""Create from a Pandas Object."""
|
|
21
|
+
raise NotImplementedError
|
|
22
|
+
|
|
23
|
+
def to_pandas(self, timezone: str = "UTC") -> pd.Series | pd.DataFrame:
|
|
24
|
+
"""Convert to a Pandas Object."""
|
|
25
|
+
raise NotImplementedError
|
|
26
|
+
|
|
27
|
+
@overload
|
|
28
|
+
def to_json(self, path: None = None, **kwargs) -> str:
|
|
29
|
+
"""Dump to a JSON string."""
|
|
30
|
+
|
|
31
|
+
@overload
|
|
32
|
+
def to_json(self, path: str, **kwargs) -> None:
|
|
33
|
+
"""Dump to a JSON file."""
|
|
34
|
+
|
|
35
|
+
def to_json(self, path: str | None = None, **kwargs) -> str | None:
|
|
36
|
+
"""Dump to a JSON string or file."""
|
|
37
|
+
if path is None:
|
|
38
|
+
return self.model_dump_json(**kwargs)
|
|
39
|
+
encoding = kwargs.pop("encoding", "UTF-8")
|
|
40
|
+
with open(path, "w", encoding=encoding) as file:
|
|
41
|
+
file.write(self.model_dump_json(**kwargs))
|
|
42
|
+
return None
|
|
43
|
+
|
|
44
|
+
@overload
|
|
45
|
+
@classmethod
|
|
46
|
+
def from_json(cls, string: str, **kwargs) -> Self:
|
|
47
|
+
"""Load from a JSON string."""
|
|
48
|
+
|
|
49
|
+
@overload
|
|
50
|
+
@classmethod
|
|
51
|
+
def from_json(cls, *, path: str, **kwargs) -> Self:
|
|
52
|
+
"""Load from a JSON file."""
|
|
53
|
+
|
|
54
|
+
@classmethod
|
|
55
|
+
def from_json(cls, string: str | None = None, path: str | None = None, **kwargs) -> Self:
|
|
56
|
+
"""Load from a JSON file or string."""
|
|
57
|
+
if string:
|
|
58
|
+
return cls.model_validate_json(string, **kwargs)
|
|
59
|
+
if path:
|
|
60
|
+
encoding = kwargs.pop("encoding", "UTF-8")
|
|
61
|
+
with open(path, encoding=encoding) as file:
|
|
62
|
+
return cls.model_validate_json(file.read(), **kwargs)
|
|
63
|
+
raise ValueError("Either string or path must be provided.")
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class TimeSeries(TimeSeriesBase):
|
|
67
|
+
"""
|
|
68
|
+
Represents a time series data.
|
|
69
|
+
Attributes:
|
|
70
|
+
name (str | None): The name of the time series.
|
|
71
|
+
data (list[float | None]): The data points of the time series.
|
|
72
|
+
Methods:
|
|
73
|
+
replace_nan_with_none(cls, data: list[float]) -> list[float | None]:
|
|
74
|
+
Replace NaN values with None.
|
|
75
|
+
from_pandas(cls, data: pd.Series) -> Self:
|
|
76
|
+
Create a TimeSeries object from a Pandas Series.
|
|
77
|
+
to_pandas(self, timezone: str = "UTC") -> pd.Series:
|
|
78
|
+
Convert the TimeSeries object to a Pandas Series.
|
|
79
|
+
from_polars(cls, data: pl.DataFrame | pl.LazyFrame) -> Self:
|
|
80
|
+
Create a TimeSeries object from Polars data.
|
|
81
|
+
to_polars(self, timezone: str = "UTC") -> pl.LazyFrame:
|
|
82
|
+
Convert the TimeSeries object to a Polars LazyFrame.
|
|
83
|
+
"""
|
|
84
|
+
|
|
85
|
+
name: str | None = None
|
|
86
|
+
data: list[float | None]
|
|
87
|
+
|
|
88
|
+
@classmethod
|
|
89
|
+
def from_pandas(cls, data: pd.Series) -> Self:
|
|
90
|
+
"""Create from a Pandas Series."""
|
|
91
|
+
return cls(name=str(data.name), data=data.tolist(), index=data.index.tolist())
|
|
92
|
+
|
|
93
|
+
def to_pandas(self, timezone: str = "UTC") -> pd.Series:
|
|
94
|
+
"""Convert to a Pandas Series."""
|
|
95
|
+
series = pd.Series(self.data, name=self.name, index=self.index)
|
|
96
|
+
series.index = pd.to_datetime(series.index, utc=True)
|
|
97
|
+
return series.tz_convert(timezone)
|
|
98
|
+
|
|
99
|
+
@classmethod
|
|
100
|
+
def from_polars(cls, data: pl.DataFrame | pl.LazyFrame) -> Self:
|
|
101
|
+
"""Create from Polars data."""
|
|
102
|
+
# Always work with DataFrame
|
|
103
|
+
df = data.collect() if isinstance(data, pl.LazyFrame) else data
|
|
104
|
+
|
|
105
|
+
if len(df.columns) != 2:
|
|
106
|
+
raise ValueError("Must contain exactly two columns: timestamp and value")
|
|
107
|
+
|
|
108
|
+
value_col = [col for col in df.columns if col != "timestamp"][0]
|
|
109
|
+
return cls(
|
|
110
|
+
name=value_col,
|
|
111
|
+
data=df[value_col].cast(pl.Float64).to_list(), # Ensure float type
|
|
112
|
+
index=df["timestamp"].cast(pl.Datetime).dt.convert_time_zone("UTC").to_list(),
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
def to_polars(self, timezone: str = "UTC") -> pl.LazyFrame:
|
|
116
|
+
"""Convert to Polars LazyFrame."""
|
|
117
|
+
# Always return LazyFrame as specified in return type
|
|
118
|
+
df = pl.DataFrame(
|
|
119
|
+
{
|
|
120
|
+
"timestamp": pl.Series(self.index, dtype=pl.Datetime).dt.convert_time_zone(
|
|
121
|
+
timezone
|
|
122
|
+
),
|
|
123
|
+
"total" if self.name is None else self.name: pl.Series(self.data, dtype=pl.Float64),
|
|
124
|
+
}
|
|
125
|
+
)
|
|
126
|
+
return df.lazy()
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
class TimeDataFrame(TimeSeriesBase):
|
|
130
|
+
"""Time series data with multiple columns."""
|
|
131
|
+
|
|
132
|
+
columns: list[str]
|
|
133
|
+
data: list[list[float | None]]
|
|
134
|
+
|
|
135
|
+
@classmethod
|
|
136
|
+
def from_pandas(cls, data: pd.DataFrame) -> Self:
|
|
137
|
+
"""Create from a Pandas DataFrame."""
|
|
138
|
+
# Cast values to float | None
|
|
139
|
+
values = [
|
|
140
|
+
[float(x) if pd.notnull(x) else None for x in row] for row in data.values.tolist()
|
|
141
|
+
]
|
|
142
|
+
return cls(columns=data.columns.tolist(), data=values, index=data.index.tolist())
|
|
143
|
+
|
|
144
|
+
def to_pandas(self, timezone: str = "UTC") -> pd.DataFrame:
|
|
145
|
+
"""Convert to a Pandas DataFrame."""
|
|
146
|
+
frame = pd.DataFrame(self.data, columns=self.columns, index=self.index)
|
|
147
|
+
frame.index = pd.to_datetime(frame.index, utc=True)
|
|
148
|
+
return frame.tz_convert(timezone)
|
|
149
|
+
|
|
150
|
+
@classmethod
|
|
151
|
+
def from_timeseries(cls, data: list[TimeSeries]) -> Self:
|
|
152
|
+
"""Create from a list of TimeSeries objects."""
|
|
153
|
+
return cls(
|
|
154
|
+
columns=[series.name or "" for series in data], # Handle None names
|
|
155
|
+
data=[series.data for series in data], # Pass list of value lists
|
|
156
|
+
index=data[0].index,
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
def to_timeseries(self) -> list[TimeSeries]:
|
|
160
|
+
"""Convert to a list of TimeSeries objects."""
|
|
161
|
+
return [
|
|
162
|
+
TimeSeries(name=col, data=[row[i] for row in self.data], index=self.index)
|
|
163
|
+
for i, col in enumerate(self.columns)
|
|
164
|
+
]
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
"""Multi-variable linear regression (MVLR) module."""
|
|
2
|
+
|
|
3
|
+
from .main import find_best_mvlr
|
|
4
|
+
from .models import (
|
|
5
|
+
IndependentVariableInput,
|
|
6
|
+
MultiVariableRegressionInput,
|
|
7
|
+
MultiVariableRegressionResult,
|
|
8
|
+
ValidationParameters,
|
|
9
|
+
IndependentVariableResult,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
__all__ = [
|
|
13
|
+
"find_best_mvlr",
|
|
14
|
+
"IndependentVariableInput",
|
|
15
|
+
"MultiVariableRegressionInput",
|
|
16
|
+
"MultiVariableRegressionResult",
|
|
17
|
+
"ValidationParameters",
|
|
18
|
+
"IndependentVariableResult",
|
|
19
|
+
]
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
"""Miscelaneous helper functions for the MVLR app."""
|
|
2
|
+
|
|
3
|
+
import pandas as pd
|
|
4
|
+
|
|
5
|
+
from openenergyid.enums import Granularity
|
|
6
|
+
|
|
7
|
+
pandas_granularity_map = {Granularity.P7D: "W-MON", Granularity.P1M: "MS", Granularity.P1D: "D"}
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def resample_input_data(
|
|
11
|
+
data: pd.DataFrame,
|
|
12
|
+
granularity: Granularity,
|
|
13
|
+
aggregation_methods: dict = None,
|
|
14
|
+
) -> pd.DataFrame:
|
|
15
|
+
"""Resample input data to the given granularity.
|
|
16
|
+
|
|
17
|
+
By default, the data is summed up for each column.
|
|
18
|
+
Provide a dictionary of aggregation methods to override this behaviour.
|
|
19
|
+
"""
|
|
20
|
+
if granularity not in pandas_granularity_map:
|
|
21
|
+
raise NotImplementedError("Granularity not implemented.")
|
|
22
|
+
aggregation_methods = aggregation_methods.copy() if aggregation_methods else {}
|
|
23
|
+
|
|
24
|
+
for column in data.columns:
|
|
25
|
+
if column not in aggregation_methods:
|
|
26
|
+
aggregation_methods[column] = "sum"
|
|
27
|
+
|
|
28
|
+
return data.resample(rule=pandas_granularity_map[granularity]).agg(
|
|
29
|
+
aggregation_methods,
|
|
30
|
+
)
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
"""Main module for the MultiVariableLinearRegression class."""
|
|
2
|
+
|
|
3
|
+
from .models import MultiVariableRegressionInput, MultiVariableRegressionResult
|
|
4
|
+
from .helpers import resample_input_data
|
|
5
|
+
from .mvlr import MultiVariableLinearRegression
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def find_best_mvlr(
|
|
9
|
+
data: MultiVariableRegressionInput,
|
|
10
|
+
) -> MultiVariableRegressionResult:
|
|
11
|
+
"""Cycle through multiple granularities and return the best model."""
|
|
12
|
+
best_rsquared = 0
|
|
13
|
+
for granularity in data.granularities:
|
|
14
|
+
frame = data.data_frame()
|
|
15
|
+
frame = resample_input_data(data=frame, granularity=granularity)
|
|
16
|
+
mvlr = MultiVariableLinearRegression(
|
|
17
|
+
data=frame,
|
|
18
|
+
y=data.dependent_variable,
|
|
19
|
+
granularity=granularity,
|
|
20
|
+
allow_negative_predictions=data.allow_negative_predictions,
|
|
21
|
+
single_use_exog_prefixes=data.single_use_exog_prefixes or [],
|
|
22
|
+
exogs__disallow_negative_coefficient=data.get_disallowed_negative_coefficients(),
|
|
23
|
+
)
|
|
24
|
+
mvlr.do_analysis()
|
|
25
|
+
if mvlr.validate(
|
|
26
|
+
min_rsquared=data.validation_parameters.rsquared,
|
|
27
|
+
max_f_pvalue=data.validation_parameters.f_pvalue,
|
|
28
|
+
max_pvalues=data.validation_parameters.pvalues,
|
|
29
|
+
):
|
|
30
|
+
return MultiVariableRegressionResult.from_mvlr(mvlr)
|
|
31
|
+
best_rsquared = max(best_rsquared, mvlr.fit.rsquared_adj)
|
|
32
|
+
raise ValueError(
|
|
33
|
+
f"No valid model found. Best R²: {best_rsquared:.3f} (need ≥{data.validation_parameters.rsquared})"
|
|
34
|
+
)
|
|
@@ -0,0 +1,228 @@
|
|
|
1
|
+
"""Models for multivariable linear regression."""
|
|
2
|
+
|
|
3
|
+
from typing import Any
|
|
4
|
+
import pandas as pd
|
|
5
|
+
|
|
6
|
+
from pydantic import BaseModel, Field, ConfigDict
|
|
7
|
+
import statsmodels.formula.api as fm
|
|
8
|
+
|
|
9
|
+
from openenergyid.enums import Granularity
|
|
10
|
+
from openenergyid.models import TimeDataFrame
|
|
11
|
+
|
|
12
|
+
from .mvlr import MultiVariableLinearRegression
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
COLUMN_TEMPERATUREEQUIVALENT = "temperatureEquivalent"
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
######################
|
|
19
|
+
# MVLR Input Models #
|
|
20
|
+
######################
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class ValidationParameters(BaseModel):
|
|
24
|
+
"""Parameters for validation of a multivariable linear regression model."""
|
|
25
|
+
|
|
26
|
+
rsquared: float = Field(
|
|
27
|
+
0.75, ge=0, le=1, description="Minimum acceptable value for the adjusted R-squared"
|
|
28
|
+
)
|
|
29
|
+
f_pvalue: float = Field(
|
|
30
|
+
0.05, ge=0, le=1, description="Maximum acceptable value for the F-statistic"
|
|
31
|
+
)
|
|
32
|
+
pvalues: float = Field(
|
|
33
|
+
0.05, ge=0, le=1, description="Maximum acceptable value for the p-values of the t-statistic"
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class IndependentVariableInput(BaseModel):
|
|
38
|
+
"""
|
|
39
|
+
Independent variable.
|
|
40
|
+
|
|
41
|
+
Has to corresponds to a column in the data frame.
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
name: str = Field(
|
|
45
|
+
description="Name of the independent variable. "
|
|
46
|
+
"If the name is `temperatureEquivalent`, "
|
|
47
|
+
"it will be unpacked into columns according to the variants."
|
|
48
|
+
)
|
|
49
|
+
variants: list[str] | None = Field(
|
|
50
|
+
default=None,
|
|
51
|
+
description="Variants of the `temperatureEquivalent` independent variable. "
|
|
52
|
+
"Eg. `HDD_16.5` will be Heating Degree Days with a base temperature of 16.5°C, "
|
|
53
|
+
"`CDD_0` will be Cooling Degree Days with a base temperature of 0°C.",
|
|
54
|
+
)
|
|
55
|
+
allow_negative_coefficient: bool = Field(
|
|
56
|
+
default=True,
|
|
57
|
+
alias="allowNegativeCoefficient",
|
|
58
|
+
description="Whether the coefficient can be negative.",
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class MultiVariableRegressionInput(BaseModel):
|
|
63
|
+
"""Multi-variable regression input."""
|
|
64
|
+
|
|
65
|
+
timezone: str = Field(alias="timeZone")
|
|
66
|
+
independent_variables: list[IndependentVariableInput] = Field(
|
|
67
|
+
alias="independentVariables", min_length=1
|
|
68
|
+
)
|
|
69
|
+
dependent_variable: str = Field(alias="dependentVariable")
|
|
70
|
+
frame: TimeDataFrame
|
|
71
|
+
granularities: list[Granularity]
|
|
72
|
+
allow_negative_predictions: bool = Field(alias="allowNegativePredictions", default=False)
|
|
73
|
+
validation_parameters: ValidationParameters = Field(
|
|
74
|
+
alias="validationParameters", default=ValidationParameters()
|
|
75
|
+
)
|
|
76
|
+
single_use_exog_prefixes: list[str] | None = Field(
|
|
77
|
+
# default=["HDD", "CDD", "FDD"],
|
|
78
|
+
default=None,
|
|
79
|
+
alias="singleUseExogPrefixes",
|
|
80
|
+
description="List of prefixes to be used as single-use exogenous variables.",
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
def model_post_init(self, __context: Any) -> None:
|
|
84
|
+
"""Post init hook."""
|
|
85
|
+
# Check if all independent variables are present in the data frame
|
|
86
|
+
for iv in self.independent_variables: # pylint: disable=not-an-iterable
|
|
87
|
+
if iv.name not in self.frame.columns:
|
|
88
|
+
raise ValueError(f"Independent variable {iv.name} not found in the data frame.")
|
|
89
|
+
|
|
90
|
+
return super().model_post_init(__context)
|
|
91
|
+
|
|
92
|
+
def _data_frame(self) -> pd.DataFrame:
|
|
93
|
+
"""Convert the data to a pandas DataFrame."""
|
|
94
|
+
return self.frame.to_pandas(timezone=self.timezone)
|
|
95
|
+
|
|
96
|
+
def data_frame(self) -> pd.DataFrame:
|
|
97
|
+
"""
|
|
98
|
+
Return the data frame ready for analysis.
|
|
99
|
+
|
|
100
|
+
Unpacks degree days and removes unnecessary columns.
|
|
101
|
+
|
|
102
|
+
If an independent variable named `temperatureEquivalent` is present,
|
|
103
|
+
it will be unpacked into columns according to the variants.
|
|
104
|
+
Eg. Variant "HDD_16.5" will be Heating Degree Days
|
|
105
|
+
with a base temperature of 16.5°C,
|
|
106
|
+
"CDD_0" will be Cooling Degree Days with a base temperature of 0°C.
|
|
107
|
+
"""
|
|
108
|
+
frame = self._data_frame()
|
|
109
|
+
columns_to_retain = [self.dependent_variable]
|
|
110
|
+
for iv in self.independent_variables: # pylint: disable=not-an-iterable
|
|
111
|
+
if iv.name == COLUMN_TEMPERATUREEQUIVALENT and iv.variants is not None:
|
|
112
|
+
for variant in iv.variants:
|
|
113
|
+
prefix, base_temperature = variant.split("_")
|
|
114
|
+
if prefix == "CDD":
|
|
115
|
+
frame[variant] = frame[COLUMN_TEMPERATUREEQUIVALENT] - float(
|
|
116
|
+
base_temperature
|
|
117
|
+
)
|
|
118
|
+
else:
|
|
119
|
+
frame[variant] = (
|
|
120
|
+
float(base_temperature) - frame[COLUMN_TEMPERATUREEQUIVALENT]
|
|
121
|
+
)
|
|
122
|
+
frame[variant] = frame[variant].clip(lower=0)
|
|
123
|
+
columns_to_retain.append(variant)
|
|
124
|
+
frame.drop(columns=[COLUMN_TEMPERATUREEQUIVALENT], inplace=True)
|
|
125
|
+
else:
|
|
126
|
+
columns_to_retain.append(iv.name)
|
|
127
|
+
|
|
128
|
+
frame = frame[columns_to_retain].copy()
|
|
129
|
+
|
|
130
|
+
return frame
|
|
131
|
+
|
|
132
|
+
def get_disallowed_negative_coefficients(self) -> list[str]:
|
|
133
|
+
"""Get independent variables that are not allowed to have a negative coefficient."""
|
|
134
|
+
result = []
|
|
135
|
+
for iv in self.independent_variables: # pylint: disable=not-an-iterable
|
|
136
|
+
if iv.name == COLUMN_TEMPERATUREEQUIVALENT and iv.variants is not None:
|
|
137
|
+
if not iv.allow_negative_coefficient:
|
|
138
|
+
result.extend(iv.variants)
|
|
139
|
+
elif not iv.allow_negative_coefficient:
|
|
140
|
+
result.append(iv.name)
|
|
141
|
+
return result
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
######################
|
|
145
|
+
# MVLR Result Models #
|
|
146
|
+
######################
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
class ConfidenceInterval(BaseModel):
|
|
150
|
+
"""Confidence interval for a coefficient."""
|
|
151
|
+
|
|
152
|
+
confidence: float = Field(ge=0, le=1)
|
|
153
|
+
lower: float
|
|
154
|
+
upper: float
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
class IndependentVariableResult(BaseModel):
|
|
158
|
+
"""Independent variable for a multivariable linear regression model."""
|
|
159
|
+
|
|
160
|
+
name: str
|
|
161
|
+
coef: float
|
|
162
|
+
t_stat: float | None = Field(default=None, alias="tStat")
|
|
163
|
+
p_value: float | None = Field(ge=0, le=1, default=None, alias="pValue")
|
|
164
|
+
std_err: float | None = Field(default=None, alias="stdErr")
|
|
165
|
+
confidence_interval: ConfidenceInterval | None = Field(default=None, alias="confidenceInterval")
|
|
166
|
+
|
|
167
|
+
model_config = ConfigDict(populate_by_name=True)
|
|
168
|
+
|
|
169
|
+
@classmethod
|
|
170
|
+
def from_fit(cls, fit: fm.ols, name: str) -> "IndependentVariableResult":
|
|
171
|
+
"""Create an IndependentVariable from a fit."""
|
|
172
|
+
return cls(
|
|
173
|
+
name=name,
|
|
174
|
+
coef=fit.params[name],
|
|
175
|
+
t_stat=fit.tvalues[name],
|
|
176
|
+
p_value=fit.pvalues[name],
|
|
177
|
+
std_err=fit.bse[name],
|
|
178
|
+
confidence_interval=ConfidenceInterval(
|
|
179
|
+
confidence=0.95,
|
|
180
|
+
lower=fit.conf_int().transpose()[name][0],
|
|
181
|
+
upper=fit.conf_int().transpose()[name][1],
|
|
182
|
+
),
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
class MultiVariableRegressionResult(BaseModel):
|
|
187
|
+
"""Result of a multivariable regression model."""
|
|
188
|
+
|
|
189
|
+
dependent_variable: str = Field(alias="dependentVariable")
|
|
190
|
+
independent_variables: list[IndependentVariableResult] = Field(alias="independentVariables")
|
|
191
|
+
r2: float = Field(ge=0, le=1, alias="rSquared")
|
|
192
|
+
r2_adj: float = Field(ge=0, le=1, alias="rSquaredAdjusted")
|
|
193
|
+
f_stat: float = Field(ge=0, alias="fStat")
|
|
194
|
+
prob_f_stat: float = Field(ge=0, le=1, alias="probFStat")
|
|
195
|
+
intercept: IndependentVariableResult
|
|
196
|
+
granularity: Granularity
|
|
197
|
+
frame: TimeDataFrame
|
|
198
|
+
|
|
199
|
+
model_config = ConfigDict(populate_by_name=True)
|
|
200
|
+
|
|
201
|
+
@classmethod
|
|
202
|
+
def from_mvlr(cls, mvlr: MultiVariableLinearRegression) -> "MultiVariableRegressionResult":
|
|
203
|
+
"""Create a MultiVariableRegressionResult from a MultiVariableLinearRegression."""
|
|
204
|
+
|
|
205
|
+
# Get independent variables
|
|
206
|
+
param_keys = mvlr.fit.params.keys().tolist()
|
|
207
|
+
param_keys.remove("Intercept")
|
|
208
|
+
independent_variables = []
|
|
209
|
+
for k in param_keys:
|
|
210
|
+
independent_variables.append(IndependentVariableResult.from_fit(mvlr.fit, k))
|
|
211
|
+
|
|
212
|
+
# Create resulting TimeSeries
|
|
213
|
+
cols_to_keep = list(param_keys)
|
|
214
|
+
cols_to_keep.append(mvlr.y)
|
|
215
|
+
cols_to_remove = list(filter(lambda v: v not in cols_to_keep, mvlr.data.columns.values))
|
|
216
|
+
frame = mvlr.data.drop(cols_to_remove, axis=1)
|
|
217
|
+
|
|
218
|
+
return cls(
|
|
219
|
+
dependent_variable=mvlr.y,
|
|
220
|
+
independent_variables=independent_variables,
|
|
221
|
+
r2=mvlr.fit.rsquared,
|
|
222
|
+
r2_adj=mvlr.fit.rsquared_adj,
|
|
223
|
+
f_stat=mvlr.fit.fvalue,
|
|
224
|
+
prob_f_stat=mvlr.fit.f_pvalue,
|
|
225
|
+
intercept=IndependentVariableResult.from_fit(mvlr.fit, "Intercept"),
|
|
226
|
+
granularity=mvlr.granularity,
|
|
227
|
+
frame=TimeDataFrame.from_pandas(frame),
|
|
228
|
+
)
|