openenergyid 0.1.31__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openenergyid/__init__.py +8 -0
- openenergyid/abstractsim/__init__.py +5 -0
- openenergyid/abstractsim/abstract.py +102 -0
- openenergyid/baseload/__init__.py +15 -0
- openenergyid/baseload/analysis.py +190 -0
- openenergyid/baseload/exceptions.py +9 -0
- openenergyid/baseload/models.py +32 -0
- openenergyid/capacity/__init__.py +6 -0
- openenergyid/capacity/main.py +103 -0
- openenergyid/capacity/models.py +32 -0
- openenergyid/const.py +29 -0
- openenergyid/dyntar/__init__.py +20 -0
- openenergyid/dyntar/const.py +31 -0
- openenergyid/dyntar/main.py +313 -0
- openenergyid/dyntar/models.py +101 -0
- openenergyid/elia/__init__.py +4 -0
- openenergyid/elia/api.py +91 -0
- openenergyid/elia/const.py +18 -0
- openenergyid/energysharing/__init__.py +12 -0
- openenergyid/energysharing/const.py +8 -0
- openenergyid/energysharing/data_formatting.py +77 -0
- openenergyid/energysharing/main.py +122 -0
- openenergyid/energysharing/models.py +80 -0
- openenergyid/enums.py +16 -0
- openenergyid/models.py +174 -0
- openenergyid/mvlr/__init__.py +19 -0
- openenergyid/mvlr/helpers.py +30 -0
- openenergyid/mvlr/main.py +34 -0
- openenergyid/mvlr/models.py +227 -0
- openenergyid/mvlr/mvlr.py +450 -0
- openenergyid/pvsim/__init__.py +8 -0
- openenergyid/pvsim/abstract.py +60 -0
- openenergyid/pvsim/elia/__init__.py +3 -0
- openenergyid/pvsim/elia/main.py +89 -0
- openenergyid/pvsim/main.py +49 -0
- openenergyid/pvsim/pvlib/__init__.py +11 -0
- openenergyid/pvsim/pvlib/main.py +115 -0
- openenergyid/pvsim/pvlib/models.py +235 -0
- openenergyid/pvsim/pvlib/quickscan.py +99 -0
- openenergyid/pvsim/pvlib/weather.py +91 -0
- openenergyid/sim/__init__.py +5 -0
- openenergyid/sim/main.py +67 -0
- openenergyid/simeval/__init__.py +6 -0
- openenergyid/simeval/main.py +148 -0
- openenergyid/simeval/models.py +162 -0
- openenergyid-0.1.31.dist-info/METADATA +32 -0
- openenergyid-0.1.31.dist-info/RECORD +50 -0
- openenergyid-0.1.31.dist-info/WHEEL +5 -0
- openenergyid-0.1.31.dist-info/licenses/LICENSE +21 -0
- openenergyid-0.1.31.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
"""Main Calcuation Module for Energy Sharing."""
|
|
2
|
+
|
|
3
|
+
import pandas as pd
|
|
4
|
+
|
|
5
|
+
from .const import (
|
|
6
|
+
GROSS_INJECTION,
|
|
7
|
+
GROSS_OFFTAKE,
|
|
8
|
+
KEY,
|
|
9
|
+
NET_INJECTION,
|
|
10
|
+
NET_OFFTAKE,
|
|
11
|
+
SHARED_ENERGY,
|
|
12
|
+
)
|
|
13
|
+
from .data_formatting import (
|
|
14
|
+
create_multi_index_output_frame,
|
|
15
|
+
result_to_input_for_reiteration,
|
|
16
|
+
)
|
|
17
|
+
from .models import CalculationMethod
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _calculate(df: pd.DataFrame, method: CalculationMethod) -> pd.DataFrame:
|
|
21
|
+
"""Calculate the energy sharing for the given input data. This function is not iterative."""
|
|
22
|
+
# Step 1: Calculate the maximum available gross injection that can be shared
|
|
23
|
+
# A participant cannot share their injection with themselves
|
|
24
|
+
|
|
25
|
+
# Take the injection of P1, and divide it per participant as per their key
|
|
26
|
+
|
|
27
|
+
injections_to_share = []
|
|
28
|
+
rest = {}
|
|
29
|
+
|
|
30
|
+
for participant in df[GROSS_INJECTION].columns:
|
|
31
|
+
injection_to_share = df[GROSS_INJECTION][participant].copy()
|
|
32
|
+
|
|
33
|
+
key = df[KEY].copy()
|
|
34
|
+
if method == CalculationMethod.RELATIVE or method == CalculationMethod.OPTIMAL:
|
|
35
|
+
# Set the key of the current participant to 0
|
|
36
|
+
# Re-normalize the keys for the other participants
|
|
37
|
+
if participant in df[KEY].columns:
|
|
38
|
+
key.loc[:, participant] = 0
|
|
39
|
+
key = key.div(key.sum(axis=1), axis=0)
|
|
40
|
+
|
|
41
|
+
# Multiply injection_to_share with the key of each participant
|
|
42
|
+
shared_by_participant = (injection_to_share * key.T).T
|
|
43
|
+
shared_by_participant.fillna(0, inplace=True)
|
|
44
|
+
|
|
45
|
+
# Set the value for the current participant to 0
|
|
46
|
+
if participant in shared_by_participant.columns:
|
|
47
|
+
shared_by_participant.loc[:, participant] = 0
|
|
48
|
+
|
|
49
|
+
# Put the not shared injection in the rest
|
|
50
|
+
rest[participant] = injection_to_share - shared_by_participant.sum(axis=1)
|
|
51
|
+
|
|
52
|
+
injections_to_share.append(shared_by_participant)
|
|
53
|
+
|
|
54
|
+
# Sum the injections to share
|
|
55
|
+
max_allocated_injection = sum(injections_to_share)
|
|
56
|
+
|
|
57
|
+
# Concat the rest
|
|
58
|
+
injection_that_cannot_be_shared = pd.concat(rest, axis=1)
|
|
59
|
+
|
|
60
|
+
# Step 2: Calculate the Net Offtake, by assigning the injections to each participant
|
|
61
|
+
# But, a participant cannot receive more than their offtake
|
|
62
|
+
|
|
63
|
+
net_offtake = df[GROSS_OFFTAKE] - max_allocated_injection
|
|
64
|
+
|
|
65
|
+
# Sum all negative values into a column "Not Shared"
|
|
66
|
+
not_shared_after_assignment = net_offtake.clip(upper=0).sum(axis=1).abs()
|
|
67
|
+
|
|
68
|
+
# Clip the values to 0
|
|
69
|
+
net_offtake = net_offtake.clip(lower=0)
|
|
70
|
+
|
|
71
|
+
# Calculate the amount of actual shared energy
|
|
72
|
+
# This is the difference between the gross offtake and the net offtake
|
|
73
|
+
shared_energy = df[GROSS_OFFTAKE] - net_offtake
|
|
74
|
+
|
|
75
|
+
# Step 3: Assign the Rests back to the original injectors
|
|
76
|
+
|
|
77
|
+
# The energy that is not shared after assignment
|
|
78
|
+
# should be divided back to the original injectors
|
|
79
|
+
# A ratio of the original injection should be used
|
|
80
|
+
|
|
81
|
+
re_distributed_not_shared = (
|
|
82
|
+
(df[GROSS_INJECTION].T / df[GROSS_INJECTION].sum(axis=1)) * not_shared_after_assignment
|
|
83
|
+
).T
|
|
84
|
+
re_distributed_not_shared.fillna(0, inplace=True)
|
|
85
|
+
|
|
86
|
+
# The nett injection is the sum of:
|
|
87
|
+
# the injection that cannot be shared to begin with
|
|
88
|
+
# (because participants cannot share with themselves)
|
|
89
|
+
# and the injection that cannot be shared after assignment
|
|
90
|
+
# (because participants cannot receive more than their offtake)
|
|
91
|
+
|
|
92
|
+
net_injection = injection_that_cannot_be_shared + re_distributed_not_shared
|
|
93
|
+
|
|
94
|
+
result = create_multi_index_output_frame(
|
|
95
|
+
net_injection=net_injection, net_offtake=net_offtake, shared_energy=shared_energy
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
return result
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def calculate(df: pd.DataFrame, method: CalculationMethod) -> pd.DataFrame:
|
|
102
|
+
"""Calculate the energy sharing for the given input data.
|
|
103
|
+
|
|
104
|
+
This function is iterative if the method is optimal."""
|
|
105
|
+
result = _calculate(df, method)
|
|
106
|
+
|
|
107
|
+
if method in [CalculationMethod.FIXED, CalculationMethod.RELATIVE]:
|
|
108
|
+
return result
|
|
109
|
+
|
|
110
|
+
# Optimal method, we iterate until the amount of shared energy is 0
|
|
111
|
+
final_result = result.copy()
|
|
112
|
+
while not result[SHARED_ENERGY].eq(0).all().all():
|
|
113
|
+
df = result_to_input_for_reiteration(result, df[KEY])
|
|
114
|
+
result = _calculate(df, method)
|
|
115
|
+
|
|
116
|
+
# Add the result to the final result
|
|
117
|
+
# Overwrite NET_INJECTION and NET_OFFTAKE, Sum SHARED_ENERGY
|
|
118
|
+
final_result[NET_INJECTION] = result[NET_INJECTION]
|
|
119
|
+
final_result[NET_OFFTAKE] = result[NET_OFFTAKE]
|
|
120
|
+
final_result[SHARED_ENERGY] += result[SHARED_ENERGY]
|
|
121
|
+
|
|
122
|
+
return final_result
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
"""Data models for energy sharing."""
|
|
2
|
+
|
|
3
|
+
from enum import Enum
|
|
4
|
+
from typing import Annotated, Any
|
|
5
|
+
|
|
6
|
+
import pandas as pd
|
|
7
|
+
from pydantic import BaseModel, Field, confloat
|
|
8
|
+
|
|
9
|
+
from openenergyid import TimeDataFrame
|
|
10
|
+
|
|
11
|
+
from .const import NET_INJECTION, NET_OFFTAKE, SHARED_ENERGY
|
|
12
|
+
from .data_formatting import create_multi_index_input_frame
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class CalculationMethod(Enum):
|
|
16
|
+
"""Calculation method for energy sharing."""
|
|
17
|
+
|
|
18
|
+
FIXED = "Fixed"
|
|
19
|
+
RELATIVE = "Relative"
|
|
20
|
+
OPTIMAL = "Optimal"
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class KeyInput(TimeDataFrame):
|
|
24
|
+
"""Energy Sharing Keys."""
|
|
25
|
+
|
|
26
|
+
data: Annotated[
|
|
27
|
+
list[list[confloat(ge=0.0, le=1.0)]], # type: ignore
|
|
28
|
+
Field(
|
|
29
|
+
description="Key data, column per participant. "
|
|
30
|
+
"Must be between 0 and 1. "
|
|
31
|
+
"Each row must sum to 1."
|
|
32
|
+
),
|
|
33
|
+
]
|
|
34
|
+
|
|
35
|
+
def model_post_init(self, __context: Any) -> None:
|
|
36
|
+
"""Post-initialization validation."""
|
|
37
|
+
for row in self.data:
|
|
38
|
+
if round(sum(row), 3) != 1.0:
|
|
39
|
+
raise ValueError("Each row must sum to 1.")
|
|
40
|
+
return super().model_post_init(__context)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class EnergySharingInput(BaseModel):
|
|
44
|
+
"""Input data for energy sharing."""
|
|
45
|
+
|
|
46
|
+
gross_injection: Annotated[
|
|
47
|
+
TimeDataFrame,
|
|
48
|
+
Field(alias="grossInjection", description="Gross injection data, column per participant"),
|
|
49
|
+
]
|
|
50
|
+
gross_offtake: Annotated[
|
|
51
|
+
TimeDataFrame,
|
|
52
|
+
Field(alias="grossOfftake", description="Gross offtake data, column per participant"),
|
|
53
|
+
]
|
|
54
|
+
key: KeyInput
|
|
55
|
+
|
|
56
|
+
def to_pandas(self) -> pd.DataFrame:
|
|
57
|
+
"""Return the data as a combined DataFrame"""
|
|
58
|
+
df = create_multi_index_input_frame(
|
|
59
|
+
gross_injection=self.gross_injection.to_pandas(),
|
|
60
|
+
gross_offtake=self.gross_offtake.to_pandas(),
|
|
61
|
+
key=self.key.to_pandas(),
|
|
62
|
+
)
|
|
63
|
+
return df
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class EnergySharingOutput(BaseModel):
|
|
67
|
+
"""Output data for energy sharing."""
|
|
68
|
+
|
|
69
|
+
net_injection: TimeDataFrame = Field(alias="netInjection")
|
|
70
|
+
net_offtake: TimeDataFrame = Field(alias="netOfftake")
|
|
71
|
+
shared_energy: TimeDataFrame = Field(alias="sharedEnergy")
|
|
72
|
+
|
|
73
|
+
@classmethod
|
|
74
|
+
def from_calculation_result(cls, result: pd.DataFrame) -> "EnergySharingOutput":
|
|
75
|
+
"""Create an output model from a calculation result."""
|
|
76
|
+
return cls.model_construct(
|
|
77
|
+
net_injection=TimeDataFrame.from_pandas(result[NET_INJECTION]),
|
|
78
|
+
net_offtake=TimeDataFrame.from_pandas(result[NET_OFFTAKE]),
|
|
79
|
+
shared_energy=TimeDataFrame.from_pandas(result[SHARED_ENERGY]),
|
|
80
|
+
)
|
openenergyid/enums.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
"""Static enums for Open Energy ID."""
|
|
2
|
+
|
|
3
|
+
from enum import Enum
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class Granularity(Enum):
|
|
7
|
+
"""Granularity of a time series."""
|
|
8
|
+
|
|
9
|
+
P1Y = "P1Y" # 1 year
|
|
10
|
+
P1M = "P1M" # 1 month
|
|
11
|
+
P7D = "P7D" # 7 days
|
|
12
|
+
P1D = "P1D" # 1 day
|
|
13
|
+
PT1H = "PT1H" # 1 hour
|
|
14
|
+
PT15M = "PT15M" # 15 minutes
|
|
15
|
+
PT5M = "PT5M" # 5 minutes
|
|
16
|
+
PT1M = "PT1M" # 1 minute
|
openenergyid/models.py
ADDED
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
"""Data models for the Open Energy ID."""
|
|
2
|
+
|
|
3
|
+
import datetime as dt
|
|
4
|
+
from typing import Self, overload
|
|
5
|
+
|
|
6
|
+
import pandas as pd
|
|
7
|
+
import polars as pl
|
|
8
|
+
from pydantic import BaseModel
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class TimeSeriesBase(BaseModel):
|
|
12
|
+
"""Pydantic base model for time series data."""
|
|
13
|
+
|
|
14
|
+
index: list[dt.datetime]
|
|
15
|
+
|
|
16
|
+
@classmethod
|
|
17
|
+
def from_pandas(cls, data: pd.Series | pd.DataFrame) -> Self:
|
|
18
|
+
"""Create from a Pandas Object."""
|
|
19
|
+
raise NotImplementedError
|
|
20
|
+
|
|
21
|
+
def to_pandas(self, timezone: str = "UTC") -> pd.Series | pd.DataFrame:
|
|
22
|
+
"""Convert to a Pandas Object."""
|
|
23
|
+
raise NotImplementedError
|
|
24
|
+
|
|
25
|
+
@overload
|
|
26
|
+
def to_json(self, path: None = None, **kwargs) -> str:
|
|
27
|
+
"""Dump to a JSON string."""
|
|
28
|
+
|
|
29
|
+
@overload
|
|
30
|
+
def to_json(self, path: str, **kwargs) -> None:
|
|
31
|
+
"""Dump to a JSON file."""
|
|
32
|
+
|
|
33
|
+
def to_json(self, path: str | None = None, **kwargs) -> str | None:
|
|
34
|
+
"""Dump to a JSON string or file."""
|
|
35
|
+
if path is None:
|
|
36
|
+
return self.model_dump_json(**kwargs)
|
|
37
|
+
encoding = kwargs.pop("encoding", "UTF-8")
|
|
38
|
+
with open(path, "w", encoding=encoding) as file:
|
|
39
|
+
file.write(self.model_dump_json(**kwargs))
|
|
40
|
+
return None
|
|
41
|
+
|
|
42
|
+
@overload
|
|
43
|
+
@classmethod
|
|
44
|
+
def from_json(cls, string: str, **kwargs) -> Self:
|
|
45
|
+
"""Load from a JSON string."""
|
|
46
|
+
|
|
47
|
+
@overload
|
|
48
|
+
@classmethod
|
|
49
|
+
def from_json(cls, *, path: str, **kwargs) -> Self:
|
|
50
|
+
"""Load from a JSON file."""
|
|
51
|
+
|
|
52
|
+
@classmethod
|
|
53
|
+
def from_json(cls, string: str | None = None, path: str | None = None, **kwargs) -> Self:
|
|
54
|
+
"""Load from a JSON file or string."""
|
|
55
|
+
if string:
|
|
56
|
+
return cls.model_validate_json(string, **kwargs)
|
|
57
|
+
if path:
|
|
58
|
+
encoding = kwargs.pop("encoding", "UTF-8")
|
|
59
|
+
with open(path, encoding=encoding) as file:
|
|
60
|
+
return cls.model_validate_json(file.read(), **kwargs)
|
|
61
|
+
raise ValueError("Either string or path must be provided.")
|
|
62
|
+
|
|
63
|
+
def first_timestamp(self) -> dt.datetime:
|
|
64
|
+
"""Get the first timestamp in the index."""
|
|
65
|
+
return min(self.index)
|
|
66
|
+
|
|
67
|
+
def last_timestamp(self) -> dt.datetime:
|
|
68
|
+
"""Get the last timestamp in the index."""
|
|
69
|
+
return max(self.index)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
class TimeSeries(TimeSeriesBase):
|
|
73
|
+
"""
|
|
74
|
+
Represents a time series data.
|
|
75
|
+
Attributes:
|
|
76
|
+
name (str | None): The name of the time series.
|
|
77
|
+
data (list[float | None]): The data points of the time series.
|
|
78
|
+
Methods:
|
|
79
|
+
replace_nan_with_none(cls, data: list[float]) -> list[float | None]:
|
|
80
|
+
Replace NaN values with None.
|
|
81
|
+
from_pandas(cls, data: pd.Series) -> Self:
|
|
82
|
+
Create a TimeSeries object from a Pandas Series.
|
|
83
|
+
to_pandas(self, timezone: str = "UTC") -> pd.Series:
|
|
84
|
+
Convert the TimeSeries object to a Pandas Series.
|
|
85
|
+
from_polars(cls, data: pl.DataFrame | pl.LazyFrame) -> Self:
|
|
86
|
+
Create a TimeSeries object from Polars data.
|
|
87
|
+
to_polars(self, timezone: str = "UTC") -> pl.LazyFrame:
|
|
88
|
+
Convert the TimeSeries object to a Polars LazyFrame.
|
|
89
|
+
"""
|
|
90
|
+
|
|
91
|
+
name: str | None = None
|
|
92
|
+
data: list[float | None]
|
|
93
|
+
|
|
94
|
+
@classmethod
|
|
95
|
+
def from_pandas(cls, data: pd.Series) -> Self:
|
|
96
|
+
"""Create from a Pandas Series."""
|
|
97
|
+
if not hasattr(data, "name") or data.name is None:
|
|
98
|
+
name = None
|
|
99
|
+
else:
|
|
100
|
+
name = str(data.name)
|
|
101
|
+
return cls(name=name, data=data.tolist(), index=data.index.tolist())
|
|
102
|
+
|
|
103
|
+
def to_pandas(self, timezone: str = "UTC") -> pd.Series:
|
|
104
|
+
"""Convert to a Pandas Series."""
|
|
105
|
+
series = pd.Series(self.data, name=self.name, index=self.index)
|
|
106
|
+
series.index = pd.to_datetime(series.index, utc=True)
|
|
107
|
+
return series.tz_convert(timezone)
|
|
108
|
+
|
|
109
|
+
@classmethod
|
|
110
|
+
def from_polars(cls, data: pl.DataFrame | pl.LazyFrame) -> Self:
|
|
111
|
+
"""Create from Polars data."""
|
|
112
|
+
# Always work with DataFrame
|
|
113
|
+
df = data.collect() if isinstance(data, pl.LazyFrame) else data
|
|
114
|
+
|
|
115
|
+
if len(df.columns) != 2:
|
|
116
|
+
raise ValueError("Must contain exactly two columns: timestamp and value")
|
|
117
|
+
|
|
118
|
+
value_col = [col for col in df.columns if col != "timestamp"][0]
|
|
119
|
+
return cls(
|
|
120
|
+
name=value_col,
|
|
121
|
+
data=df[value_col].cast(pl.Float64).to_list(), # Ensure float type
|
|
122
|
+
index=df["timestamp"].cast(pl.Datetime).dt.convert_time_zone("UTC").to_list(),
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
def to_polars(self, timezone: str = "UTC") -> pl.LazyFrame:
|
|
126
|
+
"""Convert to Polars LazyFrame."""
|
|
127
|
+
# Always return LazyFrame as specified in return type
|
|
128
|
+
df = pl.DataFrame(
|
|
129
|
+
{
|
|
130
|
+
"timestamp": pl.Series(self.index, dtype=pl.Datetime).dt.convert_time_zone(
|
|
131
|
+
timezone
|
|
132
|
+
),
|
|
133
|
+
"total" if self.name is None else self.name: pl.Series(self.data, dtype=pl.Float64),
|
|
134
|
+
}
|
|
135
|
+
)
|
|
136
|
+
return df.lazy()
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
class TimeDataFrame(TimeSeriesBase):
|
|
140
|
+
"""Time series data with multiple columns."""
|
|
141
|
+
|
|
142
|
+
columns: list[str]
|
|
143
|
+
data: list[list[float | None]]
|
|
144
|
+
|
|
145
|
+
@classmethod
|
|
146
|
+
def from_pandas(cls, data: pd.DataFrame) -> Self:
|
|
147
|
+
"""Create from a Pandas DataFrame."""
|
|
148
|
+
# Cast values to float | None
|
|
149
|
+
values = [
|
|
150
|
+
[float(x) if pd.notnull(x) else None for x in row] for row in data.values.tolist()
|
|
151
|
+
]
|
|
152
|
+
return cls(columns=data.columns.tolist(), data=values, index=data.index.tolist())
|
|
153
|
+
|
|
154
|
+
def to_pandas(self, timezone: str = "UTC") -> pd.DataFrame:
|
|
155
|
+
"""Convert to a Pandas DataFrame."""
|
|
156
|
+
frame = pd.DataFrame(self.data, columns=self.columns, index=self.index)
|
|
157
|
+
frame.index = pd.to_datetime(frame.index, utc=True)
|
|
158
|
+
return frame.tz_convert(timezone)
|
|
159
|
+
|
|
160
|
+
@classmethod
|
|
161
|
+
def from_timeseries(cls, data: list[TimeSeries]) -> Self:
|
|
162
|
+
"""Create from a list of TimeSeries objects."""
|
|
163
|
+
return cls(
|
|
164
|
+
columns=[series.name or "" for series in data], # Handle None names
|
|
165
|
+
data=[series.data for series in data], # Pass list of value lists
|
|
166
|
+
index=data[0].index,
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
def to_timeseries(self) -> list[TimeSeries]:
|
|
170
|
+
"""Convert to a list of TimeSeries objects."""
|
|
171
|
+
return [
|
|
172
|
+
TimeSeries(name=col, data=[row[i] for row in self.data], index=self.index)
|
|
173
|
+
for i, col in enumerate(self.columns)
|
|
174
|
+
]
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
"""Multi-variable linear regression (MVLR) module."""
|
|
2
|
+
|
|
3
|
+
from .main import find_best_mvlr
|
|
4
|
+
from .models import (
|
|
5
|
+
IndependentVariableInput,
|
|
6
|
+
IndependentVariableResult,
|
|
7
|
+
MultiVariableRegressionInput,
|
|
8
|
+
MultiVariableRegressionResult,
|
|
9
|
+
ValidationParameters,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
__all__ = [
|
|
13
|
+
"find_best_mvlr",
|
|
14
|
+
"IndependentVariableInput",
|
|
15
|
+
"MultiVariableRegressionInput",
|
|
16
|
+
"MultiVariableRegressionResult",
|
|
17
|
+
"ValidationParameters",
|
|
18
|
+
"IndependentVariableResult",
|
|
19
|
+
]
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
"""Miscelaneous helper functions for the MVLR app."""
|
|
2
|
+
|
|
3
|
+
import pandas as pd
|
|
4
|
+
|
|
5
|
+
from openenergyid.enums import Granularity
|
|
6
|
+
|
|
7
|
+
pandas_granularity_map = {Granularity.P7D: "W-MON", Granularity.P1M: "MS", Granularity.P1D: "D"}
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def resample_input_data(
|
|
11
|
+
data: pd.DataFrame,
|
|
12
|
+
granularity: Granularity,
|
|
13
|
+
aggregation_methods: dict = None,
|
|
14
|
+
) -> pd.DataFrame:
|
|
15
|
+
"""Resample input data to the given granularity.
|
|
16
|
+
|
|
17
|
+
By default, the data is summed up for each column.
|
|
18
|
+
Provide a dictionary of aggregation methods to override this behaviour.
|
|
19
|
+
"""
|
|
20
|
+
if granularity not in pandas_granularity_map:
|
|
21
|
+
raise NotImplementedError("Granularity not implemented.")
|
|
22
|
+
aggregation_methods = aggregation_methods.copy() if aggregation_methods else {}
|
|
23
|
+
|
|
24
|
+
for column in data.columns:
|
|
25
|
+
if column not in aggregation_methods:
|
|
26
|
+
aggregation_methods[column] = "sum"
|
|
27
|
+
|
|
28
|
+
return data.resample(rule=pandas_granularity_map[granularity]).agg(
|
|
29
|
+
aggregation_methods,
|
|
30
|
+
)
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
"""Main module for the MultiVariableLinearRegression class."""
|
|
2
|
+
|
|
3
|
+
from .helpers import resample_input_data
|
|
4
|
+
from .models import MultiVariableRegressionInput, MultiVariableRegressionResult
|
|
5
|
+
from .mvlr import MultiVariableLinearRegression
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def find_best_mvlr(
|
|
9
|
+
data: MultiVariableRegressionInput,
|
|
10
|
+
) -> MultiVariableRegressionResult:
|
|
11
|
+
"""Cycle through multiple granularities and return the best model."""
|
|
12
|
+
best_rsquared = 0
|
|
13
|
+
for granularity in data.granularities:
|
|
14
|
+
frame = data.data_frame()
|
|
15
|
+
frame = resample_input_data(data=frame, granularity=granularity)
|
|
16
|
+
mvlr = MultiVariableLinearRegression(
|
|
17
|
+
data=frame,
|
|
18
|
+
y=data.dependent_variable,
|
|
19
|
+
granularity=granularity,
|
|
20
|
+
allow_negative_predictions=data.allow_negative_predictions,
|
|
21
|
+
single_use_exog_prefixes=data.single_use_exog_prefixes or [],
|
|
22
|
+
exogs__disallow_negative_coefficient=data.get_disallowed_negative_coefficients(),
|
|
23
|
+
)
|
|
24
|
+
mvlr.do_analysis()
|
|
25
|
+
if mvlr.validate(
|
|
26
|
+
min_rsquared=data.validation_parameters.rsquared,
|
|
27
|
+
max_f_pvalue=data.validation_parameters.f_pvalue,
|
|
28
|
+
max_pvalues=data.validation_parameters.pvalues,
|
|
29
|
+
):
|
|
30
|
+
return MultiVariableRegressionResult.from_mvlr(mvlr)
|
|
31
|
+
best_rsquared = max(best_rsquared, mvlr.fit.rsquared_adj)
|
|
32
|
+
raise ValueError(
|
|
33
|
+
f"No valid model found. Best R²: {best_rsquared:.3f} (need ≥{data.validation_parameters.rsquared})"
|
|
34
|
+
)
|