likelihood 2.2.0.dev1__cp311-cp311-manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- likelihood/VERSION +1 -0
- likelihood/__init__.py +20 -0
- likelihood/graph/__init__.py +9 -0
- likelihood/graph/_nn.py +283 -0
- likelihood/graph/graph.py +86 -0
- likelihood/graph/nn.py +329 -0
- likelihood/main.py +273 -0
- likelihood/models/__init__.py +3 -0
- likelihood/models/deep/__init__.py +13 -0
- likelihood/models/deep/_autoencoders.py +896 -0
- likelihood/models/deep/_predictor.py +809 -0
- likelihood/models/deep/autoencoders.py +903 -0
- likelihood/models/deep/bandit.py +97 -0
- likelihood/models/deep/gan.py +313 -0
- likelihood/models/deep/predictor.py +805 -0
- likelihood/models/deep/rl.py +345 -0
- likelihood/models/environments.py +202 -0
- likelihood/models/hmm.py +163 -0
- likelihood/models/regression.py +451 -0
- likelihood/models/simulation.py +213 -0
- likelihood/models/utils.py +87 -0
- likelihood/pipes.py +382 -0
- likelihood/rust_py_integration.cpython-311-x86_64-linux-gnu.so +0 -0
- likelihood/tools/__init__.py +4 -0
- likelihood/tools/cat_embed.py +212 -0
- likelihood/tools/figures.py +348 -0
- likelihood/tools/impute.py +278 -0
- likelihood/tools/models_tools.py +866 -0
- likelihood/tools/numeric_tools.py +390 -0
- likelihood/tools/reports.py +375 -0
- likelihood/tools/tools.py +1336 -0
- likelihood-2.2.0.dev1.dist-info/METADATA +68 -0
- likelihood-2.2.0.dev1.dist-info/RECORD +37 -0
- likelihood-2.2.0.dev1.dist-info/WHEEL +5 -0
- likelihood-2.2.0.dev1.dist-info/licenses/LICENSE +21 -0
- likelihood-2.2.0.dev1.dist-info/top_level.txt +7 -0
- src/lib.rs +12 -0
likelihood/models/hmm.py
ADDED
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import os
|
|
3
|
+
import pickle
|
|
4
|
+
from typing import List
|
|
5
|
+
|
|
6
|
+
import numpy as np
|
|
7
|
+
from IPython.display import clear_output
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class HMM:
|
|
11
|
+
def __init__(self, n_states: int, n_observations: int):
|
|
12
|
+
self.n_states = n_states
|
|
13
|
+
self.n_observations = n_observations
|
|
14
|
+
|
|
15
|
+
# Initialize parameters with random values
|
|
16
|
+
self.pi = np.random.dirichlet(np.ones(n_states), size=1)[0]
|
|
17
|
+
self.A = np.random.dirichlet(np.ones(n_states), size=n_states)
|
|
18
|
+
self.B = np.random.dirichlet(np.ones(n_observations), size=n_states)
|
|
19
|
+
|
|
20
|
+
def save_model(self, filename: str = "./hmm") -> None:
|
|
21
|
+
filename = filename if filename.endswith(".pkl") else filename + ".pkl"
|
|
22
|
+
with open(filename, "wb") as f:
|
|
23
|
+
pickle.dump(self, f)
|
|
24
|
+
|
|
25
|
+
@staticmethod
|
|
26
|
+
def load_model(filename: str = "./hmm") -> "HMM":
|
|
27
|
+
filename = filename + ".pkl" if not filename.endswith(".pkl") else filename
|
|
28
|
+
with open(filename, "rb") as f:
|
|
29
|
+
return pickle.load(f)
|
|
30
|
+
|
|
31
|
+
def forward(self, sequence: List[int]) -> np.ndarray:
|
|
32
|
+
T = len(sequence)
|
|
33
|
+
alpha = np.zeros((T, self.n_states))
|
|
34
|
+
|
|
35
|
+
# Add a small constant (smoothing) to avoid log(0)
|
|
36
|
+
epsilon = 1e-10 # Small value to avoid taking log(0)
|
|
37
|
+
|
|
38
|
+
# Initialization (log-space)
|
|
39
|
+
alpha[0] = np.log(self.pi + epsilon) + np.log(self.B[:, sequence[0]] + epsilon)
|
|
40
|
+
alpha[0] -= np.log(np.sum(np.exp(alpha[0]))) # Normalization (log-space)
|
|
41
|
+
|
|
42
|
+
# Recursion (log-space)
|
|
43
|
+
for t in range(1, T):
|
|
44
|
+
for i in range(self.n_states):
|
|
45
|
+
alpha[t, i] = np.log(
|
|
46
|
+
np.sum(np.exp(alpha[t - 1] + np.log(self.A[:, i] + epsilon)))
|
|
47
|
+
) + np.log(self.B[i, sequence[t]] + epsilon)
|
|
48
|
+
alpha[t] -= np.log(np.sum(np.exp(alpha[t]))) # Normalization
|
|
49
|
+
|
|
50
|
+
return alpha
|
|
51
|
+
|
|
52
|
+
def backward(self, sequence: List[int]) -> np.ndarray:
|
|
53
|
+
T = len(sequence)
|
|
54
|
+
beta = np.ones((T, self.n_states))
|
|
55
|
+
|
|
56
|
+
# Backward recursion
|
|
57
|
+
for t in range(T - 2, -1, -1):
|
|
58
|
+
for i in range(self.n_states):
|
|
59
|
+
beta[t, i] = np.sum(self.A[i] * self.B[:, sequence[t + 1]] * beta[t + 1])
|
|
60
|
+
|
|
61
|
+
return beta
|
|
62
|
+
|
|
63
|
+
def viterbi(self, sequence: List[int]) -> np.ndarray:
|
|
64
|
+
T = len(sequence)
|
|
65
|
+
delta = np.zeros((T, self.n_states))
|
|
66
|
+
psi = np.zeros((T, self.n_states), dtype=int)
|
|
67
|
+
|
|
68
|
+
# Initialization
|
|
69
|
+
delta[0] = self.pi * self.B[:, sequence[0]]
|
|
70
|
+
|
|
71
|
+
# Recursion
|
|
72
|
+
for t in range(1, T):
|
|
73
|
+
for i in range(self.n_states):
|
|
74
|
+
delta[t, i] = np.max(delta[t - 1] * self.A[:, i]) * self.B[i, sequence[t]]
|
|
75
|
+
psi[t, i] = np.argmax(delta[t - 1] * self.A[:, i])
|
|
76
|
+
|
|
77
|
+
# Reconstruct the most probable path
|
|
78
|
+
state_sequence = np.zeros(T, dtype=int)
|
|
79
|
+
state_sequence[T - 1] = np.argmax(delta[T - 1])
|
|
80
|
+
for t in range(T - 2, -1, -1):
|
|
81
|
+
state_sequence[t] = psi[t + 1, state_sequence[t + 1]]
|
|
82
|
+
|
|
83
|
+
return state_sequence
|
|
84
|
+
|
|
85
|
+
def baum_welch(
|
|
86
|
+
self, sequences: List[List[int]], n_iterations: int, verbose: bool = False
|
|
87
|
+
) -> None:
|
|
88
|
+
for iteration in range(n_iterations):
|
|
89
|
+
# Initialize accumulators
|
|
90
|
+
A_num = np.zeros((self.n_states, self.n_states))
|
|
91
|
+
B_num = np.zeros((self.n_states, self.n_observations))
|
|
92
|
+
pi_num = np.zeros(self.n_states)
|
|
93
|
+
|
|
94
|
+
for sequence in sequences:
|
|
95
|
+
T = len(sequence)
|
|
96
|
+
alpha = self.forward(sequence)
|
|
97
|
+
beta = self.backward(sequence)
|
|
98
|
+
|
|
99
|
+
# Update pi
|
|
100
|
+
gamma = (alpha * beta) / np.sum(alpha * beta, axis=1, keepdims=True)
|
|
101
|
+
pi_num += gamma[0]
|
|
102
|
+
|
|
103
|
+
# Update A and B
|
|
104
|
+
for t in range(T - 1):
|
|
105
|
+
xi = np.zeros((self.n_states, self.n_states))
|
|
106
|
+
denom = np.sum(alpha[t] * self.A * self.B[:, sequence[t + 1]] * beta[t + 1])
|
|
107
|
+
|
|
108
|
+
for i in range(self.n_states):
|
|
109
|
+
for j in range(self.n_states):
|
|
110
|
+
xi[i, j] = (
|
|
111
|
+
alpha[t, i]
|
|
112
|
+
* self.A[i, j]
|
|
113
|
+
* self.B[j, sequence[t + 1]]
|
|
114
|
+
* beta[t + 1, j]
|
|
115
|
+
) / denom
|
|
116
|
+
A_num[i] += xi[i]
|
|
117
|
+
|
|
118
|
+
B_num[:, sequence[t]] += gamma[t]
|
|
119
|
+
|
|
120
|
+
# For the last step of the sequence
|
|
121
|
+
B_num[:, sequence[-1]] += gamma[-1]
|
|
122
|
+
|
|
123
|
+
# Normalize and update parameters
|
|
124
|
+
self.pi = pi_num / len(sequences)
|
|
125
|
+
self.A = A_num / np.sum(A_num, axis=1, keepdims=True)
|
|
126
|
+
self.B = B_num / np.sum(B_num, axis=1, keepdims=True)
|
|
127
|
+
|
|
128
|
+
# Logging parameters every 10 iterations
|
|
129
|
+
if iteration % 10 == 0 and verbose:
|
|
130
|
+
os.system("cls" if os.name == "nt" else "clear")
|
|
131
|
+
clear_output(wait=True)
|
|
132
|
+
logging.info(f"Iteration {iteration}:")
|
|
133
|
+
logging.info("Pi: %s", self.pi)
|
|
134
|
+
logging.info("A:\n%s", self.A)
|
|
135
|
+
logging.info("B:\n%s", self.B)
|
|
136
|
+
|
|
137
|
+
def decoding_accuracy(self, sequences: List[List[int]], true_states: List[List[int]]) -> float:
|
|
138
|
+
correct_predictions = 0
|
|
139
|
+
total_predictions = 0
|
|
140
|
+
|
|
141
|
+
for sequence, true_state in zip(sequences, true_states):
|
|
142
|
+
predicted_states = self.viterbi(sequence)
|
|
143
|
+
correct_predictions += np.sum(predicted_states == true_state)
|
|
144
|
+
total_predictions += len(sequence)
|
|
145
|
+
|
|
146
|
+
accuracy = (correct_predictions / total_predictions) * 100
|
|
147
|
+
return accuracy
|
|
148
|
+
|
|
149
|
+
def state_probabilities(self, sequence: List[int]) -> np.ndarray:
|
|
150
|
+
"""
|
|
151
|
+
Returns the smoothed probabilities of the hidden states at each time step.
|
|
152
|
+
This is done by using both forward and backward probabilities.
|
|
153
|
+
"""
|
|
154
|
+
alpha = self.forward(sequence)
|
|
155
|
+
beta = self.backward(sequence)
|
|
156
|
+
|
|
157
|
+
# Compute smoothed probabilities (gamma)
|
|
158
|
+
smoothed_probs = (alpha * beta) / np.sum(alpha * beta, axis=1, keepdims=True)
|
|
159
|
+
|
|
160
|
+
return smoothed_probs
|
|
161
|
+
|
|
162
|
+
def sequence_probability(self, sequence: List[int]) -> np.ndarray:
|
|
163
|
+
return self.state_probabilities(sequence)[-1]
|
|
@@ -0,0 +1,451 @@
|
|
|
1
|
+
import pickle
|
|
2
|
+
|
|
3
|
+
import matplotlib.pyplot as plt
|
|
4
|
+
import numpy as np
|
|
5
|
+
import seaborn as sns
|
|
6
|
+
|
|
7
|
+
from likelihood.main import *
|
|
8
|
+
from likelihood.models.utils import FeaturesArima
|
|
9
|
+
from likelihood.tools import *
|
|
10
|
+
|
|
11
|
+
# -------------------------------------------------------------------------
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class AbstractArima(FeaturesArima):
|
|
15
|
+
"""A class that implements the auto-regressive arima (1, 0, 0) model
|
|
16
|
+
|
|
17
|
+
Parameters
|
|
18
|
+
----------
|
|
19
|
+
datapoints : `np.ndarray`
|
|
20
|
+
The input data points for training.
|
|
21
|
+
noise : `float`, optional
|
|
22
|
+
Noise level for the model, by default 0
|
|
23
|
+
tol : `float`, optional
|
|
24
|
+
Tolerance for convergence, by default 1e-4
|
|
25
|
+
|
|
26
|
+
Attributes
|
|
27
|
+
----------
|
|
28
|
+
datapoints : `np.ndarray`
|
|
29
|
+
The input data points for training.
|
|
30
|
+
n_steps : `int`
|
|
31
|
+
Number of steps to predict.
|
|
32
|
+
noise : `float`
|
|
33
|
+
Noise level for the model.
|
|
34
|
+
p : `int`
|
|
35
|
+
Order of autoregressive part.
|
|
36
|
+
q : `int`
|
|
37
|
+
Order of moving average part.
|
|
38
|
+
tol : `float`
|
|
39
|
+
Tolerance for convergence.
|
|
40
|
+
nwalkers : `int`
|
|
41
|
+
Number of walkers for sampling.
|
|
42
|
+
mov : `int`
|
|
43
|
+
Maximum number of iterations.
|
|
44
|
+
theta_trained : `np.ndarray`
|
|
45
|
+
Trained parameters of the model.
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
__slots__ = [
|
|
49
|
+
"datapoints",
|
|
50
|
+
"n_steps",
|
|
51
|
+
"noise",
|
|
52
|
+
"p",
|
|
53
|
+
"q",
|
|
54
|
+
"tol",
|
|
55
|
+
"nwalkers",
|
|
56
|
+
"mov",
|
|
57
|
+
"theta_trained",
|
|
58
|
+
]
|
|
59
|
+
|
|
60
|
+
def __init__(self, datapoints: np.ndarray, noise: float = 0, tol: float = 1e-4):
|
|
61
|
+
"""Initialize the ARIMA model.
|
|
62
|
+
|
|
63
|
+
Parameters
|
|
64
|
+
----------
|
|
65
|
+
datapoints : `np.ndarray`
|
|
66
|
+
The input data points for training.
|
|
67
|
+
noise : `float`, optional
|
|
68
|
+
Noise level for the model, by default 0
|
|
69
|
+
tol : `float`, optional
|
|
70
|
+
Tolerance for convergence, by default 1e-4
|
|
71
|
+
"""
|
|
72
|
+
self.datapoints = datapoints
|
|
73
|
+
self.noise = noise
|
|
74
|
+
self.p = datapoints.shape[0]
|
|
75
|
+
self.q = 0
|
|
76
|
+
self.tol = tol
|
|
77
|
+
self.n_steps = 0
|
|
78
|
+
|
|
79
|
+
def model(self, datapoints: np.ndarray, theta: list, mode=True):
|
|
80
|
+
"""Compute the model forward pass.
|
|
81
|
+
|
|
82
|
+
Parameters
|
|
83
|
+
----------
|
|
84
|
+
datapoints : `np.ndarray`
|
|
85
|
+
The input data points.
|
|
86
|
+
theta : `list`
|
|
87
|
+
Model parameters.
|
|
88
|
+
mode : `bool`, optional
|
|
89
|
+
Forward pass mode, by default True
|
|
90
|
+
|
|
91
|
+
Returns
|
|
92
|
+
-------
|
|
93
|
+
`np.ndarray`
|
|
94
|
+
Model output.
|
|
95
|
+
"""
|
|
96
|
+
datapoints = self.datapoints
|
|
97
|
+
noise = self.noise
|
|
98
|
+
self.theta_trained = theta
|
|
99
|
+
|
|
100
|
+
return super().forward(datapoints, theta, mode, noise)
|
|
101
|
+
|
|
102
|
+
def xvec(self, datapoints: np.ndarray, n_steps: int = 0):
|
|
103
|
+
"""Extract vector of data points.
|
|
104
|
+
|
|
105
|
+
Parameters
|
|
106
|
+
----------
|
|
107
|
+
datapoints : `np.ndarray`
|
|
108
|
+
The input data points.
|
|
109
|
+
n_steps : `int`, optional
|
|
110
|
+
Number of steps to consider, by default 0
|
|
111
|
+
|
|
112
|
+
Returns
|
|
113
|
+
-------
|
|
114
|
+
`np.ndarray`
|
|
115
|
+
Extracted data points vector.
|
|
116
|
+
"""
|
|
117
|
+
datapoints = self.datapoints
|
|
118
|
+
self.n_steps = n_steps
|
|
119
|
+
|
|
120
|
+
return datapoints[n_steps:]
|
|
121
|
+
|
|
122
|
+
def train(self, nwalkers: int = 10, mov: int = 200, weights: bool = False):
|
|
123
|
+
"""Train the model using sampling method.
|
|
124
|
+
|
|
125
|
+
Parameters
|
|
126
|
+
----------
|
|
127
|
+
nwalkers : `int`, optional
|
|
128
|
+
Number of walkers for sampling, by default 10
|
|
129
|
+
mov : `int`, optional
|
|
130
|
+
Maximum number of iterations, by default 200
|
|
131
|
+
weights : `bool`, optional
|
|
132
|
+
Whether to use weights in sampling, by default False
|
|
133
|
+
"""
|
|
134
|
+
datapoints = self.datapoints
|
|
135
|
+
xvec = self.xvec
|
|
136
|
+
self.nwalkers = nwalkers
|
|
137
|
+
self.mov = mov
|
|
138
|
+
|
|
139
|
+
assert self.nwalkers <= self.mov, "n_walkers must be less or equal than mov"
|
|
140
|
+
model = self.model
|
|
141
|
+
n = self.p + self.q
|
|
142
|
+
theta = np.random.rand(n)
|
|
143
|
+
x_vec = xvec(datapoints)
|
|
144
|
+
|
|
145
|
+
if weights:
|
|
146
|
+
par, error = walkers(
|
|
147
|
+
nwalkers,
|
|
148
|
+
x_vec,
|
|
149
|
+
datapoints,
|
|
150
|
+
model,
|
|
151
|
+
theta=self.theta_trained,
|
|
152
|
+
mov=mov,
|
|
153
|
+
tol=self.tol,
|
|
154
|
+
figname=None,
|
|
155
|
+
)
|
|
156
|
+
else:
|
|
157
|
+
par, error = walkers(
|
|
158
|
+
nwalkers, x_vec, datapoints, model, theta, mov=mov, tol=self.tol, figname=None
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
index = np.where(error == np.min(error))[0][0]
|
|
162
|
+
trained = np.array(par[index])
|
|
163
|
+
|
|
164
|
+
self.theta_trained = trained
|
|
165
|
+
|
|
166
|
+
def predict(self, n_steps: int = 0):
|
|
167
|
+
"""Make predictions for future steps.
|
|
168
|
+
|
|
169
|
+
Parameters
|
|
170
|
+
----------
|
|
171
|
+
n_steps : `int`, optional
|
|
172
|
+
Number of steps to predict, by default 0
|
|
173
|
+
|
|
174
|
+
Returns
|
|
175
|
+
-------
|
|
176
|
+
`np.ndarray`
|
|
177
|
+
Predicted values.
|
|
178
|
+
"""
|
|
179
|
+
self.n_steps = n_steps
|
|
180
|
+
datapoints = self.datapoints
|
|
181
|
+
model = self.model
|
|
182
|
+
theta_trained = self.theta_trained
|
|
183
|
+
y_pred = model(datapoints, theta_trained)
|
|
184
|
+
|
|
185
|
+
for i in range(n_steps):
|
|
186
|
+
self.datapoints = y_pred[i:]
|
|
187
|
+
y_new = model(datapoints, theta_trained, mode=False)
|
|
188
|
+
y_pred = y_pred.tolist()
|
|
189
|
+
y_pred.append(y_new)
|
|
190
|
+
y_pred = np.array(y_pred)
|
|
191
|
+
|
|
192
|
+
return np.array(y_pred)
|
|
193
|
+
|
|
194
|
+
def save_model(self, name: str = "model"):
|
|
195
|
+
with open(name + ".pkl", "wb") as file:
|
|
196
|
+
pickle.dump(self.theta_trained, file)
|
|
197
|
+
|
|
198
|
+
def load_model(self, name: str = "model"):
|
|
199
|
+
with open(name + ".pkl", "rb") as file:
|
|
200
|
+
self.theta_trained = pickle.load(file)
|
|
201
|
+
|
|
202
|
+
def eval(self, y_val: np.ndarray, y_pred: np.ndarray):
|
|
203
|
+
rmse = np.sqrt(np.mean((y_pred - y_val) ** 2))
|
|
204
|
+
square_error = np.sqrt((y_pred - y_val) ** 2)
|
|
205
|
+
accuracy = np.sum(square_error[np.where(square_error < rmse)])
|
|
206
|
+
accuracy /= np.sum(square_error)
|
|
207
|
+
print("Accuracy: {:.4f}".format(accuracy))
|
|
208
|
+
print("RMSE: {:.4f}".format(rmse))
|
|
209
|
+
|
|
210
|
+
def plot_pred(
|
|
211
|
+
self, y_real: np.ndarray, y_pred: np.ndarray, ci: float = 0.90, mode: bool = True
|
|
212
|
+
):
|
|
213
|
+
sns.set_theme(style="whitegrid")
|
|
214
|
+
plt.figure(figsize=(5, 3))
|
|
215
|
+
n = self.n_steps
|
|
216
|
+
y_mean = np.mean(y_pred, axis=0)
|
|
217
|
+
y_std = np.std(y_pred, axis=0)
|
|
218
|
+
if ci < 0.95:
|
|
219
|
+
Z = (ci / 0.90) * 1.64
|
|
220
|
+
else:
|
|
221
|
+
Z = (ci / 0.95) * 1.96
|
|
222
|
+
plt.plot(y_pred, label="Predicted", linewidth=2, color=sns.color_palette("deep")[1])
|
|
223
|
+
plt.plot(
|
|
224
|
+
y_real, ".--", label="Real", color=sns.color_palette("deep")[0], alpha=0.6, markersize=6
|
|
225
|
+
)
|
|
226
|
+
plt.fill_between(
|
|
227
|
+
range(y_pred.shape[0])[-n:],
|
|
228
|
+
(y_pred - Z * y_std)[-n:],
|
|
229
|
+
(y_pred + Z * y_std)[-n:],
|
|
230
|
+
alpha=0.2,
|
|
231
|
+
color=sns.color_palette("deep")[1],
|
|
232
|
+
)
|
|
233
|
+
plt.title("Predicted vs Real Values with Confidence Interval", fontsize=12)
|
|
234
|
+
plt.xlabel("Time Steps", fontsize=12)
|
|
235
|
+
plt.ylabel("y", fontsize=12)
|
|
236
|
+
plt.grid(True, linestyle="--", alpha=0.7)
|
|
237
|
+
plt.xticks(fontsize=10)
|
|
238
|
+
plt.yticks(fontsize=10)
|
|
239
|
+
print(f"Confidence Interval: ±{Z * y_std:.4f}")
|
|
240
|
+
plt.legend(loc="upper left", fontsize=9)
|
|
241
|
+
if mode:
|
|
242
|
+
plt.savefig(f"pred_{n}.png", dpi=300)
|
|
243
|
+
plt.tight_layout()
|
|
244
|
+
plt.show()
|
|
245
|
+
|
|
246
|
+
def summary(self):
|
|
247
|
+
print("\nSummary:")
|
|
248
|
+
print("-----------------------")
|
|
249
|
+
print("Lenght of theta: {}".format(len(self.theta_trained)))
|
|
250
|
+
print("Mean of theta: {:.4f}".format(np.mean(self.theta_trained)))
|
|
251
|
+
print("-----------------------")
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
class FourierRegression(AbstractArima):
|
|
255
|
+
"""A class that implements the arima model with FFT noise filtering
|
|
256
|
+
|
|
257
|
+
Parameters
|
|
258
|
+
----------
|
|
259
|
+
datapoints : np.ndarray
|
|
260
|
+
A set of points to train the arima model.
|
|
261
|
+
|
|
262
|
+
Returns
|
|
263
|
+
-------
|
|
264
|
+
new_datapoints : np.ndarray
|
|
265
|
+
It is the number of predicted points. It is necessary
|
|
266
|
+
to apply predict(n_steps) followed by fit()
|
|
267
|
+
|
|
268
|
+
"""
|
|
269
|
+
|
|
270
|
+
__slots__ = ["datapoints_", "sigma", "mode", "mov", "n_walkers", "name"]
|
|
271
|
+
|
|
272
|
+
def __init__(self, datapoints: np.ndarray):
|
|
273
|
+
self.datapoints_ = datapoints
|
|
274
|
+
|
|
275
|
+
def fit(self, sigma: int = 0, mov: int = 200, mode: bool = False):
|
|
276
|
+
self.sigma = sigma
|
|
277
|
+
self.mode = mode
|
|
278
|
+
self.mov = mov
|
|
279
|
+
|
|
280
|
+
datapoints = self.datapoints_
|
|
281
|
+
self.datapoints_, _ = fft_denoise(datapoints, sigma, mode)
|
|
282
|
+
|
|
283
|
+
def predict(
|
|
284
|
+
self, n_steps: int, n_walkers: int = 1, name: str = "fourier_model", save: bool = True
|
|
285
|
+
):
|
|
286
|
+
self.n_walkers = n_walkers
|
|
287
|
+
self.name = name
|
|
288
|
+
mov = self.mov
|
|
289
|
+
|
|
290
|
+
assert self.n_walkers <= mov, "n_walkers must be less or equal than mov"
|
|
291
|
+
|
|
292
|
+
new_datapoints = []
|
|
293
|
+
for i in range(self.datapoints_.shape[0]):
|
|
294
|
+
super().__init__(self.datapoints_[i, :])
|
|
295
|
+
super().train(n_walkers, mov)
|
|
296
|
+
if save:
|
|
297
|
+
super().save_model(str(i) + "_" + name)
|
|
298
|
+
y_pred_ = super().predict(n_steps)
|
|
299
|
+
new_datapoints.append(y_pred_)
|
|
300
|
+
|
|
301
|
+
new_datapoints = np.array(new_datapoints)
|
|
302
|
+
new_datapoints = np.reshape(new_datapoints, (len(new_datapoints), -1))
|
|
303
|
+
|
|
304
|
+
return new_datapoints
|
|
305
|
+
|
|
306
|
+
def load_predict(self, n_steps: int, name: str = "fourier_model"):
|
|
307
|
+
new_datapoints = []
|
|
308
|
+
|
|
309
|
+
for i in range(self.datapoints_.shape[0]):
|
|
310
|
+
super().__init__(self.datapoints_[i, :])
|
|
311
|
+
super().load_model(str(i) + "_" + name)
|
|
312
|
+
y_pred_ = super().predict(n_steps)
|
|
313
|
+
new_datapoints.append(y_pred_)
|
|
314
|
+
|
|
315
|
+
new_datapoints = np.array(new_datapoints)
|
|
316
|
+
new_datapoints = np.reshape(new_datapoints, (len(new_datapoints), -1))
|
|
317
|
+
|
|
318
|
+
return new_datapoints
|
|
319
|
+
|
|
320
|
+
|
|
321
|
+
class Arima(AbstractArima):
|
|
322
|
+
"""A class that implements the (p, d, q) ARIMA model.
|
|
323
|
+
|
|
324
|
+
Parameters
|
|
325
|
+
----------
|
|
326
|
+
datapoints : np.ndarray
|
|
327
|
+
A set of points to train the ARIMA model.
|
|
328
|
+
p : float
|
|
329
|
+
Number of auto-regressive terms (ratio). By default it is set to `1`.
|
|
330
|
+
d : int
|
|
331
|
+
Degree of differencing. By default it is set to `0`.
|
|
332
|
+
q : float
|
|
333
|
+
Number of forecast errors in the model (ratio). By default it is set to `0`.
|
|
334
|
+
n_steps : int
|
|
335
|
+
Number of steps to predict ahead.
|
|
336
|
+
noise : float
|
|
337
|
+
Amount of noise added during training.
|
|
338
|
+
tol : float
|
|
339
|
+
Tolerance for convergence checks.
|
|
340
|
+
|
|
341
|
+
Returns
|
|
342
|
+
-------
|
|
343
|
+
None
|
|
344
|
+
|
|
345
|
+
Notes
|
|
346
|
+
-----
|
|
347
|
+
The values of `p`, `q` are scaled based on the length of `datapoints`.
|
|
348
|
+
"""
|
|
349
|
+
|
|
350
|
+
__slots__ = ["datapoints", "noise", "p", "d", "q", "tol", "theta_trained"]
|
|
351
|
+
|
|
352
|
+
def __init__(
|
|
353
|
+
self,
|
|
354
|
+
datapoints: np.ndarray,
|
|
355
|
+
p: float = 1,
|
|
356
|
+
d: int = 0,
|
|
357
|
+
q: float = 0,
|
|
358
|
+
noise: float = 0,
|
|
359
|
+
tol: float = 1e-5,
|
|
360
|
+
):
|
|
361
|
+
"""Initializes the ARIMA model with given parameters.
|
|
362
|
+
|
|
363
|
+
Parameters
|
|
364
|
+
----------
|
|
365
|
+
datapoints : np.ndarray
|
|
366
|
+
A set of points to train the ARIMA model.
|
|
367
|
+
p : float
|
|
368
|
+
Auto-regressive term (scaled by length of data).
|
|
369
|
+
d : int
|
|
370
|
+
Degree of differencing.
|
|
371
|
+
q : float
|
|
372
|
+
Moving average term (scaled by length of data).
|
|
373
|
+
noise : float
|
|
374
|
+
Noise level for training.
|
|
375
|
+
tol : float
|
|
376
|
+
Tolerance for numerical convergence.
|
|
377
|
+
|
|
378
|
+
Returns
|
|
379
|
+
-------
|
|
380
|
+
None
|
|
381
|
+
"""
|
|
382
|
+
self.datapoints = datapoints
|
|
383
|
+
self.noise = noise
|
|
384
|
+
assert p > 0 and p <= 1, "p must be less than 1 but greater than 0"
|
|
385
|
+
self.p = int(p * len(datapoints))
|
|
386
|
+
assert d >= 0 and d <= 1, "p must be less than 1 but greater than or equal to 0"
|
|
387
|
+
self.d = d
|
|
388
|
+
self.q = int(q * len(datapoints))
|
|
389
|
+
self.tol = tol
|
|
390
|
+
|
|
391
|
+
def model(self, datapoints: np.ndarray, theta: list, mode: bool = True):
|
|
392
|
+
"""Computes the prior probability or prediction based on ARIMA model.
|
|
393
|
+
|
|
394
|
+
Parameters
|
|
395
|
+
----------
|
|
396
|
+
datapoints : np.ndarray
|
|
397
|
+
The input data used for modeling.
|
|
398
|
+
theta : list
|
|
399
|
+
Model parameters.
|
|
400
|
+
mode : bool
|
|
401
|
+
If True, computes in forward mode; otherwise in backward mode.
|
|
402
|
+
|
|
403
|
+
Returns
|
|
404
|
+
-------
|
|
405
|
+
y_vec : np.ndarray
|
|
406
|
+
Predicted values according to the ARIMA model.
|
|
407
|
+
"""
|
|
408
|
+
datapoints = self.datapoints
|
|
409
|
+
noise = self.noise
|
|
410
|
+
self.theta_trained = theta
|
|
411
|
+
|
|
412
|
+
assert type(self.d) == int, "d must be 0 or 1"
|
|
413
|
+
|
|
414
|
+
if self.d != 0 or self.q != 0:
|
|
415
|
+
if self.d != 0:
|
|
416
|
+
y_sum = super().integrated(datapoints)
|
|
417
|
+
norm_datapoints = np.linalg.norm(datapoints)
|
|
418
|
+
norm_y_sum = np.linalg.norm(y_sum)
|
|
419
|
+
if norm_y_sum != 0 and norm_datapoints != 0:
|
|
420
|
+
y_sum = cal_average(
|
|
421
|
+
np.abs(y_sum * (norm_datapoints / norm_y_sum)) * np.sign(datapoints), 0.05
|
|
422
|
+
)
|
|
423
|
+
else:
|
|
424
|
+
y_sum = datapoints.copy()
|
|
425
|
+
|
|
426
|
+
y_sum_regr = y_sum[-self.p :]
|
|
427
|
+
y_regr_vec = super().forward(y_sum_regr, theta[0 : self.p], mode, 0)
|
|
428
|
+
if self.q != 0:
|
|
429
|
+
y_sum_average = super().average(y_sum[-self.q :])
|
|
430
|
+
y_vec_magnitude = np.linalg.norm(y_regr_vec.copy())
|
|
431
|
+
y_sum_average_magnitude = np.linalg.norm(y_sum_average)
|
|
432
|
+
|
|
433
|
+
if y_sum_average_magnitude > y_vec_magnitude:
|
|
434
|
+
scaling_factor = y_vec_magnitude / y_sum_average_magnitude
|
|
435
|
+
y_sum_average = y_sum_average * scaling_factor
|
|
436
|
+
theta_mean = np.mean(theta[-self.q :])
|
|
437
|
+
if abs(theta_mean) > 1:
|
|
438
|
+
additional_scaling_factor = 1.0 - abs(theta_mean)
|
|
439
|
+
y_sum_average = y_sum_average * additional_scaling_factor
|
|
440
|
+
y_average_vec = super().forward(y_sum_average, theta[-self.q :], mode, 0)
|
|
441
|
+
if mode:
|
|
442
|
+
y_vec = y_regr_vec.copy()
|
|
443
|
+
for i in reversed(range(y_average_vec.shape[0])):
|
|
444
|
+
y_vec[i] += y_average_vec[i]
|
|
445
|
+
else:
|
|
446
|
+
y_vec = y_regr_vec + y_average_vec
|
|
447
|
+
else:
|
|
448
|
+
y_vec = y_regr_vec
|
|
449
|
+
return y_vec
|
|
450
|
+
else:
|
|
451
|
+
return super().forward(datapoints, theta, mode, noise)
|