likelihood 2.2.0.dev1__cp312-cp312-manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,163 @@
1
+ import logging
2
+ import os
3
+ import pickle
4
+ from typing import List
5
+
6
+ import numpy as np
7
+ from IPython.display import clear_output
8
+
9
+
10
+ class HMM:
11
+ def __init__(self, n_states: int, n_observations: int):
12
+ self.n_states = n_states
13
+ self.n_observations = n_observations
14
+
15
+ # Initialize parameters with random values
16
+ self.pi = np.random.dirichlet(np.ones(n_states), size=1)[0]
17
+ self.A = np.random.dirichlet(np.ones(n_states), size=n_states)
18
+ self.B = np.random.dirichlet(np.ones(n_observations), size=n_states)
19
+
20
+ def save_model(self, filename: str = "./hmm") -> None:
21
+ filename = filename if filename.endswith(".pkl") else filename + ".pkl"
22
+ with open(filename, "wb") as f:
23
+ pickle.dump(self, f)
24
+
25
+ @staticmethod
26
+ def load_model(filename: str = "./hmm") -> "HMM":
27
+ filename = filename + ".pkl" if not filename.endswith(".pkl") else filename
28
+ with open(filename, "rb") as f:
29
+ return pickle.load(f)
30
+
31
+ def forward(self, sequence: List[int]) -> np.ndarray:
32
+ T = len(sequence)
33
+ alpha = np.zeros((T, self.n_states))
34
+
35
+ # Add a small constant (smoothing) to avoid log(0)
36
+ epsilon = 1e-10 # Small value to avoid taking log(0)
37
+
38
+ # Initialization (log-space)
39
+ alpha[0] = np.log(self.pi + epsilon) + np.log(self.B[:, sequence[0]] + epsilon)
40
+ alpha[0] -= np.log(np.sum(np.exp(alpha[0]))) # Normalization (log-space)
41
+
42
+ # Recursion (log-space)
43
+ for t in range(1, T):
44
+ for i in range(self.n_states):
45
+ alpha[t, i] = np.log(
46
+ np.sum(np.exp(alpha[t - 1] + np.log(self.A[:, i] + epsilon)))
47
+ ) + np.log(self.B[i, sequence[t]] + epsilon)
48
+ alpha[t] -= np.log(np.sum(np.exp(alpha[t]))) # Normalization
49
+
50
+ return alpha
51
+
52
+ def backward(self, sequence: List[int]) -> np.ndarray:
53
+ T = len(sequence)
54
+ beta = np.ones((T, self.n_states))
55
+
56
+ # Backward recursion
57
+ for t in range(T - 2, -1, -1):
58
+ for i in range(self.n_states):
59
+ beta[t, i] = np.sum(self.A[i] * self.B[:, sequence[t + 1]] * beta[t + 1])
60
+
61
+ return beta
62
+
63
+ def viterbi(self, sequence: List[int]) -> np.ndarray:
64
+ T = len(sequence)
65
+ delta = np.zeros((T, self.n_states))
66
+ psi = np.zeros((T, self.n_states), dtype=int)
67
+
68
+ # Initialization
69
+ delta[0] = self.pi * self.B[:, sequence[0]]
70
+
71
+ # Recursion
72
+ for t in range(1, T):
73
+ for i in range(self.n_states):
74
+ delta[t, i] = np.max(delta[t - 1] * self.A[:, i]) * self.B[i, sequence[t]]
75
+ psi[t, i] = np.argmax(delta[t - 1] * self.A[:, i])
76
+
77
+ # Reconstruct the most probable path
78
+ state_sequence = np.zeros(T, dtype=int)
79
+ state_sequence[T - 1] = np.argmax(delta[T - 1])
80
+ for t in range(T - 2, -1, -1):
81
+ state_sequence[t] = psi[t + 1, state_sequence[t + 1]]
82
+
83
+ return state_sequence
84
+
85
+ def baum_welch(
86
+ self, sequences: List[List[int]], n_iterations: int, verbose: bool = False
87
+ ) -> None:
88
+ for iteration in range(n_iterations):
89
+ # Initialize accumulators
90
+ A_num = np.zeros((self.n_states, self.n_states))
91
+ B_num = np.zeros((self.n_states, self.n_observations))
92
+ pi_num = np.zeros(self.n_states)
93
+
94
+ for sequence in sequences:
95
+ T = len(sequence)
96
+ alpha = self.forward(sequence)
97
+ beta = self.backward(sequence)
98
+
99
+ # Update pi
100
+ gamma = (alpha * beta) / np.sum(alpha * beta, axis=1, keepdims=True)
101
+ pi_num += gamma[0]
102
+
103
+ # Update A and B
104
+ for t in range(T - 1):
105
+ xi = np.zeros((self.n_states, self.n_states))
106
+ denom = np.sum(alpha[t] * self.A * self.B[:, sequence[t + 1]] * beta[t + 1])
107
+
108
+ for i in range(self.n_states):
109
+ for j in range(self.n_states):
110
+ xi[i, j] = (
111
+ alpha[t, i]
112
+ * self.A[i, j]
113
+ * self.B[j, sequence[t + 1]]
114
+ * beta[t + 1, j]
115
+ ) / denom
116
+ A_num[i] += xi[i]
117
+
118
+ B_num[:, sequence[t]] += gamma[t]
119
+
120
+ # For the last step of the sequence
121
+ B_num[:, sequence[-1]] += gamma[-1]
122
+
123
+ # Normalize and update parameters
124
+ self.pi = pi_num / len(sequences)
125
+ self.A = A_num / np.sum(A_num, axis=1, keepdims=True)
126
+ self.B = B_num / np.sum(B_num, axis=1, keepdims=True)
127
+
128
+ # Logging parameters every 10 iterations
129
+ if iteration % 10 == 0 and verbose:
130
+ os.system("cls" if os.name == "nt" else "clear")
131
+ clear_output(wait=True)
132
+ logging.info(f"Iteration {iteration}:")
133
+ logging.info("Pi: %s", self.pi)
134
+ logging.info("A:\n%s", self.A)
135
+ logging.info("B:\n%s", self.B)
136
+
137
+ def decoding_accuracy(self, sequences: List[List[int]], true_states: List[List[int]]) -> float:
138
+ correct_predictions = 0
139
+ total_predictions = 0
140
+
141
+ for sequence, true_state in zip(sequences, true_states):
142
+ predicted_states = self.viterbi(sequence)
143
+ correct_predictions += np.sum(predicted_states == true_state)
144
+ total_predictions += len(sequence)
145
+
146
+ accuracy = (correct_predictions / total_predictions) * 100
147
+ return accuracy
148
+
149
+ def state_probabilities(self, sequence: List[int]) -> np.ndarray:
150
+ """
151
+ Returns the smoothed probabilities of the hidden states at each time step.
152
+ This is done by using both forward and backward probabilities.
153
+ """
154
+ alpha = self.forward(sequence)
155
+ beta = self.backward(sequence)
156
+
157
+ # Compute smoothed probabilities (gamma)
158
+ smoothed_probs = (alpha * beta) / np.sum(alpha * beta, axis=1, keepdims=True)
159
+
160
+ return smoothed_probs
161
+
162
+ def sequence_probability(self, sequence: List[int]) -> np.ndarray:
163
+ return self.state_probabilities(sequence)[-1]
@@ -0,0 +1,451 @@
1
+ import pickle
2
+
3
+ import matplotlib.pyplot as plt
4
+ import numpy as np
5
+ import seaborn as sns
6
+
7
+ from likelihood.main import *
8
+ from likelihood.models.utils import FeaturesArima
9
+ from likelihood.tools import *
10
+
11
+ # -------------------------------------------------------------------------
12
+
13
+
14
+ class AbstractArima(FeaturesArima):
15
+ """A class that implements the auto-regressive arima (1, 0, 0) model
16
+
17
+ Parameters
18
+ ----------
19
+ datapoints : `np.ndarray`
20
+ The input data points for training.
21
+ noise : `float`, optional
22
+ Noise level for the model, by default 0
23
+ tol : `float`, optional
24
+ Tolerance for convergence, by default 1e-4
25
+
26
+ Attributes
27
+ ----------
28
+ datapoints : `np.ndarray`
29
+ The input data points for training.
30
+ n_steps : `int`
31
+ Number of steps to predict.
32
+ noise : `float`
33
+ Noise level for the model.
34
+ p : `int`
35
+ Order of autoregressive part.
36
+ q : `int`
37
+ Order of moving average part.
38
+ tol : `float`
39
+ Tolerance for convergence.
40
+ nwalkers : `int`
41
+ Number of walkers for sampling.
42
+ mov : `int`
43
+ Maximum number of iterations.
44
+ theta_trained : `np.ndarray`
45
+ Trained parameters of the model.
46
+ """
47
+
48
+ __slots__ = [
49
+ "datapoints",
50
+ "n_steps",
51
+ "noise",
52
+ "p",
53
+ "q",
54
+ "tol",
55
+ "nwalkers",
56
+ "mov",
57
+ "theta_trained",
58
+ ]
59
+
60
+ def __init__(self, datapoints: np.ndarray, noise: float = 0, tol: float = 1e-4):
61
+ """Initialize the ARIMA model.
62
+
63
+ Parameters
64
+ ----------
65
+ datapoints : `np.ndarray`
66
+ The input data points for training.
67
+ noise : `float`, optional
68
+ Noise level for the model, by default 0
69
+ tol : `float`, optional
70
+ Tolerance for convergence, by default 1e-4
71
+ """
72
+ self.datapoints = datapoints
73
+ self.noise = noise
74
+ self.p = datapoints.shape[0]
75
+ self.q = 0
76
+ self.tol = tol
77
+ self.n_steps = 0
78
+
79
+ def model(self, datapoints: np.ndarray, theta: list, mode=True):
80
+ """Compute the model forward pass.
81
+
82
+ Parameters
83
+ ----------
84
+ datapoints : `np.ndarray`
85
+ The input data points.
86
+ theta : `list`
87
+ Model parameters.
88
+ mode : `bool`, optional
89
+ Forward pass mode, by default True
90
+
91
+ Returns
92
+ -------
93
+ `np.ndarray`
94
+ Model output.
95
+ """
96
+ datapoints = self.datapoints
97
+ noise = self.noise
98
+ self.theta_trained = theta
99
+
100
+ return super().forward(datapoints, theta, mode, noise)
101
+
102
+ def xvec(self, datapoints: np.ndarray, n_steps: int = 0):
103
+ """Extract vector of data points.
104
+
105
+ Parameters
106
+ ----------
107
+ datapoints : `np.ndarray`
108
+ The input data points.
109
+ n_steps : `int`, optional
110
+ Number of steps to consider, by default 0
111
+
112
+ Returns
113
+ -------
114
+ `np.ndarray`
115
+ Extracted data points vector.
116
+ """
117
+ datapoints = self.datapoints
118
+ self.n_steps = n_steps
119
+
120
+ return datapoints[n_steps:]
121
+
122
+ def train(self, nwalkers: int = 10, mov: int = 200, weights: bool = False):
123
+ """Train the model using sampling method.
124
+
125
+ Parameters
126
+ ----------
127
+ nwalkers : `int`, optional
128
+ Number of walkers for sampling, by default 10
129
+ mov : `int`, optional
130
+ Maximum number of iterations, by default 200
131
+ weights : `bool`, optional
132
+ Whether to use weights in sampling, by default False
133
+ """
134
+ datapoints = self.datapoints
135
+ xvec = self.xvec
136
+ self.nwalkers = nwalkers
137
+ self.mov = mov
138
+
139
+ assert self.nwalkers <= self.mov, "n_walkers must be less or equal than mov"
140
+ model = self.model
141
+ n = self.p + self.q
142
+ theta = np.random.rand(n)
143
+ x_vec = xvec(datapoints)
144
+
145
+ if weights:
146
+ par, error = walkers(
147
+ nwalkers,
148
+ x_vec,
149
+ datapoints,
150
+ model,
151
+ theta=self.theta_trained,
152
+ mov=mov,
153
+ tol=self.tol,
154
+ figname=None,
155
+ )
156
+ else:
157
+ par, error = walkers(
158
+ nwalkers, x_vec, datapoints, model, theta, mov=mov, tol=self.tol, figname=None
159
+ )
160
+
161
+ index = np.where(error == np.min(error))[0][0]
162
+ trained = np.array(par[index])
163
+
164
+ self.theta_trained = trained
165
+
166
+ def predict(self, n_steps: int = 0):
167
+ """Make predictions for future steps.
168
+
169
+ Parameters
170
+ ----------
171
+ n_steps : `int`, optional
172
+ Number of steps to predict, by default 0
173
+
174
+ Returns
175
+ -------
176
+ `np.ndarray`
177
+ Predicted values.
178
+ """
179
+ self.n_steps = n_steps
180
+ datapoints = self.datapoints
181
+ model = self.model
182
+ theta_trained = self.theta_trained
183
+ y_pred = model(datapoints, theta_trained)
184
+
185
+ for i in range(n_steps):
186
+ self.datapoints = y_pred[i:]
187
+ y_new = model(datapoints, theta_trained, mode=False)
188
+ y_pred = y_pred.tolist()
189
+ y_pred.append(y_new)
190
+ y_pred = np.array(y_pred)
191
+
192
+ return np.array(y_pred)
193
+
194
+ def save_model(self, name: str = "model"):
195
+ with open(name + ".pkl", "wb") as file:
196
+ pickle.dump(self.theta_trained, file)
197
+
198
+ def load_model(self, name: str = "model"):
199
+ with open(name + ".pkl", "rb") as file:
200
+ self.theta_trained = pickle.load(file)
201
+
202
+ def eval(self, y_val: np.ndarray, y_pred: np.ndarray):
203
+ rmse = np.sqrt(np.mean((y_pred - y_val) ** 2))
204
+ square_error = np.sqrt((y_pred - y_val) ** 2)
205
+ accuracy = np.sum(square_error[np.where(square_error < rmse)])
206
+ accuracy /= np.sum(square_error)
207
+ print("Accuracy: {:.4f}".format(accuracy))
208
+ print("RMSE: {:.4f}".format(rmse))
209
+
210
+ def plot_pred(
211
+ self, y_real: np.ndarray, y_pred: np.ndarray, ci: float = 0.90, mode: bool = True
212
+ ):
213
+ sns.set_theme(style="whitegrid")
214
+ plt.figure(figsize=(5, 3))
215
+ n = self.n_steps
216
+ y_mean = np.mean(y_pred, axis=0)
217
+ y_std = np.std(y_pred, axis=0)
218
+ if ci < 0.95:
219
+ Z = (ci / 0.90) * 1.64
220
+ else:
221
+ Z = (ci / 0.95) * 1.96
222
+ plt.plot(y_pred, label="Predicted", linewidth=2, color=sns.color_palette("deep")[1])
223
+ plt.plot(
224
+ y_real, ".--", label="Real", color=sns.color_palette("deep")[0], alpha=0.6, markersize=6
225
+ )
226
+ plt.fill_between(
227
+ range(y_pred.shape[0])[-n:],
228
+ (y_pred - Z * y_std)[-n:],
229
+ (y_pred + Z * y_std)[-n:],
230
+ alpha=0.2,
231
+ color=sns.color_palette("deep")[1],
232
+ )
233
+ plt.title("Predicted vs Real Values with Confidence Interval", fontsize=12)
234
+ plt.xlabel("Time Steps", fontsize=12)
235
+ plt.ylabel("y", fontsize=12)
236
+ plt.grid(True, linestyle="--", alpha=0.7)
237
+ plt.xticks(fontsize=10)
238
+ plt.yticks(fontsize=10)
239
+ print(f"Confidence Interval: ±{Z * y_std:.4f}")
240
+ plt.legend(loc="upper left", fontsize=9)
241
+ if mode:
242
+ plt.savefig(f"pred_{n}.png", dpi=300)
243
+ plt.tight_layout()
244
+ plt.show()
245
+
246
+ def summary(self):
247
+ print("\nSummary:")
248
+ print("-----------------------")
249
+ print("Lenght of theta: {}".format(len(self.theta_trained)))
250
+ print("Mean of theta: {:.4f}".format(np.mean(self.theta_trained)))
251
+ print("-----------------------")
252
+
253
+
254
+ class FourierRegression(AbstractArima):
255
+ """A class that implements the arima model with FFT noise filtering
256
+
257
+ Parameters
258
+ ----------
259
+ datapoints : np.ndarray
260
+ A set of points to train the arima model.
261
+
262
+ Returns
263
+ -------
264
+ new_datapoints : np.ndarray
265
+ It is the number of predicted points. It is necessary
266
+ to apply predict(n_steps) followed by fit()
267
+
268
+ """
269
+
270
+ __slots__ = ["datapoints_", "sigma", "mode", "mov", "n_walkers", "name"]
271
+
272
+ def __init__(self, datapoints: np.ndarray):
273
+ self.datapoints_ = datapoints
274
+
275
+ def fit(self, sigma: int = 0, mov: int = 200, mode: bool = False):
276
+ self.sigma = sigma
277
+ self.mode = mode
278
+ self.mov = mov
279
+
280
+ datapoints = self.datapoints_
281
+ self.datapoints_, _ = fft_denoise(datapoints, sigma, mode)
282
+
283
+ def predict(
284
+ self, n_steps: int, n_walkers: int = 1, name: str = "fourier_model", save: bool = True
285
+ ):
286
+ self.n_walkers = n_walkers
287
+ self.name = name
288
+ mov = self.mov
289
+
290
+ assert self.n_walkers <= mov, "n_walkers must be less or equal than mov"
291
+
292
+ new_datapoints = []
293
+ for i in range(self.datapoints_.shape[0]):
294
+ super().__init__(self.datapoints_[i, :])
295
+ super().train(n_walkers, mov)
296
+ if save:
297
+ super().save_model(str(i) + "_" + name)
298
+ y_pred_ = super().predict(n_steps)
299
+ new_datapoints.append(y_pred_)
300
+
301
+ new_datapoints = np.array(new_datapoints)
302
+ new_datapoints = np.reshape(new_datapoints, (len(new_datapoints), -1))
303
+
304
+ return new_datapoints
305
+
306
+ def load_predict(self, n_steps: int, name: str = "fourier_model"):
307
+ new_datapoints = []
308
+
309
+ for i in range(self.datapoints_.shape[0]):
310
+ super().__init__(self.datapoints_[i, :])
311
+ super().load_model(str(i) + "_" + name)
312
+ y_pred_ = super().predict(n_steps)
313
+ new_datapoints.append(y_pred_)
314
+
315
+ new_datapoints = np.array(new_datapoints)
316
+ new_datapoints = np.reshape(new_datapoints, (len(new_datapoints), -1))
317
+
318
+ return new_datapoints
319
+
320
+
321
+ class Arima(AbstractArima):
322
+ """A class that implements the (p, d, q) ARIMA model.
323
+
324
+ Parameters
325
+ ----------
326
+ datapoints : np.ndarray
327
+ A set of points to train the ARIMA model.
328
+ p : float
329
+ Number of auto-regressive terms (ratio). By default it is set to `1`.
330
+ d : int
331
+ Degree of differencing. By default it is set to `0`.
332
+ q : float
333
+ Number of forecast errors in the model (ratio). By default it is set to `0`.
334
+ n_steps : int
335
+ Number of steps to predict ahead.
336
+ noise : float
337
+ Amount of noise added during training.
338
+ tol : float
339
+ Tolerance for convergence checks.
340
+
341
+ Returns
342
+ -------
343
+ None
344
+
345
+ Notes
346
+ -----
347
+ The values of `p`, `q` are scaled based on the length of `datapoints`.
348
+ """
349
+
350
+ __slots__ = ["datapoints", "noise", "p", "d", "q", "tol", "theta_trained"]
351
+
352
+ def __init__(
353
+ self,
354
+ datapoints: np.ndarray,
355
+ p: float = 1,
356
+ d: int = 0,
357
+ q: float = 0,
358
+ noise: float = 0,
359
+ tol: float = 1e-5,
360
+ ):
361
+ """Initializes the ARIMA model with given parameters.
362
+
363
+ Parameters
364
+ ----------
365
+ datapoints : np.ndarray
366
+ A set of points to train the ARIMA model.
367
+ p : float
368
+ Auto-regressive term (scaled by length of data).
369
+ d : int
370
+ Degree of differencing.
371
+ q : float
372
+ Moving average term (scaled by length of data).
373
+ noise : float
374
+ Noise level for training.
375
+ tol : float
376
+ Tolerance for numerical convergence.
377
+
378
+ Returns
379
+ -------
380
+ None
381
+ """
382
+ self.datapoints = datapoints
383
+ self.noise = noise
384
+ assert p > 0 and p <= 1, "p must be less than 1 but greater than 0"
385
+ self.p = int(p * len(datapoints))
386
+ assert d >= 0 and d <= 1, "p must be less than 1 but greater than or equal to 0"
387
+ self.d = d
388
+ self.q = int(q * len(datapoints))
389
+ self.tol = tol
390
+
391
+ def model(self, datapoints: np.ndarray, theta: list, mode: bool = True):
392
+ """Computes the prior probability or prediction based on ARIMA model.
393
+
394
+ Parameters
395
+ ----------
396
+ datapoints : np.ndarray
397
+ The input data used for modeling.
398
+ theta : list
399
+ Model parameters.
400
+ mode : bool
401
+ If True, computes in forward mode; otherwise in backward mode.
402
+
403
+ Returns
404
+ -------
405
+ y_vec : np.ndarray
406
+ Predicted values according to the ARIMA model.
407
+ """
408
+ datapoints = self.datapoints
409
+ noise = self.noise
410
+ self.theta_trained = theta
411
+
412
+ assert type(self.d) == int, "d must be 0 or 1"
413
+
414
+ if self.d != 0 or self.q != 0:
415
+ if self.d != 0:
416
+ y_sum = super().integrated(datapoints)
417
+ norm_datapoints = np.linalg.norm(datapoints)
418
+ norm_y_sum = np.linalg.norm(y_sum)
419
+ if norm_y_sum != 0 and norm_datapoints != 0:
420
+ y_sum = cal_average(
421
+ np.abs(y_sum * (norm_datapoints / norm_y_sum)) * np.sign(datapoints), 0.05
422
+ )
423
+ else:
424
+ y_sum = datapoints.copy()
425
+
426
+ y_sum_regr = y_sum[-self.p :]
427
+ y_regr_vec = super().forward(y_sum_regr, theta[0 : self.p], mode, 0)
428
+ if self.q != 0:
429
+ y_sum_average = super().average(y_sum[-self.q :])
430
+ y_vec_magnitude = np.linalg.norm(y_regr_vec.copy())
431
+ y_sum_average_magnitude = np.linalg.norm(y_sum_average)
432
+
433
+ if y_sum_average_magnitude > y_vec_magnitude:
434
+ scaling_factor = y_vec_magnitude / y_sum_average_magnitude
435
+ y_sum_average = y_sum_average * scaling_factor
436
+ theta_mean = np.mean(theta[-self.q :])
437
+ if abs(theta_mean) > 1:
438
+ additional_scaling_factor = 1.0 - abs(theta_mean)
439
+ y_sum_average = y_sum_average * additional_scaling_factor
440
+ y_average_vec = super().forward(y_sum_average, theta[-self.q :], mode, 0)
441
+ if mode:
442
+ y_vec = y_regr_vec.copy()
443
+ for i in reversed(range(y_average_vec.shape[0])):
444
+ y_vec[i] += y_average_vec[i]
445
+ else:
446
+ y_vec = y_regr_vec + y_average_vec
447
+ else:
448
+ y_vec = y_regr_vec
449
+ return y_vec
450
+ else:
451
+ return super().forward(datapoints, theta, mode, noise)