likelihood 1.2.23__tar.gz → 1.2.24__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (25) hide show
  1. {likelihood-1.2.23 → likelihood-1.2.24}/PKG-INFO +1 -1
  2. {likelihood-1.2.23 → likelihood-1.2.24}/likelihood/graph/nn.py +6 -5
  3. likelihood-1.2.24/likelihood/models/hmm.py +163 -0
  4. {likelihood-1.2.23 → likelihood-1.2.24}/likelihood/models/simulation.py +5 -6
  5. {likelihood-1.2.23 → likelihood-1.2.24}/likelihood.egg-info/PKG-INFO +1 -1
  6. {likelihood-1.2.23 → likelihood-1.2.24}/likelihood.egg-info/SOURCES.txt +1 -0
  7. {likelihood-1.2.23 → likelihood-1.2.24}/LICENSE +0 -0
  8. {likelihood-1.2.23 → likelihood-1.2.24}/README.md +0 -0
  9. {likelihood-1.2.23 → likelihood-1.2.24}/likelihood/__init__.py +0 -0
  10. {likelihood-1.2.23 → likelihood-1.2.24}/likelihood/graph/__init__.py +0 -0
  11. {likelihood-1.2.23 → likelihood-1.2.24}/likelihood/graph/graph.py +0 -0
  12. {likelihood-1.2.23 → likelihood-1.2.24}/likelihood/main.py +0 -0
  13. {likelihood-1.2.23 → likelihood-1.2.24}/likelihood/models/__init__.py +0 -0
  14. {likelihood-1.2.23 → likelihood-1.2.24}/likelihood/models/deep/__init__.py +0 -0
  15. {likelihood-1.2.23 → likelihood-1.2.24}/likelihood/models/deep/autoencoders.py +0 -0
  16. {likelihood-1.2.23 → likelihood-1.2.24}/likelihood/models/regression.py +0 -0
  17. {likelihood-1.2.23 → likelihood-1.2.24}/likelihood/models/utils.py +0 -0
  18. {likelihood-1.2.23 → likelihood-1.2.24}/likelihood/tools/__init__.py +0 -0
  19. {likelihood-1.2.23 → likelihood-1.2.24}/likelihood/tools/numeric_tools.py +0 -0
  20. {likelihood-1.2.23 → likelihood-1.2.24}/likelihood/tools/tools.py +0 -0
  21. {likelihood-1.2.23 → likelihood-1.2.24}/likelihood.egg-info/dependency_links.txt +0 -0
  22. {likelihood-1.2.23 → likelihood-1.2.24}/likelihood.egg-info/requires.txt +0 -0
  23. {likelihood-1.2.23 → likelihood-1.2.24}/likelihood.egg-info/top_level.txt +0 -0
  24. {likelihood-1.2.23 → likelihood-1.2.24}/setup.cfg +0 -0
  25. {likelihood-1.2.23 → likelihood-1.2.24}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: likelihood
3
- Version: 1.2.23
3
+ Version: 1.2.24
4
4
  Summary: A package that performs the maximum likelihood algorithm.
5
5
  Home-page: https://github.com/jzsmoreno/likelihood/
6
6
  Author: J. A. Moreno-Guerra
@@ -1,6 +1,8 @@
1
1
  import os
2
2
 
3
3
  os.environ["TF_ENABLE_ONEDNN_OPTS"] = "0"
4
+ # Suppress TensorFlow INFO logs
5
+ os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
4
6
  import logging
5
7
  import warnings
6
8
  from typing import List, Tuple
@@ -9,7 +11,6 @@ import numpy as np
9
11
  import pandas as pd
10
12
  import tensorflow as tf
11
13
  from IPython.display import clear_output
12
- from numpy import ndarray
13
14
  from pandas.core.frame import DataFrame
14
15
  from sklearn.metrics import f1_score
15
16
  from sklearn.model_selection import train_test_split
@@ -21,7 +22,7 @@ logging.getLogger("tensorflow").setLevel(logging.ERROR)
21
22
  tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
22
23
 
23
24
 
24
- def compare_similarity(arr1: ndarray, arr2: ndarray) -> int:
25
+ def compare_similarity(arr1: np.ndarray, arr2: np.ndarray) -> int:
25
26
  """Compares the similarity between two arrays of categories.
26
27
 
27
28
  Parameters
@@ -44,9 +45,9 @@ def compare_similarity(arr1: ndarray, arr2: ndarray) -> int:
44
45
  return count
45
46
 
46
47
 
47
- def cal_adjency_matrix(
48
+ def cal_adjacency_matrix(
48
49
  df: DataFrame, exclude_subset: List[str] = [], sparse: bool = True, **kwargs
49
- ) -> Tuple[dict, ndarray]:
50
+ ) -> Tuple[dict, np.ndarray]:
50
51
  """Calculates the adjacency matrix for a given DataFrame.
51
52
  The adjacency matrix is a matrix that represents the similarity between each pair of categories.
52
53
  The similarity is calculated using the `compare_similarity` function.
@@ -133,7 +134,7 @@ class Data:
133
134
  target: str | None = None,
134
135
  exclude_subset: List[str] = [],
135
136
  ):
136
- _, adjacency = cal_adjency_matrix(df, exclude_subset=exclude_subset, sparse=True)
137
+ _, adjacency = cal_adjacency_matrix(df, exclude_subset=exclude_subset, sparse=True)
137
138
  if target is not None:
138
139
  X = df.drop(columns=[target] + exclude_subset)
139
140
  else:
@@ -0,0 +1,163 @@
1
+ import logging
2
+ import os
3
+ import pickle
4
+ from typing import List, Tuple
5
+
6
+ import numpy as np
7
+ from IPython.display import clear_output
8
+
9
+
10
+ class HMM:
11
+ def __init__(self, n_states: int, n_observations: int):
12
+ self.n_states = n_states
13
+ self.n_observations = n_observations
14
+
15
+ # Initialize parameters with random values
16
+ self.pi = np.random.dirichlet(np.ones(n_states), size=1)[0]
17
+ self.A = np.random.dirichlet(np.ones(n_states), size=n_states)
18
+ self.B = np.random.dirichlet(np.ones(n_observations), size=n_states)
19
+
20
+ def save_model(self, filename: str = "./hmm") -> None:
21
+ filename = filename if filename.endswith(".pkl") else filename + ".pkl"
22
+ with open(filename, "wb") as f:
23
+ pickle.dump(self, f)
24
+
25
+ @staticmethod
26
+ def load_model(filename: str = "./hmm") -> "HMM":
27
+ filename = filename + ".pkl" if not filename.endswith(".pkl") else filename
28
+ with open(filename, "rb") as f:
29
+ return pickle.load(f)
30
+
31
+ def forward(self, sequence: List[int]) -> np.ndarray:
32
+ T = len(sequence)
33
+ alpha = np.zeros((T, self.n_states))
34
+
35
+ # Add a small constant (smoothing) to avoid log(0)
36
+ epsilon = 1e-10 # Small value to avoid taking log(0)
37
+
38
+ # Initialization (log-space)
39
+ alpha[0] = np.log(self.pi + epsilon) + np.log(self.B[:, sequence[0]] + epsilon)
40
+ alpha[0] -= np.log(np.sum(np.exp(alpha[0]))) # Normalization (log-space)
41
+
42
+ # Recursion (log-space)
43
+ for t in range(1, T):
44
+ for i in range(self.n_states):
45
+ alpha[t, i] = np.log(
46
+ np.sum(np.exp(alpha[t - 1] + np.log(self.A[:, i] + epsilon)))
47
+ ) + np.log(self.B[i, sequence[t]] + epsilon)
48
+ alpha[t] -= np.log(np.sum(np.exp(alpha[t]))) # Normalization
49
+
50
+ return alpha
51
+
52
+ def backward(self, sequence: List[int]) -> np.ndarray:
53
+ T = len(sequence)
54
+ beta = np.ones((T, self.n_states))
55
+
56
+ # Backward recursion
57
+ for t in range(T - 2, -1, -1):
58
+ for i in range(self.n_states):
59
+ beta[t, i] = np.sum(self.A[i] * self.B[:, sequence[t + 1]] * beta[t + 1])
60
+
61
+ return beta
62
+
63
+ def viterbi(self, sequence: List[int]) -> np.ndarray:
64
+ T = len(sequence)
65
+ delta = np.zeros((T, self.n_states))
66
+ psi = np.zeros((T, self.n_states), dtype=int)
67
+
68
+ # Initialization
69
+ delta[0] = self.pi * self.B[:, sequence[0]]
70
+
71
+ # Recursion
72
+ for t in range(1, T):
73
+ for i in range(self.n_states):
74
+ delta[t, i] = np.max(delta[t - 1] * self.A[:, i]) * self.B[i, sequence[t]]
75
+ psi[t, i] = np.argmax(delta[t - 1] * self.A[:, i])
76
+
77
+ # Reconstruct the most probable path
78
+ state_sequence = np.zeros(T, dtype=int)
79
+ state_sequence[T - 1] = np.argmax(delta[T - 1])
80
+ for t in range(T - 2, -1, -1):
81
+ state_sequence[t] = psi[t + 1, state_sequence[t + 1]]
82
+
83
+ return state_sequence
84
+
85
+ def baum_welch(
86
+ self, sequences: List[List[int]], n_iterations: int, verbose: bool = False
87
+ ) -> None:
88
+ for iteration in range(n_iterations):
89
+ # Initialize accumulators
90
+ A_num = np.zeros((self.n_states, self.n_states))
91
+ B_num = np.zeros((self.n_states, self.n_observations))
92
+ pi_num = np.zeros(self.n_states)
93
+
94
+ for sequence in sequences:
95
+ T = len(sequence)
96
+ alpha = self.forward(sequence)
97
+ beta = self.backward(sequence)
98
+
99
+ # Update pi
100
+ gamma = (alpha * beta) / np.sum(alpha * beta, axis=1, keepdims=True)
101
+ pi_num += gamma[0]
102
+
103
+ # Update A and B
104
+ for t in range(T - 1):
105
+ xi = np.zeros((self.n_states, self.n_states))
106
+ denom = np.sum(alpha[t] * self.A * self.B[:, sequence[t + 1]] * beta[t + 1])
107
+
108
+ for i in range(self.n_states):
109
+ for j in range(self.n_states):
110
+ xi[i, j] = (
111
+ alpha[t, i]
112
+ * self.A[i, j]
113
+ * self.B[j, sequence[t + 1]]
114
+ * beta[t + 1, j]
115
+ ) / denom
116
+ A_num[i] += xi[i]
117
+
118
+ B_num[:, sequence[t]] += gamma[t]
119
+
120
+ # For the last step of the sequence
121
+ B_num[:, sequence[-1]] += gamma[-1]
122
+
123
+ # Normalize and update parameters
124
+ self.pi = pi_num / len(sequences)
125
+ self.A = A_num / np.sum(A_num, axis=1, keepdims=True)
126
+ self.B = B_num / np.sum(B_num, axis=1, keepdims=True)
127
+
128
+ # Logging parameters every 10 iterations
129
+ if iteration % 10 == 0 and verbose:
130
+ os.system("cls" if os.name == "nt" else "clear")
131
+ clear_output(wait=True)
132
+ logging.info(f"Iteration {iteration}:")
133
+ logging.info("Pi: %s", self.pi)
134
+ logging.info("A:\n%s", self.A)
135
+ logging.info("B:\n%s", self.B)
136
+
137
+ def decoding_accuracy(self, sequences: List[List[int]], true_states: List[List[int]]) -> float:
138
+ correct_predictions = 0
139
+ total_predictions = 0
140
+
141
+ for sequence, true_state in zip(sequences, true_states):
142
+ predicted_states = self.viterbi(sequence)
143
+ correct_predictions += np.sum(predicted_states == true_state)
144
+ total_predictions += len(sequence)
145
+
146
+ accuracy = (correct_predictions / total_predictions) * 100
147
+ return accuracy
148
+
149
+ def state_probabilities(self, sequence: List[int]) -> np.ndarray:
150
+ """
151
+ Returns the smoothed probabilities of the hidden states at each time step.
152
+ This is done by using both forward and backward probabilities.
153
+ """
154
+ alpha = self.forward(sequence)
155
+ beta = self.backward(sequence)
156
+
157
+ # Compute smoothed probabilities (gamma)
158
+ smoothed_probs = (alpha * beta) / np.sum(alpha * beta, axis=1, keepdims=True)
159
+
160
+ return smoothed_probs
161
+
162
+ def sequence_probability(self, sequence: List[int]) -> np.ndarray:
163
+ return self.state_probabilities(sequence)[-1]
@@ -5,7 +5,6 @@ from typing import List, Tuple, Union
5
5
  import matplotlib.pyplot as plt
6
6
  import numpy as np
7
7
  import pandas as pd
8
- from numpy import ndarray
9
8
  from pandas.core.frame import DataFrame
10
9
 
11
10
  from likelihood.tools import DataScaler, FeatureSelection, OneHotEncoder, cdf, check_nan_inf
@@ -66,12 +65,12 @@ class SimulationEngine(FeatureSelection):
66
65
 
67
66
  super().__init__(**kwargs)
68
67
 
69
- def predict(self, df: DataFrame, column: str) -> ndarray | list:
68
+ def predict(self, df: DataFrame, column: str) -> np.ndarray | list:
70
69
  # Let us assign the dictionary entries corresponding to the column
71
70
  w, quick_encoder, names_cols, dfe, numeric_dict = self.w_dict[column]
72
71
 
73
72
  df = df[names_cols].copy()
74
- # Change the scale of the dataframe
73
+ # Change the scale of the DataFrame
75
74
  dataset = self.df.copy()
76
75
  dataset.drop(columns=column, inplace=True)
77
76
  numeric_df = dataset.select_dtypes(include="number")
@@ -85,7 +84,7 @@ class SimulationEngine(FeatureSelection):
85
84
  for col in numeric_df.columns:
86
85
  df[col] = numeric_df[col].values
87
86
 
88
- # Encoding the datadrame
87
+ # Encoding the DataFrame
89
88
  for num, colname in enumerate(dfe._encode_columns):
90
89
  if df[colname].dtype == "object":
91
90
  encode_dict = dfe.encoding_list[num]
@@ -93,7 +92,7 @@ class SimulationEngine(FeatureSelection):
93
92
  dfe._code_transformation_to, dictionary_list=encode_dict
94
93
  )
95
94
 
96
- # PREDICTION
95
+ # Prediction
97
96
  y = df.to_numpy() @ w
98
97
 
99
98
  # Categorical column
@@ -113,7 +112,7 @@ class SimulationEngine(FeatureSelection):
113
112
 
114
113
  return y[:]
115
114
 
116
- def _encode(self, df: DataFrame) -> ndarray | list:
115
+ def _encode(self, df: DataFrame) -> np.ndarray | list:
117
116
  df = df.copy()
118
117
  column = df.columns[0]
119
118
  frec = df[column].value_counts() / len(df)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: likelihood
3
- Version: 1.2.23
3
+ Version: 1.2.24
4
4
  Summary: A package that performs the maximum likelihood algorithm.
5
5
  Home-page: https://github.com/jzsmoreno/likelihood/
6
6
  Author: J. A. Moreno-Guerra
@@ -12,6 +12,7 @@ likelihood/graph/__init__.py
12
12
  likelihood/graph/graph.py
13
13
  likelihood/graph/nn.py
14
14
  likelihood/models/__init__.py
15
+ likelihood/models/hmm.py
15
16
  likelihood/models/regression.py
16
17
  likelihood/models/simulation.py
17
18
  likelihood/models/utils.py
File without changes
File without changes
File without changes
File without changes