likelihood 1.4.1__py3-none-any.whl → 1.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
likelihood/tools/tools.py CHANGED
@@ -169,7 +169,6 @@ def generate_feature_yaml(
169
169
  return feature_info
170
170
 
171
171
 
172
- # a function that calculates the percentage of missing values per column is defined
173
172
  def cal_missing_values(df: DataFrame) -> None:
174
173
  """Calculate the percentage of missing (`NaN`/`NaT`) values per column in a dataframe.
175
174
 
@@ -180,8 +179,7 @@ def cal_missing_values(df: DataFrame) -> None:
180
179
 
181
180
  Returns
182
181
  -------
183
- `None`
184
- Prints out a table with columns as index and percentages of missing values as data.
182
+ `None` : Prints out a table with columns as index and percentages of missing values as data.
185
183
  """
186
184
 
187
185
  col = df.columns
@@ -226,7 +224,6 @@ def cdf(
226
224
  cdf_values = np.cumsum(x) / np.sum(x)
227
225
  sorted_x = np.sort(x)
228
226
 
229
- # Calculate the CDF or inverse CDF (quantile function)
230
227
  probabilities = np.linspace(0, 1, len(sorted_x))
231
228
 
232
229
  if inv:
@@ -281,7 +278,6 @@ def calculate_probability(x: np.ndarray, points: int = 1, cond: bool = True) ->
281
278
  fit, _, sorted_x = cdf(x)
282
279
  p = fit(x)
283
280
 
284
- # Validate probability values
285
281
  if cond:
286
282
  prob_value = np.prod(p[-points])
287
283
  message = "product"
@@ -304,7 +300,7 @@ class CorrelationBase:
304
300
 
305
301
  def __init__(self, x: np.ndarray, y: Union[np.ndarray, None] = None):
306
302
  self.x = x
307
- self.y = y if y is not None else x # Default to autocorrelation if y is not provided
303
+ self.y = y if y is not None else x
308
304
  self._compute_correlation()
309
305
  self.z = self.result[self.result.size // 2 :]
310
306
  self.z /= np.abs(self.z).max()
@@ -395,7 +391,6 @@ def fft_denoise(
395
391
  denoised_dataset = np.zeros_like(dataset)
396
392
  periods = np.zeros(num_samples)
397
393
 
398
- # Precompute values that do not change within the loop
399
394
  freq = (1 / n_points) * np.arange(n_points)
400
395
  L = np.arange(1, np.floor(n_points / 2), dtype=int)
401
396
 
@@ -405,15 +400,12 @@ def fft_denoise(
405
400
  threshold = np.mean(PSD) + sigma * np.std(PSD)
406
401
  indices = PSD > threshold
407
402
 
408
- # Zero out all others in frequency domain
409
403
  PSDclean = PSD * indices
410
404
  fhat_cleaned = fhat * indices
411
405
 
412
- # Inverse FFT for filtered time signal
413
406
  denoised_signal = np.fft.ifft(fhat_cleaned).real
414
407
  denoised_dataset[i, :] = denoised_signal
415
408
 
416
- # Calculate the period of the signal
417
409
  peak_index = L[np.argmax(np.abs(fhat[L]))]
418
410
  periods[i] = 1 / (2 * freq[peak_index])
419
411
 
@@ -430,33 +422,27 @@ def get_period(dataset: np.ndarray) -> float:
430
422
  Parameters
431
423
  ----------
432
424
  dataset : `ndarray`
433
- the `dataset` describing the function over which the period is calculated
425
+ the `dataset` describing the function over which the period is calculated.
434
426
 
435
427
  Returns
436
428
  -------
437
429
  period : `float`
438
- period of the function described by the `dataset`
430
+ period of the function described by the `dataset`.
439
431
  """
440
432
  n = dataset.size
441
433
 
442
- # Ensure there are enough points for FFT analysis
443
434
  if n < 2:
444
435
  raise ValueError("Dataset must contain at least two points.")
445
436
 
446
- # Compute the FFT and PSD
447
- fhat = np.fft.rfft(dataset) # Use rfft for real-valued input to save computation
448
- freqs = np.fft.rfftfreq(n) # Get only positive frequencies
437
+ fhat = np.fft.rfft(dataset)
438
+ freqs = np.fft.rfftfreq(n)
449
439
 
450
- # Calculate the Power Spectral Density (PSD)
451
440
  PSD = np.abs(fhat) ** 2 / n
452
441
 
453
- # Remove the first frequency component (DC component)
454
442
  PSD[0] = 0
455
443
 
456
- # Find the index of the maximum PSD value, excluding the DC component
457
444
  max_psd_index = np.argmax(PSD)
458
445
 
459
- # Calculate the period based on the corresponding frequency
460
446
  dominant_freq = freqs[max_psd_index]
461
447
  if dominant_freq == 0:
462
448
  raise ValueError("No significant periodic component found in the dataset.")
@@ -472,12 +458,12 @@ def sigmoide_inv(y: float) -> float:
472
458
  Parameters
473
459
  ----------
474
460
  y : `float`
475
- the number to evaluate the function
461
+ the number to evaluate the function.
476
462
 
477
463
  Returns
478
464
  -------
479
465
  `float`
480
- value of evaluated function
466
+ value of evaluated function.
481
467
  """
482
468
 
483
469
  return math.log(y / (1 - y))
@@ -540,6 +526,10 @@ class LogisticRegression:
540
526
  datapoints : `np.array`
541
527
  An array containing the values of the independent variable.
542
528
 
529
+ Returns
530
+ -------
531
+ `np.array`
532
+
543
533
  """
544
534
  sig = np.vectorize(sigmoide)
545
535
 
@@ -558,8 +548,6 @@ class LogisticRegression:
558
548
  -------
559
549
  importance : `np.array`
560
550
  An array containing the importance of each feature.
561
-
562
-
563
551
  """
564
552
  if print_important_features:
565
553
  for i, a in enumerate(self.importance):
@@ -589,9 +577,7 @@ class LinearRegression:
589
577
 
590
578
  Returns
591
579
  -------
592
- importance : `np.array`
593
- An array containing the importance of each feature.
594
-
580
+ `None` : The function doesn't return anything.
595
581
  """
596
582
 
597
583
  self.X = dataset
@@ -635,8 +621,6 @@ class LinearRegression:
635
621
  -------
636
622
  importance : `np.array`
637
623
  An array containing the importance of each feature.
638
-
639
-
640
624
  """
641
625
  if print_important_features:
642
626
  for i, a in enumerate(self.importance):
@@ -658,7 +642,6 @@ def cal_average(y: np.ndarray, alpha: float = 1):
658
642
  -------
659
643
  average : `float`
660
644
  The average of the data.
661
-
662
645
  """
663
646
 
664
647
  n = int(alpha * len(y))
@@ -799,7 +782,6 @@ def mean_square_error(y_true: np.ndarray, y_pred: np.ndarray, print_error: bool
799
782
  -------
800
783
  RMSE : `float`
801
784
  The Root Mean Squared Error.
802
-
803
785
  """
804
786
  if print_error:
805
787
  print(f"The RMSE is {np.sqrt(np.mean((y_true - y_pred)**2))}")
@@ -975,7 +957,6 @@ class PerformanceMeasures:
975
957
  def __init__(self) -> None:
976
958
  pass
977
959
 
978
- # Performance measure Res_T
979
960
  def f_mean(self, y_true: np.ndarray, y_pred: np.ndarray, labels: List[int]) -> float:
980
961
  F_vec = self._f1_score(y_true, y_pred, labels)
981
962
  mean_f_measure = np.mean(F_vec)
@@ -988,7 +969,6 @@ class PerformanceMeasures:
988
969
 
989
970
  return mean_f_measure
990
971
 
991
- # Performance measure Res_P
992
972
  def resp(self, y_true: np.ndarray, y_pred: np.ndarray, labels: List[int]) -> float:
993
973
  T_C = len(y_true)
994
974
  sum1, sum2 = 0.0, 0.0
@@ -999,7 +979,7 @@ class PerformanceMeasures:
999
979
  sum1 += (1 - class_instances) * F_vec[label_idx]
1000
980
  sum2 += 1 - class_instances
1001
981
 
1002
- res_p = sum1 / sum2 if sum2 != 0 else 0.0 # Avoid division by zero
982
+ res_p = sum1 / sum2 if sum2 != 0 else 0.0
1003
983
  print(f"Metric Res_p -> {res_p}")
1004
984
 
1005
985
  return res_p
@@ -1016,7 +996,6 @@ class PerformanceMeasures:
1016
996
  sum_cols = np.sum(count_mat, axis=0)
1017
997
  sum_rows = np.sum(count_mat, axis=1)
1018
998
 
1019
- # Avoid division by zero
1020
999
  precision = np.divide(
1021
1000
  count_mat.diagonal(), sum_cols, out=np.zeros_like(sum_cols), where=sum_cols != 0
1022
1001
  )
@@ -1028,7 +1007,6 @@ class PerformanceMeasures:
1028
1007
 
1029
1008
  return f1_vec
1030
1009
 
1031
- # Returns confusion matrix of predictions
1032
1010
  def _confu_mat(self, y_true: np.ndarray, y_pred: np.ndarray, labels: List[int]) -> np.ndarray:
1033
1011
  num_classes = len(labels)
1034
1012
  label_mapping = {label: idx for idx, label in enumerate(labels)}
@@ -1056,21 +1034,18 @@ class OneHotEncoder:
1056
1034
  self.x = x
1057
1035
 
1058
1036
  if not isinstance(self.x, np.ndarray):
1059
- self.x = np.array(self.x) # If not numpy array then convert it
1037
+ self.x = np.array(self.x)
1060
1038
 
1061
- y = np.zeros(
1062
- (self.x.size, self.x.max() + 1)
1063
- ) # Build matrix of (size num of entries) x (max value + 1)
1039
+ y = np.zeros((self.x.size, self.x.max() + 1))
1064
1040
 
1065
- y[np.arange(self.x.size), self.x] = 1 # Label with ones
1041
+ y[np.arange(self.x.size), self.x] = 1
1066
1042
 
1067
1043
  return y
1068
1044
 
1069
1045
  def decode(self, x: np.ndarray | list) -> np.ndarray:
1070
1046
  if not isinstance(x, np.ndarray):
1071
- x = np.array(x) # If not numpy array then convert it
1047
+ x = np.array(x)
1072
1048
 
1073
- # We return the max values of each row
1074
1049
  y = np.argmax(x, axis=1)
1075
1050
 
1076
1051
  return y
@@ -1107,13 +1082,11 @@ class FeatureSelection:
1107
1082
  `str`
1108
1083
  A string representation of the directed graph.
1109
1084
  """
1110
- # Assign and clean dataset
1111
1085
  self._load_data(dataset)
1112
1086
 
1113
1087
  curr_dataset = self.X
1114
1088
  columns = list(curr_dataset.columns)
1115
1089
 
1116
- # We construct string from causal_graph
1117
1090
  feature_string = " digraph { "
1118
1091
  for column in columns:
1119
1092
  feature_string += column + "; "
@@ -1125,85 +1098,53 @@ class FeatureSelection:
1125
1098
  numeric_df = pd.DataFrame(numeric_scaled.T, columns=numeric_df.columns)
1126
1099
  curr_dataset[numeric_df.columns] = numeric_df
1127
1100
 
1128
- # We construct dictionary to save index for scaling
1129
1101
  numeric_dict = dict(zip(list(numeric_df.columns), range(len(list(numeric_df.columns)))))
1130
1102
 
1131
- # Iterate over all the columns to obtain their importances.
1132
1103
  for index_column, column in enumerate(columns):
1133
-
1134
- # Variable to predict
1135
1104
  Y = curr_dataset[column]
1136
-
1137
- # We check whether it is numerical or categorical.
1138
1105
  column_type = Y.dtype
1139
1106
  if column_type != "object":
1140
- # Linear regression model
1141
1107
  Model = LinearRegression()
1142
-
1143
- # Auxiliary dataset without the column in question
1144
1108
  X_aux = curr_dataset.drop([column], axis=1)
1145
-
1146
- # We encode
1147
1109
  dfe = DataFrameEncoder(X_aux)
1148
1110
  encoded_df = dfe.encode(save_mode=False)
1149
- # We train
1150
1111
  Model.fit(encoded_df.to_numpy().T, Y.to_numpy().T)
1151
- # We obtain importance
1152
1112
  importance = Model.get_importances()
1153
1113
  w = Model.w
1154
1114
  else:
1155
1115
  Model = LogisticRegression()
1156
1116
  num_unique_entries = curr_dataset[column].nunique()
1157
-
1158
1117
  quick_encoder = DataFrameEncoder(Y.to_frame())
1159
1118
  encoded_Y = quick_encoder.encode(save_mode=False)
1160
-
1161
- # Mapping to one-hot
1162
1119
  one_hot = OneHotEncoder()
1163
1120
  train_y = one_hot.encode(encoded_Y[column])
1164
- # PASSING 0 -> 0.5 and 1 -> 0.73105
1165
1121
  for i in range(len(train_y)):
1166
1122
  for j in range(num_unique_entries):
1167
1123
  if train_y[i][j] == 1.0:
1168
1124
  train_y[i][j] = 0.73105
1169
1125
  else:
1170
1126
  train_y[i][j] = 0.5
1171
-
1172
- # Delete the column in question
1173
1127
  X_aux = curr_dataset.drop([column], axis=1)
1174
-
1175
- # We encode
1176
1128
  dfe = DataFrameEncoder(X_aux)
1177
1129
  encoded_df = dfe.encode(save_mode=False)
1178
-
1179
- # We train
1180
1130
  Model.fit(encoded_df.to_numpy().T, train_y)
1181
-
1182
- # We obtain importance
1183
1131
  importance = Model.get_importances()
1184
1132
  w = Model.w
1185
-
1186
- # We obtain the $n$ most important ones
1187
1133
  top_n_indexes = sorted(
1188
1134
  range(len(importance)), key=lambda i: importance[i], reverse=True
1189
1135
  )[:n_importances]
1190
1136
 
1191
- # We build the string for the column in question
1192
1137
  names_cols = list(X_aux.columns)
1193
- # We store the indices, values and column names in a list of tuples.
1194
1138
  features_imp_node = [
1195
1139
  (names_cols[top_n_indexes[i]], importance[top_n_indexes[i]])
1196
1140
  for i in range(n_importances)
1197
1141
  ]
1198
- # We store w's for predictions
1199
1142
 
1200
1143
  if column_type != "object":
1201
1144
  self.w_dict[column] = (w, None, names_cols, dfe, numeric_dict)
1202
1145
  else:
1203
1146
  self.w_dict[column] = (w, quick_encoder, names_cols, dfe, numeric_dict)
1204
- # Add to general list
1205
1147
  self.all_features_imp_graph.append((column, features_imp_node))
1206
- # We format it
1207
1148
  for i in top_n_indexes:
1208
1149
  feature_string += names_cols[i] + " -> "
1209
1150
 
@@ -1212,10 +1153,8 @@ class FeatureSelection:
1212
1153
  return feature_string + "} "
1213
1154
 
1214
1155
  def _load_data(self, dataset: DataFrame):
1215
- # Assign data and clean dataset of unneeded columns
1216
1156
 
1217
1157
  if len(self.not_features) > 0:
1218
- # We remove unnecessary columns
1219
1158
  self.X = dataset.drop(columns=self.not_features)
1220
1159
 
1221
1160
  else:
@@ -1232,11 +1171,15 @@ def check_nan_inf(df: DataFrame) -> DataFrame:
1232
1171
  """
1233
1172
  Checks for NaN and Inf values in the DataFrame. If any are found, they will be removed.
1234
1173
 
1235
- Parameters:
1236
- df (DataFrame): The input DataFrame to be checked.
1174
+ Parameters
1175
+ ----------
1176
+ df : DataFrame
1177
+ The input DataFrame to be checked.
1237
1178
 
1238
- Returns:
1239
- DataFrame: A new DataFrame with NaN and Inf values removed.
1179
+ Returns
1180
+ ----------
1181
+ DataFrame
1182
+ A new DataFrame with NaN and Inf values removed.
1240
1183
  """
1241
1184
 
1242
1185
  nan_values = df.isnull().values.any()
@@ -1272,7 +1215,6 @@ if __name__ == "__main__":
1272
1215
  print(helper.f_mean(y_true, y_pred, labels))
1273
1216
 
1274
1217
  # Use DataFrameEncoder
1275
- # Create a DataFrame
1276
1218
  data = {"Name": ["John", "Alice", "Bob", "Jafet", "Beto"], "Age": [25, 30, 35, 21, 28]}
1277
1219
  import pandas as pd
1278
1220
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: likelihood
3
- Version: 1.4.1
3
+ Version: 1.5.0
4
4
  Summary: A package that performs the maximum likelihood algorithm.
5
5
  Home-page: https://github.com/jzsmoreno/likelihood/
6
6
  Author: J. A. Moreno-Guerra
@@ -2,20 +2,21 @@ likelihood/__init__.py,sha256=5C0hapdsk85XZhN_rssRAEFpkRRuKNtj6cyRbqD2_gM,994
2
2
  likelihood/main.py,sha256=fcCkGOOWKjfvw2tLVqjuKPV8t0rVCIT9FlbYcOv4EYo,7974
3
3
  likelihood/graph/__init__.py,sha256=6TuFDfmXTwpLyHl7_KqBfdzW6zqHjGzIFvymjFPlvjI,21
4
4
  likelihood/graph/graph.py,sha256=bLrNMvIh7GOTdPTwnNss8oPZ7cbSHQScAsH_ttmVUK0,3294
5
- likelihood/graph/nn.py,sha256=MD2M-KgQnrlHg3iS42vrdOnD51-GRk3CJ5CCMQ0DNWI,10763
5
+ likelihood/graph/nn.py,sha256=EaMmboKriCFnkP48_HLGRAsOZSWxwUlMG0WDGZ4ey1o,11035
6
6
  likelihood/models/__init__.py,sha256=e6nB4w47w0Q9DrAFeP3OcUgcoHOtf7Il4mBhgf4AARg,52
7
7
  likelihood/models/hmm.py,sha256=0s0gFySH1u4NjRaZDxiZ8oeTaFhFrw1x0GJxwy3dFrA,6253
8
8
  likelihood/models/regression.py,sha256=9cakyGlJCEO6WfpoKLh3GxdXQeQp7cUvJIkQ5odT0TA,9404
9
9
  likelihood/models/simulation.py,sha256=LFyE_szo7sDukviMLeg_6RoyAaI7yMXUy8f4mDOrGoc,8460
10
10
  likelihood/models/utils.py,sha256=dvigPi_hxcs5ntfHr7Y1JvP5ULtMW3kkN0nJpS4orE8,1319
11
11
  likelihood/models/deep/__init__.py,sha256=-KIPippVaMqgG8mEgYjNxYQdqOUcFhUuKhbVe8TTCfo,28
12
- likelihood/models/deep/autoencoders.py,sha256=O-H5KLmJvYjuE-b6l97esruihK6djocgxbkO2N1X2RM,39306
12
+ likelihood/models/deep/autoencoders.py,sha256=0EIZwDNlZ9NCfQbhQ_KdXkkRwIjUEU-jk0l0u-J1wmA,44212
13
13
  likelihood/tools/__init__.py,sha256=N1IhMDzacsGQT2MIYBMBC0zTxes78vC_0gGrwkuPgmg,78
14
- likelihood/tools/models_tools.py,sha256=bjwoBlDeW1fUi58yJsuKcaTUTgWhOCNsc24_ESYI3BI,3502
14
+ likelihood/tools/figures.py,sha256=waF0NHIMrctCmaLhcuz5DMcXyRKynmn6aG0XITYCTLc,10940
15
+ likelihood/tools/models_tools.py,sha256=c3-vac-1MYSarYDtfR6XfVC7X_WY9auS7y2_3Z973IQ,8875
15
16
  likelihood/tools/numeric_tools.py,sha256=FA44kbiAcxcquz1el_g3Pqsp5ii8XFkAIrsMs5bGkj0,11445
16
- likelihood/tools/tools.py,sha256=6JLZBHxc4f1lJfw4aBwdS2s16EpydFNqLZF73I7wddQ,44412
17
- likelihood-1.4.1.dist-info/LICENSE,sha256=XWHWt9egYEUHGPTnlcZfJKLPmysacOwdiLj_-J7Z9ew,1066
18
- likelihood-1.4.1.dist-info/METADATA,sha256=6otKXhthH5ZSUvYfcghD6CaC1skWZ0FBouXsGXuJfZw,2822
19
- likelihood-1.4.1.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
20
- likelihood-1.4.1.dist-info/top_level.txt,sha256=KDiBLr870YTxqLFqObTOSrTK10uw8dFsITSNLlte3PA,11
21
- likelihood-1.4.1.dist-info/RECORD,,
17
+ likelihood/tools/tools.py,sha256=SePaBg-gP29rt5SR2xhqNNQLu7_m0Wner5y_XzdSdpc,42031
18
+ likelihood-1.5.0.dist-info/LICENSE,sha256=XWHWt9egYEUHGPTnlcZfJKLPmysacOwdiLj_-J7Z9ew,1066
19
+ likelihood-1.5.0.dist-info/METADATA,sha256=zTpqZ3w7y_vWY2dqQH7JSfROIkC8dbRcLn2LSCAQGc4,2822
20
+ likelihood-1.5.0.dist-info/WHEEL,sha256=52BFRY2Up02UkjOa29eZOS2VxUrpPORXg1pkohGGUS8,91
21
+ likelihood-1.5.0.dist-info/top_level.txt,sha256=KDiBLr870YTxqLFqObTOSrTK10uw8dFsITSNLlte3PA,11
22
+ likelihood-1.5.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.8.2)
2
+ Generator: setuptools (76.0.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5