likelihood 1.4.1__py3-none-any.whl → 1.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,11 +3,148 @@ import os
3
3
 
4
4
  import networkx as nx
5
5
  import pandas as pd
6
+ from pandas.core.frame import DataFrame
6
7
 
7
8
  os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
8
9
  logging.getLogger("tensorflow").setLevel(logging.ERROR)
9
10
 
11
+ import sys
12
+ import warnings
13
+ from functools import wraps
14
+ from typing import Dict
15
+
16
+ import numpy as np
10
17
  import tensorflow as tf
18
+ from pandas.core.frame import DataFrame
19
+
20
+ from .figures import *
21
+
22
+
23
+ class suppress_prints:
24
+ def __enter__(self):
25
+ self.original_stdout = sys.stdout
26
+ sys.stdout = open(os.devnull, "w")
27
+
28
+ def __exit__(self, exc_type, exc_value, traceback):
29
+ sys.stdout.close()
30
+ sys.stdout = self.original_stdout
31
+
32
+
33
+ def suppress_warnings(func):
34
+ @wraps(func)
35
+ def wrapper(*args, **kwargs):
36
+ with warnings.catch_warnings():
37
+ warnings.simplefilter("ignore")
38
+ return func(*args, **kwargs)
39
+
40
+ return wrapper
41
+
42
+
43
+ def remove_collinearity(df: DataFrame, threshold: float = 0.9):
44
+ """
45
+ Removes highly collinear features from the DataFrame based on a correlation threshold.
46
+
47
+ This function calculates the correlation matrix of the DataFrame and removes columns
48
+ that are highly correlated with any other column in the DataFrame. It uses an absolute
49
+ correlation value greater than the specified threshold to identify which columns to drop.
50
+
51
+ Parameters
52
+ ----------
53
+ df : `DataFrame`
54
+ The input DataFrame containing numerical data.
55
+ threshold : `float`
56
+ The correlation threshold above which features will be removed. Default is `0.9`.
57
+
58
+ Returns
59
+ ----------
60
+ DataFrame: A DataFrame with highly collinear features removed.
61
+ """
62
+ corr_matrix = df.corr().abs()
63
+ upper_triangle = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(bool))
64
+ to_drop = [
65
+ column for column in upper_triangle.columns if any(upper_triangle[column] > threshold)
66
+ ]
67
+ df_reduced = df.drop(columns=to_drop)
68
+
69
+ return df_reduced
70
+
71
+
72
+ def train_and_insights(
73
+ x_data: np.ndarray,
74
+ y_act: np.ndarray,
75
+ model: tf.keras.Model,
76
+ patience: int = 3,
77
+ reg: bool = False,
78
+ frac: float = 1.0,
79
+ **kwargs: Optional[Dict],
80
+ ) -> tf.keras.Model:
81
+ """
82
+ Train a Keras model and provide insights on the training and validation metrics.
83
+
84
+ Parameters
85
+ ----------
86
+ x_data : `np.ndarray`
87
+ Input data for training the model.
88
+ y_act : `np.ndarray`
89
+ Actual labels corresponding to x_data.
90
+ model : `tf.keras.Model`
91
+ The Keras model to train.
92
+ patience : `int`
93
+ The patience parameter for early stopping callback (default is 3).
94
+ reg : `bool`
95
+ Flag to determine if residual analysis should be performed (default is `False`).
96
+ frac : `float`
97
+ Fraction of data to use (default is 1.0).
98
+
99
+ Keyword Arguments:
100
+ ----------
101
+ Additional keyword arguments passed to the `model.fit` function, such as validation split and callbacks.
102
+
103
+ Returns
104
+ ----------
105
+ `tf.keras.Model`
106
+ The trained model after fitting.
107
+ """
108
+
109
+ validation_split = kwargs.get("validation_split", 0.2)
110
+ callback = kwargs.get(
111
+ "callback", [tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=patience)]
112
+ )
113
+
114
+ for key in ["validation_split", "callback"]:
115
+ if key in kwargs:
116
+ del kwargs[key]
117
+
118
+ history = model.fit(
119
+ x_data,
120
+ y_act,
121
+ validation_split=validation_split,
122
+ verbose=False,
123
+ callbacks=callback,
124
+ **kwargs,
125
+ )
126
+
127
+ hist = pd.DataFrame(history.history)
128
+ hist["epoch"] = history.epoch
129
+
130
+ columns = hist.columns
131
+ train_err, train_metric = columns[0], columns[1]
132
+ val_err, val_metric = columns[2], columns[3]
133
+ train_err, val_err = hist[train_err].values, hist[val_err].values
134
+
135
+ with suppress_prints():
136
+ n = int(len(x_data) * frac)
137
+ y_pred = model.predict(x_data[:n])
138
+ y_act = y_act[:n]
139
+
140
+ if reg:
141
+ residual(y_act, y_pred)
142
+ residual_hist(y_act, y_pred)
143
+ act_pred(y_act, y_pred)
144
+
145
+ loss_curve(hist["epoch"].values, train_err, val_err)
146
+
147
+ return model
11
148
 
12
149
 
13
150
  @tf.keras.utils.register_keras_serializable(package="Custom", name="LoRALayer")
@@ -58,16 +195,31 @@ def apply_lora(model, rank=4):
58
195
  return new_model
59
196
 
60
197
 
61
- def graph_metrics(adj_matrix, eigenvector_threshold=1e-6):
198
+ def graph_metrics(adj_matrix: np.ndarray, eigenvector_threshold: float = 1e-6) -> DataFrame:
62
199
  """
63
- This function calculates the following graph metrics using the adjacency matrix:
64
- 1. Degree Centrality
65
- 2. Clustering Coefficient
66
- 3. Eigenvector Centrality
67
- 4. Degree
68
- 5. Betweenness Centrality
69
- 6. Closeness Centrality
70
- 7. Assortativity
200
+ Calculate various graph metrics based on the given adjacency matrix and return them in a single DataFrame.
201
+
202
+ Parameters
203
+ ----------
204
+ adj_matrix : `np.ndarray`
205
+ The adjacency matrix representing the graph, where each element denotes the presence/weight of an edge between nodes.
206
+ eigenvector_threshold : `float`
207
+ A threshold for the eigenvector centrality calculation, used to determine the cutoff for small eigenvalues. Default is `1e-6`.
208
+
209
+ Returns
210
+ ----------
211
+ DataFrame : A DataFrame containing the following graph metrics as columns.
212
+ - `Degree Centrality`: Degree centrality values for each node, indicating the number of direct connections each node has.
213
+ - `Clustering Coefficient`: Clustering coefficient values for each node, representing the degree to which nodes cluster together.
214
+ - `Eigenvector Centrality`: Eigenvector centrality values, indicating the influence of a node in the graph based on the eigenvectors of the adjacency matrix.
215
+ - `Degree`: The degree of each node, representing the number of edges connected to each node.
216
+ - `Betweenness Centrality`: Betweenness centrality values, representing the extent to which a node lies on the shortest paths between other nodes.
217
+ - `Closeness Centrality`: Closeness centrality values, indicating the inverse of the average shortest path distance from a node to all other nodes in the graph.
218
+ - `Assortativity`: The assortativity coefficient of the graph, measuring the tendency of nodes to connect to similar nodes.
219
+
220
+ Notes
221
+ ----------
222
+ The returned DataFrame will have one row for each node and one column for each of the computed metrics.
71
223
  """
72
224
  adj_matrix = adj_matrix.astype(int)
73
225
  G = nx.from_numpy_array(adj_matrix)
@@ -345,6 +345,27 @@ def gauss_elimination(A: ndarray | list, pr: int = 2) -> ndarray:
345
345
  return X
346
346
 
347
347
 
348
+ def find_multiples(target: int) -> tuple[int, int] | None:
349
+ """Find two factors of a given target number.
350
+
351
+ Parameters
352
+ ----------
353
+ target : int
354
+ The target number to find factors for.
355
+
356
+ Returns
357
+ -------
358
+ tuple[int, int] | None
359
+ A tuple containing two factors of the target number.
360
+ Returns None if no factors are found.
361
+ """
362
+ for i in range(2, target + 1):
363
+ if target % i == 0:
364
+ factor = target // i
365
+ return i, factor
366
+ return None
367
+
368
+
348
369
  # Example usage:
349
370
  if __name__ == "__main__":
350
371
  import pandas as pd
likelihood/tools/tools.py CHANGED
@@ -169,7 +169,6 @@ def generate_feature_yaml(
169
169
  return feature_info
170
170
 
171
171
 
172
- # a function that calculates the percentage of missing values per column is defined
173
172
  def cal_missing_values(df: DataFrame) -> None:
174
173
  """Calculate the percentage of missing (`NaN`/`NaT`) values per column in a dataframe.
175
174
 
@@ -180,8 +179,7 @@ def cal_missing_values(df: DataFrame) -> None:
180
179
 
181
180
  Returns
182
181
  -------
183
- `None`
184
- Prints out a table with columns as index and percentages of missing values as data.
182
+ `None` : Prints out a table with columns as index and percentages of missing values as data.
185
183
  """
186
184
 
187
185
  col = df.columns
@@ -226,7 +224,6 @@ def cdf(
226
224
  cdf_values = np.cumsum(x) / np.sum(x)
227
225
  sorted_x = np.sort(x)
228
226
 
229
- # Calculate the CDF or inverse CDF (quantile function)
230
227
  probabilities = np.linspace(0, 1, len(sorted_x))
231
228
 
232
229
  if inv:
@@ -281,7 +278,6 @@ def calculate_probability(x: np.ndarray, points: int = 1, cond: bool = True) ->
281
278
  fit, _, sorted_x = cdf(x)
282
279
  p = fit(x)
283
280
 
284
- # Validate probability values
285
281
  if cond:
286
282
  prob_value = np.prod(p[-points])
287
283
  message = "product"
@@ -304,7 +300,7 @@ class CorrelationBase:
304
300
 
305
301
  def __init__(self, x: np.ndarray, y: Union[np.ndarray, None] = None):
306
302
  self.x = x
307
- self.y = y if y is not None else x # Default to autocorrelation if y is not provided
303
+ self.y = y if y is not None else x
308
304
  self._compute_correlation()
309
305
  self.z = self.result[self.result.size // 2 :]
310
306
  self.z /= np.abs(self.z).max()
@@ -395,7 +391,6 @@ def fft_denoise(
395
391
  denoised_dataset = np.zeros_like(dataset)
396
392
  periods = np.zeros(num_samples)
397
393
 
398
- # Precompute values that do not change within the loop
399
394
  freq = (1 / n_points) * np.arange(n_points)
400
395
  L = np.arange(1, np.floor(n_points / 2), dtype=int)
401
396
 
@@ -405,15 +400,12 @@ def fft_denoise(
405
400
  threshold = np.mean(PSD) + sigma * np.std(PSD)
406
401
  indices = PSD > threshold
407
402
 
408
- # Zero out all others in frequency domain
409
403
  PSDclean = PSD * indices
410
404
  fhat_cleaned = fhat * indices
411
405
 
412
- # Inverse FFT for filtered time signal
413
406
  denoised_signal = np.fft.ifft(fhat_cleaned).real
414
407
  denoised_dataset[i, :] = denoised_signal
415
408
 
416
- # Calculate the period of the signal
417
409
  peak_index = L[np.argmax(np.abs(fhat[L]))]
418
410
  periods[i] = 1 / (2 * freq[peak_index])
419
411
 
@@ -430,33 +422,27 @@ def get_period(dataset: np.ndarray) -> float:
430
422
  Parameters
431
423
  ----------
432
424
  dataset : `ndarray`
433
- the `dataset` describing the function over which the period is calculated
425
+ the `dataset` describing the function over which the period is calculated.
434
426
 
435
427
  Returns
436
428
  -------
437
429
  period : `float`
438
- period of the function described by the `dataset`
430
+ period of the function described by the `dataset`.
439
431
  """
440
432
  n = dataset.size
441
433
 
442
- # Ensure there are enough points for FFT analysis
443
434
  if n < 2:
444
435
  raise ValueError("Dataset must contain at least two points.")
445
436
 
446
- # Compute the FFT and PSD
447
- fhat = np.fft.rfft(dataset) # Use rfft for real-valued input to save computation
448
- freqs = np.fft.rfftfreq(n) # Get only positive frequencies
437
+ fhat = np.fft.rfft(dataset)
438
+ freqs = np.fft.rfftfreq(n)
449
439
 
450
- # Calculate the Power Spectral Density (PSD)
451
440
  PSD = np.abs(fhat) ** 2 / n
452
441
 
453
- # Remove the first frequency component (DC component)
454
442
  PSD[0] = 0
455
443
 
456
- # Find the index of the maximum PSD value, excluding the DC component
457
444
  max_psd_index = np.argmax(PSD)
458
445
 
459
- # Calculate the period based on the corresponding frequency
460
446
  dominant_freq = freqs[max_psd_index]
461
447
  if dominant_freq == 0:
462
448
  raise ValueError("No significant periodic component found in the dataset.")
@@ -472,12 +458,12 @@ def sigmoide_inv(y: float) -> float:
472
458
  Parameters
473
459
  ----------
474
460
  y : `float`
475
- the number to evaluate the function
461
+ the number to evaluate the function.
476
462
 
477
463
  Returns
478
464
  -------
479
465
  `float`
480
- value of evaluated function
466
+ value of evaluated function.
481
467
  """
482
468
 
483
469
  return math.log(y / (1 - y))
@@ -540,6 +526,10 @@ class LogisticRegression:
540
526
  datapoints : `np.array`
541
527
  An array containing the values of the independent variable.
542
528
 
529
+ Returns
530
+ -------
531
+ `np.array`
532
+
543
533
  """
544
534
  sig = np.vectorize(sigmoide)
545
535
 
@@ -558,8 +548,6 @@ class LogisticRegression:
558
548
  -------
559
549
  importance : `np.array`
560
550
  An array containing the importance of each feature.
561
-
562
-
563
551
  """
564
552
  if print_important_features:
565
553
  for i, a in enumerate(self.importance):
@@ -589,9 +577,7 @@ class LinearRegression:
589
577
 
590
578
  Returns
591
579
  -------
592
- importance : `np.array`
593
- An array containing the importance of each feature.
594
-
580
+ `None` : The function doesn't return anything.
595
581
  """
596
582
 
597
583
  self.X = dataset
@@ -635,8 +621,6 @@ class LinearRegression:
635
621
  -------
636
622
  importance : `np.array`
637
623
  An array containing the importance of each feature.
638
-
639
-
640
624
  """
641
625
  if print_important_features:
642
626
  for i, a in enumerate(self.importance):
@@ -658,7 +642,6 @@ def cal_average(y: np.ndarray, alpha: float = 1):
658
642
  -------
659
643
  average : `float`
660
644
  The average of the data.
661
-
662
645
  """
663
646
 
664
647
  n = int(alpha * len(y))
@@ -799,7 +782,6 @@ def mean_square_error(y_true: np.ndarray, y_pred: np.ndarray, print_error: bool
799
782
  -------
800
783
  RMSE : `float`
801
784
  The Root Mean Squared Error.
802
-
803
785
  """
804
786
  if print_error:
805
787
  print(f"The RMSE is {np.sqrt(np.mean((y_true - y_pred)**2))}")
@@ -975,7 +957,6 @@ class PerformanceMeasures:
975
957
  def __init__(self) -> None:
976
958
  pass
977
959
 
978
- # Performance measure Res_T
979
960
  def f_mean(self, y_true: np.ndarray, y_pred: np.ndarray, labels: List[int]) -> float:
980
961
  F_vec = self._f1_score(y_true, y_pred, labels)
981
962
  mean_f_measure = np.mean(F_vec)
@@ -988,7 +969,6 @@ class PerformanceMeasures:
988
969
 
989
970
  return mean_f_measure
990
971
 
991
- # Performance measure Res_P
992
972
  def resp(self, y_true: np.ndarray, y_pred: np.ndarray, labels: List[int]) -> float:
993
973
  T_C = len(y_true)
994
974
  sum1, sum2 = 0.0, 0.0
@@ -999,7 +979,7 @@ class PerformanceMeasures:
999
979
  sum1 += (1 - class_instances) * F_vec[label_idx]
1000
980
  sum2 += 1 - class_instances
1001
981
 
1002
- res_p = sum1 / sum2 if sum2 != 0 else 0.0 # Avoid division by zero
982
+ res_p = sum1 / sum2 if sum2 != 0 else 0.0
1003
983
  print(f"Metric Res_p -> {res_p}")
1004
984
 
1005
985
  return res_p
@@ -1016,7 +996,6 @@ class PerformanceMeasures:
1016
996
  sum_cols = np.sum(count_mat, axis=0)
1017
997
  sum_rows = np.sum(count_mat, axis=1)
1018
998
 
1019
- # Avoid division by zero
1020
999
  precision = np.divide(
1021
1000
  count_mat.diagonal(), sum_cols, out=np.zeros_like(sum_cols), where=sum_cols != 0
1022
1001
  )
@@ -1028,7 +1007,6 @@ class PerformanceMeasures:
1028
1007
 
1029
1008
  return f1_vec
1030
1009
 
1031
- # Returns confusion matrix of predictions
1032
1010
  def _confu_mat(self, y_true: np.ndarray, y_pred: np.ndarray, labels: List[int]) -> np.ndarray:
1033
1011
  num_classes = len(labels)
1034
1012
  label_mapping = {label: idx for idx, label in enumerate(labels)}
@@ -1056,21 +1034,18 @@ class OneHotEncoder:
1056
1034
  self.x = x
1057
1035
 
1058
1036
  if not isinstance(self.x, np.ndarray):
1059
- self.x = np.array(self.x) # If not numpy array then convert it
1037
+ self.x = np.array(self.x)
1060
1038
 
1061
- y = np.zeros(
1062
- (self.x.size, self.x.max() + 1)
1063
- ) # Build matrix of (size num of entries) x (max value + 1)
1039
+ y = np.zeros((self.x.size, self.x.max() + 1))
1064
1040
 
1065
- y[np.arange(self.x.size), self.x] = 1 # Label with ones
1041
+ y[np.arange(self.x.size), self.x] = 1
1066
1042
 
1067
1043
  return y
1068
1044
 
1069
1045
  def decode(self, x: np.ndarray | list) -> np.ndarray:
1070
1046
  if not isinstance(x, np.ndarray):
1071
- x = np.array(x) # If not numpy array then convert it
1047
+ x = np.array(x)
1072
1048
 
1073
- # We return the max values of each row
1074
1049
  y = np.argmax(x, axis=1)
1075
1050
 
1076
1051
  return y
@@ -1107,13 +1082,11 @@ class FeatureSelection:
1107
1082
  `str`
1108
1083
  A string representation of the directed graph.
1109
1084
  """
1110
- # Assign and clean dataset
1111
1085
  self._load_data(dataset)
1112
1086
 
1113
1087
  curr_dataset = self.X
1114
1088
  columns = list(curr_dataset.columns)
1115
1089
 
1116
- # We construct string from causal_graph
1117
1090
  feature_string = " digraph { "
1118
1091
  for column in columns:
1119
1092
  feature_string += column + "; "
@@ -1125,85 +1098,53 @@ class FeatureSelection:
1125
1098
  numeric_df = pd.DataFrame(numeric_scaled.T, columns=numeric_df.columns)
1126
1099
  curr_dataset[numeric_df.columns] = numeric_df
1127
1100
 
1128
- # We construct dictionary to save index for scaling
1129
1101
  numeric_dict = dict(zip(list(numeric_df.columns), range(len(list(numeric_df.columns)))))
1130
1102
 
1131
- # Iterate over all the columns to obtain their importances.
1132
1103
  for index_column, column in enumerate(columns):
1133
-
1134
- # Variable to predict
1135
1104
  Y = curr_dataset[column]
1136
-
1137
- # We check whether it is numerical or categorical.
1138
1105
  column_type = Y.dtype
1139
1106
  if column_type != "object":
1140
- # Linear regression model
1141
1107
  Model = LinearRegression()
1142
-
1143
- # Auxiliary dataset without the column in question
1144
1108
  X_aux = curr_dataset.drop([column], axis=1)
1145
-
1146
- # We encode
1147
1109
  dfe = DataFrameEncoder(X_aux)
1148
1110
  encoded_df = dfe.encode(save_mode=False)
1149
- # We train
1150
1111
  Model.fit(encoded_df.to_numpy().T, Y.to_numpy().T)
1151
- # We obtain importance
1152
1112
  importance = Model.get_importances()
1153
1113
  w = Model.w
1154
1114
  else:
1155
1115
  Model = LogisticRegression()
1156
1116
  num_unique_entries = curr_dataset[column].nunique()
1157
-
1158
1117
  quick_encoder = DataFrameEncoder(Y.to_frame())
1159
1118
  encoded_Y = quick_encoder.encode(save_mode=False)
1160
-
1161
- # Mapping to one-hot
1162
1119
  one_hot = OneHotEncoder()
1163
1120
  train_y = one_hot.encode(encoded_Y[column])
1164
- # PASSING 0 -> 0.5 and 1 -> 0.73105
1165
1121
  for i in range(len(train_y)):
1166
1122
  for j in range(num_unique_entries):
1167
1123
  if train_y[i][j] == 1.0:
1168
1124
  train_y[i][j] = 0.73105
1169
1125
  else:
1170
1126
  train_y[i][j] = 0.5
1171
-
1172
- # Delete the column in question
1173
1127
  X_aux = curr_dataset.drop([column], axis=1)
1174
-
1175
- # We encode
1176
1128
  dfe = DataFrameEncoder(X_aux)
1177
1129
  encoded_df = dfe.encode(save_mode=False)
1178
-
1179
- # We train
1180
1130
  Model.fit(encoded_df.to_numpy().T, train_y)
1181
-
1182
- # We obtain importance
1183
1131
  importance = Model.get_importances()
1184
1132
  w = Model.w
1185
-
1186
- # We obtain the $n$ most important ones
1187
1133
  top_n_indexes = sorted(
1188
1134
  range(len(importance)), key=lambda i: importance[i], reverse=True
1189
1135
  )[:n_importances]
1190
1136
 
1191
- # We build the string for the column in question
1192
1137
  names_cols = list(X_aux.columns)
1193
- # We store the indices, values and column names in a list of tuples.
1194
1138
  features_imp_node = [
1195
1139
  (names_cols[top_n_indexes[i]], importance[top_n_indexes[i]])
1196
1140
  for i in range(n_importances)
1197
1141
  ]
1198
- # We store w's for predictions
1199
1142
 
1200
1143
  if column_type != "object":
1201
1144
  self.w_dict[column] = (w, None, names_cols, dfe, numeric_dict)
1202
1145
  else:
1203
1146
  self.w_dict[column] = (w, quick_encoder, names_cols, dfe, numeric_dict)
1204
- # Add to general list
1205
1147
  self.all_features_imp_graph.append((column, features_imp_node))
1206
- # We format it
1207
1148
  for i in top_n_indexes:
1208
1149
  feature_string += names_cols[i] + " -> "
1209
1150
 
@@ -1212,10 +1153,8 @@ class FeatureSelection:
1212
1153
  return feature_string + "} "
1213
1154
 
1214
1155
  def _load_data(self, dataset: DataFrame):
1215
- # Assign data and clean dataset of unneeded columns
1216
1156
 
1217
1157
  if len(self.not_features) > 0:
1218
- # We remove unnecessary columns
1219
1158
  self.X = dataset.drop(columns=self.not_features)
1220
1159
 
1221
1160
  else:
@@ -1228,34 +1167,50 @@ class FeatureSelection:
1228
1167
  self.X = self.X.drop(columns=["index"])
1229
1168
 
1230
1169
 
1231
- def check_nan_inf(df: DataFrame) -> DataFrame:
1170
+ def check_nan_inf(df: DataFrame, verbose: bool = False) -> DataFrame:
1232
1171
  """
1233
1172
  Checks for NaN and Inf values in the DataFrame. If any are found, they will be removed.
1234
1173
 
1235
- Parameters:
1236
- df (DataFrame): The input DataFrame to be checked.
1174
+ Parameters
1175
+ ----------
1176
+ df : DataFrame
1177
+ The input DataFrame to be checked.
1237
1178
 
1238
- Returns:
1239
- DataFrame: A new DataFrame with NaN and Inf values removed.
1179
+ Returns
1180
+ ----------
1181
+ DataFrame
1182
+ A new DataFrame with NaN and Inf values removed.
1240
1183
  """
1241
1184
 
1242
1185
  nan_values = df.isnull().values.any()
1243
1186
  inf_values = np.isinf(df.select_dtypes(include="number")).values.any()
1244
1187
 
1188
+ nan_count = df.isnull().values.sum()
1189
+ inf_count = np.isinf(df.select_dtypes(include="number")).values.sum()
1190
+
1245
1191
  if nan_values:
1246
- print("UserWarning: Some rows may have been deleted due to the existence of NaN values.")
1192
+ (
1193
+ print(
1194
+ "UserWarning: Some rows may have been deleted due to the existence of NaN values."
1195
+ )
1196
+ if verbose
1197
+ else None
1198
+ )
1247
1199
  df.dropna(inplace=True)
1248
1200
 
1249
1201
  if inf_values:
1250
- print("UserWarning: Some rows may have been deleted due to the existence of Inf values.")
1202
+ (
1203
+ print(
1204
+ "UserWarning: Some rows may have been deleted due to the existence of Inf values."
1205
+ )
1206
+ if verbose
1207
+ else None
1208
+ )
1251
1209
  df.replace([np.inf, -np.inf], np.nan, inplace=True)
1252
1210
  df.dropna(inplace=True)
1253
1211
 
1254
- nan_count = df.isnull().values.sum()
1255
- inf_count = np.isinf(df.select_dtypes(include="number")).values.sum()
1256
-
1257
- print(f"NaN values removed: {nan_count}")
1258
- print(f"Infinite values removed: {inf_count}")
1212
+ print(f"NaN values removed: ", "{:,}".format(nan_count))
1213
+ print(f"Infinite values removed: ", "{:,}".format(inf_count))
1259
1214
 
1260
1215
  return df
1261
1216
 
@@ -1272,7 +1227,6 @@ if __name__ == "__main__":
1272
1227
  print(helper.f_mean(y_true, y_pred, labels))
1273
1228
 
1274
1229
  # Use DataFrameEncoder
1275
- # Create a DataFrame
1276
1230
  data = {"Name": ["John", "Alice", "Bob", "Jafet", "Beto"], "Age": [25, 30, 35, 21, 28]}
1277
1231
  import pandas as pd
1278
1232
 
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: likelihood
3
- Version: 1.4.1
3
+ Version: 1.5.1
4
4
  Summary: A package that performs the maximum likelihood algorithm.
5
5
  Home-page: https://github.com/jzsmoreno/likelihood/
6
6
  Author: J. A. Moreno-Guerra
@@ -39,6 +39,7 @@ Dynamic: classifier
39
39
  Dynamic: description
40
40
  Dynamic: description-content-type
41
41
  Dynamic: home-page
42
+ Dynamic: license-file
42
43
  Dynamic: maintainer
43
44
  Dynamic: maintainer-email
44
45
  Dynamic: provides-extra
@@ -0,0 +1,23 @@
1
+ likelihood/__init__.py,sha256=5C0hapdsk85XZhN_rssRAEFpkRRuKNtj6cyRbqD2_gM,994
2
+ likelihood/main.py,sha256=fcCkGOOWKjfvw2tLVqjuKPV8t0rVCIT9FlbYcOv4EYo,7974
3
+ likelihood/graph/__init__.py,sha256=6TuFDfmXTwpLyHl7_KqBfdzW6zqHjGzIFvymjFPlvjI,21
4
+ likelihood/graph/graph.py,sha256=bLrNMvIh7GOTdPTwnNss8oPZ7cbSHQScAsH_ttmVUK0,3294
5
+ likelihood/graph/nn.py,sha256=EaMmboKriCFnkP48_HLGRAsOZSWxwUlMG0WDGZ4ey1o,11035
6
+ likelihood/models/__init__.py,sha256=e6nB4w47w0Q9DrAFeP3OcUgcoHOtf7Il4mBhgf4AARg,52
7
+ likelihood/models/hmm.py,sha256=0s0gFySH1u4NjRaZDxiZ8oeTaFhFrw1x0GJxwy3dFrA,6253
8
+ likelihood/models/regression.py,sha256=9cakyGlJCEO6WfpoKLh3GxdXQeQp7cUvJIkQ5odT0TA,9404
9
+ likelihood/models/simulation.py,sha256=IkYGA6-L1LvSnIlyrVWTzQQu-JnfXml5Tewt-GC05PY,8446
10
+ likelihood/models/utils.py,sha256=dvigPi_hxcs5ntfHr7Y1JvP5ULtMW3kkN0nJpS4orE8,1319
11
+ likelihood/models/deep/__init__.py,sha256=-KIPippVaMqgG8mEgYjNxYQdqOUcFhUuKhbVe8TTCfo,28
12
+ likelihood/models/deep/autoencoders.py,sha256=0EIZwDNlZ9NCfQbhQ_KdXkkRwIjUEU-jk0l0u-J1wmA,44212
13
+ likelihood/tools/__init__.py,sha256=N1IhMDzacsGQT2MIYBMBC0zTxes78vC_0gGrwkuPgmg,78
14
+ likelihood/tools/figures.py,sha256=waF0NHIMrctCmaLhcuz5DMcXyRKynmn6aG0XITYCTLc,10940
15
+ likelihood/tools/impute.py,sha256=BwBVFSQkG3uWsZEk1THTmqZc3YhHlDhMXgKIV3sx5Lg,9486
16
+ likelihood/tools/models_tools.py,sha256=c3-vac-1MYSarYDtfR6XfVC7X_WY9auS7y2_3Z973IQ,8875
17
+ likelihood/tools/numeric_tools.py,sha256=OelCF45QO-zhanX3GmfcdYMfUZxYt353oJ8_gPEdWss,11959
18
+ likelihood/tools/tools.py,sha256=vlQ-peK_z5-MLVnStxlBdl-NfmF6ILxZ6LhBd4K77JI,42282
19
+ likelihood-1.5.1.dist-info/licenses/LICENSE,sha256=XWHWt9egYEUHGPTnlcZfJKLPmysacOwdiLj_-J7Z9ew,1066
20
+ likelihood-1.5.1.dist-info/METADATA,sha256=s__LhxtBZXbQHaU-WQtpRvOmfnP7zZ1nqhI6I9IRNFA,2844
21
+ likelihood-1.5.1.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
22
+ likelihood-1.5.1.dist-info/top_level.txt,sha256=KDiBLr870YTxqLFqObTOSrTK10uw8dFsITSNLlte3PA,11
23
+ likelihood-1.5.1.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.8.2)
2
+ Generator: setuptools (78.1.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5