likelihood 2.0.0__py3-none-any.whl → 2.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -27,12 +27,12 @@ class Env:
27
27
 
28
28
  Parameters
29
29
  ----------
30
- model : Any
31
- Model with `.predict()` method (e.g., Keras model).
32
- maxlen : int
33
- Maximum length of deque. By default it is set to `100`.
34
- name : str
35
- The name of the environment. By default it is set to `likenasium`.
30
+ model : Any
31
+ Model with `.predict()` method (e.g., Keras model).
32
+ maxlen : int
33
+ Maximum length of deque. By default it is set to `100`.
34
+ name : str
35
+ The name of the environment. By default it is set to `likenasium`.
36
36
  """
37
37
  self.model = model
38
38
  self.maxlen = maxlen
@@ -49,14 +49,14 @@ class Env:
49
49
 
50
50
  Parameters
51
51
  ----------
52
- state : `np.ndarray`
53
- Current state to process (input to the model).
54
- action : int
55
- Expected action to process.
52
+ state : `np.ndarray`
53
+ Current state to process (input to the model).
54
+ action : `int`
55
+ Expected action to process.
56
56
 
57
57
  Returns
58
58
  -------
59
- tuple: (current_state, action_pred, reward, next_action, done)
59
+ `tuple` : (current_state, action_pred, reward, next_action, done)
60
60
  """
61
61
  if self.done:
62
62
  return None, None, 0, None, True
@@ -120,9 +120,9 @@ class AutoQL:
120
120
 
121
121
  Parameters
122
122
  ----------
123
- env : Any
123
+ env : `Any`
124
124
  The environment to interact with
125
- model : tf.keras.Model
125
+ model : `tf.keras.Model`
126
126
  The Q-network model
127
127
  """
128
128
 
@@ -137,16 +137,16 @@ class AutoQL:
137
137
 
138
138
  Parameters
139
139
  ----------
140
- state : `np.ndarray`
141
- Current state.
142
- action : int
143
- Expected action to process.
144
- epsilon : float
145
- Exploration probability. By default it is set to `0`
140
+ state : `np.ndarray`
141
+ Current state.
142
+ action : `int`
143
+ Expected action to process.
144
+ epsilon : `float`
145
+ Exploration probability. By default it is set to `0`
146
146
 
147
147
  Returns
148
148
  -------
149
- tuple: (state, action, reward, next_action, done)
149
+ `tuple` : (state, action, reward, next_action, done)
150
150
  """
151
151
  current_state, value, reward, next_action, done = self.env.step(state, action)
152
152
 
@@ -164,17 +164,17 @@ class AutoQL:
164
164
 
165
165
  Parameters
166
166
  ----------
167
- state : `np.ndarray`
168
- Current state
169
- action : int
170
- Expected action to process.
167
+ state : `np.ndarray`
168
+ Current state
169
+ action : `int`
170
+ Expected action to process.
171
171
 
172
- epsilon : float
173
- Exploration probability.
172
+ epsilon : `float`
173
+ Exploration probability.
174
174
 
175
175
  Returns
176
176
  -------
177
- tuple: (state, action, reward, next_action, done)
177
+ `tuple` : (state, action, reward, next_action, done)
178
178
  """
179
179
  current_state, greedy_action, reward, next_action, done = self.epsilon_greedy_policy(
180
180
  state, action, epsilon
@@ -202,7 +202,7 @@ class AutoQL:
202
202
 
203
203
  Returns
204
204
  -------
205
- float: Training loss
205
+ `float` : Training loss
206
206
  """
207
207
 
208
208
  batch_ = random.sample(self.replay_buffer, self.batch_size)
@@ -250,21 +250,21 @@ class AutoQL:
250
250
 
251
251
  Parameters
252
252
  ----------
253
- optimizer : str
253
+ optimizer : `str`
254
254
  The optimizer for training (e.g., `sgd`). By default it is set to `adam`.
255
- loss_fn : str
255
+ loss_fn : `str`
256
256
  The loss function. By default it is set to `mse`.
257
- num_episodes : int
257
+ num_episodes : `int`
258
258
  Total number of episodes to train. By default it is set to `50`.
259
- num_steps : int
259
+ num_steps : `int`
260
260
  Steps per episode. By default it is set to `100`. If `num_steps` is less than `self.env.maxlen`, then the second will be chosen.
261
- gamma : float
261
+ gamma : `float`
262
262
  Discount factor. By default it is set to `0.7`.
263
- batch_size : int
263
+ batch_size : `int`
264
264
  Size of training batches. By default it is set to `32`.
265
- patience : int
265
+ patience : `int`
266
266
  How many episodes to wait for improvement.
267
- alpha : float
267
+ alpha : `float`
268
268
  Trade-off factor between loss and reward.
269
269
  """
270
270
  rewards = []
@@ -11,7 +11,7 @@ logging.getLogger("tensorflow").setLevel(logging.ERROR)
11
11
  import sys
12
12
  import warnings
13
13
  from functools import wraps
14
- from typing import Dict
14
+ from typing import Dict, List, Optional, Tuple, Union
15
15
 
16
16
  import numpy as np
17
17
  import tensorflow as tf
@@ -40,6 +40,189 @@ def suppress_warnings(func):
40
40
  return wrapper
41
41
 
42
42
 
43
+ class TransformRange:
44
+ """
45
+ Generates a new DataFrame with ranges represented as strings.
46
+
47
+ Transforms numerical columns into categorical range bins with descriptive labels.
48
+ """
49
+
50
+ def __init__(self, df: pd.DataFrame) -> None:
51
+ """Initializes the class with the original DataFrame.
52
+
53
+ Parameters
54
+ ----------
55
+ df : `pd.DataFrame`
56
+ The original DataFrame to transform.
57
+
58
+ Raises
59
+ ------
60
+ TypeError
61
+ If df is not a pandas DataFrame.
62
+ """
63
+ if not isinstance(df, pd.DataFrame):
64
+ raise TypeError("df must be a pandas DataFrame")
65
+ self.df = df.copy() # Create a copy to avoid modifying the original
66
+
67
+ def _create_bins_and_labels(
68
+ self, min_val: Union[int, float], max_val: Union[int, float], bin_size: int
69
+ ) -> Tuple[np.ndarray, List[str]]:
70
+ """
71
+ Creates the bin edges and their labels.
72
+
73
+ Parameters
74
+ ----------
75
+ min_val : `int` or `float`
76
+ The minimum value for the range.
77
+ max_val : `int` or `float`
78
+ The maximum value for the range.
79
+ bin_size : `int`
80
+ The size of each bin.
81
+
82
+ Returns
83
+ -------
84
+ bins : `np.ndarray`
85
+ The bin edges.
86
+ labels : `list`
87
+ The labels for the bins.
88
+
89
+ Raises
90
+ ------
91
+ ValueError
92
+ If bin_size is not positive or if min_val >= max_val.
93
+ """
94
+ if bin_size <= 0:
95
+ raise ValueError("bin_size must be positive")
96
+ if min_val >= max_val:
97
+ raise ValueError("min_val must be less than max_val")
98
+
99
+ start = int(min_val)
100
+ end = int(max_val) + bin_size
101
+
102
+ bins = np.arange(start, end + 1, bin_size)
103
+
104
+ if bins[-1] <= max_val:
105
+ bins = np.append(bins, max_val + 1)
106
+
107
+ labels = [f"{int(bins[i])}-{int(bins[i+1] - 1)}" for i in range(len(bins) - 1)]
108
+ return bins, labels
109
+
110
+ def _transform_column_to_ranges(self, column: str, bin_size: int) -> pd.Series:
111
+ """
112
+ Transforms a column in the DataFrame into range bins.
113
+
114
+ Parameters
115
+ ----------
116
+ column : `str`
117
+ The name of the column to transform.
118
+ bin_size : `int`
119
+ The size of each bin.
120
+
121
+ Returns
122
+ -------
123
+ `pd.Series`
124
+ A Series with the range labels.
125
+
126
+ Raises
127
+ ------
128
+ KeyError
129
+ If column is not found in the DataFrame.
130
+ ValueError
131
+ If bin_size is not positive or if column contains non-numeric data.
132
+ """
133
+ if column not in self.df.columns:
134
+ raise KeyError(f"Column '{column}' not found in DataFrame")
135
+
136
+ if bin_size <= 0:
137
+ raise ValueError("bin_size must be positive")
138
+
139
+ numeric_series = pd.to_numeric(self.df[column], errors="coerce")
140
+ if numeric_series.isna().all():
141
+ raise ValueError(f"Column '{column}' contains no valid numeric data")
142
+
143
+ min_val = numeric_series.min()
144
+ max_val = numeric_series.max()
145
+
146
+ if min_val == max_val:
147
+ return pd.Series(
148
+ [f"{int(min_val)}-{int(max_val)}"] * len(self.df), name=f"{column}_range"
149
+ )
150
+
151
+ bins, labels = self._create_bins_and_labels(min_val, max_val, bin_size)
152
+
153
+ return pd.cut(numeric_series, bins=bins, labels=labels, right=False, include_lowest=True)
154
+
155
+ def transform_dataframe(
156
+ self, columns_bin_sizes: Dict[str, int], drop_original: bool = False
157
+ ) -> pd.DataFrame:
158
+ """
159
+ Creates a new DataFrame with range columns.
160
+
161
+ Parameters
162
+ ----------
163
+ columns_bin_sizes : `dict`
164
+ A dictionary where the keys are column names and the values are the bin sizes.
165
+ drop_original : `bool`, optional
166
+ If True, drops original columns from the result, by default False
167
+
168
+ Returns
169
+ -------
170
+ `pd.DataFrame`
171
+ A DataFrame with the transformed range columns.
172
+
173
+ Raises
174
+ ------
175
+ TypeError
176
+ If columns_bin_sizes is not a dictionary.
177
+ """
178
+ if not isinstance(columns_bin_sizes, dict):
179
+ raise TypeError("columns_bin_sizes must be a dictionary")
180
+
181
+ if not columns_bin_sizes:
182
+ return pd.DataFrame()
183
+
184
+ range_columns = {}
185
+ for column, bin_size in columns_bin_sizes.items():
186
+ range_columns[f"{column}_range"] = self._transform_column_to_ranges(column, bin_size)
187
+
188
+ result_df = pd.DataFrame(range_columns)
189
+
190
+ if not drop_original:
191
+ original_cols = [col for col in self.df.columns if col not in columns_bin_sizes]
192
+ if original_cols:
193
+ result_df = pd.concat([self.df[original_cols], result_df], axis=1)
194
+
195
+ return result_df
196
+
197
+ def get_range_info(self, column: str) -> Dict[str, Union[int, float, List[str]]]:
198
+ """
199
+ Get information about the range transformation for a specific column.
200
+
201
+ Parameters
202
+ ----------
203
+ column : `str`
204
+ The name of the column to analyze.
205
+
206
+ Returns
207
+ -------
208
+ `dict`
209
+ Dictionary containing min_val, max_val, bin_size, and labels.
210
+ """
211
+ if column not in self.df.columns:
212
+ raise KeyError(f"Column '{column}' not found in DataFrame")
213
+
214
+ numeric_series = pd.to_numeric(self.df[column], errors="coerce")
215
+ min_val = numeric_series.min()
216
+ max_val = numeric_series.max()
217
+
218
+ return {
219
+ "min_value": min_val,
220
+ "max_value": max_val,
221
+ "range": max_val - min_val,
222
+ "column": column,
223
+ }
224
+
225
+
43
226
  def remove_collinearity(df: DataFrame, threshold: float = 0.9):
44
227
  """
45
228
  Removes highly collinear features from the DataFrame based on a correlation threshold.
@@ -56,8 +239,8 @@ def remove_collinearity(df: DataFrame, threshold: float = 0.9):
56
239
  The correlation threshold above which features will be removed. Default is `0.9`.
57
240
 
58
241
  Returns
59
- ----------
60
- DataFrame: A DataFrame with highly collinear features removed.
242
+ -------
243
+ DataFrame : A DataFrame with highly collinear features removed.
61
244
  """
62
245
  corr_matrix = df.corr().abs()
63
246
  upper_triangle = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(bool))
@@ -97,11 +280,11 @@ def train_and_insights(
97
280
  Fraction of data to use (default is 1.0).
98
281
 
99
282
  Keyword Arguments:
100
- ----------
283
+ ------------------
101
284
  Additional keyword arguments passed to the `model.fit` function, such as validation split and callbacks.
102
285
 
103
286
  Returns
104
- ----------
287
+ -------
105
288
  `tf.keras.Model`
106
289
  The trained model after fitting.
107
290
  """
@@ -207,7 +390,7 @@ def graph_metrics(adj_matrix: np.ndarray, eigenvector_threshold: float = 1e-6) -
207
390
  A threshold for the eigenvector centrality calculation, used to determine the cutoff for small eigenvalues. Default is `1e-6`.
208
391
 
209
392
  Returns
210
- ----------
393
+ -------
211
394
  DataFrame : A DataFrame containing the following graph metrics as columns.
212
395
  - `Degree Centrality`: Degree centrality values for each node, indicating the number of direct connections each node has.
213
396
  - `Clustering Coefficient`: Clustering coefficient values for each node, representing the degree to which nodes cluster together.
@@ -218,7 +401,7 @@ def graph_metrics(adj_matrix: np.ndarray, eigenvector_threshold: float = 1e-6) -
218
401
  - `Assortativity`: The assortativity coefficient of the graph, measuring the tendency of nodes to connect to similar nodes.
219
402
 
220
403
  Notes
221
- ----------
404
+ -----
222
405
  The returned DataFrame will have one row for each node and one column for each of the computed metrics.
223
406
  """
224
407
  adj_matrix = adj_matrix.astype(int)
@@ -251,3 +434,7 @@ def graph_metrics(adj_matrix: np.ndarray, eigenvector_threshold: float = 1e-6) -
251
434
  metrics_df["Assortativity"] = assortativity
252
435
 
253
436
  return metrics_df
437
+
438
+
439
+ if __name__ == "__main__":
440
+ pass
@@ -154,7 +154,7 @@ def xicor(X: np.ndarray, Y: np.ndarray, ties: bool = True, random_seed: int = No
154
154
  The first variable to be correlated. Must have at least one dimension.
155
155
  Y : `np.ndarray`
156
156
  The second variable to be correlated. Must have at least one dimension.
157
- ties : bool
157
+ ties : `bool`
158
158
  Whether to handle ties using randomization.
159
159
  random_seed : int, optional
160
160
  Seed for the random number generator for reproducibility.
@@ -356,9 +356,9 @@ def find_multiples(target: int) -> tuple[int, int] | None:
356
356
  Returns
357
357
  -------
358
358
  tuple[int, int] | None
359
- If i and i+1 both divide target, returns (i, i+1).
360
- Otherwise, returns (i, target // i).
361
- Returns None if no factors are found.
359
+ If `i` and `i+1` both divide target, returns (i, i+1).
360
+ Otherwise, returns `(i, target // i)`.
361
+ Returns `None` if no factors are found.
362
362
  """
363
363
  for i in range(2, target + 1):
364
364
  if target % i == 0:
likelihood/tools/tools.py CHANGED
@@ -861,7 +861,7 @@ class DataFrameEncoder:
861
861
  """Encodes the `object` type columns of the dataframe
862
862
 
863
863
  Keyword Arguments:
864
- ----------
864
+ ------------------
865
865
  - save_mode (`bool`): An optional integer parameter. By default it is set to `True`
866
866
  - dictionary_name (`str`): An optional string parameter. By default it is set to `labelencoder_dictionary`
867
867
  - norm_method (`str`): An optional string parameter to perform normalization. By default it is set to `None`
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: likelihood
3
- Version: 2.0.0
3
+ Version: 2.0.1
4
4
  Summary: A package that performs the maximum likelihood algorithm.
5
5
  Home-page: https://github.com/jzsmoreno/likelihood/
6
6
  Author: J. A. Moreno-Guerra
@@ -15,16 +15,16 @@ likelihood/models/deep/_predictor.py,sha256=XI4QfVM7PS_60zYtmi-V8UzNDrASFiDMVPmV
15
15
  likelihood/models/deep/autoencoders.py,sha256=muUBH9BclOK8ViI7PijyMOBBLVox6uwuIabyJvpU5qw,30729
16
16
  likelihood/models/deep/gan.py,sha256=rTnaLmIPjsKg6_0B8JZOVwPxdx59rHmqvzDitdJMCQ4,10924
17
17
  likelihood/models/deep/predictor.py,sha256=q5tPaAbF7s5XIcxVr6fyHTQdZa9tlixO9vb9a9Cw0wM,27831
18
- likelihood/models/deep/rl.py,sha256=9dhhnVTIETi9zvVeyOXYo1hl-LQJezmv0rgsUq11Qwc,11611
18
+ likelihood/models/deep/rl.py,sha256=VVuwHwK24d2fe3uNHliE1QJsKGZAPhx_pdgj3jqN5rQ,11565
19
19
  likelihood/tools/__init__.py,sha256=N1IhMDzacsGQT2MIYBMBC0zTxes78vC_0gGrwkuPgmg,78
20
20
  likelihood/tools/cat_embed.py,sha256=SJ7o1vbrNYp21fLLcjRnWpUDcz1nVSe8TmMvsLIz5CI,7346
21
21
  likelihood/tools/figures.py,sha256=waF0NHIMrctCmaLhcuz5DMcXyRKynmn6aG0XITYCTLc,10940
22
22
  likelihood/tools/impute.py,sha256=n87Tv-xLUAdPl7BQLFcLWSsXBZbXksahyCayJWMydXc,9485
23
- likelihood/tools/models_tools.py,sha256=c3-vac-1MYSarYDtfR6XfVC7X_WY9auS7y2_3Z973IQ,8875
24
- likelihood/tools/numeric_tools.py,sha256=Hwf-lbqROqPPZ9N7eVzKIDyZxFGQdP53isWxPqpG0eo,12254
25
- likelihood/tools/tools.py,sha256=GKZsqjyO5tGXWGSfn3jlQBTjRlmBv2byfvpu-QclUx0,42188
26
- likelihood-2.0.0.dist-info/licenses/LICENSE,sha256=XWHWt9egYEUHGPTnlcZfJKLPmysacOwdiLj_-J7Z9ew,1066
27
- likelihood-2.0.0.dist-info/METADATA,sha256=Ziysy1MQuW77OHHd1UzMtlfeUT9wsdgCl6rxW3uLBEE,2917
28
- likelihood-2.0.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
29
- likelihood-2.0.0.dist-info/top_level.txt,sha256=KDiBLr870YTxqLFqObTOSrTK10uw8dFsITSNLlte3PA,11
30
- likelihood-2.0.0.dist-info/RECORD,,
23
+ likelihood/tools/models_tools.py,sha256=-QAfvCy9mw-ZyeJHzJJ7O6eDfUXghtA7KfFtTc-Tp0A,14607
24
+ likelihood/tools/numeric_tools.py,sha256=JeLECoVS3ayFH53kUYkAMs0fzALZV1M22-tBLM-Q34g,12264
25
+ likelihood/tools/tools.py,sha256=5vPUHrm8D4ODsg-MP4uZ3NgXV9fNbs0Olx7RWtUdVDU,42196
26
+ likelihood-2.0.1.dist-info/licenses/LICENSE,sha256=XWHWt9egYEUHGPTnlcZfJKLPmysacOwdiLj_-J7Z9ew,1066
27
+ likelihood-2.0.1.dist-info/METADATA,sha256=3mLJAcVO4jzu4IoCVVaSBPMxBWV-xnHs_f_DvvN9G0c,2917
28
+ likelihood-2.0.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
29
+ likelihood-2.0.1.dist-info/top_level.txt,sha256=KDiBLr870YTxqLFqObTOSrTK10uw8dFsITSNLlte3PA,11
30
+ likelihood-2.0.1.dist-info/RECORD,,