dragon-ml-toolbox 1.4.0__py3-none-any.whl → 1.4.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dragon-ml-toolbox might be problematic. Click here for more details.

@@ -5,18 +5,23 @@ import xgboost as xgb
5
5
  import lightgbm as lgb
6
6
  from sklearn.ensemble import HistGradientBoostingClassifier, HistGradientBoostingRegressor
7
7
  from sklearn.base import ClassifierMixin
8
- from sklearn.preprocessing import StandardScaler
9
- from typing import Literal, Union, Tuple, Dict
10
- from collections.abc import Sequence
8
+ from typing import Literal, Union, Tuple, Dict, Optional
11
9
  import polars as pl
12
10
  from functools import partial
11
+ from .utilities import sanitize_filename, _script_info, threshold_binary_values
12
+
13
+
14
+ __all__ = [
15
+ "ObjectiveFunction",
16
+ "run_pso"
17
+ ]
13
18
 
14
19
 
15
20
  class ObjectiveFunction():
16
21
  """
17
- Callable objective function designed for optimizing continuous outputs from regression models.
22
+ Callable objective function designed for optimizing continuous outputs from tree-based regression models.
18
23
 
19
- The trained model must include a 'model' and a 'scaler'. Additionally 'feature_names' and 'target_name' will be parsed if present.
24
+ The target serialized file (joblib) must include a trained tree-based 'model'. Additionally 'feature_names' and 'target_name' will be parsed if present.
20
25
 
21
26
  Parameters
22
27
  ----------
@@ -29,15 +34,14 @@ class ObjectiveFunction():
29
34
  task : Literal, default 'maximization'
30
35
  Whether to maximize or minimize the target.
31
36
  """
32
- def __init__(self, trained_model_path: str, add_noise: bool=True, task: Literal["maximization", "minimization"]="maximization", binary_features: int=0) -> None:
37
+ def __init__(self, trained_model_path: str, add_noise: bool, task: Literal["maximization", "minimization"], binary_features: int=0) -> None:
33
38
  self.binary_features = binary_features
34
39
  self.is_hybrid = False if binary_features <= 0 else True
35
40
  self.use_noise = add_noise
36
41
  self._artifact = joblib.load(trained_model_path)
37
42
  self.model = self._get_from_artifact('model')
38
- self.scaler = self._get_from_artifact('scaler')
39
- self.feature_names: list[str] = self._get_from_artifact('feature_names') # type: ignore
40
- self.target_name: str = self._get_from_artifact('target_name') # type: ignore
43
+ self.feature_names: Optional[list[str]] = self._get_from_artifact('feature_names') # type: ignore
44
+ self.target_name: Optional[str] = self._get_from_artifact('target_name') # type: ignore
41
45
  self.task = task
42
46
  self.check_model() # check for classification models and None values
43
47
 
@@ -45,16 +49,15 @@ class ObjectiveFunction():
45
49
  if self.use_noise:
46
50
  features_array = self.add_noise(features_array)
47
51
  if self.is_hybrid:
48
- features_array = self._handle_hybrid(features_array)
52
+ features_array = threshold_binary_values(input_array=features_array, binary_features=self.binary_features)
49
53
 
50
54
  if features_array.ndim == 1:
51
55
  features_array = features_array.reshape(1, -1)
52
56
 
53
- # scale features as the model expects
54
- features_array = self.scaler.transform(features_array) # type: ignore
55
-
56
57
  result = self.model.predict(features_array) # type: ignore
57
58
  scalar = result.item()
59
+ # print(f"[DEBUG] Model predicted: {scalar}")
60
+
58
61
  # pso minimizes by default, so we return the negative value to maximize
59
62
  if self.task == "maximization":
60
63
  return -scalar
@@ -62,23 +65,22 @@ class ObjectiveFunction():
62
65
  return scalar
63
66
 
64
67
  def add_noise(self, features_array):
65
- noise_range = np.random.uniform(0.95, 1.05, size=features_array.shape)
66
- new_feature_values = features_array * noise_range
67
- return new_feature_values
68
-
69
- def _handle_hybrid(self, features_array):
70
- feat_continuous = features_array[:self.binary_features]
71
- feat_binary = (features_array[self.binary_features:] > 0.5).astype(int) #threshold binary values
72
- new_feature_values = np.concatenate([feat_continuous, feat_binary])
73
- return new_feature_values
68
+ if self.binary_features > 0:
69
+ split_idx = -self.binary_features
70
+ cont_part = features_array[:split_idx]
71
+ bin_part = features_array[split_idx:]
72
+ noise = np.random.uniform(0.95, 1.05, size=cont_part.shape)
73
+ cont_noised = cont_part * noise
74
+ return np.concatenate([cont_noised, bin_part])
75
+ else:
76
+ noise = np.random.uniform(0.95, 1.05, size=features_array.shape)
77
+ return features_array * noise
74
78
 
75
79
  def check_model(self):
76
80
  if isinstance(self.model, ClassifierMixin) or isinstance(self.model, xgb.XGBClassifier) or isinstance(self.model, lgb.LGBMClassifier):
77
81
  raise ValueError(f"[Model Check Failed] ❌\nThe loaded model ({type(self.model).__name__}) is a Classifier.\nOptimization is not suitable for standard classification tasks.")
78
82
  if self.model is None:
79
83
  raise ValueError("Loaded model is None")
80
- if self.scaler is None:
81
- raise ValueError("Loaded scaler is None")
82
84
 
83
85
  def _get_from_artifact(self, key: str):
84
86
  val = self._artifact.get(key)
@@ -89,10 +91,10 @@ class ObjectiveFunction():
89
91
  return result
90
92
 
91
93
  def __repr__(self):
92
- return (f"<ObjectiveFunction(model={type(self.model).__name__}, scaler={type(self.scaler).__name__}, use_noise={self.use_noise}, is_hybrid={self.is_hybrid}, task='{self.task}')>")
94
+ return (f"<ObjectiveFunction(model={type(self.model).__name__}, use_noise={self.use_noise}, is_hybrid={self.is_hybrid}, task='{self.task}')>")
93
95
 
94
96
 
95
- def _set_boundaries(lower_boundaries: Sequence[float], upper_boundaries: Sequence[float]):
97
+ def _set_boundaries(lower_boundaries: list[float], upper_boundaries: list[float]):
96
98
  assert len(lower_boundaries) == len(upper_boundaries), "Lower and upper boundaries must have the same length."
97
99
  assert len(lower_boundaries) >= 1, "At least one boundary pair is required."
98
100
  lower = np.array(lower_boundaries)
@@ -112,31 +114,40 @@ def _save_results(*dicts, save_dir: str, target_name: str):
112
114
  combined_dict = dict()
113
115
  for single_dict in dicts:
114
116
  combined_dict.update(single_dict)
115
-
116
- full_path = os.path.join(save_dir, f"results_{target_name}.csv")
117
+
118
+ sanitized_target_name = sanitize_filename(target_name)
119
+
120
+ full_path = os.path.join(save_dir, f"Optimization_{sanitized_target_name}.csv")
117
121
  pl.DataFrame(combined_dict).write_csv(full_path)
118
122
 
119
123
 
120
- def run_pso(lower_boundaries: Sequence[float], upper_boundaries: Sequence[float], objective_function: ObjectiveFunction,
121
- save_results_dir: str,
124
+ def run_pso(lower_boundaries: list[float],
125
+ upper_boundaries: list[float],
126
+ objective_function: ObjectiveFunction,
127
+ save_results_dir: str,
128
+ auto_binary_boundaries: bool=True,
122
129
  target_name: Union[str, None]=None,
123
130
  feature_names: Union[list[str], None]=None,
124
- swarm_size: int=100, max_iterations: int=100,
131
+ swarm_size: int=200,
132
+ max_iterations: int=400,
125
133
  inequality_constrain_function=None,
126
- post_hoc_analysis: Union[int, None]=None) -> Tuple[Dict[str, float | list[float]], Dict[str, float | list[float]]]:
134
+ post_hoc_analysis: Optional[int]=3,
135
+ workers: int=3) -> Tuple[Dict[str, float | list[float]], Dict[str, float | list[float]]]:
127
136
  """
128
- Executes Particle Swarm Optimization (PSO) to optimize a given objective function and saves the results.
137
+ Executes Particle Swarm Optimization (PSO) to optimize a given objective function and saves the results as a CSV file.
129
138
 
130
139
  Parameters
131
140
  ----------
132
- lower_boundaries : Sequence[float]
133
- Lower bounds for each feature in the search space.
134
- upper_boundaries : Sequence[float]
135
- Upper bounds for each feature in the search space.
141
+ lower_boundaries : list[float]
142
+ Lower bounds for each feature in the search space (as many as features expected by the model).
143
+ upper_boundaries : list[float]
144
+ Upper bounds for each feature in the search space (as many as features expected by the model).
136
145
  objective_function : ObjectiveFunction
137
- A callable object encapsulating a regression model and its scaler.
146
+ A callable object encapsulating a tree-based regression model.
138
147
  save_results_dir : str
139
148
  Directory path to save the results CSV file.
149
+ auto_binary_boundaries : bool
150
+ Use `ObjectiveFunction.binary_features` to append as many binary boundaries as needed to `lower_boundaries` and `upper_boundaries` automatically.
140
151
  target_name : str or None, optional
141
152
  Name of the target variable. If None, attempts to retrieve from the ObjectiveFunction object.
142
153
  feature_names : list[str] or None, optional
@@ -147,32 +158,39 @@ def run_pso(lower_boundaries: Sequence[float], upper_boundaries: Sequence[float]
147
158
  Maximum number of iterations for the optimization algorithm.
148
159
  inequality_constrain_function : callable or None, optional
149
160
  Optional function defining inequality constraints to be respected by the optimization.
150
- post_hoc_analysis : int or None, optional
161
+ post_hoc_analysis : int or None
151
162
  If specified, runs the optimization multiple times to perform post hoc analysis. The value indicates the number of repetitions.
163
+ workers : int
164
+ Number of parallel processes to use.
152
165
 
153
166
  Returns
154
167
  -------
155
168
  Tuple[Dict[str, float | list[float]], Dict[str, float | list[float]]]
156
169
  If `post_hoc_analysis` is None, returns two dictionaries:
157
- - best_features_named: Feature values (after inverse scaling) that yield the best result.
158
- - best_target_named: Best result obtained for the target variable.
170
+ - feature_names: Feature values (after inverse scaling) that yield the best result.
171
+ - target_name: Best result obtained for the target variable.
159
172
 
160
173
  If `post_hoc_analysis` is an integer, returns two dictionaries:
161
- - all_best_features_named: Lists of best feature values (after inverse scaling) for each repetition.
162
- - all_best_targets_named: List of best target values across repetitions.
174
+ - feature_names: Lists of best feature values (after inverse scaling) for each repetition.
175
+ - target_name: List of best target values across repetitions.
163
176
 
164
177
  Notes
165
178
  -----
166
179
  - PSO minimizes the objective function by default; if maximization is desired, it should be handled inside the ObjectiveFunction.
167
- - Feature values are scaled before being passed to the model and inverse-transformed before result saving.
168
180
  """
181
+ # Append binary boundaries
182
+ binary_number = objective_function.binary_features
183
+ if auto_binary_boundaries and binary_number > 0:
184
+ lower_boundaries.extend([0] * binary_number)
185
+ upper_boundaries.extend([1] * binary_number)
186
+
169
187
  lower, upper = _set_boundaries(lower_boundaries, upper_boundaries)
170
-
188
+
171
189
  # feature names
172
190
  if feature_names is None and objective_function.feature_names is not None:
173
191
  feature_names = objective_function.feature_names
174
192
  names = _set_feature_names(size=len(lower_boundaries), names=feature_names)
175
-
193
+
176
194
  # target name
177
195
  if target_name is None and objective_function.target_name is not None:
178
196
  target_name = objective_function.target_name
@@ -186,20 +204,25 @@ def run_pso(lower_boundaries: Sequence[float], upper_boundaries: Sequence[float]
186
204
  "f_ieqcons": inequality_constrain_function,
187
205
  "swarmsize": swarm_size,
188
206
  "maxiter": max_iterations,
189
- "processes": 1,
190
- "particle_output": True
207
+ "processes": workers,
208
+ "particle_output": False
191
209
  }
192
210
 
193
- if post_hoc_analysis is None:
194
- # best_features, best_target = pso(**arguments)
195
- best_features, best_target, _particle_positions, _target_values_per_position = pso(**arguments)
211
+ os.makedirs(save_results_dir, exist_ok=True)
212
+
213
+ if post_hoc_analysis is None or post_hoc_analysis == 1:
214
+ best_features, best_target, *_ = _pso(**arguments)
215
+ # best_features, best_target, _particle_positions, _target_values_per_position = _pso(**arguments)
196
216
 
197
- # inverse transformation
198
- best_features = np.array(best_features).reshape(1, -1)
199
- best_features_real = objective_function.scaler.inverse_transform(best_features).flatten() # type: ignore
217
+ # flip best_target if maximization was used
218
+ if objective_function.task == "maximization":
219
+ best_target = -best_target
220
+
221
+ # threshold binary features
222
+ best_features_threshold = threshold_binary_values(best_features, binary_number)
200
223
 
201
224
  # name features
202
- best_features_named = {name: value for name, value in zip(names, best_features_real)}
225
+ best_features_named = {name: value for name, value in zip(names, best_features_threshold)}
203
226
  best_target_named = {target_name: best_target}
204
227
 
205
228
  # save results
@@ -209,15 +232,18 @@ def run_pso(lower_boundaries: Sequence[float], upper_boundaries: Sequence[float]
209
232
  else:
210
233
  all_best_targets = list()
211
234
  all_best_features = [[] for _ in range(len(lower_boundaries))]
212
- for _ in range(post_hoc_analysis):
213
- # best_features, best_target = pso(**arguments)
214
- best_features, best_target, _particle_positions, _target_values_per_position = pso(**arguments)
235
+ for _ in range(post_hoc_analysis):
236
+ best_features, best_target, *_ = _pso(**arguments)
237
+ # best_features, best_target, _particle_positions, _target_values_per_position = _pso(**arguments)
238
+
239
+ # flip best_target if maximization was used
240
+ if objective_function.task == "maximization":
241
+ best_target = -best_target
215
242
 
216
- # inverse transformation
217
- best_features = np.array(best_features).reshape(1, -1)
218
- best_features_real = objective_function.scaler.inverse_transform(best_features).flatten() # type: ignore
243
+ # threshold binary features
244
+ best_features_threshold = threshold_binary_values(best_features, binary_number)
219
245
 
220
- for i, best_feature in enumerate(best_features_real):
246
+ for i, best_feature in enumerate(best_features_threshold):
221
247
  all_best_features[i].append(best_feature)
222
248
  all_best_targets.append(best_target)
223
249
 
@@ -231,6 +257,8 @@ def run_pso(lower_boundaries: Sequence[float], upper_boundaries: Sequence[float]
231
257
  return all_best_features_named, all_best_targets_named # type: ignore
232
258
 
233
259
 
260
+ def info():
261
+ _script_info(__all__)
234
262
 
235
263
 
236
264
  ### SOURCE CODE FOR PSO ###
@@ -249,7 +277,7 @@ def _cons_ieqcons_wrapper(ieqcons, args, kwargs, x):
249
277
  def _cons_f_ieqcons_wrapper(f_ieqcons, args, kwargs, x):
250
278
  return np.array(f_ieqcons(x, *args, **kwargs))
251
279
 
252
- def pso(func, lb, ub, ieqcons=[], f_ieqcons=None, args=(), kwargs={},
280
+ def _pso(func, lb, ub, ieqcons=[], f_ieqcons=None, args=(), kwargs={},
253
281
  swarmsize=100, omega=0.5, phip=0.5, phig=0.5, maxiter=100,
254
282
  minstep=1e-8, minfunc=1e-8, debug=False, processes=1,
255
283
  particle_output=False):
@@ -377,7 +405,7 @@ def pso(func, lb, ub, ieqcons=[], f_ieqcons=None, args=(), kwargs={},
377
405
  for i in range(S):
378
406
  fx[i] = obj(x[i, :])
379
407
  fs[i] = is_feasible(x[i, :])
380
-
408
+
381
409
  # Store particle's best position (if constraints are satisfied)
382
410
  i_update = np.logical_and((fx < fp), fs)
383
411
  p[i_update, :] = x[i_update, :].copy()
@@ -1,5 +1,12 @@
1
1
  import torch
2
2
  from torch import nn
3
+ from .utilities import _script_info
4
+
5
+
6
+ __all__ = [
7
+ "MyNeuralNetwork",
8
+ "MyLSTMNetwork"
9
+ ]
3
10
 
4
11
 
5
12
  class MyNeuralNetwork(nn.Module):
@@ -73,9 +80,11 @@ class MyNeuralNetwork(nn.Module):
73
80
  return X
74
81
 
75
82
 
76
- class MyConvolutionalNetwork(nn.Module):
83
+ class _MyConvolutionalNetwork(nn.Module):
77
84
  def __init__(self, outputs: int, color_channels: int=3, img_size: int=256, drop_out: float=0.2):
78
85
  """
86
+ - EDUCATIONAL PURPOSES ONLY, not optimized and requires lots of memory.
87
+
79
88
  Create a basic Convolutional Neural Network with two convolution layers with a pooling layer after each convolution.
80
89
 
81
90
  Args:
@@ -225,3 +234,6 @@ class MyLSTMNetwork(nn.Module):
225
234
  else:
226
235
  return output
227
236
 
237
+
238
+ def info():
239
+ _script_info(__all__)
ml_tools/trainer.py CHANGED
@@ -6,6 +6,12 @@ import matplotlib.pyplot as plt
6
6
  import torch
7
7
  from torch import nn
8
8
  from sklearn.metrics import mean_squared_error, classification_report, ConfusionMatrixDisplay, roc_curve, roc_auc_score, r2_score, median_absolute_error
9
+ from .utilities import _script_info
10
+
11
+
12
+ __all__ = [
13
+ "MyTrainer"
14
+ ]
9
15
 
10
16
 
11
17
  class MyTrainer():
@@ -288,36 +294,6 @@ class MyTrainer():
288
294
  print(f"Area under the curve score: {area_under_curve:4.2f}")
289
295
  else:
290
296
  print("Error encountered while retrieving 'model.kind' attribute.")
291
-
292
-
293
- def forecast(self, samples_list: list[torch.Tensor], view_as: tuple[int,int]=(1,-1)):
294
- """
295
- DEPRECATED - Use `helpers.model_predict()` instead
296
-
297
- Returns a list containing lists of predicted values, one for each sample.
298
-
299
- Each sample must be a tensor and have the same shape and normalization expected by the model
300
- (this method will add the batch dimension automatically).
301
-
302
- Args:
303
- `samples_list`: list of tensors.
304
-
305
- `view_as`: reshape each output, default is (1,-1).
306
-
307
- Returns: List of lists.
308
- """
309
- self.model.eval()
310
- results = list()
311
- with torch.no_grad():
312
- for data_point in samples_list:
313
- data_point = data_point.unsqueeze(0).to(self.device)
314
- output = self.model(data_point)
315
- if self.kind == "classification":
316
- results.append(output.argmax(dim=1).view(view_as).cpu().tolist())
317
- else: #regression
318
- results.append(output.view(view_as).cpu().tolist())
319
-
320
- return results
321
297
 
322
298
 
323
299
  def rnn_forecast(self, sequence: torch.Tensor, steps: int):
@@ -364,3 +340,7 @@ class MyTrainer():
364
340
  # Cast to array and return
365
341
  predictions = numpy.array(predictions)
366
342
  return predictions
343
+
344
+
345
+ def info():
346
+ _script_info(__all__)
ml_tools/utilities.py CHANGED
@@ -4,6 +4,20 @@ import pandas as pd
4
4
  import os
5
5
  from pathlib import Path
6
6
  import re
7
+ from typing import Literal, Union, Sequence
8
+
9
+
10
+ # Keep track of available tools
11
+ __all__ = [
12
+ "list_csv_paths",
13
+ "load_dataframe",
14
+ "yield_dataframes_from_dir",
15
+ "merge_dataframes",
16
+ "save_dataframe",
17
+ "normalize_mixed_list",
18
+ "sanitize_filename",
19
+ "threshold_binary_values"
20
+ ]
7
21
 
8
22
 
9
23
  def list_csv_paths(directory: str) -> dict[str, str]:
@@ -76,11 +90,93 @@ def yield_dataframes_from_dir(datasets_dir: str):
76
90
  for df_name, df_path in list_csv_paths(datasets_dir).items():
77
91
  df, _ = load_dataframe(df_path)
78
92
  yield df, df_name
93
+
94
+
95
+ def merge_dataframes(
96
+ *dfs: pd.DataFrame,
97
+ reset_index: bool = False,
98
+ direction: Literal["horizontal", "vertical"] = "horizontal"
99
+ ) -> pd.DataFrame:
100
+ """
101
+ Merges multiple DataFrames either horizontally or vertically.
102
+
103
+ Parameters:
104
+ *dfs (pd.DataFrame): Variable number of DataFrames to merge.
105
+ reset_index (bool): Whether to reset index in the final merged DataFrame.
106
+ direction (["horizontal" | "vertical"]):
107
+ - "horizontal": Merge on index, adding columns.
108
+ - "vertical": Append rows; all DataFrames must have identical columns.
109
+
110
+ Returns:
111
+ pd.DataFrame: A single merged DataFrame.
112
+
113
+ Raises:
114
+ ValueError:
115
+ - If fewer than 2 DataFrames are provided.
116
+ - If indexes do not match for horizontal merge.
117
+ - If column names or order differ for vertical merge.
118
+ """
119
+ if len(dfs) < 2:
120
+ raise ValueError("At least 2 DataFrames must be provided.")
121
+
122
+ for i, df in enumerate(dfs, start=1):
123
+ print(f"DataFrame {i} shape: {df.shape}")
124
+
125
+
126
+ if direction == "horizontal":
127
+ reference_index = dfs[0].index
128
+ for i, df in enumerate(dfs, start=1):
129
+ if not df.index.equals(reference_index):
130
+ raise ValueError(f"Indexes do not match: Dataset 1 and Dataset {i}.")
131
+ merged_df = pd.concat(dfs, axis=1)
132
+
133
+ elif direction == "vertical":
134
+ reference_columns = dfs[0].columns
135
+ for i, df in enumerate(dfs, start=1):
136
+ if not df.columns.equals(reference_columns):
137
+ raise ValueError(f"Column names/order do not match: Dataset 1 and Dataset {i}.")
138
+ merged_df = pd.concat(dfs, axis=0)
139
+
140
+ else:
141
+ raise ValueError(f"Invalid merge direction: {direction}")
142
+
143
+ if reset_index:
144
+ merged_df = merged_df.reset_index(drop=True)
145
+
146
+ print(f"Merged DataFrame shape: {merged_df.shape}")
147
+
148
+ return merged_df
149
+
150
+
151
+ def save_dataframe(df: pd.DataFrame, save_dir: str, filename: str) -> None:
152
+ """
153
+ Save a pandas DataFrame to a CSV file.
154
+
155
+ Parameters:
156
+ df: pandas.DataFrame to save
157
+ save_dir: str, directory where the CSV file will be saved.
158
+ filename: str, CSV filename, extension will be added if missing.
159
+ """
160
+ if df.empty:
161
+ print(f"⚠️ Attempting to save an empty DataFrame: '{filename}'. Process Skipped.")
162
+ return
163
+
164
+ os.makedirs(save_dir, exist_ok=True)
165
+
166
+ filename = sanitize_filename(filename)
167
+
168
+ if not filename.endswith('.csv'):
169
+ filename += '.csv'
79
170
 
171
+ output_path = os.path.join(save_dir, filename)
80
172
 
173
+ df.to_csv(output_path, index=False, encoding='utf-8')
174
+ print(f"✅ Saved file: '{filename}'")
175
+
176
+
81
177
  def normalize_mixed_list(data: list, threshold: int = 2) -> list[float]:
82
178
  """
83
- Normalize a mixed list of numeric values and strings so that the sum of the values equals 1.0,
179
+ Normalize a mixed list of numeric values and strings casted to floats so that the sum of the values equals 1.0,
84
180
  applying heuristic adjustments to correct for potential data entry scale mismatches.
85
181
 
86
182
  Parameters:
@@ -168,27 +264,46 @@ def sanitize_filename(filename: str) -> str:
168
264
  return sanitized
169
265
 
170
266
 
171
- def save_dataframe(df: pd.DataFrame, save_dir: str, filename: str) -> None:
267
+ def threshold_binary_values(
268
+ input_array: Union[Sequence[float], np.ndarray],
269
+ binary_features: int
270
+ ) -> np.ndarray:
172
271
  """
173
- Save a pandas DataFrame to a CSV file.
272
+ Thresholds binary features in a 1D numeric sequence. Binary features must be located at the end of the sequence.
273
+
274
+ Converts binary elements to values (0 or 1) using a threshold of 0.5. The rest of the array (assumed to be continuous features) is returned unchanged.
174
275
 
175
276
  Parameters:
176
- df: pandas.DataFrame to save
177
- save_dir: str, directory where the CSV file will be saved.
178
- filename: str, CSV filename, extension will be added if missing.
277
+ input_array (Union[Sequence[float], np.ndarray]) : A one-dimensional collection of numeric values. The binary features must be located at the end of the array.
278
+
279
+ binary_features (int) : Number of binary features to threshold from the end of the array. Must be between 0 and the total number of elements.
280
+
281
+ Returns:
282
+ np.ndarray : A 1D NumPy array where the final `binary_features` values have been binarized.
179
283
  """
180
- if df.empty:
181
- print(f"⚠️ Attempting to save an empty DataFrame: '{filename}'. Process Skipped.")
182
- return
284
+ array = np.asarray(input_array).flatten()
285
+ total = array.shape[0]
183
286
 
184
- os.makedirs(save_dir, exist_ok=True)
287
+ if binary_features < 0 or binary_features > total:
288
+ raise ValueError("Binary features must be between 0 and the total number of features.")
185
289
 
186
- filename = sanitize_filename(filename)
290
+ if binary_features == 0:
291
+ return array
292
+
293
+ cont_part = array[:-binary_features]
294
+ bin_part = (array[-binary_features:] > 0.5).astype(int)
187
295
 
188
- if not filename.endswith('.csv'):
189
- filename += '.csv'
190
-
191
- output_path = os.path.join(save_dir, filename)
192
-
193
- df.to_csv(output_path, index=False, encoding='utf-8')
194
- print(f"✅ Saved file: '{filename}'")
296
+ return np.concatenate([cont_part, bin_part])
297
+
298
+
299
+ def _script_info(all_data: list[str]):
300
+ """
301
+ List available names.
302
+ """
303
+ print("Available functions and objects:")
304
+ for i, name in enumerate(all_data, start=1):
305
+ print(f"{i} - {name}")
306
+
307
+
308
+ def info():
309
+ _script_info(__all__)
@@ -4,9 +4,18 @@ from PIL import Image, ImageOps
4
4
  from typing import Literal
5
5
  from torchvision import transforms
6
6
  import torch
7
+ from .utilities import _script_info
8
+
9
+
10
+ __all__ = [
11
+ "inspect_images",
12
+ "image_augmentation",
13
+ "ResizeAspectFill",
14
+ "is_image",
15
+ "model_predict"
16
+ ]
7
17
 
8
18
 
9
- # --- Helper Functions ---
10
19
  def inspect_images(path: str):
11
20
  """
12
21
  Prints out the types, sizes and channels of image files found in the directory and its subdirectories.
@@ -216,3 +225,7 @@ def model_predict(model: torch.nn.Module, kind: Literal["regression", "classific
216
225
  results.append(output.view(view_as).cpu().tolist())
217
226
 
218
227
  return results
228
+
229
+
230
+ def info():
231
+ _script_info(__all__)
@@ -1,19 +0,0 @@
1
- dragon_ml_toolbox-1.4.0.dist-info/licenses/LICENSE,sha256=2uUFNy7D0TLgHim1K5s3DIJ4q_KvxEXVilnU20cWliY,1066
2
- dragon_ml_toolbox-1.4.0.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=e1Hg5ZtaBpDV7ZvxhLe1ac28l7nMjvi1MSE5YvB1s-o,1472
3
- ml_tools/MICE_imputation.py,sha256=4kqZiesk8vyh4MBLnNE9grflG4fDusqzuYBElsbk4LY,9484
4
- ml_tools/VIF_factor.py,sha256=rHSAxQcXLrG8dIjCXBAvETsSkCBfYus9NqimOnm2Bvk,9559
5
- ml_tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
- ml_tools/data_exploration.py,sha256=qtkGumckC2PmTpj3brVFi072ewX0OI6dwUF4Or7Yikg,21341
7
- ml_tools/datasetmaster.py,sha256=VUneKshnmjOGbtqVVGTFcIMRKF3s6ZDYrosIYKDjD80,28956
8
- ml_tools/ensemble_learning.py,sha256=wK6mtOE4v9AWlxkcWhJj5XZjREChxb46kE0i2IxS-OE,28372
9
- ml_tools/handle_excel.py,sha256=IR0VQc3hYdmjwC31E5YxDnRcWig4jSIx7Y_7to-KZz4,11969
10
- ml_tools/logger.py,sha256=XwSpCUzw2Le24fJHyljBxNLgw63SwjZ0pMjTJqf0ylI,4622
11
- ml_tools/particle_swarm_optimization.py,sha256=jpkje4OETC9fyISxxUTx4XGrImSU6gDEcwz46ZDs2bQ,19250
12
- ml_tools/pytorch_models.py,sha256=Oykw02sOZLCjvSadQd64UGesBN7kq0x1EGXHusvYiQI,9908
13
- ml_tools/trainer.py,sha256=Zd7AaHeoNd8dEas2JChWoHaCUpWUVRDUMybuHaKJ0XY,16740
14
- ml_tools/utilities.py,sha256=gr1cyRUfZcRo9fjWpCaQkrvWY0-xJnDJdrE8JEsOi8o,6309
15
- ml_tools/vision_helpers.py,sha256=lBAW6dzAK-HOswAt1fU_tfP9hkNLY5D8c_I_7hhEXno,7528
16
- dragon_ml_toolbox-1.4.0.dist-info/METADATA,sha256=V7Y96iAbgX6Xl6RWzEt4nGfKMZe4cuLs0BrFQghXxX8,2335
17
- dragon_ml_toolbox-1.4.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
18
- dragon_ml_toolbox-1.4.0.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
19
- dragon_ml_toolbox-1.4.0.dist-info/RECORD,,