dragon-ml-toolbox 4.3.0__tar.gz → 4.5.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dragon-ml-toolbox might be problematic. Click here for more details.

Files changed (36) hide show
  1. {dragon_ml_toolbox-4.3.0/dragon_ml_toolbox.egg-info → dragon_ml_toolbox-4.5.0}/PKG-INFO +1 -1
  2. {dragon_ml_toolbox-4.3.0 → dragon_ml_toolbox-4.5.0/dragon_ml_toolbox.egg-info}/PKG-INFO +1 -1
  3. {dragon_ml_toolbox-4.3.0 → dragon_ml_toolbox-4.5.0}/ml_tools/ETL_engineering.py +1 -1
  4. {dragon_ml_toolbox-4.3.0 → dragon_ml_toolbox-4.5.0}/ml_tools/PSO_optimization.py +14 -1
  5. dragon_ml_toolbox-4.5.0/ml_tools/__init__.py +1 -0
  6. {dragon_ml_toolbox-4.3.0 → dragon_ml_toolbox-4.5.0}/ml_tools/custom_logger.py +15 -5
  7. {dragon_ml_toolbox-4.3.0 → dragon_ml_toolbox-4.5.0}/ml_tools/data_exploration.py +12 -6
  8. {dragon_ml_toolbox-4.3.0 → dragon_ml_toolbox-4.5.0}/pyproject.toml +1 -1
  9. dragon_ml_toolbox-4.3.0/ml_tools/__init__.py +0 -0
  10. {dragon_ml_toolbox-4.3.0 → dragon_ml_toolbox-4.5.0}/LICENSE +0 -0
  11. {dragon_ml_toolbox-4.3.0 → dragon_ml_toolbox-4.5.0}/LICENSE-THIRD-PARTY.md +0 -0
  12. {dragon_ml_toolbox-4.3.0 → dragon_ml_toolbox-4.5.0}/README.md +0 -0
  13. {dragon_ml_toolbox-4.3.0 → dragon_ml_toolbox-4.5.0}/dragon_ml_toolbox.egg-info/SOURCES.txt +0 -0
  14. {dragon_ml_toolbox-4.3.0 → dragon_ml_toolbox-4.5.0}/dragon_ml_toolbox.egg-info/dependency_links.txt +0 -0
  15. {dragon_ml_toolbox-4.3.0 → dragon_ml_toolbox-4.5.0}/dragon_ml_toolbox.egg-info/requires.txt +0 -0
  16. {dragon_ml_toolbox-4.3.0 → dragon_ml_toolbox-4.5.0}/dragon_ml_toolbox.egg-info/top_level.txt +0 -0
  17. {dragon_ml_toolbox-4.3.0 → dragon_ml_toolbox-4.5.0}/ml_tools/GUI_tools.py +0 -0
  18. {dragon_ml_toolbox-4.3.0 → dragon_ml_toolbox-4.5.0}/ml_tools/MICE_imputation.py +0 -0
  19. {dragon_ml_toolbox-4.3.0 → dragon_ml_toolbox-4.5.0}/ml_tools/ML_callbacks.py +0 -0
  20. {dragon_ml_toolbox-4.3.0 → dragon_ml_toolbox-4.5.0}/ml_tools/ML_evaluation.py +0 -0
  21. {dragon_ml_toolbox-4.3.0 → dragon_ml_toolbox-4.5.0}/ml_tools/ML_inference.py +0 -0
  22. {dragon_ml_toolbox-4.3.0 → dragon_ml_toolbox-4.5.0}/ml_tools/ML_trainer.py +0 -0
  23. {dragon_ml_toolbox-4.3.0 → dragon_ml_toolbox-4.5.0}/ml_tools/RNN_forecast.py +0 -0
  24. {dragon_ml_toolbox-4.3.0 → dragon_ml_toolbox-4.5.0}/ml_tools/SQL.py +0 -0
  25. {dragon_ml_toolbox-4.3.0 → dragon_ml_toolbox-4.5.0}/ml_tools/VIF_factor.py +0 -0
  26. {dragon_ml_toolbox-4.3.0 → dragon_ml_toolbox-4.5.0}/ml_tools/_logger.py +0 -0
  27. {dragon_ml_toolbox-4.3.0 → dragon_ml_toolbox-4.5.0}/ml_tools/_pytorch_models.py +0 -0
  28. {dragon_ml_toolbox-4.3.0 → dragon_ml_toolbox-4.5.0}/ml_tools/_script_info.py +0 -0
  29. {dragon_ml_toolbox-4.3.0 → dragon_ml_toolbox-4.5.0}/ml_tools/datasetmaster.py +0 -0
  30. {dragon_ml_toolbox-4.3.0 → dragon_ml_toolbox-4.5.0}/ml_tools/ensemble_inference.py +0 -0
  31. {dragon_ml_toolbox-4.3.0 → dragon_ml_toolbox-4.5.0}/ml_tools/ensemble_learning.py +0 -0
  32. {dragon_ml_toolbox-4.3.0 → dragon_ml_toolbox-4.5.0}/ml_tools/handle_excel.py +0 -0
  33. {dragon_ml_toolbox-4.3.0 → dragon_ml_toolbox-4.5.0}/ml_tools/keys.py +0 -0
  34. {dragon_ml_toolbox-4.3.0 → dragon_ml_toolbox-4.5.0}/ml_tools/path_manager.py +0 -0
  35. {dragon_ml_toolbox-4.3.0 → dragon_ml_toolbox-4.5.0}/ml_tools/utilities.py +0 -0
  36. {dragon_ml_toolbox-4.3.0 → dragon_ml_toolbox-4.5.0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dragon-ml-toolbox
3
- Version: 4.3.0
3
+ Version: 4.5.0
4
4
  Summary: A collection of tools for data science and machine learning projects.
5
5
  Author-email: Karl Loza <luigiloza@gmail.com>
6
6
  License-Expression: MIT
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dragon-ml-toolbox
3
- Version: 4.3.0
3
+ Version: 4.5.0
4
4
  Summary: A collection of tools for data science and machine learning projects.
5
5
  Author-email: Karl Loza <luigiloza@gmail.com>
6
6
  License-Expression: MIT
@@ -569,7 +569,7 @@ class NumberExtractor:
569
569
  self,
570
570
  regex_pattern: str = r"(\d+\.?\d*)",
571
571
  dtype: Literal["float", "int"] = "float",
572
- round_digits: Optional[int] = None,
572
+ round_digits: Optional[int] = 2,
573
573
  ):
574
574
  # --- Validation ---
575
575
  if not isinstance(regex_pattern, str):
@@ -2,7 +2,7 @@ import numpy as np
2
2
  from pathlib import Path
3
3
  import xgboost as xgb
4
4
  import lightgbm as lgb
5
- from typing import Literal, Union, Tuple, Dict, Optional
5
+ from typing import Literal, Union, Tuple, Dict, Optional, Any
6
6
  import pandas as pd
7
7
  from copy import deepcopy
8
8
  from .utilities import (
@@ -25,6 +25,7 @@ from contextlib import nullcontext
25
25
  __all__ = [
26
26
  "ObjectiveFunction",
27
27
  "multiple_objective_functions_from_dir",
28
+ "parse_lower_upper_bounds",
28
29
  "run_pso",
29
30
  "plot_optimal_feature_distributions"
30
31
  ]
@@ -169,6 +170,18 @@ def multiple_objective_functions_from_dir(directory: Union[str,Path], add_noise:
169
170
  return objective_functions, objective_function_names
170
171
 
171
172
 
173
+ def parse_lower_upper_bounds(source: dict[str,tuple[Any,Any]]):
174
+ """
175
+ Parse lower and upper boundaries, returning 2 lists:
176
+
177
+ `lower_bounds`, `upper_bounds`
178
+ """
179
+ lower = [low[0] for low in source.values()]
180
+ upper = [up[1] for up in source.values()]
181
+
182
+ return lower, upper
183
+
184
+
172
185
  def _set_boundaries(lower_boundaries: list[float], upper_boundaries: list[float]):
173
186
  assert len(lower_boundaries) == len(upper_boundaries), "Lower and upper boundaries must have the same length."
174
187
  assert len(lower_boundaries) >= 1, "At least one boundary pair is required."
@@ -0,0 +1 @@
1
+ from .custom_logger import custom_logger
@@ -1,9 +1,9 @@
1
1
  from pathlib import Path
2
2
  from datetime import datetime
3
3
  from typing import Union, List, Dict, Any
4
- import pandas as pd
5
4
  import traceback
6
5
  import json
6
+ import csv
7
7
  from .path_manager import sanitize_filename, make_fullpath
8
8
  from ._script_info import _script_info
9
9
  from ._logger import _LOGGER
@@ -18,7 +18,6 @@ def custom_logger(
18
18
  data: Union[
19
19
  List[Any],
20
20
  Dict[Any, Any],
21
- pd.DataFrame,
22
21
  str,
23
22
  BaseException
24
23
  ],
@@ -75,7 +74,7 @@ def custom_logger(
75
74
  _log_exception_to_log(data, base_path.with_suffix(".log"))
76
75
 
77
76
  else:
78
- raise ValueError("Unsupported data type. Must be list, dict, DataFrame, str, or BaseException.")
77
+ raise ValueError("Unsupported data type. Must be list, dict, str, or BaseException.")
79
78
 
80
79
  _LOGGER.info(f"🗄️ Log saved to: '{base_path}'")
81
80
 
@@ -106,8 +105,19 @@ def _log_dict_to_csv(data: Dict[Any, List[Any]], path: Path) -> None:
106
105
  padded_value = value + [None] * (max_length - len(value))
107
106
  sanitized_dict[sanitized_key] = padded_value
108
107
 
109
- df = pd.DataFrame(sanitized_dict)
110
- df.to_csv(path, index=False)
108
+ # The `newline=''` argument is important to prevent extra blank rows
109
+ with open(path, 'w', newline='', encoding='utf-8') as csv_file:
110
+ writer = csv.writer(csv_file)
111
+
112
+ # 1. Write the header row from the sanitized dictionary keys
113
+ header = list(sanitized_dict.keys())
114
+ writer.writerow(header)
115
+
116
+ # 2. Transpose columns to rows and write them
117
+ # zip(*sanitized_dict.values()) elegantly converts the column data
118
+ # (lists in the dict) into row-by-row tuples.
119
+ rows_to_write = zip(*sanitized_dict.values())
120
+ writer.writerows(rows_to_write)
111
121
 
112
122
 
113
123
  def _log_string_to_log(data: str, path: Path) -> None:
@@ -126,7 +126,7 @@ def drop_rows_with_missing_data(df: pd.DataFrame, targets: Optional[list[str]],
126
126
  # Stage 1: Drop rows with all target columns missing
127
127
  if targets is not None:
128
128
  # validate targets
129
- valid_targets = [target for target in targets if target in df_clean.columns]
129
+ valid_targets = _validate_columns(df_clean, targets)
130
130
  target_na = df_clean[valid_targets].isnull().all(axis=1)
131
131
  if target_na.any():
132
132
  print(f"🧹 Dropping {target_na.sum()} rows with all target columns missing.")
@@ -134,10 +134,10 @@ def drop_rows_with_missing_data(df: pd.DataFrame, targets: Optional[list[str]],
134
134
  else:
135
135
  print("✅ No rows with all targets missing.")
136
136
  else:
137
- targets = []
137
+ valid_targets = []
138
138
 
139
139
  # Stage 2: Drop rows based on feature column missing values
140
- feature_cols = [col for col in df_clean.columns if col not in targets]
140
+ feature_cols = [col for col in df_clean.columns if col not in valid_targets]
141
141
  if feature_cols:
142
142
  feature_na_frac = df_clean[feature_cols].isnull().mean(axis=1)
143
143
  rows_to_drop = feature_na_frac[feature_na_frac > threshold].index
@@ -238,8 +238,9 @@ def split_features_targets(df: pd.DataFrame, targets: list[str]):
238
238
  - Shape of the features dataframe.
239
239
  - Shape of the targets dataframe.
240
240
  """
241
- df_targets = df[targets]
242
- df_features = df.drop(columns=targets)
241
+ valid_targets = _validate_columns(df, targets)
242
+ df_targets = df[valid_targets]
243
+ df_features = df.drop(columns=valid_targets)
243
244
  print(f"Original shape: {df.shape}\nFeatures shape: {df_features.shape}\nTargets shape: {df_targets.shape}")
244
245
  return df_features, df_targets
245
246
 
@@ -347,7 +348,7 @@ def plot_correlation_heatmap(df: pd.DataFrame,
347
348
  full_path = save_path / plot_title
348
349
 
349
350
  plt.savefig(full_path, bbox_inches="tight", format='svg')
350
- print(f"Saved correlation heatmap: '{plot_title}.svg'")
351
+ print(f"Saved correlation heatmap: '{plot_title}'")
351
352
 
352
353
  plt.show()
353
354
  plt.close()
@@ -644,5 +645,10 @@ def standardize_percentages(
644
645
  return df_copy
645
646
 
646
647
 
648
+ def _validate_columns(df: pd.DataFrame, columns: list[str]):
649
+ valid_columns = [column for column in columns if column in df.columns]
650
+ return valid_columns
651
+
652
+
647
653
  def info():
648
654
  _script_info(__all__)
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "dragon-ml-toolbox"
3
- version = "4.3.0"
3
+ version = "4.5.0"
4
4
  description = "A collection of tools for data science and machine learning projects."
5
5
  authors = [
6
6
  { name = "Karl Loza", email = "luigiloza@gmail.com" }
File without changes