dragon-ml-toolbox 4.3.0__tar.gz → 4.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dragon-ml-toolbox might be problematic. Click here for more details.

Files changed (35) hide show
  1. {dragon_ml_toolbox-4.3.0/dragon_ml_toolbox.egg-info → dragon_ml_toolbox-4.4.0}/PKG-INFO +1 -1
  2. {dragon_ml_toolbox-4.3.0 → dragon_ml_toolbox-4.4.0/dragon_ml_toolbox.egg-info}/PKG-INFO +1 -1
  3. {dragon_ml_toolbox-4.3.0 → dragon_ml_toolbox-4.4.0}/ml_tools/ETL_engineering.py +1 -1
  4. {dragon_ml_toolbox-4.3.0 → dragon_ml_toolbox-4.4.0}/ml_tools/custom_logger.py +15 -5
  5. {dragon_ml_toolbox-4.3.0 → dragon_ml_toolbox-4.4.0}/ml_tools/data_exploration.py +11 -5
  6. {dragon_ml_toolbox-4.3.0 → dragon_ml_toolbox-4.4.0}/pyproject.toml +1 -1
  7. {dragon_ml_toolbox-4.3.0 → dragon_ml_toolbox-4.4.0}/LICENSE +0 -0
  8. {dragon_ml_toolbox-4.3.0 → dragon_ml_toolbox-4.4.0}/LICENSE-THIRD-PARTY.md +0 -0
  9. {dragon_ml_toolbox-4.3.0 → dragon_ml_toolbox-4.4.0}/README.md +0 -0
  10. {dragon_ml_toolbox-4.3.0 → dragon_ml_toolbox-4.4.0}/dragon_ml_toolbox.egg-info/SOURCES.txt +0 -0
  11. {dragon_ml_toolbox-4.3.0 → dragon_ml_toolbox-4.4.0}/dragon_ml_toolbox.egg-info/dependency_links.txt +0 -0
  12. {dragon_ml_toolbox-4.3.0 → dragon_ml_toolbox-4.4.0}/dragon_ml_toolbox.egg-info/requires.txt +0 -0
  13. {dragon_ml_toolbox-4.3.0 → dragon_ml_toolbox-4.4.0}/dragon_ml_toolbox.egg-info/top_level.txt +0 -0
  14. {dragon_ml_toolbox-4.3.0 → dragon_ml_toolbox-4.4.0}/ml_tools/GUI_tools.py +0 -0
  15. {dragon_ml_toolbox-4.3.0 → dragon_ml_toolbox-4.4.0}/ml_tools/MICE_imputation.py +0 -0
  16. {dragon_ml_toolbox-4.3.0 → dragon_ml_toolbox-4.4.0}/ml_tools/ML_callbacks.py +0 -0
  17. {dragon_ml_toolbox-4.3.0 → dragon_ml_toolbox-4.4.0}/ml_tools/ML_evaluation.py +0 -0
  18. {dragon_ml_toolbox-4.3.0 → dragon_ml_toolbox-4.4.0}/ml_tools/ML_inference.py +0 -0
  19. {dragon_ml_toolbox-4.3.0 → dragon_ml_toolbox-4.4.0}/ml_tools/ML_trainer.py +0 -0
  20. {dragon_ml_toolbox-4.3.0 → dragon_ml_toolbox-4.4.0}/ml_tools/PSO_optimization.py +0 -0
  21. {dragon_ml_toolbox-4.3.0 → dragon_ml_toolbox-4.4.0}/ml_tools/RNN_forecast.py +0 -0
  22. {dragon_ml_toolbox-4.3.0 → dragon_ml_toolbox-4.4.0}/ml_tools/SQL.py +0 -0
  23. {dragon_ml_toolbox-4.3.0 → dragon_ml_toolbox-4.4.0}/ml_tools/VIF_factor.py +0 -0
  24. {dragon_ml_toolbox-4.3.0 → dragon_ml_toolbox-4.4.0}/ml_tools/__init__.py +0 -0
  25. {dragon_ml_toolbox-4.3.0 → dragon_ml_toolbox-4.4.0}/ml_tools/_logger.py +0 -0
  26. {dragon_ml_toolbox-4.3.0 → dragon_ml_toolbox-4.4.0}/ml_tools/_pytorch_models.py +0 -0
  27. {dragon_ml_toolbox-4.3.0 → dragon_ml_toolbox-4.4.0}/ml_tools/_script_info.py +0 -0
  28. {dragon_ml_toolbox-4.3.0 → dragon_ml_toolbox-4.4.0}/ml_tools/datasetmaster.py +0 -0
  29. {dragon_ml_toolbox-4.3.0 → dragon_ml_toolbox-4.4.0}/ml_tools/ensemble_inference.py +0 -0
  30. {dragon_ml_toolbox-4.3.0 → dragon_ml_toolbox-4.4.0}/ml_tools/ensemble_learning.py +0 -0
  31. {dragon_ml_toolbox-4.3.0 → dragon_ml_toolbox-4.4.0}/ml_tools/handle_excel.py +0 -0
  32. {dragon_ml_toolbox-4.3.0 → dragon_ml_toolbox-4.4.0}/ml_tools/keys.py +0 -0
  33. {dragon_ml_toolbox-4.3.0 → dragon_ml_toolbox-4.4.0}/ml_tools/path_manager.py +0 -0
  34. {dragon_ml_toolbox-4.3.0 → dragon_ml_toolbox-4.4.0}/ml_tools/utilities.py +0 -0
  35. {dragon_ml_toolbox-4.3.0 → dragon_ml_toolbox-4.4.0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dragon-ml-toolbox
3
- Version: 4.3.0
3
+ Version: 4.4.0
4
4
  Summary: A collection of tools for data science and machine learning projects.
5
5
  Author-email: Karl Loza <luigiloza@gmail.com>
6
6
  License-Expression: MIT
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dragon-ml-toolbox
3
- Version: 4.3.0
3
+ Version: 4.4.0
4
4
  Summary: A collection of tools for data science and machine learning projects.
5
5
  Author-email: Karl Loza <luigiloza@gmail.com>
6
6
  License-Expression: MIT
@@ -569,7 +569,7 @@ class NumberExtractor:
569
569
  self,
570
570
  regex_pattern: str = r"(\d+\.?\d*)",
571
571
  dtype: Literal["float", "int"] = "float",
572
- round_digits: Optional[int] = None,
572
+ round_digits: Optional[int] = 2,
573
573
  ):
574
574
  # --- Validation ---
575
575
  if not isinstance(regex_pattern, str):
@@ -1,9 +1,9 @@
1
1
  from pathlib import Path
2
2
  from datetime import datetime
3
3
  from typing import Union, List, Dict, Any
4
- import pandas as pd
5
4
  import traceback
6
5
  import json
6
+ import csv
7
7
  from .path_manager import sanitize_filename, make_fullpath
8
8
  from ._script_info import _script_info
9
9
  from ._logger import _LOGGER
@@ -18,7 +18,6 @@ def custom_logger(
18
18
  data: Union[
19
19
  List[Any],
20
20
  Dict[Any, Any],
21
- pd.DataFrame,
22
21
  str,
23
22
  BaseException
24
23
  ],
@@ -75,7 +74,7 @@ def custom_logger(
75
74
  _log_exception_to_log(data, base_path.with_suffix(".log"))
76
75
 
77
76
  else:
78
- raise ValueError("Unsupported data type. Must be list, dict, DataFrame, str, or BaseException.")
77
+ raise ValueError("Unsupported data type. Must be list, dict, str, or BaseException.")
79
78
 
80
79
  _LOGGER.info(f"🗄️ Log saved to: '{base_path}'")
81
80
 
@@ -106,8 +105,19 @@ def _log_dict_to_csv(data: Dict[Any, List[Any]], path: Path) -> None:
106
105
  padded_value = value + [None] * (max_length - len(value))
107
106
  sanitized_dict[sanitized_key] = padded_value
108
107
 
109
- df = pd.DataFrame(sanitized_dict)
110
- df.to_csv(path, index=False)
108
+ # The `newline=''` argument is important to prevent extra blank rows
109
+ with open(path, 'w', newline='', encoding='utf-8') as csv_file:
110
+ writer = csv.writer(csv_file)
111
+
112
+ # 1. Write the header row from the sanitized dictionary keys
113
+ header = list(sanitized_dict.keys())
114
+ writer.writerow(header)
115
+
116
+ # 2. Transpose columns to rows and write them
117
+ # zip(*sanitized_dict.values()) elegantly converts the column data
118
+ # (lists in the dict) into row-by-row tuples.
119
+ rows_to_write = zip(*sanitized_dict.values())
120
+ writer.writerows(rows_to_write)
111
121
 
112
122
 
113
123
  def _log_string_to_log(data: str, path: Path) -> None:
@@ -126,7 +126,7 @@ def drop_rows_with_missing_data(df: pd.DataFrame, targets: Optional[list[str]],
126
126
  # Stage 1: Drop rows with all target columns missing
127
127
  if targets is not None:
128
128
  # validate targets
129
- valid_targets = [target for target in targets if target in df_clean.columns]
129
+ valid_targets = _validate_columns(df_clean, targets)
130
130
  target_na = df_clean[valid_targets].isnull().all(axis=1)
131
131
  if target_na.any():
132
132
  print(f"🧹 Dropping {target_na.sum()} rows with all target columns missing.")
@@ -134,10 +134,10 @@ def drop_rows_with_missing_data(df: pd.DataFrame, targets: Optional[list[str]],
134
134
  else:
135
135
  print("✅ No rows with all targets missing.")
136
136
  else:
137
- targets = []
137
+ valid_targets = []
138
138
 
139
139
  # Stage 2: Drop rows based on feature column missing values
140
- feature_cols = [col for col in df_clean.columns if col not in targets]
140
+ feature_cols = [col for col in df_clean.columns if col not in valid_targets]
141
141
  if feature_cols:
142
142
  feature_na_frac = df_clean[feature_cols].isnull().mean(axis=1)
143
143
  rows_to_drop = feature_na_frac[feature_na_frac > threshold].index
@@ -238,8 +238,9 @@ def split_features_targets(df: pd.DataFrame, targets: list[str]):
238
238
  - Shape of the features dataframe.
239
239
  - Shape of the targets dataframe.
240
240
  """
241
- df_targets = df[targets]
242
- df_features = df.drop(columns=targets)
241
+ valid_targets = _validate_columns(df, targets)
242
+ df_targets = df[valid_targets]
243
+ df_features = df.drop(columns=valid_targets)
243
244
  print(f"Original shape: {df.shape}\nFeatures shape: {df_features.shape}\nTargets shape: {df_targets.shape}")
244
245
  return df_features, df_targets
245
246
 
@@ -644,5 +645,10 @@ def standardize_percentages(
644
645
  return df_copy
645
646
 
646
647
 
648
+ def _validate_columns(df: pd.DataFrame, columns: list[str]):
649
+ valid_columns = [column for column in columns if column in df.columns]
650
+ return valid_columns
651
+
652
+
647
653
  def info():
648
654
  _script_info(__all__)
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "dragon-ml-toolbox"
3
- version = "4.3.0"
3
+ version = "4.4.0"
4
4
  description = "A collection of tools for data science and machine learning projects."
5
5
  authors = [
6
6
  { name = "Karl Loza", email = "luigiloza@gmail.com" }