dragon-ml-toolbox 10.0.0__tar.gz → 10.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dragon-ml-toolbox might be problematic. Click here for more details.

Files changed (41) hide show
  1. {dragon_ml_toolbox-10.0.0/dragon_ml_toolbox.egg-info → dragon_ml_toolbox-10.1.0}/PKG-INFO +4 -2
  2. {dragon_ml_toolbox-10.0.0 → dragon_ml_toolbox-10.1.0}/README.md +3 -1
  3. {dragon_ml_toolbox-10.0.0 → dragon_ml_toolbox-10.1.0/dragon_ml_toolbox.egg-info}/PKG-INFO +4 -2
  4. {dragon_ml_toolbox-10.0.0 → dragon_ml_toolbox-10.1.0}/ml_tools/ETL_cleaning.py +29 -17
  5. {dragon_ml_toolbox-10.0.0 → dragon_ml_toolbox-10.1.0}/pyproject.toml +1 -1
  6. {dragon_ml_toolbox-10.0.0 → dragon_ml_toolbox-10.1.0}/LICENSE +0 -0
  7. {dragon_ml_toolbox-10.0.0 → dragon_ml_toolbox-10.1.0}/LICENSE-THIRD-PARTY.md +0 -0
  8. {dragon_ml_toolbox-10.0.0 → dragon_ml_toolbox-10.1.0}/dragon_ml_toolbox.egg-info/SOURCES.txt +0 -0
  9. {dragon_ml_toolbox-10.0.0 → dragon_ml_toolbox-10.1.0}/dragon_ml_toolbox.egg-info/dependency_links.txt +0 -0
  10. {dragon_ml_toolbox-10.0.0 → dragon_ml_toolbox-10.1.0}/dragon_ml_toolbox.egg-info/requires.txt +0 -0
  11. {dragon_ml_toolbox-10.0.0 → dragon_ml_toolbox-10.1.0}/dragon_ml_toolbox.egg-info/top_level.txt +0 -0
  12. {dragon_ml_toolbox-10.0.0 → dragon_ml_toolbox-10.1.0}/ml_tools/ETL_engineering.py +0 -0
  13. {dragon_ml_toolbox-10.0.0 → dragon_ml_toolbox-10.1.0}/ml_tools/GUI_tools.py +0 -0
  14. {dragon_ml_toolbox-10.0.0 → dragon_ml_toolbox-10.1.0}/ml_tools/MICE_imputation.py +0 -0
  15. {dragon_ml_toolbox-10.0.0 → dragon_ml_toolbox-10.1.0}/ml_tools/ML_callbacks.py +0 -0
  16. {dragon_ml_toolbox-10.0.0 → dragon_ml_toolbox-10.1.0}/ml_tools/ML_datasetmaster.py +0 -0
  17. {dragon_ml_toolbox-10.0.0 → dragon_ml_toolbox-10.1.0}/ml_tools/ML_evaluation.py +0 -0
  18. {dragon_ml_toolbox-10.0.0 → dragon_ml_toolbox-10.1.0}/ml_tools/ML_evaluation_multi.py +0 -0
  19. {dragon_ml_toolbox-10.0.0 → dragon_ml_toolbox-10.1.0}/ml_tools/ML_inference.py +0 -0
  20. {dragon_ml_toolbox-10.0.0 → dragon_ml_toolbox-10.1.0}/ml_tools/ML_models.py +0 -0
  21. {dragon_ml_toolbox-10.0.0 → dragon_ml_toolbox-10.1.0}/ml_tools/ML_optimization.py +0 -0
  22. {dragon_ml_toolbox-10.0.0 → dragon_ml_toolbox-10.1.0}/ml_tools/ML_scaler.py +0 -0
  23. {dragon_ml_toolbox-10.0.0 → dragon_ml_toolbox-10.1.0}/ml_tools/ML_trainer.py +0 -0
  24. {dragon_ml_toolbox-10.0.0 → dragon_ml_toolbox-10.1.0}/ml_tools/PSO_optimization.py +0 -0
  25. {dragon_ml_toolbox-10.0.0 → dragon_ml_toolbox-10.1.0}/ml_tools/RNN_forecast.py +0 -0
  26. {dragon_ml_toolbox-10.0.0 → dragon_ml_toolbox-10.1.0}/ml_tools/SQL.py +0 -0
  27. {dragon_ml_toolbox-10.0.0 → dragon_ml_toolbox-10.1.0}/ml_tools/VIF_factor.py +0 -0
  28. {dragon_ml_toolbox-10.0.0 → dragon_ml_toolbox-10.1.0}/ml_tools/__init__.py +0 -0
  29. {dragon_ml_toolbox-10.0.0 → dragon_ml_toolbox-10.1.0}/ml_tools/_logger.py +0 -0
  30. {dragon_ml_toolbox-10.0.0 → dragon_ml_toolbox-10.1.0}/ml_tools/_script_info.py +0 -0
  31. {dragon_ml_toolbox-10.0.0 → dragon_ml_toolbox-10.1.0}/ml_tools/custom_logger.py +0 -0
  32. {dragon_ml_toolbox-10.0.0 → dragon_ml_toolbox-10.1.0}/ml_tools/data_exploration.py +0 -0
  33. {dragon_ml_toolbox-10.0.0 → dragon_ml_toolbox-10.1.0}/ml_tools/ensemble_evaluation.py +0 -0
  34. {dragon_ml_toolbox-10.0.0 → dragon_ml_toolbox-10.1.0}/ml_tools/ensemble_inference.py +0 -0
  35. {dragon_ml_toolbox-10.0.0 → dragon_ml_toolbox-10.1.0}/ml_tools/ensemble_learning.py +0 -0
  36. {dragon_ml_toolbox-10.0.0 → dragon_ml_toolbox-10.1.0}/ml_tools/handle_excel.py +0 -0
  37. {dragon_ml_toolbox-10.0.0 → dragon_ml_toolbox-10.1.0}/ml_tools/keys.py +0 -0
  38. {dragon_ml_toolbox-10.0.0 → dragon_ml_toolbox-10.1.0}/ml_tools/optimization_tools.py +0 -0
  39. {dragon_ml_toolbox-10.0.0 → dragon_ml_toolbox-10.1.0}/ml_tools/path_manager.py +0 -0
  40. {dragon_ml_toolbox-10.0.0 → dragon_ml_toolbox-10.1.0}/ml_tools/utilities.py +0 -0
  41. {dragon_ml_toolbox-10.0.0 → dragon_ml_toolbox-10.1.0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dragon-ml-toolbox
3
- Version: 10.0.0
3
+ Version: 10.1.0
4
4
  Summary: A collection of tools for data science and machine learning projects.
5
5
  Author-email: Karl Loza <luigiloza@gmail.com>
6
6
  License-Expression: MIT
@@ -148,6 +148,7 @@ data_exploration
148
148
  ensemble_evaluation
149
149
  ensemble_inference
150
150
  ensemble_learning
151
+ ETL_cleaning
151
152
  ETL_engineering
152
153
  ML_callbacks
153
154
  ML_datasetmaster
@@ -265,7 +266,8 @@ pip install "dragon-ml-toolbox[base]"
265
266
  #### Modules:
266
267
 
267
268
  ```Bash
268
- ETL_Engineering
269
+ ETL_cleaning
270
+ ETL_engineering
269
271
  custom_logger
270
272
  SQL
271
273
  utilities
@@ -63,6 +63,7 @@ data_exploration
63
63
  ensemble_evaluation
64
64
  ensemble_inference
65
65
  ensemble_learning
66
+ ETL_cleaning
66
67
  ETL_engineering
67
68
  ML_callbacks
68
69
  ML_datasetmaster
@@ -180,7 +181,8 @@ pip install "dragon-ml-toolbox[base]"
180
181
  #### Modules:
181
182
 
182
183
  ```Bash
183
- ETL_Engineering
184
+ ETL_cleaning
185
+ ETL_engineering
184
186
  custom_logger
185
187
  SQL
186
188
  utilities
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dragon-ml-toolbox
3
- Version: 10.0.0
3
+ Version: 10.1.0
4
4
  Summary: A collection of tools for data science and machine learning projects.
5
5
  Author-email: Karl Loza <luigiloza@gmail.com>
6
6
  License-Expression: MIT
@@ -148,6 +148,7 @@ data_exploration
148
148
  ensemble_evaluation
149
149
  ensemble_inference
150
150
  ensemble_learning
151
+ ETL_cleaning
151
152
  ETL_engineering
152
153
  ML_callbacks
153
154
  ML_datasetmaster
@@ -265,7 +266,8 @@ pip install "dragon-ml-toolbox[base]"
265
266
  #### Modules:
266
267
 
267
268
  ```Bash
268
- ETL_Engineering
269
+ ETL_cleaning
270
+ ETL_engineering
269
271
  custom_logger
270
272
  SQL
271
273
  utilities
@@ -1,8 +1,7 @@
1
1
  import polars as pl
2
2
  import pandas as pd
3
- import re
4
3
  from pathlib import Path
5
- from typing import Literal, Union, Optional, Any, Callable, List, Dict, Tuple
4
+ from typing import Union, List, Dict
6
5
  from .path_manager import sanitize_filename, make_fullpath
7
6
  from .utilities import save_dataframe, load_dataframe
8
7
  from ._script_info import _script_info
@@ -131,16 +130,37 @@ def basic_clean(input_filepath: Union[str,Path], output_filepath: Union[str,Path
131
130
  r'\p{C}+': '',
132
131
 
133
132
  # Full-width to half-width
133
+ # Numbers
134
+ '0': '0', '1': '1', '2': '2', '3': '3', '4': '4',
135
+ '5': '5', '6': '6', '7': '7', '8': '8', '9': '9',
136
+ # Superscripts & Subscripts
137
+ '¹': '1', '²': '2', '³': '3', '⁴': '4', '⁵': '5',
138
+ '⁶': '6', '⁷': '7', '⁸': '8', '⁹': '9', '⁰': '0',
139
+ '₁': '1', '₂': '2', '₃': '3', '₄': '4', '₅': '5',
140
+ '₆': '6', '₇': '7', '₈': '8', '₉': '9', '₀': '0',
141
+ # Uppercase Alphabet
142
+ 'A': 'A', 'B': 'B', 'C': 'C', 'D': 'D', 'E': 'E', 'F': 'F',
143
+ 'G': 'G', 'H': 'H', 'I': 'I', 'J': 'J', 'K': 'K', 'L': 'L',
144
+ 'M': 'M', 'N': 'N', 'O': 'O', 'P': 'P', 'Q': 'Q', 'R': 'R',
145
+ 'S': 'S', 'T': 'T', 'U': 'U', 'V': 'V', 'W': 'W', 'X': 'X',
146
+ 'Y': 'Y', 'Z': 'Z',
147
+ # Lowercase Alphabet
148
+ 'a': 'a', 'b': 'b', 'c': 'c', 'd': 'd', 'e': 'e', 'f': 'f',
149
+ 'g': 'g', 'h': 'h', 'i': 'i', 'j': 'j', 'k': 'k', 'l': 'l',
150
+ 'm': 'm', 'n': 'n', 'o': 'o', 'p': 'p', 'q': 'q', 'r': 'r',
151
+ 's': 's', 't': 't', 'u': 'u', 'v': 'v', 'w': 'w', 'x': 'x',
152
+ 'y': 'y', 'z': 'z',
153
+ # Punctuation
134
154
  '》': '>', '《': '<', ':': ':', ',': ',', '。': '.', ';': ';', '【': '[', '】': ']',
135
- '(': '(', ')': ')', '?': '?', '!': '!', '~': '~', '@': '@', '#': '#',
136
- '$': '$', '%': '%', '^': '^', '&': '&', '*': '*', '\': '\\', '|': '|',
155
+ '(': '(', ')': ')', '?': '?', '!': '!', '~': '~', '@': '@', '#': '#', '+': '+', '-': '-',
156
+ '$': '$', '%': '%', '^': '^', '&': '&', '*': '*', '\': '\\', '|': '|', '、':',', '≈':'=',
137
157
 
138
158
  # Others
139
159
  '©': '',
140
160
  '®': '',
141
161
  '™': '',
142
162
 
143
- # Collapse repeating punctuation (explicit method)
163
+ # Collapse repeating punctuation
144
164
  r'\.{2,}': '.', # Replace two or more dots with a single dot
145
165
  r'\?{2,}': '?', # Replace two or more question marks with a single question mark
146
166
  r'!{2,}': '!', # Replace two or more exclamation marks with a single one
@@ -148,9 +168,9 @@ def basic_clean(input_filepath: Union[str,Path], output_filepath: Union[str,Path
148
168
  # Typographical standardization
149
169
  # Unify various dashes and hyphens to a standard hyphen-minus
150
170
  r'[—–―]': '-',
151
- # Unify various quote types to standard single quotes
171
+ # Unify various quote types to standard quotes
152
172
  r'[“”]': "'",
153
- r'[‘’]': "'",
173
+ r'[‘’′]': "'",
154
174
 
155
175
  # 2. Internal Whitespace Consolidation
156
176
  # Collapse any sequence of whitespace chars (including non-breaking spaces) to a single space
@@ -162,7 +182,7 @@ def basic_clean(input_filepath: Union[str,Path], output_filepath: Union[str,Path
162
182
 
163
183
  # 4. Textual Null Standardization (New Step)
164
184
  # Convert common null-like text to actual nulls. (?i) makes it case-insensitive.
165
- r'^(N/A|NA|NULL|NONE|NIL|)$': None,
185
+ r'^(N/A|无|NA|NULL|NONE|NIL|)$': None,
166
186
 
167
187
  # 5. Final Nullification of Empty Strings
168
188
  # After all cleaning, if a string is now empty, convert it to a null
@@ -192,7 +212,7 @@ def basic_clean(input_filepath: Union[str,Path], output_filepath: Union[str,Path
192
212
  # Save cleaned dataframe
193
213
  save_dataframe(df=df_final, save_dir=output_path.parent, filename=output_path.name)
194
214
 
195
- _LOGGER.info(f"Successfully cleaned and saved data to '{output_path.name}'.")
215
+ _LOGGER.info(f"Data successfully cleaned.")
196
216
 
197
217
 
198
218
  ########## EXTRACT and CLEAN ##########
@@ -238,14 +258,6 @@ class ColumnCleaner:
238
258
  _LOGGER.error("The 'rules' argument must be a dictionary.")
239
259
  raise TypeError()
240
260
 
241
- # Validate each regex pattern for correctness
242
- for pattern in rules.keys():
243
- try:
244
- re.compile(pattern)
245
- except re.error:
246
- _LOGGER.error(f"Invalid regex pattern '{pattern}'.")
247
- raise
248
-
249
261
  self.column_name = column_name
250
262
  self.rules = rules
251
263
  self.case_insensitive = case_insensitive
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "dragon-ml-toolbox"
3
- version = "10.0.0"
3
+ version = "10.1.0"
4
4
  description = "A collection of tools for data science and machine learning projects."
5
5
  authors = [
6
6
  { name = "Karl Loza", email = "luigiloza@gmail.com" }