dragon-ml-toolbox 10.0.1__tar.gz → 10.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dragon-ml-toolbox might be problematic. Click here for more details.
- {dragon_ml_toolbox-10.0.1/dragon_ml_toolbox.egg-info → dragon_ml_toolbox-10.1.0}/PKG-INFO +1 -1
- {dragon_ml_toolbox-10.0.1 → dragon_ml_toolbox-10.1.0/dragon_ml_toolbox.egg-info}/PKG-INFO +1 -1
- {dragon_ml_toolbox-10.0.1 → dragon_ml_toolbox-10.1.0}/ml_tools/ETL_cleaning.py +27 -15
- {dragon_ml_toolbox-10.0.1 → dragon_ml_toolbox-10.1.0}/pyproject.toml +1 -1
- {dragon_ml_toolbox-10.0.1 → dragon_ml_toolbox-10.1.0}/LICENSE +0 -0
- {dragon_ml_toolbox-10.0.1 → dragon_ml_toolbox-10.1.0}/LICENSE-THIRD-PARTY.md +0 -0
- {dragon_ml_toolbox-10.0.1 → dragon_ml_toolbox-10.1.0}/README.md +0 -0
- {dragon_ml_toolbox-10.0.1 → dragon_ml_toolbox-10.1.0}/dragon_ml_toolbox.egg-info/SOURCES.txt +0 -0
- {dragon_ml_toolbox-10.0.1 → dragon_ml_toolbox-10.1.0}/dragon_ml_toolbox.egg-info/dependency_links.txt +0 -0
- {dragon_ml_toolbox-10.0.1 → dragon_ml_toolbox-10.1.0}/dragon_ml_toolbox.egg-info/requires.txt +0 -0
- {dragon_ml_toolbox-10.0.1 → dragon_ml_toolbox-10.1.0}/dragon_ml_toolbox.egg-info/top_level.txt +0 -0
- {dragon_ml_toolbox-10.0.1 → dragon_ml_toolbox-10.1.0}/ml_tools/ETL_engineering.py +0 -0
- {dragon_ml_toolbox-10.0.1 → dragon_ml_toolbox-10.1.0}/ml_tools/GUI_tools.py +0 -0
- {dragon_ml_toolbox-10.0.1 → dragon_ml_toolbox-10.1.0}/ml_tools/MICE_imputation.py +0 -0
- {dragon_ml_toolbox-10.0.1 → dragon_ml_toolbox-10.1.0}/ml_tools/ML_callbacks.py +0 -0
- {dragon_ml_toolbox-10.0.1 → dragon_ml_toolbox-10.1.0}/ml_tools/ML_datasetmaster.py +0 -0
- {dragon_ml_toolbox-10.0.1 → dragon_ml_toolbox-10.1.0}/ml_tools/ML_evaluation.py +0 -0
- {dragon_ml_toolbox-10.0.1 → dragon_ml_toolbox-10.1.0}/ml_tools/ML_evaluation_multi.py +0 -0
- {dragon_ml_toolbox-10.0.1 → dragon_ml_toolbox-10.1.0}/ml_tools/ML_inference.py +0 -0
- {dragon_ml_toolbox-10.0.1 → dragon_ml_toolbox-10.1.0}/ml_tools/ML_models.py +0 -0
- {dragon_ml_toolbox-10.0.1 → dragon_ml_toolbox-10.1.0}/ml_tools/ML_optimization.py +0 -0
- {dragon_ml_toolbox-10.0.1 → dragon_ml_toolbox-10.1.0}/ml_tools/ML_scaler.py +0 -0
- {dragon_ml_toolbox-10.0.1 → dragon_ml_toolbox-10.1.0}/ml_tools/ML_trainer.py +0 -0
- {dragon_ml_toolbox-10.0.1 → dragon_ml_toolbox-10.1.0}/ml_tools/PSO_optimization.py +0 -0
- {dragon_ml_toolbox-10.0.1 → dragon_ml_toolbox-10.1.0}/ml_tools/RNN_forecast.py +0 -0
- {dragon_ml_toolbox-10.0.1 → dragon_ml_toolbox-10.1.0}/ml_tools/SQL.py +0 -0
- {dragon_ml_toolbox-10.0.1 → dragon_ml_toolbox-10.1.0}/ml_tools/VIF_factor.py +0 -0
- {dragon_ml_toolbox-10.0.1 → dragon_ml_toolbox-10.1.0}/ml_tools/__init__.py +0 -0
- {dragon_ml_toolbox-10.0.1 → dragon_ml_toolbox-10.1.0}/ml_tools/_logger.py +0 -0
- {dragon_ml_toolbox-10.0.1 → dragon_ml_toolbox-10.1.0}/ml_tools/_script_info.py +0 -0
- {dragon_ml_toolbox-10.0.1 → dragon_ml_toolbox-10.1.0}/ml_tools/custom_logger.py +0 -0
- {dragon_ml_toolbox-10.0.1 → dragon_ml_toolbox-10.1.0}/ml_tools/data_exploration.py +0 -0
- {dragon_ml_toolbox-10.0.1 → dragon_ml_toolbox-10.1.0}/ml_tools/ensemble_evaluation.py +0 -0
- {dragon_ml_toolbox-10.0.1 → dragon_ml_toolbox-10.1.0}/ml_tools/ensemble_inference.py +0 -0
- {dragon_ml_toolbox-10.0.1 → dragon_ml_toolbox-10.1.0}/ml_tools/ensemble_learning.py +0 -0
- {dragon_ml_toolbox-10.0.1 → dragon_ml_toolbox-10.1.0}/ml_tools/handle_excel.py +0 -0
- {dragon_ml_toolbox-10.0.1 → dragon_ml_toolbox-10.1.0}/ml_tools/keys.py +0 -0
- {dragon_ml_toolbox-10.0.1 → dragon_ml_toolbox-10.1.0}/ml_tools/optimization_tools.py +0 -0
- {dragon_ml_toolbox-10.0.1 → dragon_ml_toolbox-10.1.0}/ml_tools/path_manager.py +0 -0
- {dragon_ml_toolbox-10.0.1 → dragon_ml_toolbox-10.1.0}/ml_tools/utilities.py +0 -0
- {dragon_ml_toolbox-10.0.1 → dragon_ml_toolbox-10.1.0}/setup.cfg +0 -0
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
import polars as pl
|
|
2
2
|
import pandas as pd
|
|
3
|
-
import re
|
|
4
3
|
from pathlib import Path
|
|
5
|
-
from typing import
|
|
4
|
+
from typing import Union, List, Dict
|
|
6
5
|
from .path_manager import sanitize_filename, make_fullpath
|
|
7
6
|
from .utilities import save_dataframe, load_dataframe
|
|
8
7
|
from ._script_info import _script_info
|
|
@@ -131,9 +130,30 @@ def basic_clean(input_filepath: Union[str,Path], output_filepath: Union[str,Path
|
|
|
131
130
|
r'\p{C}+': '',
|
|
132
131
|
|
|
133
132
|
# Full-width to half-width
|
|
133
|
+
# Numbers
|
|
134
|
+
'0': '0', '1': '1', '2': '2', '3': '3', '4': '4',
|
|
135
|
+
'5': '5', '6': '6', '7': '7', '8': '8', '9': '9',
|
|
136
|
+
# Superscripts & Subscripts
|
|
137
|
+
'¹': '1', '²': '2', '³': '3', '⁴': '4', '⁵': '5',
|
|
138
|
+
'⁶': '6', '⁷': '7', '⁸': '8', '⁹': '9', '⁰': '0',
|
|
139
|
+
'₁': '1', '₂': '2', '₃': '3', '₄': '4', '₅': '5',
|
|
140
|
+
'₆': '6', '₇': '7', '₈': '8', '₉': '9', '₀': '0',
|
|
141
|
+
# Uppercase Alphabet
|
|
142
|
+
'A': 'A', 'B': 'B', 'C': 'C', 'D': 'D', 'E': 'E', 'F': 'F',
|
|
143
|
+
'G': 'G', 'H': 'H', 'I': 'I', 'J': 'J', 'K': 'K', 'L': 'L',
|
|
144
|
+
'M': 'M', 'N': 'N', 'O': 'O', 'P': 'P', 'Q': 'Q', 'R': 'R',
|
|
145
|
+
'S': 'S', 'T': 'T', 'U': 'U', 'V': 'V', 'W': 'W', 'X': 'X',
|
|
146
|
+
'Y': 'Y', 'Z': 'Z',
|
|
147
|
+
# Lowercase Alphabet
|
|
148
|
+
'a': 'a', 'b': 'b', 'c': 'c', 'd': 'd', 'e': 'e', 'f': 'f',
|
|
149
|
+
'g': 'g', 'h': 'h', 'i': 'i', 'j': 'j', 'k': 'k', 'l': 'l',
|
|
150
|
+
'm': 'm', 'n': 'n', 'o': 'o', 'p': 'p', 'q': 'q', 'r': 'r',
|
|
151
|
+
's': 's', 't': 't', 'u': 'u', 'v': 'v', 'w': 'w', 'x': 'x',
|
|
152
|
+
'y': 'y', 'z': 'z',
|
|
153
|
+
# Punctuation
|
|
134
154
|
'》': '>', '《': '<', ':': ':', ',': ',', '。': '.', ';': ';', '【': '[', '】': ']',
|
|
135
|
-
'(': '(', ')': ')', '?': '?', '!': '!', '~': '~', '@': '@', '#': '#',
|
|
136
|
-
'$': '$', '%': '%', '^': '^', '&': '&', '*': '*', '\': '\\', '|': '|',
|
|
155
|
+
'(': '(', ')': ')', '?': '?', '!': '!', '~': '~', '@': '@', '#': '#', '+': '+', '-': '-',
|
|
156
|
+
'$': '$', '%': '%', '^': '^', '&': '&', '*': '*', '\': '\\', '|': '|', '、':',', '≈':'=',
|
|
137
157
|
|
|
138
158
|
# Others
|
|
139
159
|
'©': '',
|
|
@@ -148,9 +168,9 @@ def basic_clean(input_filepath: Union[str,Path], output_filepath: Union[str,Path
|
|
|
148
168
|
# Typographical standardization
|
|
149
169
|
# Unify various dashes and hyphens to a standard hyphen-minus
|
|
150
170
|
r'[—–―]': '-',
|
|
151
|
-
# Unify various quote types to standard
|
|
171
|
+
# Unify various quote types to standard quotes
|
|
152
172
|
r'[“”]': "'",
|
|
153
|
-
r'[
|
|
173
|
+
r'[‘’′]': "'",
|
|
154
174
|
|
|
155
175
|
# 2. Internal Whitespace Consolidation
|
|
156
176
|
# Collapse any sequence of whitespace chars (including non-breaking spaces) to a single space
|
|
@@ -162,7 +182,7 @@ def basic_clean(input_filepath: Union[str,Path], output_filepath: Union[str,Path
|
|
|
162
182
|
|
|
163
183
|
# 4. Textual Null Standardization (New Step)
|
|
164
184
|
# Convert common null-like text to actual nulls. (?i) makes it case-insensitive.
|
|
165
|
-
r'^(N/A
|
|
185
|
+
r'^(N/A|无|NA|NULL|NONE|NIL|)$': None,
|
|
166
186
|
|
|
167
187
|
# 5. Final Nullification of Empty Strings
|
|
168
188
|
# After all cleaning, if a string is now empty, convert it to a null
|
|
@@ -238,14 +258,6 @@ class ColumnCleaner:
|
|
|
238
258
|
_LOGGER.error("The 'rules' argument must be a dictionary.")
|
|
239
259
|
raise TypeError()
|
|
240
260
|
|
|
241
|
-
# Validate each regex pattern for correctness
|
|
242
|
-
for pattern in rules.keys():
|
|
243
|
-
try:
|
|
244
|
-
re.compile(pattern)
|
|
245
|
-
except re.error:
|
|
246
|
-
_LOGGER.error(f"Invalid regex pattern '{pattern}'.")
|
|
247
|
-
raise
|
|
248
|
-
|
|
249
261
|
self.column_name = column_name
|
|
250
262
|
self.rules = rules
|
|
251
263
|
self.case_insensitive = case_insensitive
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{dragon_ml_toolbox-10.0.1 → dragon_ml_toolbox-10.1.0}/dragon_ml_toolbox.egg-info/SOURCES.txt
RENAMED
|
File without changes
|
|
File without changes
|
{dragon_ml_toolbox-10.0.1 → dragon_ml_toolbox-10.1.0}/dragon_ml_toolbox.egg-info/requires.txt
RENAMED
|
File without changes
|
{dragon_ml_toolbox-10.0.1 → dragon_ml_toolbox-10.1.0}/dragon_ml_toolbox.egg-info/top_level.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|