dragon-ml-toolbox 3.2.0__tar.gz → 3.2.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dragon-ml-toolbox might be problematic. Click here for more details.
- {dragon_ml_toolbox-3.2.0/dragon_ml_toolbox.egg-info → dragon_ml_toolbox-3.2.1}/PKG-INFO +1 -1
- {dragon_ml_toolbox-3.2.0 → dragon_ml_toolbox-3.2.1/dragon_ml_toolbox.egg-info}/PKG-INFO +1 -1
- {dragon_ml_toolbox-3.2.0 → dragon_ml_toolbox-3.2.1}/ml_tools/ETL_engineering.py +14 -2
- {dragon_ml_toolbox-3.2.0 → dragon_ml_toolbox-3.2.1}/pyproject.toml +1 -1
- {dragon_ml_toolbox-3.2.0 → dragon_ml_toolbox-3.2.1}/LICENSE +0 -0
- {dragon_ml_toolbox-3.2.0 → dragon_ml_toolbox-3.2.1}/LICENSE-THIRD-PARTY.md +0 -0
- {dragon_ml_toolbox-3.2.0 → dragon_ml_toolbox-3.2.1}/README.md +0 -0
- {dragon_ml_toolbox-3.2.0 → dragon_ml_toolbox-3.2.1}/dragon_ml_toolbox.egg-info/SOURCES.txt +0 -0
- {dragon_ml_toolbox-3.2.0 → dragon_ml_toolbox-3.2.1}/dragon_ml_toolbox.egg-info/dependency_links.txt +0 -0
- {dragon_ml_toolbox-3.2.0 → dragon_ml_toolbox-3.2.1}/dragon_ml_toolbox.egg-info/requires.txt +0 -0
- {dragon_ml_toolbox-3.2.0 → dragon_ml_toolbox-3.2.1}/dragon_ml_toolbox.egg-info/top_level.txt +0 -0
- {dragon_ml_toolbox-3.2.0 → dragon_ml_toolbox-3.2.1}/ml_tools/GUI_tools.py +0 -0
- {dragon_ml_toolbox-3.2.0 → dragon_ml_toolbox-3.2.1}/ml_tools/MICE_imputation.py +0 -0
- {dragon_ml_toolbox-3.2.0 → dragon_ml_toolbox-3.2.1}/ml_tools/ML_callbacks.py +0 -0
- {dragon_ml_toolbox-3.2.0 → dragon_ml_toolbox-3.2.1}/ml_tools/ML_evaluation.py +0 -0
- {dragon_ml_toolbox-3.2.0 → dragon_ml_toolbox-3.2.1}/ml_tools/ML_trainer.py +0 -0
- {dragon_ml_toolbox-3.2.0 → dragon_ml_toolbox-3.2.1}/ml_tools/ML_tutorial.py +0 -0
- {dragon_ml_toolbox-3.2.0 → dragon_ml_toolbox-3.2.1}/ml_tools/PSO_optimization.py +0 -0
- {dragon_ml_toolbox-3.2.0 → dragon_ml_toolbox-3.2.1}/ml_tools/RNN_forecast.py +0 -0
- {dragon_ml_toolbox-3.2.0 → dragon_ml_toolbox-3.2.1}/ml_tools/VIF_factor.py +0 -0
- {dragon_ml_toolbox-3.2.0 → dragon_ml_toolbox-3.2.1}/ml_tools/__init__.py +0 -0
- {dragon_ml_toolbox-3.2.0 → dragon_ml_toolbox-3.2.1}/ml_tools/_particle_swarm_optimization.py +0 -0
- {dragon_ml_toolbox-3.2.0 → dragon_ml_toolbox-3.2.1}/ml_tools/_pytorch_models.py +0 -0
- {dragon_ml_toolbox-3.2.0 → dragon_ml_toolbox-3.2.1}/ml_tools/data_exploration.py +0 -0
- {dragon_ml_toolbox-3.2.0 → dragon_ml_toolbox-3.2.1}/ml_tools/datasetmaster.py +0 -0
- {dragon_ml_toolbox-3.2.0 → dragon_ml_toolbox-3.2.1}/ml_tools/ensemble_learning.py +0 -0
- {dragon_ml_toolbox-3.2.0 → dragon_ml_toolbox-3.2.1}/ml_tools/handle_excel.py +0 -0
- {dragon_ml_toolbox-3.2.0 → dragon_ml_toolbox-3.2.1}/ml_tools/logger.py +0 -0
- {dragon_ml_toolbox-3.2.0 → dragon_ml_toolbox-3.2.1}/ml_tools/utilities.py +0 -0
- {dragon_ml_toolbox-3.2.0 → dragon_ml_toolbox-3.2.1}/setup.cfg +0 -0
|
@@ -417,14 +417,17 @@ class KeywordDummifier:
|
|
|
417
417
|
`group_name` at the same index and contains the keywords to search for.
|
|
418
418
|
case_insensitive (bool):
|
|
419
419
|
If True, keyword matching ignores case.
|
|
420
|
+
drop_empty (bool):
|
|
421
|
+
If True, columns that contain no positive matches (all zeros) will be dropped from the final output.
|
|
420
422
|
"""
|
|
421
|
-
def __init__(self, group_names: List[str], group_keywords: List[List[str]], case_insensitive: bool = True):
|
|
423
|
+
def __init__(self, group_names: List[str], group_keywords: List[List[str]], case_insensitive: bool = True, drop_empty: bool = True):
|
|
422
424
|
if len(group_names) != len(group_keywords):
|
|
423
425
|
raise ValueError("Initialization failed: 'group_names' and 'group_keywords' must have the same length.")
|
|
424
426
|
|
|
425
427
|
self.group_names = group_names
|
|
426
428
|
self.group_keywords = group_keywords
|
|
427
429
|
self.case_insensitive = case_insensitive
|
|
430
|
+
self.drop_empty = drop_empty
|
|
428
431
|
|
|
429
432
|
def __call__(self, column: pl.Series) -> pl.DataFrame:
|
|
430
433
|
"""
|
|
@@ -471,7 +474,16 @@ class KeywordDummifier:
|
|
|
471
474
|
# If a group had no matches, create a column of zeros
|
|
472
475
|
final_columns.append(pl.lit(0, dtype=pl.UInt8).alias(name))
|
|
473
476
|
|
|
474
|
-
|
|
477
|
+
# First, create a full DataFrame with all potential columns
|
|
478
|
+
result_df = pl.DataFrame(final_columns)
|
|
479
|
+
|
|
480
|
+
# If drop_empty is True, filter out all-zero columns
|
|
481
|
+
if self.drop_empty:
|
|
482
|
+
# A column is kept if its sum is greater than 0
|
|
483
|
+
cols_to_keep = [col for col in result_df if col.sum() > 0]
|
|
484
|
+
return result_df.select(cols_to_keep)
|
|
485
|
+
|
|
486
|
+
return result_df
|
|
475
487
|
|
|
476
488
|
|
|
477
489
|
class NumberExtractor:
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{dragon_ml_toolbox-3.2.0 → dragon_ml_toolbox-3.2.1}/dragon_ml_toolbox.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
|
File without changes
|
{dragon_ml_toolbox-3.2.0 → dragon_ml_toolbox-3.2.1}/dragon_ml_toolbox.egg-info/top_level.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{dragon_ml_toolbox-3.2.0 → dragon_ml_toolbox-3.2.1}/ml_tools/_particle_swarm_optimization.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|