PyPI - dragon-ml-toolbox - Versions diffs - 20.0.0__py3-none-any.whl → 20.1.0__py3-none-any.whl - Mend

dragon-ml-toolbox 20.0.0py3-none-any.whl → 20.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

{dragon_ml_toolbox-20.0.0.dist-info → dragon_ml_toolbox-20.1.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: dragon-ml-toolbox
-Version: 20.0.0
+Version: 20.1.0
 Summary: Complete pipelines and helper tools for data science and machine learning projects.
 Author-email: Karl Luigi Loza Vidaurre <luigiloza@gmail.com>
 License-Expression: MIT

{dragon_ml_toolbox-20.0.0.dist-info → dragon_ml_toolbox-20.1.0.dist-info}/RECORD RENAMED Viewed

@@ -1,5 +1,5 @@
-dragon_ml_toolbox-20.0.0.dist-info/licenses/LICENSE,sha256=L35WDmmLZNTlJvxF6Vy7Uy4SYNi6rCfWUqlTHpoRMoU,1081
-dragon_ml_toolbox-20.0.0.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=0-HBRMMgKuwtGy6nMJZvIn1fLxhx_ksyyVB2U_iyYZU,2818
+dragon_ml_toolbox-20.1.0.dist-info/licenses/LICENSE,sha256=L35WDmmLZNTlJvxF6Vy7Uy4SYNi6rCfWUqlTHpoRMoU,1081
+dragon_ml_toolbox-20.1.0.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=0-HBRMMgKuwtGy6nMJZvIn1fLxhx_ksyyVB2U_iyYZU,2818
 ml_tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 ml_tools/constants.py,sha256=3br5Rk9cL2IUo638eJuMOGdbGQaWssaUecYEvSeRBLM,3322
 ml_tools/ETL_cleaning/__init__.py,sha256=TytE8RKmtW4KQlkaTxpYKlJAbCu-VAc82eDdHwVD3Jo,427
@@ -21,7 +21,7 @@ ml_tools/IO_tools/__init__.py,sha256=ZeEM5bbZ5udgRXFAL51uRXzoCzPLO8TWZ4AiME7NNy0
 ml_tools/IO_tools/_imprimir.py,sha256=eN-V60xtDNFINThuRTjXknMxtbK8Ah0MWgc8l2GTXMA,250
 ml_tools/MICE/_MICE_imputation.py,sha256=N1cDwVYfoHvIZz7FLLcW-guZUo8iFKedtkfS7CU6TVE,5318
 ml_tools/MICE/__init__.py,sha256=i5N_fd3rxpEgLsKKDoLbokW0rHm-ADEg8r3gBB5426E,313
-ml_tools/MICE/_dragon_mice.py,sha256=E6LyCe7JjEvDeKJfDfDd1iKJS86pDQLYGYoajahtuyg,17736
+ml_tools/MICE/_dragon_mice.py,sha256=qEOy9Gx1QzVBvkvGR8790TkvKw8-fp06vCDGWM6j9os,17806
 ml_tools/MICE/_imprimir.py,sha256=YVhgZlUQ-NrDUVhHTK3u8s1QEbZ_jvDVF7-0FptVsxs,215
 ml_tools/ML_callbacks/__init__.py,sha256=dF37KXezy6P3VArhZbm5CI6si65GA-qVY70jvZFZYkA,427
 ml_tools/ML_callbacks/_base.py,sha256=xLVAFOhBHjqnf8a_wKgW1F-tn2u6EqV3IHXsXKTn2NE,3269
@@ -29,10 +29,11 @@ ml_tools/ML_callbacks/_checkpoint.py,sha256=Ioj9wn8XlsR_S1NnmWbyT9lkO8o2_DcHVMrF
 ml_tools/ML_callbacks/_early_stop.py,sha256=qzTzxfDCDim0qj7QQ7ykJNIOBWbXtviDptMCczXXy_k,8073
 ml_tools/ML_callbacks/_imprimir.py,sha256=Wz6NXhiCFSJsAZh3JnQ4qt7tj2_qhu14DTwu-gkkzZs,257
 ml_tools/ML_callbacks/_scheduler.py,sha256=mn97_VH8Lp37KH3zSgmPemGQV8g-K8GfhRNHTftaNcg,7390
-ml_tools/ML_chain/__init__.py,sha256=rUBVwB96fAoq-Q9zY3s0fL_TFU5W2axlg7XZzrCXrSU,399
-ml_tools/ML_chain/_chaining_tools.py,sha256=ASi0Zr9WBVA7wd-pYVN69VIZFOIuB4QpGlrSl9Ob-90,13788
-ml_tools/ML_chain/_dragon_chain.py,sha256=wFlknv0rlL8P3K0ls8kj_oup4SvPNFqSxDmiBdPfGt4,5737
-ml_tools/ML_chain/_imprimir.py,sha256=JCVslxnrmvJ_LJOmexL2u5-OYykHFe1H49EkrJPpAIg,254
+ml_tools/ML_chain/__init__.py,sha256=UVD1xaJ59pft_ysg8z_ihqjEDQqPRQwmhui_zNRFp7I,491
+ml_tools/ML_chain/_chaining_tools.py,sha256=BDwTvgJFbJ-wgy3IkP6_SNpNaWpHGXV3PhAM7sYmHeU,13675
+ml_tools/ML_chain/_dragon_chain.py,sha256=x3fN136C5N9WcXJJW9zkNrBzP8QoBaXpxz7SPF3txjg,5601
+ml_tools/ML_chain/_imprimir.py,sha256=tHVXoGhMlbpkpcoGKwtkYVFlHFEllRCsYdpiAFI1aZk,285
+ml_tools/ML_chain/_update_schema.py,sha256=z1Us7lv6hy6GwSu1mcid50Jmqq3sh91hMQ0LnQjhte8,3806
 ml_tools/ML_configuration/__init__.py,sha256=wSpfk8bHRSoYjcKJmjd5ivB4Fw8UFjyOZL4hct9rJT0,2637
 ml_tools/ML_configuration/_base_model_config.py,sha256=95L3IfobNFMtnNr79zYpDGerC1q1v7M05tWZvTS2cwE,2247
 ml_tools/ML_configuration/_finalize.py,sha256=l_n13bLu0avMdJ8hNRrH8V_wOBQZM1UGsTydKBkTysM,15047
@@ -125,11 +126,11 @@ ml_tools/_core/__init__.py,sha256=m-VP0RW0tOTm9N5NI3kFNcpM7WtVgs0RK9pK3ZJRZQQ,14
 ml_tools/_core/_logger.py,sha256=xzhn_FouMDRVNwXGBGlPC9Ruq6i5uCrmNaS5jesguMU,4972
 ml_tools/_core/_schema_load_ops.py,sha256=KLs9vBzANz5ESe2wlP-C41N4VlgGil-ywcfvWKSOGss,1551
 ml_tools/_core/_script_info.py,sha256=LtFGt10gEvCnhIRMKJPi2yXkiGLcdr7lE-oIP2XGHzQ,234
-ml_tools/data_exploration/__init__.py,sha256=a4hlq6Pyc_cQjiys_2CUFd5nIvzqPc4g8asWEHJz9Es,1674
+ml_tools/data_exploration/__init__.py,sha256=w9dM6wjmxfbEXQCWGFVL_cIuLHtYVP364aQvzRwfZXY,1674
 ml_tools/data_exploration/_analysis.py,sha256=H6LryV56FFCHWjvQdkhZbtprZy6aP8EqU_hC2Cf9CLE,7832
 ml_tools/data_exploration/_cleaning.py,sha256=LpoOHOB6HVtdObZExg-B8SxZW-JUc51tblnkCFDZxKg,20846
 ml_tools/data_exploration/_features.py,sha256=wW-M8n2aLIy05DR2z4fI8wjpPjn3mOAnm9aSGYbMKwI,23363
-ml_tools/data_exploration/_imprimir.py,sha256=PkvDvQkYTQC_KnfI1gxxUxtC-XeSRePniM1TyJj8Caw,876
+ml_tools/data_exploration/_imprimir.py,sha256=0nXu60HpeJZ8s83mpVoRtdKILK3t8EHRFVk7d9vRVUo,876
 ml_tools/data_exploration/_plotting.py,sha256=zH1dPcIoAlOuww23xIoBCsQOAshPPv9OyGposOA2RvI,19883
 ml_tools/data_exploration/_schema_ops.py,sha256=PoFeHaS9dXI9gfL0SRD-8uSP4owqmbQFbtfA-HxkLnY,7108
 ml_tools/ensemble_evaluation/__init__.py,sha256=Xxx-F-_TvSVzMaocKXOo_tEXLibMJtf_YY85Ac3U0EI,483
@@ -146,7 +147,7 @@ ml_tools/excel_handler/_excel_handler.py,sha256=TODudmeQgDSdxUKzLfAzizs--VL-g8Wx
 ml_tools/excel_handler/_imprimir.py,sha256=QHazgqjRMzthRbDt33EVpvR7GqufSzng6jHw7IVCdtI,306
 ml_tools/keys/__init__.py,sha256=DV52KLOY5GfpLwJdDAHlFVz0qAmyh-KWg3gZorFdMSk,336
 ml_tools/keys/_imprimir.py,sha256=4qmwdia16DPq3OtlWGMkgLPT5R3lcM-ka3tQdCLx5qk,197
-ml_tools/keys/_keys.py,sha256=wyUpNY7iZIGIqvnT2BSahnkkNkK_vvZALOtRWZ7h50A,8800
+ml_tools/keys/_keys.py,sha256=fArSyT_UGGSH4PHjG-R0kefFznAtAxSAasDCQ7-89a8,8899
 ml_tools/math_utilities/__init__.py,sha256=NuTcb_Ogdwx5x-oDieBt1EAqCoZRnXbkZbUrwB6ItH0,337
 ml_tools/math_utilities/_imprimir.py,sha256=kk5DQb_BV9g767uTdXQiRjEEHgQwJpEXU3jxO3QV2Fw,238
 ml_tools/math_utilities/_math_utilities.py,sha256=BYHIVcM9tuKIhVrkgLLiM5QalJ39zx7dXYy_M9aGgiM,9012
@@ -162,7 +163,7 @@ ml_tools/plot_fonts/__init__.py,sha256=l-vSSpjZb6IeWjjgPTcNmEs7M-vbw0lqgEKD5jhtX
 ml_tools/plot_fonts/_imprimir.py,sha256=zNi6naa5eWBFfa_yV569MhUtSAL44H0xDjMcgrJSlXk,131
 ml_tools/plot_fonts/_plot_fonts.py,sha256=mfjXNT9P59ymHoTI85Q8CcvfxfK5BIFBWtTZH-hNIC4,2209
 ml_tools/schema/__init__.py,sha256=9LQtKz3OO9wm-1piUgAhCJZVZT-F-YSg5QLus9pxfgA,263
-ml_tools/schema/_feature_schema.py,sha256=QLsxBS3_CIJp4c4dknvMs7RHZl_GZDEBJQ0MxLrQo6Y,8536
+ml_tools/schema/_feature_schema.py,sha256=ICymTIL05n1qs61TvyY7rapDOJ9PlaOHi0F86N4tNlU,8547
 ml_tools/schema/_gui_schema.py,sha256=IVwN4THAdFrvh2TpV4SFd_zlzMX3eioF-w-qcSVTndE,7245
 ml_tools/schema/_imprimir.py,sha256=waNHozZmkCKKNFWSw0HFf9489FkSXogl6KuT5cn5V74,190
 ml_tools/serde/__init__.py,sha256=Gj6B8Sgf0-ad72jFXq2W_k5pXOT2iNx5Dvzwrd7Tj1U,229
@@ -172,7 +173,7 @@ ml_tools/utilities/__init__.py,sha256=pkR2HxUIlKZMDderP2awYXVIFxkU2Xt3FkJmcmuRIp
 ml_tools/utilities/_imprimir.py,sha256=sV3ASBOsTdVYvGojOTIpZYFyrnd4panS5h_4HcMzob4,432
 ml_tools/utilities/_utility_save_load.py,sha256=7skiiuYGVLVMK_nU9uLfUZw16ePvF3i9ub7G7LMyUgs,16085
 ml_tools/utilities/_utility_tools.py,sha256=bN0J9d1S0W5wNzNntBWqDsJcEAK7-1OgQg3X2fwXns0,6918
-dragon_ml_toolbox-20.0.0.dist-info/METADATA,sha256=ILeGioHn8qeLS5vaaqOs-zId8QvQxoWZcjKgHYmeuPo,7866
-dragon_ml_toolbox-20.0.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-dragon_ml_toolbox-20.0.0.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
-dragon_ml_toolbox-20.0.0.dist-info/RECORD,,
+dragon_ml_toolbox-20.1.0.dist-info/METADATA,sha256=g8BdKr-giBfa-J0TWjinoX1W4lzGaTFZEovm_Fv_43w,7866
+dragon_ml_toolbox-20.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+dragon_ml_toolbox-20.1.0.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
+dragon_ml_toolbox-20.1.0.dist-info/RECORD,,

ml_tools/MICE/_dragon_mice.py CHANGED Viewed

@@ -197,7 +197,7 @@ class DragonMICE:
                  _LOGGER.error(f"Index mismatch in dataset {subname}")
                  raise ValueError()
-        _LOGGER.info("Schema-based MICE imputation complete.")
+        _LOGGER.info("⬅️ Schema-based MICE imputation complete.")
         return kernel, imputed_datasets, imputed_dataset_names
@@ -237,9 +237,6 @@ class DragonMICE:
                 # We pass an empty DF as 'targets' to save_imputed_datasets to prevent duplication.
                 df_input = df
                 df_targets_to_save = pd.DataFrame(index=df.index)
-                # Monitor all columns that had NaNs
-                imputed_column_names = [col for col in df.columns if df[col].isna().any()]
             else:
                 # Explicitly cast tuple to list for Pandas indexing
                 feature_cols = list(self._schema.feature_names)
@@ -253,8 +250,9 @@ class DragonMICE:
                 df_input = df[feature_cols]
                 # Drop features to get targets (more robust than explicit selection if targets vary)
                 df_targets_to_save = df.drop(columns=feature_cols)
-                imputed_column_names = _get_na_column_names(df=df_input) # type: ignore
+            # Monitor all columns that had NaNs
+            imputed_column_names = [col for col in df_input.columns if df_input[col].isna().any()]
             # Run core logic
             kernel, imputed_datasets, imputed_dataset_names = self._run_mice(df=df_input, df_name=df_name) # type: ignore
@@ -316,35 +314,41 @@ def get_convergence_diagnostic(kernel: mf.ImputationKernel, imputed_dataset_name
     # iterate over each imputed dataset
     for dataset_id, imputed_dataset_name in zip(range(dataset_count), imputed_dataset_names):
-        #Check directory for current dataset
         dataset_file_dir = f"Convergence_Metrics_{imputed_dataset_name}"
         local_save_dir = make_fullpath(input_path=root_path / dataset_file_dir, make=True)
-        for feature_name in column_names:
-            means_per_iteration = []
-            for iteration in range(iterations_cap):
-                current_imputed = kernel.complete_data(dataset=dataset_id, iteration=iteration)
-                means_per_iteration.append(np.mean(current_imputed[feature_name])) # type: ignore
+        # 1. Pre-calculate means for all features across all iterations
+        # Structure: {feature_name: [mean_iter_0, mean_iter_1, ...]}
+        history = {col: [] for col in column_names}
+        for iteration in range(iterations_cap):
+            # Resolve dataset ONLY ONCE per iteration
+            current_imputed = kernel.complete_data(dataset=dataset_id, iteration=iteration)
+            for col in column_names:
+                # Fast lookup
+                val = np.mean(current_imputed[col])
+                history[col].append(val)
+        # 2. Plotting loop
+        for feature_name, means_per_iteration in history.items():
             plt.figure(figsize=(10, 8))
             plt.plot(means_per_iteration, marker='o')
             plt.xlabel("Iteration", **label_font)
             plt.ylabel("Mean of Imputed Values", **label_font)
             plt.title(f"Mean Convergence for '{feature_name}'", **label_font)
-            # Adjust plot display for the X axis
             _ticks = np.arange(iterations_cap)
             _labels = np.arange(1, iterations_cap + 1)
-            plt.xticks(ticks=_ticks, labels=_labels) # type: ignore
+            plt.xticks(ticks=_ticks, labels=_labels)
             plt.grid(True)
-            feature_save_name = sanitize_filename(feature_name)
-            feature_save_name = feature_save_name + ".svg"
+            feature_save_name = sanitize_filename(feature_name) + ".svg"
             save_path = local_save_dir / feature_save_name
             plt.savefig(save_path, bbox_inches='tight', format="svg")
             plt.close()
-        _LOGGER.info(f"{dataset_file_dir} process completed.")
+    _LOGGER.info(f"📉 Convergence diagnostics complete.")
 # Imputed distributions
@@ -431,5 +435,5 @@ def get_imputed_distributions(kernel: mf.ImputationKernel, df_name: str, root_di
             fig = kernel.plot_imputed_distributions(variables=[feature])
             _process_figure(fig, feature)
-    _LOGGER.info(f"{local_dir_name} completed.")
+    _LOGGER.info(f"📊 Imputed distributions complete.")

ml_tools/ML_chain/__init__.py CHANGED Viewed

@@ -8,11 +8,16 @@ from ._chaining_tools import (
     prepare_chaining_dataset,
 )
+from ._update_schema import (
+    derive_next_step_schema
+)
 from ._imprimir import info
 __all__ = [
     "DragonChainOrchestrator",
+    "derive_next_step_schema",
     "augment_dataset_with_predictions",
     "augment_dataset_with_predictions_multi",
     "prepare_chaining_dataset",

ml_tools/ML_chain/_chaining_tools.py CHANGED Viewed

@@ -5,7 +5,7 @@ from typing import Optional, Literal
 from ..ML_inference import DragonInferenceHandler
-from ..keys._keys import MLTaskKeys, PyTorchInferenceKeys
+from ..keys._keys import MLTaskKeys, PyTorchInferenceKeys, ChainKeys
 from .._core import get_logger
@@ -23,11 +23,10 @@ def augment_dataset_with_predictions(
     handler: DragonInferenceHandler,
     dataset: pd.DataFrame,
     ground_truth_targets: list[str],
-    prediction_col_prefix: str = "pred_",
     batch_size: int = 4096
 ) -> pd.DataFrame:
     """
-    Uses a DragonInferenceHandler to generate predictions for a dataset and appends them as new feature columns.
+    Uses a DragonInferenceHandler to generate predictions for a dataset and appends them as new feature columns with a standardized prefix.
     This function splits the features from the ground truth targets, runs inference in batches to ensure
     memory efficiency, and returns a unified DataFrame containing:
@@ -38,8 +37,6 @@ def augment_dataset_with_predictions(
         dataset (pd.DataFrame): The input pandas DataFrame containing features and ground truth targets.
         ground_truth_targets (List[str]): A list of column names in `dataset` representing the actual targets.
             These are removed from the input features during inference and appended to the end of the result.
-        prediction_col_prefix (str, optional): A string to prepend when creating the
-            new prediction columns.
         batch_size (int, optional): The number of samples to process in a single inference step.
             Prevents OOM errors on large datasets. Defaults to 4096.
@@ -107,7 +104,7 @@ def augment_dataset_with_predictions(
     full_prediction_array = np.vstack(all_predictions)
     # Generate new column names
-    new_col_names = [f"{prediction_col_prefix}{tid}" for tid in handler.target_ids]
+    new_col_names = [f"{ChainKeys.CHAIN_PREDICTION_PREFIX}{tid}" for tid in handler.target_ids]
     # Verify dimensions match
     if full_prediction_array.shape[1] != len(new_col_names):

ml_tools/ML_chain/_dragon_chain.py CHANGED Viewed

@@ -77,18 +77,16 @@ class DragonChainOrchestrator:
     def update_with_inference(
         self,
         handler: DragonInferenceHandler,
-        prefix: str = "pred_",
         batch_size: int = 4096
     ) -> None:
         """
         Runs inference using the provided handler on the full internal dataset and appends the results as new features.
         This updates the internal state of the Orchestrator. Subsequent calls to `get_training_data`
-        will include these new prediction columns as features.
+        will include these new prediction columns as features with a standardized prefix.
         Args:
             handler (DragonInferenceHandler): The trained model handler.
-            prefix (str): Prefix for the new prediction columns (e.g., "m1_", "step2_").
             batch_size (int): Batch size for inference.
         """
         _LOGGER.info(f"Orchestrator: Updating internal state with predictions from handler (Targets: {handler.target_ids})...")
@@ -99,7 +97,6 @@ class DragonChainOrchestrator:
             handler=handler,
             dataset=self.current_dataset,
             ground_truth_targets=self.all_targets,
-            prediction_col_prefix=prefix,
             batch_size=batch_size
         )

ml_tools/ML_chain/_imprimir.py CHANGED Viewed

@@ -2,6 +2,7 @@ from .._core import _imprimir_disponibles
 _GRUPOS = [
     "DragonChainOrchestrator",
+    "derive_next_step_schema",
     "augment_dataset_with_predictions",
     "augment_dataset_with_predictions_multi",
     "prepare_chaining_dataset",

ml_tools/ML_chain/_update_schema.py ADDED Viewed

@@ -0,0 +1,96 @@
+from ..schema import FeatureSchema
+from ..ML_inference import DragonInferenceHandler
+from ..keys._keys import MLTaskKeys, ChainKeys
+from .._core import get_logger
+_LOGGER = get_logger("Schema Updater")
+__all__ = [
+    "derive_next_step_schema",
+]
+def derive_next_step_schema(
+    current_schema: FeatureSchema,
+    handler: DragonInferenceHandler,
+    verbose: bool = True
+) -> FeatureSchema:
+    """
+    Creates the FeatureSchema for the NEXT step in the chain by appending the current handler's predictions as new features.
+    Args:
+        current_schema (FeatureSchema): The current FeatureSchema.
+        handler (DragonInferenceHandler): The inference handler of the model trained using the current schema.
+    Returns:
+        FeatureSchema: An updated schema including new predicted features.
+    """
+    # 1. Determine New Column Names
+    # Match logic from _chaining_tools.py
+    if handler.target_ids is None:
+        _LOGGER.error("Handler target_ids is None; cannot derive schema.")
+        raise ValueError()
+    new_cols = [f"{ChainKeys.CHAIN_PREDICTION_PREFIX}{tid}" for tid in handler.target_ids]
+    # 2. Base Lists (Convert tuples to lists for mutation)
+    new_feature_names = list(current_schema.feature_names) + new_cols
+    new_cont_names = list(current_schema.continuous_feature_names)
+    new_cat_names = list(current_schema.categorical_feature_names)
+    # Copy existing maps (handle None case)
+    new_cat_index_map = dict(current_schema.categorical_index_map) if current_schema.categorical_index_map else {}
+    new_cat_mappings = dict(current_schema.categorical_mappings) if current_schema.categorical_mappings else {}
+    # 3. Determine Feature Type based on Task
+    is_categorical = False
+    cardinality = 0
+    if handler.task in [MLTaskKeys.BINARY_CLASSIFICATION, MLTaskKeys.MULTILABEL_BINARY_CLASSIFICATION]:
+        is_categorical = True
+        cardinality = 2
+    elif handler.task == MLTaskKeys.MULTICLASS_CLASSIFICATION:
+        is_categorical = True
+        # We rely on the class map to know the 'vocabulary' size
+        if handler._class_map is None:
+            _LOGGER.error("Handler class_map is None, cannot determine cardinality for multiclass classification model.")
+            raise ValueError()
+        cardinality = len(handler._class_map)
+    # 4. Append New Metadata
+    current_total_feats = len(current_schema.feature_names)
+    for i, col_name in enumerate(new_cols):
+        # Calculate the absolute index of this new column
+        # If we had 10 features (0-9), the new one is at index 10 + i
+        new_index = current_total_feats + i
+        if is_categorical:
+            new_cat_names.append(col_name)
+            # A. Update Cardinality for Embeddings
+            new_cat_index_map[new_index] = cardinality
+            # B. Create Identity Mapping (Dummy Encoding)
+            # Maps string representation of int back to the int.
+            identity_map = {str(k): k for k in range(cardinality)}
+            new_cat_mappings[col_name] = identity_map
+        else:
+            # Regression / Multitarget Regression
+            new_cont_names.append(col_name)
+    if verbose:
+        _LOGGER.info(f"Derived next step schema with {len(new_feature_names)} features:\n    {len(new_cont_names)} continuous\n    {len(new_cat_names)} categorical")
+    # 5. Return New Immutable Schema
+    return FeatureSchema(
+        feature_names=tuple(new_feature_names),
+        continuous_feature_names=tuple(new_cont_names),
+        categorical_feature_names=tuple(new_cat_names),
+        categorical_index_map=new_cat_index_map if new_cat_index_map else None,
+        categorical_mappings=new_cat_mappings if new_cat_mappings else None
+    )

ml_tools/data_exploration/__init__.py CHANGED Viewed

@@ -53,13 +53,13 @@ __all__ = [
     "split_features_targets",
     "split_continuous_binary",
     "split_continuous_categorical_targets",
-    "encode_categorical_features",
     "clip_outliers_single",
     "clip_outliers_multi",
     "drop_outlier_samples",
     "plot_continuous_vs_target",
     "plot_categorical_vs_target",
     "plot_correlation_heatmap",
+    "encode_categorical_features",
     "finalize_feature_schema",
     "apply_feature_schema",
     "match_and_filter_columns_by_regex",

ml_tools/data_exploration/_imprimir.py CHANGED Viewed

@@ -12,13 +12,13 @@ _GRUPOS = [
     "split_features_targets",
     "split_continuous_binary",
     "split_continuous_categorical_targets",
-    "encode_categorical_features",
     "clip_outliers_single",
     "clip_outliers_multi",
     "drop_outlier_samples",
     "plot_continuous_vs_target",
     "plot_categorical_vs_target",
     "plot_correlation_heatmap",
+    "encode_categorical_features",
     "finalize_feature_schema",
     "apply_feature_schema",
     "match_and_filter_columns_by_regex",

ml_tools/keys/_keys.py CHANGED Viewed

@@ -278,6 +278,11 @@ class SchemaKeys:
     OPTIONAL_LABELS = "optional_labels"
+class ChainKeys:
+    """Used by the ML chaining module."""
+    CHAIN_PREDICTION_PREFIX = "pred_"
 class _EvaluationConfig:
     """Set config values for evaluation modules."""
     DPI = 400

ml_tools/schema/_feature_schema.py CHANGED Viewed

@@ -44,7 +44,7 @@ class FeatureSchema(NamedTuple):
         Handles conversion of Tuple->List and IntKeys->StrKeys automatically.
         """
         # validate path
-        dir_path = make_fullpath(directory, enforce="directory")
+        dir_path = make_fullpath(directory, make=True, enforce="directory")
         file_path = dir_path / SchemaKeys.SCHEMA_FILENAME
         try:

{dragon_ml_toolbox-20.0.0.dist-info → dragon_ml_toolbox-20.1.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{dragon_ml_toolbox-20.0.0.dist-info → dragon_ml_toolbox-20.1.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{dragon_ml_toolbox-20.0.0.dist-info → dragon_ml_toolbox-20.1.0.dist-info}/licenses/LICENSE-THIRD-PARTY.md RENAMED Viewed

File without changes

{dragon_ml_toolbox-20.0.0.dist-info → dragon_ml_toolbox-20.1.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

dragon-ml-toolbox 20.0.0__py3-none-any.whl → 20.1.0__py3-none-any.whl

dragon-ml-toolbox 20.0.0py3-none-any.whl → 20.1.0py3-none-any.whl