PyPI - dragon-ml-toolbox - Versions diffs - 20.0.0__py3-none-any.whl → 20.0.1__py3-none-any.whl - Mend

dragon-ml-toolbox 20.0.0py3-none-any.whl → 20.0.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

{dragon_ml_toolbox-20.0.0.dist-info → dragon_ml_toolbox-20.0.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: dragon-ml-toolbox
-Version: 20.0.0
+Version: 20.0.1
 Summary: Complete pipelines and helper tools for data science and machine learning projects.
 Author-email: Karl Luigi Loza Vidaurre <luigiloza@gmail.com>
 License-Expression: MIT

{dragon_ml_toolbox-20.0.0.dist-info → dragon_ml_toolbox-20.0.1.dist-info}/RECORD RENAMED Viewed

@@ -1,5 +1,5 @@
-dragon_ml_toolbox-20.0.0.dist-info/licenses/LICENSE,sha256=L35WDmmLZNTlJvxF6Vy7Uy4SYNi6rCfWUqlTHpoRMoU,1081
-dragon_ml_toolbox-20.0.0.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=0-HBRMMgKuwtGy6nMJZvIn1fLxhx_ksyyVB2U_iyYZU,2818
+dragon_ml_toolbox-20.0.1.dist-info/licenses/LICENSE,sha256=L35WDmmLZNTlJvxF6Vy7Uy4SYNi6rCfWUqlTHpoRMoU,1081
+dragon_ml_toolbox-20.0.1.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=0-HBRMMgKuwtGy6nMJZvIn1fLxhx_ksyyVB2U_iyYZU,2818
 ml_tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 ml_tools/constants.py,sha256=3br5Rk9cL2IUo638eJuMOGdbGQaWssaUecYEvSeRBLM,3322
 ml_tools/ETL_cleaning/__init__.py,sha256=TytE8RKmtW4KQlkaTxpYKlJAbCu-VAc82eDdHwVD3Jo,427
@@ -21,7 +21,7 @@ ml_tools/IO_tools/__init__.py,sha256=ZeEM5bbZ5udgRXFAL51uRXzoCzPLO8TWZ4AiME7NNy0
 ml_tools/IO_tools/_imprimir.py,sha256=eN-V60xtDNFINThuRTjXknMxtbK8Ah0MWgc8l2GTXMA,250
 ml_tools/MICE/_MICE_imputation.py,sha256=N1cDwVYfoHvIZz7FLLcW-guZUo8iFKedtkfS7CU6TVE,5318
 ml_tools/MICE/__init__.py,sha256=i5N_fd3rxpEgLsKKDoLbokW0rHm-ADEg8r3gBB5426E,313
-ml_tools/MICE/_dragon_mice.py,sha256=E6LyCe7JjEvDeKJfDfDd1iKJS86pDQLYGYoajahtuyg,17736
+ml_tools/MICE/_dragon_mice.py,sha256=qEOy9Gx1QzVBvkvGR8790TkvKw8-fp06vCDGWM6j9os,17806
 ml_tools/MICE/_imprimir.py,sha256=YVhgZlUQ-NrDUVhHTK3u8s1QEbZ_jvDVF7-0FptVsxs,215
 ml_tools/ML_callbacks/__init__.py,sha256=dF37KXezy6P3VArhZbm5CI6si65GA-qVY70jvZFZYkA,427
 ml_tools/ML_callbacks/_base.py,sha256=xLVAFOhBHjqnf8a_wKgW1F-tn2u6EqV3IHXsXKTn2NE,3269
@@ -125,11 +125,11 @@ ml_tools/_core/__init__.py,sha256=m-VP0RW0tOTm9N5NI3kFNcpM7WtVgs0RK9pK3ZJRZQQ,14
 ml_tools/_core/_logger.py,sha256=xzhn_FouMDRVNwXGBGlPC9Ruq6i5uCrmNaS5jesguMU,4972
 ml_tools/_core/_schema_load_ops.py,sha256=KLs9vBzANz5ESe2wlP-C41N4VlgGil-ywcfvWKSOGss,1551
 ml_tools/_core/_script_info.py,sha256=LtFGt10gEvCnhIRMKJPi2yXkiGLcdr7lE-oIP2XGHzQ,234
-ml_tools/data_exploration/__init__.py,sha256=a4hlq6Pyc_cQjiys_2CUFd5nIvzqPc4g8asWEHJz9Es,1674
+ml_tools/data_exploration/__init__.py,sha256=w9dM6wjmxfbEXQCWGFVL_cIuLHtYVP364aQvzRwfZXY,1674
 ml_tools/data_exploration/_analysis.py,sha256=H6LryV56FFCHWjvQdkhZbtprZy6aP8EqU_hC2Cf9CLE,7832
 ml_tools/data_exploration/_cleaning.py,sha256=LpoOHOB6HVtdObZExg-B8SxZW-JUc51tblnkCFDZxKg,20846
 ml_tools/data_exploration/_features.py,sha256=wW-M8n2aLIy05DR2z4fI8wjpPjn3mOAnm9aSGYbMKwI,23363
-ml_tools/data_exploration/_imprimir.py,sha256=PkvDvQkYTQC_KnfI1gxxUxtC-XeSRePniM1TyJj8Caw,876
+ml_tools/data_exploration/_imprimir.py,sha256=0nXu60HpeJZ8s83mpVoRtdKILK3t8EHRFVk7d9vRVUo,876
 ml_tools/data_exploration/_plotting.py,sha256=zH1dPcIoAlOuww23xIoBCsQOAshPPv9OyGposOA2RvI,19883
 ml_tools/data_exploration/_schema_ops.py,sha256=PoFeHaS9dXI9gfL0SRD-8uSP4owqmbQFbtfA-HxkLnY,7108
 ml_tools/ensemble_evaluation/__init__.py,sha256=Xxx-F-_TvSVzMaocKXOo_tEXLibMJtf_YY85Ac3U0EI,483
@@ -162,7 +162,7 @@ ml_tools/plot_fonts/__init__.py,sha256=l-vSSpjZb6IeWjjgPTcNmEs7M-vbw0lqgEKD5jhtX
 ml_tools/plot_fonts/_imprimir.py,sha256=zNi6naa5eWBFfa_yV569MhUtSAL44H0xDjMcgrJSlXk,131
 ml_tools/plot_fonts/_plot_fonts.py,sha256=mfjXNT9P59ymHoTI85Q8CcvfxfK5BIFBWtTZH-hNIC4,2209
 ml_tools/schema/__init__.py,sha256=9LQtKz3OO9wm-1piUgAhCJZVZT-F-YSg5QLus9pxfgA,263
-ml_tools/schema/_feature_schema.py,sha256=QLsxBS3_CIJp4c4dknvMs7RHZl_GZDEBJQ0MxLrQo6Y,8536
+ml_tools/schema/_feature_schema.py,sha256=ICymTIL05n1qs61TvyY7rapDOJ9PlaOHi0F86N4tNlU,8547
 ml_tools/schema/_gui_schema.py,sha256=IVwN4THAdFrvh2TpV4SFd_zlzMX3eioF-w-qcSVTndE,7245
 ml_tools/schema/_imprimir.py,sha256=waNHozZmkCKKNFWSw0HFf9489FkSXogl6KuT5cn5V74,190
 ml_tools/serde/__init__.py,sha256=Gj6B8Sgf0-ad72jFXq2W_k5pXOT2iNx5Dvzwrd7Tj1U,229
@@ -172,7 +172,7 @@ ml_tools/utilities/__init__.py,sha256=pkR2HxUIlKZMDderP2awYXVIFxkU2Xt3FkJmcmuRIp
 ml_tools/utilities/_imprimir.py,sha256=sV3ASBOsTdVYvGojOTIpZYFyrnd4panS5h_4HcMzob4,432
 ml_tools/utilities/_utility_save_load.py,sha256=7skiiuYGVLVMK_nU9uLfUZw16ePvF3i9ub7G7LMyUgs,16085
 ml_tools/utilities/_utility_tools.py,sha256=bN0J9d1S0W5wNzNntBWqDsJcEAK7-1OgQg3X2fwXns0,6918
-dragon_ml_toolbox-20.0.0.dist-info/METADATA,sha256=ILeGioHn8qeLS5vaaqOs-zId8QvQxoWZcjKgHYmeuPo,7866
-dragon_ml_toolbox-20.0.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-dragon_ml_toolbox-20.0.0.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
-dragon_ml_toolbox-20.0.0.dist-info/RECORD,,
+dragon_ml_toolbox-20.0.1.dist-info/METADATA,sha256=ApSFj2vI7jdgUYtlYgjBpAXFQw9OKcd6em0ssSVZvGg,7866
+dragon_ml_toolbox-20.0.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+dragon_ml_toolbox-20.0.1.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
+dragon_ml_toolbox-20.0.1.dist-info/RECORD,,

ml_tools/MICE/_dragon_mice.py CHANGED Viewed

@@ -197,7 +197,7 @@ class DragonMICE:
                  _LOGGER.error(f"Index mismatch in dataset {subname}")
                  raise ValueError()
-        _LOGGER.info("Schema-based MICE imputation complete.")
+        _LOGGER.info("⬅️ Schema-based MICE imputation complete.")
         return kernel, imputed_datasets, imputed_dataset_names
@@ -237,9 +237,6 @@ class DragonMICE:
                 # We pass an empty DF as 'targets' to save_imputed_datasets to prevent duplication.
                 df_input = df
                 df_targets_to_save = pd.DataFrame(index=df.index)
-                # Monitor all columns that had NaNs
-                imputed_column_names = [col for col in df.columns if df[col].isna().any()]
             else:
                 # Explicitly cast tuple to list for Pandas indexing
                 feature_cols = list(self._schema.feature_names)
@@ -253,8 +250,9 @@ class DragonMICE:
                 df_input = df[feature_cols]
                 # Drop features to get targets (more robust than explicit selection if targets vary)
                 df_targets_to_save = df.drop(columns=feature_cols)
-                imputed_column_names = _get_na_column_names(df=df_input) # type: ignore
+            # Monitor all columns that had NaNs
+            imputed_column_names = [col for col in df_input.columns if df_input[col].isna().any()]
             # Run core logic
             kernel, imputed_datasets, imputed_dataset_names = self._run_mice(df=df_input, df_name=df_name) # type: ignore
@@ -316,35 +314,41 @@ def get_convergence_diagnostic(kernel: mf.ImputationKernel, imputed_dataset_name
     # iterate over each imputed dataset
     for dataset_id, imputed_dataset_name in zip(range(dataset_count), imputed_dataset_names):
-        #Check directory for current dataset
         dataset_file_dir = f"Convergence_Metrics_{imputed_dataset_name}"
         local_save_dir = make_fullpath(input_path=root_path / dataset_file_dir, make=True)
-        for feature_name in column_names:
-            means_per_iteration = []
-            for iteration in range(iterations_cap):
-                current_imputed = kernel.complete_data(dataset=dataset_id, iteration=iteration)
-                means_per_iteration.append(np.mean(current_imputed[feature_name])) # type: ignore
+        # 1. Pre-calculate means for all features across all iterations
+        # Structure: {feature_name: [mean_iter_0, mean_iter_1, ...]}
+        history = {col: [] for col in column_names}
+        for iteration in range(iterations_cap):
+            # Resolve dataset ONLY ONCE per iteration
+            current_imputed = kernel.complete_data(dataset=dataset_id, iteration=iteration)
+            for col in column_names:
+                # Fast lookup
+                val = np.mean(current_imputed[col])
+                history[col].append(val)
+        # 2. Plotting loop
+        for feature_name, means_per_iteration in history.items():
             plt.figure(figsize=(10, 8))
             plt.plot(means_per_iteration, marker='o')
             plt.xlabel("Iteration", **label_font)
             plt.ylabel("Mean of Imputed Values", **label_font)
             plt.title(f"Mean Convergence for '{feature_name}'", **label_font)
-            # Adjust plot display for the X axis
             _ticks = np.arange(iterations_cap)
             _labels = np.arange(1, iterations_cap + 1)
-            plt.xticks(ticks=_ticks, labels=_labels) # type: ignore
+            plt.xticks(ticks=_ticks, labels=_labels)
             plt.grid(True)
-            feature_save_name = sanitize_filename(feature_name)
-            feature_save_name = feature_save_name + ".svg"
+            feature_save_name = sanitize_filename(feature_name) + ".svg"
             save_path = local_save_dir / feature_save_name
             plt.savefig(save_path, bbox_inches='tight', format="svg")
             plt.close()
-        _LOGGER.info(f"{dataset_file_dir} process completed.")
+    _LOGGER.info(f"📉 Convergence diagnostics complete.")
 # Imputed distributions
@@ -431,5 +435,5 @@ def get_imputed_distributions(kernel: mf.ImputationKernel, df_name: str, root_di
             fig = kernel.plot_imputed_distributions(variables=[feature])
             _process_figure(fig, feature)
-    _LOGGER.info(f"{local_dir_name} completed.")
+    _LOGGER.info(f"📊 Imputed distributions complete.")

ml_tools/data_exploration/__init__.py CHANGED Viewed

@@ -53,13 +53,13 @@ __all__ = [
     "split_features_targets",
     "split_continuous_binary",
     "split_continuous_categorical_targets",
-    "encode_categorical_features",
     "clip_outliers_single",
     "clip_outliers_multi",
     "drop_outlier_samples",
     "plot_continuous_vs_target",
     "plot_categorical_vs_target",
     "plot_correlation_heatmap",
+    "encode_categorical_features",
     "finalize_feature_schema",
     "apply_feature_schema",
     "match_and_filter_columns_by_regex",

ml_tools/data_exploration/_imprimir.py CHANGED Viewed

@@ -12,13 +12,13 @@ _GRUPOS = [
     "split_features_targets",
     "split_continuous_binary",
     "split_continuous_categorical_targets",
-    "encode_categorical_features",
     "clip_outliers_single",
     "clip_outliers_multi",
     "drop_outlier_samples",
     "plot_continuous_vs_target",
     "plot_categorical_vs_target",
     "plot_correlation_heatmap",
+    "encode_categorical_features",
     "finalize_feature_schema",
     "apply_feature_schema",
     "match_and_filter_columns_by_regex",

ml_tools/schema/_feature_schema.py CHANGED Viewed

@@ -44,7 +44,7 @@ class FeatureSchema(NamedTuple):
         Handles conversion of Tuple->List and IntKeys->StrKeys automatically.
         """
         # validate path
-        dir_path = make_fullpath(directory, enforce="directory")
+        dir_path = make_fullpath(directory, make=True, enforce="directory")
         file_path = dir_path / SchemaKeys.SCHEMA_FILENAME
         try:

{dragon_ml_toolbox-20.0.0.dist-info → dragon_ml_toolbox-20.0.1.dist-info}/WHEEL RENAMED Viewed

File without changes

{dragon_ml_toolbox-20.0.0.dist-info → dragon_ml_toolbox-20.0.1.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{dragon_ml_toolbox-20.0.0.dist-info → dragon_ml_toolbox-20.0.1.dist-info}/licenses/LICENSE-THIRD-PARTY.md RENAMED Viewed

File without changes

{dragon_ml_toolbox-20.0.0.dist-info → dragon_ml_toolbox-20.0.1.dist-info}/top_level.txt RENAMED Viewed

File without changes

dragon-ml-toolbox 20.0.0__py3-none-any.whl → 20.0.1__py3-none-any.whl

dragon-ml-toolbox 20.0.0py3-none-any.whl → 20.0.1py3-none-any.whl