PyPI - dragon-ml-toolbox - Versions diffs - 20.0.1__py3-none-any.whl → 20.1.0__py3-none-any.whl - Mend

dragon-ml-toolbox 20.0.1py3-none-any.whl → 20.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

{dragon_ml_toolbox-20.0.1.dist-info → dragon_ml_toolbox-20.1.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: dragon-ml-toolbox
-Version: 20.0.1
+Version: 20.1.0
 Summary: Complete pipelines and helper tools for data science and machine learning projects.
 Author-email: Karl Luigi Loza Vidaurre <luigiloza@gmail.com>
 License-Expression: MIT

{dragon_ml_toolbox-20.0.1.dist-info → dragon_ml_toolbox-20.1.0.dist-info}/RECORD RENAMED Viewed

@@ -1,5 +1,5 @@
-dragon_ml_toolbox-20.0.1.dist-info/licenses/LICENSE,sha256=L35WDmmLZNTlJvxF6Vy7Uy4SYNi6rCfWUqlTHpoRMoU,1081
-dragon_ml_toolbox-20.0.1.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=0-HBRMMgKuwtGy6nMJZvIn1fLxhx_ksyyVB2U_iyYZU,2818
+dragon_ml_toolbox-20.1.0.dist-info/licenses/LICENSE,sha256=L35WDmmLZNTlJvxF6Vy7Uy4SYNi6rCfWUqlTHpoRMoU,1081
+dragon_ml_toolbox-20.1.0.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=0-HBRMMgKuwtGy6nMJZvIn1fLxhx_ksyyVB2U_iyYZU,2818
 ml_tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 ml_tools/constants.py,sha256=3br5Rk9cL2IUo638eJuMOGdbGQaWssaUecYEvSeRBLM,3322
 ml_tools/ETL_cleaning/__init__.py,sha256=TytE8RKmtW4KQlkaTxpYKlJAbCu-VAc82eDdHwVD3Jo,427
@@ -29,10 +29,11 @@ ml_tools/ML_callbacks/_checkpoint.py,sha256=Ioj9wn8XlsR_S1NnmWbyT9lkO8o2_DcHVMrF
 ml_tools/ML_callbacks/_early_stop.py,sha256=qzTzxfDCDim0qj7QQ7ykJNIOBWbXtviDptMCczXXy_k,8073
 ml_tools/ML_callbacks/_imprimir.py,sha256=Wz6NXhiCFSJsAZh3JnQ4qt7tj2_qhu14DTwu-gkkzZs,257
 ml_tools/ML_callbacks/_scheduler.py,sha256=mn97_VH8Lp37KH3zSgmPemGQV8g-K8GfhRNHTftaNcg,7390
-ml_tools/ML_chain/__init__.py,sha256=rUBVwB96fAoq-Q9zY3s0fL_TFU5W2axlg7XZzrCXrSU,399
-ml_tools/ML_chain/_chaining_tools.py,sha256=ASi0Zr9WBVA7wd-pYVN69VIZFOIuB4QpGlrSl9Ob-90,13788
-ml_tools/ML_chain/_dragon_chain.py,sha256=wFlknv0rlL8P3K0ls8kj_oup4SvPNFqSxDmiBdPfGt4,5737
-ml_tools/ML_chain/_imprimir.py,sha256=JCVslxnrmvJ_LJOmexL2u5-OYykHFe1H49EkrJPpAIg,254
+ml_tools/ML_chain/__init__.py,sha256=UVD1xaJ59pft_ysg8z_ihqjEDQqPRQwmhui_zNRFp7I,491
+ml_tools/ML_chain/_chaining_tools.py,sha256=BDwTvgJFbJ-wgy3IkP6_SNpNaWpHGXV3PhAM7sYmHeU,13675
+ml_tools/ML_chain/_dragon_chain.py,sha256=x3fN136C5N9WcXJJW9zkNrBzP8QoBaXpxz7SPF3txjg,5601
+ml_tools/ML_chain/_imprimir.py,sha256=tHVXoGhMlbpkpcoGKwtkYVFlHFEllRCsYdpiAFI1aZk,285
+ml_tools/ML_chain/_update_schema.py,sha256=z1Us7lv6hy6GwSu1mcid50Jmqq3sh91hMQ0LnQjhte8,3806
 ml_tools/ML_configuration/__init__.py,sha256=wSpfk8bHRSoYjcKJmjd5ivB4Fw8UFjyOZL4hct9rJT0,2637
 ml_tools/ML_configuration/_base_model_config.py,sha256=95L3IfobNFMtnNr79zYpDGerC1q1v7M05tWZvTS2cwE,2247
 ml_tools/ML_configuration/_finalize.py,sha256=l_n13bLu0avMdJ8hNRrH8V_wOBQZM1UGsTydKBkTysM,15047
@@ -146,7 +147,7 @@ ml_tools/excel_handler/_excel_handler.py,sha256=TODudmeQgDSdxUKzLfAzizs--VL-g8Wx
 ml_tools/excel_handler/_imprimir.py,sha256=QHazgqjRMzthRbDt33EVpvR7GqufSzng6jHw7IVCdtI,306
 ml_tools/keys/__init__.py,sha256=DV52KLOY5GfpLwJdDAHlFVz0qAmyh-KWg3gZorFdMSk,336
 ml_tools/keys/_imprimir.py,sha256=4qmwdia16DPq3OtlWGMkgLPT5R3lcM-ka3tQdCLx5qk,197
-ml_tools/keys/_keys.py,sha256=wyUpNY7iZIGIqvnT2BSahnkkNkK_vvZALOtRWZ7h50A,8800
+ml_tools/keys/_keys.py,sha256=fArSyT_UGGSH4PHjG-R0kefFznAtAxSAasDCQ7-89a8,8899
 ml_tools/math_utilities/__init__.py,sha256=NuTcb_Ogdwx5x-oDieBt1EAqCoZRnXbkZbUrwB6ItH0,337
 ml_tools/math_utilities/_imprimir.py,sha256=kk5DQb_BV9g767uTdXQiRjEEHgQwJpEXU3jxO3QV2Fw,238
 ml_tools/math_utilities/_math_utilities.py,sha256=BYHIVcM9tuKIhVrkgLLiM5QalJ39zx7dXYy_M9aGgiM,9012
@@ -172,7 +173,7 @@ ml_tools/utilities/__init__.py,sha256=pkR2HxUIlKZMDderP2awYXVIFxkU2Xt3FkJmcmuRIp
 ml_tools/utilities/_imprimir.py,sha256=sV3ASBOsTdVYvGojOTIpZYFyrnd4panS5h_4HcMzob4,432
 ml_tools/utilities/_utility_save_load.py,sha256=7skiiuYGVLVMK_nU9uLfUZw16ePvF3i9ub7G7LMyUgs,16085
 ml_tools/utilities/_utility_tools.py,sha256=bN0J9d1S0W5wNzNntBWqDsJcEAK7-1OgQg3X2fwXns0,6918
-dragon_ml_toolbox-20.0.1.dist-info/METADATA,sha256=ApSFj2vI7jdgUYtlYgjBpAXFQw9OKcd6em0ssSVZvGg,7866
-dragon_ml_toolbox-20.0.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-dragon_ml_toolbox-20.0.1.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
-dragon_ml_toolbox-20.0.1.dist-info/RECORD,,
+dragon_ml_toolbox-20.1.0.dist-info/METADATA,sha256=g8BdKr-giBfa-J0TWjinoX1W4lzGaTFZEovm_Fv_43w,7866
+dragon_ml_toolbox-20.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+dragon_ml_toolbox-20.1.0.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
+dragon_ml_toolbox-20.1.0.dist-info/RECORD,,

ml_tools/ML_chain/__init__.py CHANGED Viewed

@@ -8,11 +8,16 @@ from ._chaining_tools import (
     prepare_chaining_dataset,
 )
+from ._update_schema import (
+    derive_next_step_schema
+)
 from ._imprimir import info
 __all__ = [
     "DragonChainOrchestrator",
+    "derive_next_step_schema",
     "augment_dataset_with_predictions",
     "augment_dataset_with_predictions_multi",
     "prepare_chaining_dataset",

ml_tools/ML_chain/_chaining_tools.py CHANGED Viewed

@@ -5,7 +5,7 @@ from typing import Optional, Literal
 from ..ML_inference import DragonInferenceHandler
-from ..keys._keys import MLTaskKeys, PyTorchInferenceKeys
+from ..keys._keys import MLTaskKeys, PyTorchInferenceKeys, ChainKeys
 from .._core import get_logger
@@ -23,11 +23,10 @@ def augment_dataset_with_predictions(
     handler: DragonInferenceHandler,
     dataset: pd.DataFrame,
     ground_truth_targets: list[str],
-    prediction_col_prefix: str = "pred_",
     batch_size: int = 4096
 ) -> pd.DataFrame:
     """
-    Uses a DragonInferenceHandler to generate predictions for a dataset and appends them as new feature columns.
+    Uses a DragonInferenceHandler to generate predictions for a dataset and appends them as new feature columns with a standardized prefix.
     This function splits the features from the ground truth targets, runs inference in batches to ensure
     memory efficiency, and returns a unified DataFrame containing:
@@ -38,8 +37,6 @@ def augment_dataset_with_predictions(
         dataset (pd.DataFrame): The input pandas DataFrame containing features and ground truth targets.
         ground_truth_targets (List[str]): A list of column names in `dataset` representing the actual targets.
             These are removed from the input features during inference and appended to the end of the result.
-        prediction_col_prefix (str, optional): A string to prepend when creating the
-            new prediction columns.
         batch_size (int, optional): The number of samples to process in a single inference step.
             Prevents OOM errors on large datasets. Defaults to 4096.
@@ -107,7 +104,7 @@ def augment_dataset_with_predictions(
     full_prediction_array = np.vstack(all_predictions)
     # Generate new column names
-    new_col_names = [f"{prediction_col_prefix}{tid}" for tid in handler.target_ids]
+    new_col_names = [f"{ChainKeys.CHAIN_PREDICTION_PREFIX}{tid}" for tid in handler.target_ids]
     # Verify dimensions match
     if full_prediction_array.shape[1] != len(new_col_names):

ml_tools/ML_chain/_dragon_chain.py CHANGED Viewed

@@ -77,18 +77,16 @@ class DragonChainOrchestrator:
     def update_with_inference(
         self,
         handler: DragonInferenceHandler,
-        prefix: str = "pred_",
         batch_size: int = 4096
     ) -> None:
         """
         Runs inference using the provided handler on the full internal dataset and appends the results as new features.
         This updates the internal state of the Orchestrator. Subsequent calls to `get_training_data`
-        will include these new prediction columns as features.
+        will include these new prediction columns as features with a standardized prefix.
         Args:
             handler (DragonInferenceHandler): The trained model handler.
-            prefix (str): Prefix for the new prediction columns (e.g., "m1_", "step2_").
             batch_size (int): Batch size for inference.
         """
         _LOGGER.info(f"Orchestrator: Updating internal state with predictions from handler (Targets: {handler.target_ids})...")
@@ -99,7 +97,6 @@ class DragonChainOrchestrator:
             handler=handler,
             dataset=self.current_dataset,
             ground_truth_targets=self.all_targets,
-            prediction_col_prefix=prefix,
             batch_size=batch_size
         )

ml_tools/ML_chain/_imprimir.py CHANGED Viewed

@@ -2,6 +2,7 @@ from .._core import _imprimir_disponibles
 _GRUPOS = [
     "DragonChainOrchestrator",
+    "derive_next_step_schema",
     "augment_dataset_with_predictions",
     "augment_dataset_with_predictions_multi",
     "prepare_chaining_dataset",

ml_tools/ML_chain/_update_schema.py ADDED Viewed

@@ -0,0 +1,96 @@
+from ..schema import FeatureSchema
+from ..ML_inference import DragonInferenceHandler
+from ..keys._keys import MLTaskKeys, ChainKeys
+from .._core import get_logger
+_LOGGER = get_logger("Schema Updater")
+__all__ = [
+    "derive_next_step_schema",
+]
+def derive_next_step_schema(
+    current_schema: FeatureSchema,
+    handler: DragonInferenceHandler,
+    verbose: bool = True
+) -> FeatureSchema:
+    """
+    Creates the FeatureSchema for the NEXT step in the chain by appending the current handler's predictions as new features.
+    Args:
+        current_schema (FeatureSchema): The current FeatureSchema.
+        handler (DragonInferenceHandler): The inference handler of the model trained using the current schema.
+    Returns:
+        FeatureSchema: An updated schema including new predicted features.
+    """
+    # 1. Determine New Column Names
+    # Match logic from _chaining_tools.py
+    if handler.target_ids is None:
+        _LOGGER.error("Handler target_ids is None; cannot derive schema.")
+        raise ValueError()
+    new_cols = [f"{ChainKeys.CHAIN_PREDICTION_PREFIX}{tid}" for tid in handler.target_ids]
+    # 2. Base Lists (Convert tuples to lists for mutation)
+    new_feature_names = list(current_schema.feature_names) + new_cols
+    new_cont_names = list(current_schema.continuous_feature_names)
+    new_cat_names = list(current_schema.categorical_feature_names)
+    # Copy existing maps (handle None case)
+    new_cat_index_map = dict(current_schema.categorical_index_map) if current_schema.categorical_index_map else {}
+    new_cat_mappings = dict(current_schema.categorical_mappings) if current_schema.categorical_mappings else {}
+    # 3. Determine Feature Type based on Task
+    is_categorical = False
+    cardinality = 0
+    if handler.task in [MLTaskKeys.BINARY_CLASSIFICATION, MLTaskKeys.MULTILABEL_BINARY_CLASSIFICATION]:
+        is_categorical = True
+        cardinality = 2
+    elif handler.task == MLTaskKeys.MULTICLASS_CLASSIFICATION:
+        is_categorical = True
+        # We rely on the class map to know the 'vocabulary' size
+        if handler._class_map is None:
+            _LOGGER.error("Handler class_map is None, cannot determine cardinality for multiclass classification model.")
+            raise ValueError()
+        cardinality = len(handler._class_map)
+    # 4. Append New Metadata
+    current_total_feats = len(current_schema.feature_names)
+    for i, col_name in enumerate(new_cols):
+        # Calculate the absolute index of this new column
+        # If we had 10 features (0-9), the new one is at index 10 + i
+        new_index = current_total_feats + i
+        if is_categorical:
+            new_cat_names.append(col_name)
+            # A. Update Cardinality for Embeddings
+            new_cat_index_map[new_index] = cardinality
+            # B. Create Identity Mapping (Dummy Encoding)
+            # Maps string representation of int back to the int.
+            identity_map = {str(k): k for k in range(cardinality)}
+            new_cat_mappings[col_name] = identity_map
+        else:
+            # Regression / Multitarget Regression
+            new_cont_names.append(col_name)
+    if verbose:
+        _LOGGER.info(f"Derived next step schema with {len(new_feature_names)} features:\n    {len(new_cont_names)} continuous\n    {len(new_cat_names)} categorical")
+    # 5. Return New Immutable Schema
+    return FeatureSchema(
+        feature_names=tuple(new_feature_names),
+        continuous_feature_names=tuple(new_cont_names),
+        categorical_feature_names=tuple(new_cat_names),
+        categorical_index_map=new_cat_index_map if new_cat_index_map else None,
+        categorical_mappings=new_cat_mappings if new_cat_mappings else None
+    )

ml_tools/keys/_keys.py CHANGED Viewed

@@ -278,6 +278,11 @@ class SchemaKeys:
     OPTIONAL_LABELS = "optional_labels"
+class ChainKeys:
+    """Used by the ML chaining module."""
+    CHAIN_PREDICTION_PREFIX = "pred_"
 class _EvaluationConfig:
     """Set config values for evaluation modules."""
     DPI = 400

{dragon_ml_toolbox-20.0.1.dist-info → dragon_ml_toolbox-20.1.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{dragon_ml_toolbox-20.0.1.dist-info → dragon_ml_toolbox-20.1.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{dragon_ml_toolbox-20.0.1.dist-info → dragon_ml_toolbox-20.1.0.dist-info}/licenses/LICENSE-THIRD-PARTY.md RENAMED Viewed

File without changes

{dragon_ml_toolbox-20.0.1.dist-info → dragon_ml_toolbox-20.1.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

dragon-ml-toolbox 20.0.1__py3-none-any.whl → 20.1.0__py3-none-any.whl

dragon-ml-toolbox 20.0.1py3-none-any.whl → 20.1.0py3-none-any.whl