PyPI - dragon-ml-toolbox - Versions diffs - 10.9.0__tar.gz → 10.10.1__tar.gz - Mend

dragon-ml-toolbox 10.9.0tar.gz → 10.10.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dragon-ml-toolbox might be problematic. Click here for more details.

Files changed (41) hide show

{dragon_ml_toolbox-10.9.0/dragon_ml_toolbox.egg-info → dragon_ml_toolbox-10.10.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: dragon-ml-toolbox
-Version: 10.9.0
+Version: 10.10.1
 Summary: A collection of tools for data science and machine learning projects.
 Author-email: Karl Loza <luigiloza@gmail.com>
 License-Expression: MIT

{dragon_ml_toolbox-10.9.0 → dragon_ml_toolbox-10.10.1/dragon_ml_toolbox.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: dragon-ml-toolbox
-Version: 10.9.0
+Version: 10.10.1
 Summary: A collection of tools for data science and machine learning projects.
 Author-email: Karl Loza <luigiloza@gmail.com>
 License-Expression: MIT

{dragon_ml_toolbox-10.9.0 → dragon_ml_toolbox-10.10.1}/ml_tools/ML_datasetmaster.py RENAMED Viewed

@@ -200,7 +200,7 @@ class _BaseDatasetMaker(ABC):
         filepath = save_path / filename
         self.scaler.save(filepath, verbose=False)
         if verbose:
-            _LOGGER.info(f"Scaler for dataset '{self.id}' saved to '{filepath.name}'.")
+            _LOGGER.info(f"Scaler for dataset '{self.id}' saved as '{filepath.name}'.")
 # Single target dataset

{dragon_ml_toolbox-10.9.0 → dragon_ml_toolbox-10.10.1}/ml_tools/ML_evaluation.py RENAMED Viewed

@@ -353,7 +353,7 @@ def shap_summary_plot(model,
     plt.ion()
-def plot_attention_importance(weights: List[torch.Tensor], feature_names: Optional[List[str]], save_dir: Union[str, Path]):
+def plot_attention_importance(weights: List[torch.Tensor], feature_names: Optional[List[str]], save_dir: Union[str, Path], top_n: int = 10):
     """
     Aggregates attention weights and plots global feature importance.
@@ -364,6 +364,7 @@ def plot_attention_importance(weights: List[torch.Tensor], feature_names: Option
         weights (List[torch.Tensor]): A list of attention weight tensors from each batch.
         feature_names (List[str] | None): Names of the features for plot labeling.
         save_dir (str | Path): Directory to save the plot and summary CSV.
+        top_n (int): The number of top features to display in the plot.
     """
     if not weights:
         _LOGGER.error("Attention weights list is empty. Skipping importance plot.")
@@ -392,11 +393,10 @@ def plot_attention_importance(weights: List[torch.Tensor], feature_names: Option
     summary_df.to_csv(summary_path, index=False)
     _LOGGER.info(f"📝 Attention summary data saved as '{summary_path.name}'")
-    # --- Step 3: Create and save the plot ---
-    plt.figure(figsize=(10, 8), dpi=100)
+    # --- Step 3: Create and save the plot for top N features ---
+    plot_df = summary_df.head(top_n).sort_values('mean_attention', ascending=True)
-    # Sort for plotting
-    plot_df = summary_df.sort_values('mean_attention', ascending=True)
+    plt.figure(figsize=(10, 8), dpi=100)
     # Create horizontal bar plot with error bars
     plt.barh(
@@ -410,7 +410,7 @@ def plot_attention_importance(weights: List[torch.Tensor], feature_names: Option
         color='cornflowerblue'
     )
-    plt.title('Global Feature Importance')
+    plt.title('Top Features by Attention')
     plt.xlabel('Average Attention Weight')
     plt.ylabel('Feature')
     plt.grid(axis='x', linestyle='--', alpha=0.6)

{dragon_ml_toolbox-10.9.0 → dragon_ml_toolbox-10.10.1}/ml_tools/ML_models.py RENAMED Viewed

@@ -43,7 +43,7 @@ class _ArchitectureHandlerMixin:
             json.dump(config, f, indent=4)
         if verbose:
-            _LOGGER.info(f"Architecture for '{self.__class__.__name__}' saved to '{path_dir.name}'")
+            _LOGGER.info(f"Architecture for '{self.__class__.__name__}' saved as '{full_path.name}'")
     @classmethod
     def load(cls: type, file_or_dir: Union[str, Path], verbose: bool = True) -> nn.Module:
@@ -147,6 +147,30 @@ class _BaseMLP(nn.Module, _ArchitectureHandlerMixin):
         return f"{name}(arch: {arch_str})"
+class _BaseAttention(_BaseMLP):
+    """
+    Abstract base class for MLP models that incorporate an attention mechanism
+    before the main MLP layers.
+    """
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        # By default, models inheriting this do not have the flag.
+        self.has_interpretable_attention = False
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """Defines the standard forward pass."""
+        logits, _attention_weights = self.forward_attention(x)
+        return logits
+    def forward_attention(self, x: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
+        """Returns logits and attention weights."""
+        # This logic is now shared and defined in one place
+        x, attention_weights = self.attention(x)
+        x = self.mlp(x)
+        logits = self.output_layer(x)
+        return logits, attention_weights
 class MultilayerPerceptron(_BaseMLP):
     """
     Creates a versatile Multilayer Perceptron (MLP) for regression or classification tasks.
@@ -184,7 +208,7 @@ class MultilayerPerceptron(_BaseMLP):
         return self._repr_helper(name="MultilayerPerceptron", mlp_layers=layer_sizes)
-class AttentionMLP(_BaseMLP):
+class AttentionMLP(_BaseAttention):
     """
     A Multilayer Perceptron (MLP) that incorporates an Attention layer to dynamically weigh input features.
@@ -205,25 +229,7 @@ class AttentionMLP(_BaseMLP):
         super().__init__(in_features, out_targets, hidden_layers, drop_out)
         # Attention
         self.attention = _AttentionLayer(in_features)
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """
-        Defines the standard forward pass.
-        """
-        logits, _attention_weights = self.forward_attention(x)
-        return logits
-    def forward_attention(self, x: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
-        """
-        Returns logits and attention weights
-        """
-        # The attention layer returns the processed x and the weights
-        x, attention_weights = self.attention(x)
-        # Pass the attention-modified tensor through the MLP
-        logits = self.mlp(x)
-        return logits, attention_weights
+        self.has_interpretable_attention = True
     def __repr__(self) -> str:
         """Returns the developer-friendly string representation of the model."""
@@ -238,7 +244,7 @@ class AttentionMLP(_BaseMLP):
         return self._repr_helper(name="AttentionMLP", mlp_layers=arch)
-class MultiHeadAttentionMLP(_BaseMLP):
+class MultiHeadAttentionMLP(_BaseAttention):
     """
     An MLP that incorporates a standard `nn.MultiheadAttention` layer to process
     the input features.
@@ -267,24 +273,6 @@ class MultiHeadAttentionMLP(_BaseMLP):
             dropout=attention_dropout
         )
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Defines the standard forward pass of the model."""
-        logits, _attention_weights = self.forward_attention(x)
-        return logits
-    def forward_attention(self, x: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
-        """
-        Returns logits and attention weights.
-        """
-        # The attention layer returns the processed x and the weights
-        x, attention_weights = self.attention(x)
-        # Pass the attention-modified tensor through the MLP and prediction head
-        x = self.mlp(x)
-        logits = self.output_layer(x)
-        return logits, attention_weights
     def get_architecture_config(self) -> Dict[str, Any]:
         """Returns the full configuration of the model."""
         config = super().get_architecture_config()

{dragon_ml_toolbox-10.9.0 → dragon_ml_toolbox-10.10.1}/ml_tools/ML_scaler.py RENAMED Viewed

@@ -164,7 +164,7 @@ class PytorchScaler:
         }
         torch.save(state, path_obj)
         if verbose:
-            _LOGGER.info(f"PytorchScaler state saved to '{path_obj.name}'.")
+            _LOGGER.info(f"PytorchScaler state saved as '{path_obj.name}'.")
     @staticmethod
     def load(filepath: Union[str, Path], verbose: bool=True) -> 'PytorchScaler':

{dragon_ml_toolbox-10.9.0 → dragon_ml_toolbox-10.10.1}/ml_tools/ML_trainer.py RENAMED Viewed

@@ -472,23 +472,30 @@ class MLTrainer:
                 yield attention_weights
-    def explain_attention(self, save_dir: Union[str, Path], feature_names: Optional[List[str]], explain_dataset: Optional[Dataset] = None):
+    def explain_attention(self, save_dir: Union[str, Path],
+                          feature_names: Optional[List[str]],
+                          explain_dataset: Optional[Dataset] = None,
+                          plot_n_features: int = 10):
         """
         Generates and saves a feature importance plot based on attention weights.
-        This method only works for models with a `forward_attention` method.
+        This method only works for models with models with 'has_interpretable_attention'.
         Args:
             save_dir (str | Path): Directory to save the plot and summary data.
-            feature_names (List[str] | None): Names for the features for plot labeling.
+            feature_names (List[str] | None): Names for the features for plot labeling. If not given, generic names will be used.
             explain_dataset (Dataset, optional): A specific dataset to explain. If None, the trainer's test dataset is used.
+            plot_n_features (int): Number of top features to plot.
         """
         print("\n--- Attention Analysis ---")
         # --- Step 1: Check if the model supports this explanation ---
-        if not hasattr(self.model, 'forward_attention'):
-            _LOGGER.error("Model does not have a `forward_attention` method. Skipping attention explanation.")
+        if not getattr(self.model, 'has_interpretable_attention', False):
+            _LOGGER.warning(
+                "Model is not flagged for interpretable attention analysis. "
+                "Skipping. This is the correct behavior for models like MultiHeadAttentionMLP."
+            )
             return
         # --- Step 2: Set up the dataloader ---
@@ -514,7 +521,8 @@ class MLTrainer:
             plot_attention_importance(
                 weights=all_weights,
                 feature_names=feature_names,
-                save_dir=save_dir
+                save_dir=save_dir,
+                top_n=plot_n_features
             )
         else:
             _LOGGER.error("No attention weights were collected from the model.")

{dragon_ml_toolbox-10.9.0 → dragon_ml_toolbox-10.10.1}/ml_tools/SQL.py RENAMED Viewed

@@ -120,12 +120,14 @@ class DatabaseManager:
         if not self.cursor:
             _LOGGER.error("Database connection is not open.")
             raise sqlite3.Error()
+        sanitized_table_name = sanitize_filename(table_name)
         columns = ', '.join(f'"{k}"' for k in data.keys())
         placeholders = ', '.join(['?'] * len(data))
         values = list(data.values())
-        query = f"INSERT INTO {table_name} ({columns}) VALUES ({placeholders})"
+        query = f'INSERT INTO "{sanitized_table_name}" ({columns}) VALUES ({placeholders})'
         self.cursor.execute(query, values)
@@ -187,6 +189,8 @@ class DatabaseManager:
         if not data:
             _LOGGER.warning("'insert_many' called with empty data list. No action taken.")
             return
+        sanitized_table_name = sanitize_filename(table_name)
         # Assume all dicts have the same keys as the first one
         first_row = data[0]
@@ -196,10 +200,10 @@ class DatabaseManager:
         # Create a list of tuples, where each tuple is a row of values
         values_to_insert = [list(row.values()) for row in data]
-        query = f"INSERT INTO {table_name} ({columns}) VALUES ({placeholders})"
+        query = f'INSERT INTO "{sanitized_table_name}" ({columns}) VALUES ({placeholders})'
         self.cursor.executemany(query, values_to_insert)
-        _LOGGER.info(f"➡️ Bulk inserted {len(values_to_insert)} rows into '{table_name}'.")
+        _LOGGER.info(f"➡️ Bulk inserted {len(values_to_insert)} rows into '{sanitized_table_name}'.")
     def insert_from_dataframe(self, table_name: str, df: pd.DataFrame, if_exists: Literal['fail', 'replace', 'append'] = 'append'):
         """
@@ -220,9 +224,11 @@ class DatabaseManager:
         if not self.conn:
             _LOGGER.error("Database connection is not open.")
             raise sqlite3.Error()
+        sanitized_table_name = sanitize_filename(table_name)
         df.to_sql(
-            table_name,
+            sanitized_table_name,
             self.conn,
             if_exists=if_exists,
             index=False  # Typically, we don't want to save the DataFrame index
@@ -248,9 +254,11 @@ class DatabaseManager:
         if not self.conn:
             _LOGGER.error("Database connection is not open.")
             raise sqlite3.Error()
+        sanitized_table_name = sanitize_filename(table_name)
         # PRAGMA is a special SQL command in SQLite for database metadata
-        return pd.read_sql_query(f'PRAGMA table_info("{table_name}");', self.conn)
+        return pd.read_sql_query(f'PRAGMA table_info("{sanitized_table_name}");', self.conn)
     def create_index(self, table_name: str, column_name: str, unique: bool = False):
         """
@@ -269,11 +277,13 @@ class DatabaseManager:
         if not self.cursor:
             _LOGGER.error("Database connection is not open.")
             raise sqlite3.Error()
+        sanitized_table_name = sanitize_filename(table_name)
-        index_name = f"idx_{table_name}_{column_name}"
+        index_name = f"idx_{sanitized_table_name}_{column_name}"
         unique_clause = "UNIQUE" if unique else ""
-        query = f"CREATE {unique_clause} INDEX IF NOT EXISTS {index_name} ON {table_name} ({column_name})"
+        query = f'CREATE {unique_clause} INDEX IF NOT EXISTS "{index_name}" ON "{sanitized_table_name}" ("{column_name}")'
         _LOGGER.info(f"➡️ Executing: {query}")
         self.cursor.execute(query)

{dragon_ml_toolbox-10.9.0 → dragon_ml_toolbox-10.10.1}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "dragon-ml-toolbox"
-version = "10.9.0"
+version = "10.10.1"
 description = "A collection of tools for data science and machine learning projects."
 authors = [
     { name = "Karl Loza", email = "luigiloza@gmail.com" }