PyPI - dragon-ml-toolbox - Versions diffs - 3.12.6__py3-none-any.whl → 4.1.0__py3-none-any.whl - Mend

dragon-ml-toolbox 3.12.6py3-none-any.whl → 4.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dragon-ml-toolbox might be problematic. Click here for more details.

Files changed (32) hide show

dragon_ml_toolbox-4.1.0.dist-info/METADATA +253 -0
dragon_ml_toolbox-4.1.0.dist-info/RECORD +30 -0
ml_tools/ETL_engineering.py +2 -2
ml_tools/GUI_tools.py +2 -2
ml_tools/MICE_imputation.py +4 -3
ml_tools/ML_callbacks.py +8 -4
ml_tools/ML_evaluation.py +11 -6
ml_tools/ML_inference.py +131 -0
ml_tools/ML_trainer.py +17 -8
ml_tools/PSO_optimization.py +116 -62
ml_tools/RNN_forecast.py +5 -0
ml_tools/SQL.py +272 -0
ml_tools/VIF_factor.py +4 -3
ml_tools/_logger.py +36 -0
ml_tools/_pytorch_models.py +1 -1
ml_tools/_script_info.py +8 -0
ml_tools/{logger.py → custom_logger.py} +4 -66
ml_tools/data_exploration.py +2 -66
ml_tools/datasetmaster.py +3 -2
ml_tools/ensemble_inference.py +249 -0
ml_tools/ensemble_learning.py +40 -294
ml_tools/handle_excel.py +3 -2
ml_tools/keys.py +13 -2
ml_tools/path_manager.py +194 -31
ml_tools/utilities.py +2 -180
dragon_ml_toolbox-3.12.6.dist-info/METADATA +0 -137
dragon_ml_toolbox-3.12.6.dist-info/RECORD +0 -26
ml_tools/ML_tutorial.py +0 -300
{dragon_ml_toolbox-3.12.6.dist-info → dragon_ml_toolbox-4.1.0.dist-info}/WHEEL +0 -0
{dragon_ml_toolbox-3.12.6.dist-info → dragon_ml_toolbox-4.1.0.dist-info}/licenses/LICENSE +0 -0
{dragon_ml_toolbox-3.12.6.dist-info → dragon_ml_toolbox-4.1.0.dist-info}/licenses/LICENSE-THIRD-PARTY.md +0 -0
{dragon_ml_toolbox-3.12.6.dist-info → dragon_ml_toolbox-4.1.0.dist-info}/top_level.txt +0 -0

ml_tools/PSO_optimization.py CHANGED Viewed

@@ -2,28 +2,24 @@ import numpy as np
 from pathlib import Path
 import xgboost as xgb
 import lightgbm as lgb
-from sklearn.ensemble import HistGradientBoostingRegressor
-from sklearn.base import ClassifierMixin
 from typing import Literal, Union, Tuple, Dict, Optional
 import pandas as pd
 from copy import deepcopy
 from .utilities import (
-    _script_info,
-    list_csv_paths,
     threshold_binary_values,
     threshold_binary_values_batch,
-    deserialize_object,
-    list_files_by_extension,
-    save_dataframe,
-    make_fullpath,
-    yield_dataframes_from_dir,
-    sanitize_filename)
+    deserialize_object,
+    yield_dataframes_from_dir)
+from .path_manager import sanitize_filename, make_fullpath, list_files_by_extension, list_csv_paths
 import torch
 from tqdm import trange
 import matplotlib.pyplot as plt
 import seaborn as sns
-from .logger import _LOGGER
+from ._logger import _LOGGER
 from .keys import ModelSaveKeys
+from ._script_info import _script_info
+from .SQL import DatabaseManager
+from contextlib import nullcontext
 __all__ = [
@@ -125,7 +121,7 @@ class ObjectiveFunction():
             return features_array * noise
     def check_model(self):
-        if isinstance(self.model, ClassifierMixin) or isinstance(self.model, xgb.XGBClassifier) or isinstance(self.model, lgb.LGBMClassifier):
+        if isinstance(self.model, xgb.XGBClassifier) or isinstance(self.model, lgb.LGBMClassifier):
             raise ValueError(f"[Model Check Failed] ❌\nThe loaded model ({type(self.model).__name__}) is a Classifier.\nOptimization is not suitable for standard classification tasks.")
         if self.model is None:
             raise ValueError("Loaded model is None")
@@ -187,45 +183,73 @@ def _set_feature_names(size: int, names: Union[list[str], None]):
     else:
         assert len(names) == size, "List with feature names do not match the number of features"
         return names
-def _save_results(*dicts, save_dir: Union[str,Path], target_name: str):
-    combined_dict = dict()
-    for single_dict in dicts:
-        combined_dict.update(single_dict)
+def _save_result(result_dict: dict,
+                 save_format: Literal['csv', 'sqlite', 'both'],
+                 csv_path: Path,
+                 db_manager: Optional[DatabaseManager] = None,
+                 db_table_name: Optional[str] = None):
+    """
+    Handles saving a single result to CSV, SQLite, or both.
+    """
+    # Save to CSV
+    if save_format in ['csv', 'both']:
+        _save_or_append_to_csv(result_dict, csv_path)
+    # Save to SQLite
+    if save_format in ['sqlite', 'both']:
+        if db_manager and db_table_name:
+            db_manager.insert_row(db_table_name, result_dict)
+        else:
+            _LOGGER.warning("SQLite saving requested but db_manager or table_name not provided.")
+def _save_or_append_to_csv(data_dict: dict, save_path: Path):
+    """
+    Saves or appends a dictionary of data as a single row to a CSV file.
+    If the file doesn't exist, it creates it and writes the header.
+    If the file exists, it appends the new data without the header.
+    """
+    df_row = pd.DataFrame([data_dict])
-    df = pd.DataFrame(combined_dict)
+    file_exists = save_path.exists()
-    save_dataframe(df=df, save_dir=save_dir, filename=f"Optimization_{target_name}")
+    df_row.to_csv(
+        save_path,
+        mode='a',              # 'a' for append mode
+        index=False,           # Don't write the DataFrame index
+        header=not file_exists # Write header only if file does NOT exist
+    )
-def _run_single_pso(objective_function: ObjectiveFunction, pso_args: dict, feature_names: list[str], target_name: str, random_state: int):
-    """Helper for a single PSO run."""
+def _run_single_pso(objective_function: ObjectiveFunction, pso_args: dict, feature_names: list[str], target_name: str, random_state: int, save_format: Literal['csv', 'sqlite', 'both'], csv_path: Path, db_manager: Optional[DatabaseManager], db_table_name: str):
+    """Helper for a single PSO run that also handles saving."""
     pso_args.update({"seed": random_state})
     best_features, best_target, *_ = _pso(**pso_args)
-    # Flip best_target if maximization was used
     if objective_function.task == "maximization":
         best_target = -best_target
-    # Threshold binary features
     binary_number = objective_function.binary_features
     best_features_threshold = threshold_binary_values(best_features, binary_number)
-    # Name features and target
     best_features_named = {name: value for name, value in zip(feature_names, best_features_threshold)}
     best_target_named = {target_name: best_target}
+    # Save the result using the new helper
+    combined_dict = {**best_features_named, **best_target_named}
+    _save_result(combined_dict, save_format, csv_path, db_manager, db_table_name)
     return best_features_named, best_target_named
-def _run_post_hoc_pso(objective_function: ObjectiveFunction, pso_args: dict, feature_names: list[str], target_name: str, repetitions: int):
-    """Helper for post-hoc PSO analysis."""
-    all_best_targets = []
-    all_best_features = [[] for _ in range(len(feature_names))]
-    for _ in range(repetitions):
+def _run_post_hoc_pso(objective_function: ObjectiveFunction, pso_args: dict, feature_names: list[str], target_name: str, repetitions: int, save_format: Literal['csv', 'sqlite', 'both'], csv_path: Path, db_manager: Optional[DatabaseManager], db_table_name: str):
+    """Helper for post-hoc analysis that saves results incrementally."""
+    progress = trange(repetitions, desc="Post-Hoc PSO", unit="run")
+    for _ in progress:
         best_features, best_target, *_ = _pso(**pso_args)
         if objective_function.task == "maximization":
@@ -234,28 +258,25 @@ def _run_post_hoc_pso(objective_function: ObjectiveFunction, pso_args: dict, fea
         binary_number = objective_function.binary_features
         best_features_threshold = threshold_binary_values(best_features, binary_number)
-        for i, best_feature in enumerate(best_features_threshold):
-            all_best_features[i].append(best_feature)
-        all_best_targets.append(best_target)
-    # Name features and target
-    all_best_features_named = {name: lst for name, lst in zip(feature_names, all_best_features)}
-    all_best_targets_named = {target_name: all_best_targets}
-    return all_best_features_named, all_best_targets_named
+        result_dict = {name: value for name, value in zip(feature_names, best_features_threshold)}
+        result_dict[target_name] = best_target
+        # Save each result incrementally
+        _save_result(result_dict, save_format, csv_path, db_manager, db_table_name)
 def run_pso(lower_boundaries: list[float],
             upper_boundaries: list[float],
             objective_function: ObjectiveFunction,
             save_results_dir: Union[str,Path],
+            save_format: Literal['csv', 'sqlite', 'both'] = 'csv',
             auto_binary_boundaries: bool=True,
             target_name: Union[str, None]=None,
             feature_names: Union[list[str], None]=None,
             swarm_size: int=200,
             max_iterations: int=3000,
             random_state: int=101,
-            post_hoc_analysis: Optional[int]=10) -> Tuple[Dict[str, float | list[float]], Dict[str, float | list[float]]]:
+            post_hoc_analysis: Optional[int]=10) -> Optional[Tuple[Dict[str, float], Dict[str, float]]]:
     """
     Executes Particle Swarm Optimization (PSO) to optimize a given objective function and saves the results as a CSV file.
@@ -269,6 +290,11 @@ def run_pso(lower_boundaries: list[float],
         A callable object encapsulating a tree-based regression model.
     save_results_dir : str | Path
         Directory path to save the results CSV file.
+    save_format : {'csv', 'sqlite', 'both'}, default 'csv'
+        The format for saving optimization results.
+        - 'csv': Saves results to a CSV file.
+        - 'sqlite': Saves results to an SQLite database file. ⚠️ If a database exists, new tables will be created using the target name.
+        - 'both': Saves results to both formats.
     auto_binary_boundaries : bool
         Use `ObjectiveFunction.binary_features` to append as many binary boundaries as needed to `lower_boundaries` and `upper_boundaries` automatically.
     target_name : str or None, optional
@@ -284,14 +310,11 @@ def run_pso(lower_boundaries: list[float],
     Returns
     -------
-    Tuple[Dict[str, float | list[float]], Dict[str, float | list[float]]]
-        If `post_hoc_analysis` is None, returns two dictionaries:
-            - feature_names: Feature values (after inverse scaling) that yield the best result.
-            - target_name: Best result obtained for the target variable.
-        If `post_hoc_analysis` is an integer, returns two dictionaries:
-            - feature_names: Lists of best feature values (after inverse scaling) for each repetition.
-            - target_name: List of best target values across repetitions.
+    Tuple[Dict[str, float], Dict[str, float]] or None
+        - If `post_hoc_analysis` is None, returns two dictionaries containing the
+          single best features and the corresponding target value.
+        - If `post_hoc_analysis` is active, results are streamed directly to a CSV file
+          and this function returns `None`.
     Notes
     -----
@@ -316,8 +339,9 @@ def run_pso(lower_boundaries: list[float],
     # Append binary boundaries
     binary_number = objective_function.binary_features
     if auto_binary_boundaries and binary_number > 0:
-        local_lower_boundaries.extend([0] * binary_number)
-        local_upper_boundaries.extend([1] * binary_number)
+        # simplify binary search by constraining range
+        local_lower_boundaries.extend([0.45] * binary_number)
+        local_upper_boundaries.extend([0.55] * binary_number)
     # Set the total length of features
     size_of_features = len(local_lower_boundaries)
@@ -333,7 +357,25 @@ def run_pso(lower_boundaries: list[float],
     if target_name is None and objective_function.target_name is not None:
         target_name = objective_function.target_name
     if target_name is None:
-        target_name = "Target"
+        raise ValueError(f"'target' name was not provided and was not found in the .joblib object.")
+    # --- Setup: Saving Infrastructure ---
+    sanitized_target_name = sanitize_filename(target_name)
+    save_dir_path = make_fullpath(save_results_dir, make=True, enforce="directory")
+    base_filename = f"Optimization_{sanitized_target_name}"
+    csv_path = save_dir_path / f"{base_filename}.csv"
+    db_path = save_dir_path / "Optimization.db"
+    db_table_name = f"{sanitized_target_name}"
+    if save_format in ['sqlite', 'both']:
+        # Dynamically create the schema for the database table
+        schema = {name: "REAL" for name in names}
+        schema[target_name] = "REAL"
+        schema = {"result_id": "INTEGER PRIMARY KEY AUTOINCREMENT", **schema}
+        # Create table
+        with DatabaseManager(db_path) as db:
+            db.create_table(db_table_name, schema)
     pso_arguments = {
             "func":objective_function,
@@ -345,17 +387,29 @@ def run_pso(lower_boundaries: list[float],
             "particle_output": False,
     }
-    # Dispatcher
-    if post_hoc_analysis is None or post_hoc_analysis <= 1:
-        features, target = _run_single_pso(objective_function, pso_arguments, names, target_name, random_state)
-    else:
-        features, target = _run_post_hoc_pso(objective_function, pso_arguments, names, target_name, post_hoc_analysis)
-    # --- Save Results ---
-    save_results_path = make_fullpath(save_results_dir, make=True)
-    _save_results(features, target, save_dir=save_results_path, target_name=target_name)
-    return features, target # type: ignore
+    # --- Dispatcher ---
+    # Use a real or dummy context manager to handle the DB connection cleanly
+    db_context = DatabaseManager(db_path) if save_format in ['sqlite', 'both'] else nullcontext()
+    with db_context as db_manager:
+        if post_hoc_analysis is None or post_hoc_analysis <= 1:
+            # --- Single Run Logic ---
+            features_dict, target_dict = _run_single_pso(
+                objective_function, pso_arguments, names, target_name, random_state,
+                save_format, csv_path, db_manager, db_table_name
+            )
+            _LOGGER.info(f"✅ Single optimization complete.")
+            return features_dict, target_dict
+        else:
+            # --- Post-Hoc Analysis Logic ---
+            _LOGGER.info(f"🏁 Starting post-hoc analysis with {post_hoc_analysis} repetitions...")
+            _run_post_hoc_pso(
+                objective_function, pso_arguments, names, target_name, post_hoc_analysis,
+                save_format, csv_path, db_manager, db_table_name
+            )
+            _LOGGER.info("✅ Post-hoc analysis complete. Results saved.")
+            return None
 def _pso(func: ObjectiveFunction,

ml_tools/RNN_forecast.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import torch
 from torch import nn
 import numpy as np
+from ._script_info import _script_info
 __all__ = [
     "rnn_forecast"
@@ -47,3 +48,7 @@ def rnn_forecast(model: nn.Module, start_sequence: torch.Tensor, steps: int, dev
     # Concatenate all predictions and flatten the array for easy use
     return np.concatenate(predictions).flatten()
+def info():
+    _script_info

ml_tools/SQL.py ADDED Viewed

@@ -0,0 +1,272 @@
+import sqlite3
+import pandas as pd
+from pathlib import Path
+from typing import Union, Dict, Any, Optional, List, Literal
+from ._logger import _LOGGER
+from ._script_info import _script_info
+from .path_manager import make_fullpath
+__all__ = [
+    "DatabaseManager",
+]
+class DatabaseManager:
+    """
+    A user-friendly context manager for handling SQLite database operations.
+    This class abstracts the underlying sqlite3 connection and cursor management,
+    providing simple methods to execute queries, create tables, and handle data
+    insertion and retrieval using pandas DataFrames.
+    Parameters
+    ----------
+    db_path : Union[str, Path]
+        The file path to the SQLite database. If the file does not exist,
+        it will be created upon connection.
+    Example
+    -------
+    >>> schema = {
+    ...     "id": "INTEGER PRIMARY KEY AUTOINCREMENT",
+    ...     "run_name": "TEXT NOT NULL",
+    ...     "feature_a": "REAL",
+    ...     "score": "REAL"
+    ... }
+    >>> with DatabaseManager("my_results.db") as db:
+    ...     db.create_table("experiments", schema)
+    ...     data = {"run_name": "first_run", "feature_a": 0.123, "score": 95.5}
+    ...     db.insert_row("experiments", data)
+    ...     df = db.query_to_dataframe("SELECT * FROM experiments")
+    ...     print(df)
+    """
+    def __init__(self, db_path: Union[str, Path]):
+        """Initializes the DatabaseManager with the path to the database file."""
+        if isinstance(db_path, str):
+            if not db_path.endswith(".db"):
+                db_path = db_path + ".db"
+        elif isinstance(db_path, Path):
+            if db_path.suffix != ".db":
+                db_path = db_path.with_suffix(".db")
+        self.db_path = make_fullpath(db_path, make=True, enforce="file")
+        self.conn: Optional[sqlite3.Connection] = None
+        self.cursor: Optional[sqlite3.Cursor] = None
+    def __enter__(self):
+        """Establishes the database connection and returns the manager instance."""
+        try:
+            self.conn = sqlite3.connect(self.db_path)
+            self.cursor = self.conn.cursor()
+            _LOGGER.info(f"✅ Successfully connected to database: {self.db_path}")
+            return self
+        except sqlite3.Error as e:
+            _LOGGER.error(f"❌ Database connection failed: {e}")
+            raise  # Re-raise the exception after logging
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        """Commits changes and closes the database connection."""
+        if self.conn:
+            if exc_type:  # If an exception occurred, rollback
+                self.conn.rollback()
+                _LOGGER.warning("⚠️ Rolling back transaction due to an error.")
+            else:  # Otherwise, commit the transaction
+                self.conn.commit()
+            self.conn.close()
+            _LOGGER.info(f"❇️ Database connection closed: {self.db_path.name}")
+    def create_table(self, table_name: str, schema: Dict[str, str], if_not_exists: bool = True):
+        """
+        Creates a new table in the database based on a provided schema.
+        Parameters
+        ----------
+        table_name : str
+            The name of the table to create.
+        schema : Dict[str, str]
+            A dictionary where keys are column names and values are their SQL data types
+            (e.g., {"id": "INTEGER PRIMARY KEY", "name": "TEXT NOT NULL"}).
+        if_not_exists : bool, default=True
+            If True, adds "IF NOT EXISTS" to the SQL statement to prevent errors
+            if the table already exists.
+        """
+        if not self.cursor:
+            raise sqlite3.Error("Database connection is not open.")
+        columns_def = ", ".join([f'"{col_name}" {col_type}' for col_name, col_type in schema.items()])
+        exists_clause = "IF NOT EXISTS" if if_not_exists else ""
+        query = f"CREATE TABLE {exists_clause} {table_name} ({columns_def})"
+        _LOGGER.info(f"🗂️ Executing: {query}")
+        self.cursor.execute(query)
+    def insert_row(self, table_name: str, data: Dict[str, Any]):
+        """
+        Inserts a single row of data into the specified table.
+        Parameters
+        ----------
+        table_name : str
+            The name of the target table.
+        data : Dict[str, Any]
+            A dictionary where keys correspond to column names and values are the
+            data to be inserted.
+        """
+        if not self.cursor:
+            raise sqlite3.Error("Database connection is not open.")
+        columns = ', '.join(f'"{k}"' for k in data.keys())
+        placeholders = ', '.join(['?'] * len(data))
+        values = list(data.values())
+        query = f"INSERT INTO {table_name} ({columns}) VALUES ({placeholders})"
+        self.cursor.execute(query, values)
+    def query_to_dataframe(self, query: str, params: Optional[tuple] = None) -> pd.DataFrame:
+        """
+        Executes a SELECT query and returns the results as a pandas DataFrame.
+        Parameters
+        ----------
+        query : str
+            The SQL SELECT statement to execute.
+        params : Optional[tuple], default=None
+            An optional tuple of parameters to pass to the query for safety
+            against SQL injection.
+        Returns
+        -------
+        pd.DataFrame
+            A DataFrame containing the query results.
+        """
+        if not self.conn:
+            raise sqlite3.Error("Database connection is not open.")
+        return pd.read_sql_query(query, self.conn, params=params)
+    def execute_sql(self, query: str, params: Optional[tuple] = None):
+        """
+        Executes an arbitrary SQL command that does not return data (e.g., UPDATE, DELETE).
+        Parameters
+        ----------
+        query : str
+            The SQL statement to execute.
+        params : Optional[tuple], default=None
+            An optional tuple of parameters for the query.
+        """
+        if not self.cursor:
+            raise sqlite3.Error("Database connection is not open.")
+        self.cursor.execute(query, params if params else ())
+    def insert_many(self, table_name: str, data: List[Dict[str, Any]]):
+        """
+        Inserts multiple rows into the specified table in a single, efficient transaction.
+        Parameters
+        ----------
+        table_name : str
+            The name of the target table.
+        data : List[Dict[str, Any]]
+            A list of dictionaries, where each dictionary represents a row to be inserted.
+            All dictionaries should have the same keys.
+        """
+        if not self.cursor:
+            raise sqlite3.Error("Database connection is not open.")
+        if not data:
+            _LOGGER.warning("⚠️ insert_many called with empty data list. No action taken.")
+            return
+        # Assume all dicts have the same keys as the first one
+        first_row = data[0]
+        columns = ', '.join(f'"{k}"' for k in first_row.keys())
+        placeholders = ', '.join(['?'] * len(first_row))
+        # Create a list of tuples, where each tuple is a row of values
+        values_to_insert = [list(row.values()) for row in data]
+        query = f"INSERT INTO {table_name} ({columns}) VALUES ({placeholders})"
+        self.cursor.executemany(query, values_to_insert)
+        _LOGGER.info(f"✅ Bulk inserted {len(values_to_insert)} rows into '{table_name}'.")
+    def insert_from_dataframe(self, table_name: str, df: pd.DataFrame, if_exists: Literal['fail', 'replace', 'append'] = 'append'):
+        """
+        Writes records from a pandas DataFrame to the specified SQL table.
+        Parameters
+        ----------
+        table_name : str
+            The name of the target SQL table.
+        df : pd.DataFrame
+            The DataFrame to be written.
+        if_exists : str, default 'append'
+            How to behave if the table already exists.
+            - 'fail': Raise a ValueError.
+            - 'replace': Drop the table before inserting new values.
+            - 'append': Insert new values to the existing table.
+        """
+        if not self.conn:
+            raise sqlite3.Error("Database connection is not open.")
+        df.to_sql(
+            table_name,
+            self.conn,
+            if_exists=if_exists,
+            index=False  # Typically, we don't want to save the DataFrame index
+        )
+        _LOGGER.info(f"✅ Wrote {len(df)} rows from DataFrame to table '{table_name}' using mode '{if_exists}'.")
+    def list_tables(self) -> List[str]:
+        """Returns a list of all table names in the database."""
+        if not self.cursor:
+            raise sqlite3.Error("Database connection is not open.")
+        self.cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
+        # The result of the fetch is a list of tuples, e.g., [('table1',), ('table2',)]
+        return [table[0] for table in self.cursor.fetchall()]
+    def get_table_schema(self, table_name: str) -> pd.DataFrame:
+        """
+        Retrieves the schema of a specific table and returns it as a DataFrame.
+        Returns a DataFrame with columns: cid, name, type, notnull, dflt_value, pk
+        """
+        if not self.conn:
+            raise sqlite3.Error("Database connection is not open.")
+        # PRAGMA is a special SQL command in SQLite for database metadata
+        return pd.read_sql_query(f'PRAGMA table_info("{table_name}");', self.conn)
+    def create_index(self, table_name: str, column_name: str, unique: bool = False):
+        """
+        Creates an index on a column of a specified table to speed up queries.
+        Parameters
+        ----------
+        table_name : str
+            The name of the table containing the column.
+        column_name : str
+            The name of the column to be indexed.
+        unique : bool, default=False
+            If True, creates a unique index, which ensures all values in the
+            column are unique.
+        """
+        if not self.cursor:
+            raise sqlite3.Error("Database connection is not open.")
+        index_name = f"idx_{table_name}_{column_name}"
+        unique_clause = "UNIQUE" if unique else ""
+        query = f"CREATE {unique_clause} INDEX IF NOT EXISTS {index_name} ON {table_name} ({column_name})"
+        _LOGGER.info(f"🗂️ Executing: {query}")
+        self.cursor.execute(query)
+def info():
+    _script_info(__all__)

ml_tools/VIF_factor.py CHANGED Viewed

@@ -7,9 +7,10 @@ from statsmodels.stats.outliers_influence import variance_inflation_factor
 from statsmodels.tools.tools import add_constant
 import warnings
 from pathlib import Path
-from .utilities import sanitize_filename, yield_dataframes_from_dir, save_dataframe, _script_info, make_fullpath
-from .logger import _LOGGER
+from .utilities import yield_dataframes_from_dir, save_dataframe
+from .path_manager import sanitize_filename, make_fullpath
+from ._logger import _LOGGER
+from ._script_info import _script_info
 __all__ = [
     "compute_vif",

ml_tools/_logger.py ADDED Viewed

@@ -0,0 +1,36 @@
+import logging
+import sys
+def _get_logger(name: str = "ml_tools", level: int = logging.INFO):
+    """
+    Initializes and returns a configured logger instance.
+    - `logger.info()`
+    - `logger.warning()`
+    - `logger.error()` the program can potentially recover.
+    - `logger.critical()` the program is going to crash.
+    """
+    logger = logging.getLogger(name)
+    logger.setLevel(level)
+    # Prevents adding handlers multiple times if the function is called again
+    if not logger.handlers:
+        handler = logging.StreamHandler(sys.stdout)
+        # Define the format string and the date format separately
+        log_format = '\n🐉%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+        date_format = '%Y-%m-%d %H:%M' # Format: Year-Month-Day Hour:Minute
+        # Pass both the format and the date format to the Formatter
+        formatter = logging.Formatter(log_format, datefmt=date_format)
+        handler.setFormatter(formatter)
+        logger.addHandler(handler)
+    logger.propagate = False
+    return logger
+# Create a single logger instance to be imported by other modules
+_LOGGER = _get_logger()

ml_tools/_pytorch_models.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import torch
 from torch import nn
-from .utilities import _script_info
+from ._script_info import _script_info
 __all__ = [

ml_tools/_script_info.py ADDED Viewed

@@ -0,0 +1,8 @@
+def _script_info(all_data: list[str]):
+    """
+    List available names.
+    """
+    print("Available functions and objects:")
+    for i, name in enumerate(all_data, start=1):
+            print(f"{i} - {name}")

dragon-ml-toolbox 3.12.6__py3-none-any.whl → 4.1.0__py3-none-any.whl

Potentially problematic release.

dragon-ml-toolbox 3.12.6py3-none-any.whl → 4.1.0py3-none-any.whl