PyPI - sdg-core-lib - Versions diffs - 0.1.0__tar.gz - Mend

sdg-core-lib 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of sdg-core-lib might be problematic. Click here for more details.

Files changed (76) hide show

sdg_core_lib-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,9 @@
+Metadata-Version: 2.3
+Name: sdg-core-lib
+Version: 0.1.0
+Summary: Add your description here
+Author: emiliocimino
+Author-email: emiliocimino <emilio.cimino@outlook.it>
+Requires-Python: >=3.12
+Description-Content-Type: text/markdown

sdg_core_lib-0.1.0/README.md ADDED Viewed

File without changes

sdg_core_lib-0.1.0/pyproject.toml ADDED Viewed

@@ -0,0 +1,35 @@
+[project]
+name = "sdg-core-lib"
+version = "0.1.0"
+description = "Add your description here"
+readme = "README.md"
+authors = [
+    { name = "emiliocimino", email = "emilio.cimino@outlook.it" }
+]
+requires-python = ">=3.12"
+[dependency-groups]
+dev = [
+    "numpy==2.0.2",
+    "pandas==2.2.3",
+    "seaborn==0.13.2",
+    "scikit-learn==1.5.2",
+    "keras==3.6.0",
+    "tensorflow==2.18.0",
+    "loguru",
+    "skops",
+    "statsmodels"
+]
+test = [
+    {include-group = "dev"},
+    "pytest"
+]
+[project.scripts]
+sdg-core-lib = "sdg_core_lib:main"
+[build-system]
+requires = ["uv_build>=0.8.22,<0.9.0"]
+build-backend = "uv_build"

sdg_core_lib-0.1.0/src/sdg_core_lib/NumericDataset.py ADDED Viewed

@@ -0,0 +1,150 @@
+import numpy as np
+import pandas as pd
+NUMERICAL = "continuous"
+CATEGORICAL = "categorical"
+OTHER = "none"
+class NumericDataset:
+    """
+    Class to handle numeric datasets.
+    The class loads a dataset from a list of dictionaries into a pandas DataFrame.
+    It also identifies which columns are numerical and which are categorical.
+    """
+    def __init__(self, dataset: list[dict]):
+        self.dataset: list[dict] = dataset
+        self.dataframe: pd.DataFrame = pd.DataFrame()
+        self.columns: list[str] = []
+        self.continuous_columns = []
+        self.categorical_columns = []
+        self.unrecognized_columns = []
+        self.continuous_data: pd.DataFrame = pd.DataFrame()
+        self.categorical_data: pd.DataFrame = pd.DataFrame()
+        self.input_shape: str = ""
+        self._configure()
+    def _configure(self):
+        """
+        Convert data from requests into an easy-to-process dataframe
+        dataset: [{
+            column_data: [ ... ],
+            column_name: str,
+            column_type: str [continuous/categorical],
+            column_datatype: str
+        }]
+        :return: a pandas Dataframe where each column is structured as expected
+        :raises: ValueError if dataset is empty
+        """
+        data = self.dataset
+        if len(self.dataset) == 0:
+            raise ValueError("Dataset is empty")
+        column_names = []
+        categorical_columns = []
+        numerical_columns = []
+        unrecognized_columns = []
+        data_structure = []
+        for col in data:
+            content = col.get("column_data", [])
+            content_type = col.get("column_datatype", "object")
+            column_name = col.get("column_name", "")
+            column_type = col.get("column_type", "")
+            data_structure.append(np.array(content, dtype=content_type))
+            column_names.append(column_name)
+            if column_type == NUMERICAL:
+                numerical_columns.append(column_name)
+            elif column_type == CATEGORICAL:
+                categorical_columns.append(column_name)
+            else:
+                unrecognized_columns.append(column_name)
+        input_data = {
+            col["column_name"]: np.array(col.get("column_data", [])).tolist()
+            for col in data
+        }
+        data_frame = pd.DataFrame(input_data)
+        data_structure = np.array(data_frame.to_numpy().tolist())
+        self.dataframe = data_frame
+        self.columns = column_names
+        self.categorical_columns = categorical_columns
+        self.continuous_columns = numerical_columns
+        self.unrecognized_columns = unrecognized_columns
+        self.continuous_data = data_frame[numerical_columns]
+        self.categorical_data = data_frame[categorical_columns]
+        self.input_shape = str(data_structure.shape[1:])
+    def _categorize_column(self, col):
+        if col in self.continuous_columns:
+            return NUMERICAL
+        if col in self.categorical_columns:
+            return CATEGORICAL
+        return OTHER
+    def parse_tabular_data_json(self) -> list[dict]:
+        """
+        Converts data from a dataframe into a list of dictionaries
+        :return: a dictionary in form of:
+        dataset: [{
+            column_data: [ ... ],
+            column_name: str,
+            column_type: str [numerical/categorical],
+            column_datatype: str
+        }]
+        """
+        return [
+            {
+                "column_data": self.dataframe[col].to_numpy().tolist(),
+                "column_name": col,
+                "column_type": self._categorize_column(col),
+                "column_datatype": str(self.dataframe[col].to_numpy().dtype),
+            }
+            for col in self.dataframe.columns
+        ]
+    def parse_data_to_registry(self) -> list[dict]:
+        """
+        Translates data structure from input coherence to a structured feature list
+        :return:
+        """
+        feature_list = []
+        for idx, col in enumerate(self.dataset):
+            feat = {
+                "feature_name": col.get("column_name", ""),
+                "feature_position": idx,
+                "is_categorical": (
+                    True if col.get("column_type", "") == CATEGORICAL else False
+                ),
+                "type": col.get("column_datatype", ""),
+            }
+            feature_list.append(feat)
+        return feature_list
+    def get_data(self) -> tuple[pd.DataFrame, list[str], list[str], list[str]]:
+        """
+        Returns the data in the dataset as a tuple of 4 elements:
+        1. The pandas DataFrame containing the data
+        2. A list of column names
+        3. A list of continuous column names
+        4. A list of categorical column names
+        :return: (dataframe, columns, continuous_columns, categorical_columns)
+        :rtype: tuple[pandas.DataFrame, list[str], list[str], list[str]]
+        """
+        return (
+            self.dataframe,
+            self.columns,
+            self.continuous_columns,
+            self.categorical_columns,
+        )
+    @staticmethod
+    def get_numpy_data(dataframe: pd.DataFrame) -> np.ndarray:
+        """
+        Correctly Returns numpy array with complex structures, like columns with type list
+        :param dataframe: numpy dataframe
+        :return: correctly structured numpy array
+        """
+        return np.array(dataframe.to_numpy().tolist())

sdg_core_lib-0.1.0/src/sdg_core_lib/__init__.py ADDED Viewed

File without changes

sdg_core_lib-0.1.0/src/sdg_core_lib/browser.py ADDED Viewed

@@ -0,0 +1,73 @@
+import os
+from pathlib import Path
+import importlib
+from typing import Generator
+def find_implementations(
+    root_path: str, implementation_folder: str = "implementation"
+) -> list[str]:
+    """
+    Takes a root path and a name of a folder. Returns all modules existing in each of the so-named folders
+    :param implementation_folder: folder name where implemented modules exist
+    :param root_path: root path in which to explore
+    :return: list of stringed modules represented in py-like dot-notation
+    """
+    root_dir = Path(root_path).resolve()  # Ensure absolute path
+    implementation_dirs = root_dir.rglob(
+        implementation_folder
+    )  # Find all 'implementation' folders
+    module_paths = []
+    for impl_dir in implementation_dirs:
+        py_files = [
+            file for file in impl_dir.glob("*.py") if file.name != "__init__.py"
+        ]
+        for file in py_files:
+            rel_path = file.relative_to(root_dir).with_suffix("")  # Remove extension
+            module_path = ".".join(rel_path.parts)  # Convert to module notation
+            module_paths.append(module_path)
+    return module_paths
+def browse(path: str, package: str) -> Generator[dict | None, None, None]:
+    """
+    Generator function to iterate.
+    It exploits the find_implementations function to gather all module names, then extract from each module
+    the main class. Each main class so extracted provides a dictionary description.
+    :return: dictionary description of each implementation existing in sdg_core_lib
+    """
+    modules = find_implementations(path)
+    list_module_names = [f"{package}{module}" for module in modules]
+    for module_name in list_module_names:
+        class_name = module_name.split(".")[-1]
+        try:
+            module = importlib.import_module(module_name)
+        except ImportError:
+            yield None
+            continue
+        Class = getattr(module, class_name)
+        yield Class.self_describe()
+def browse_functions():
+    base_function_package = "sdg_core_lib.post_process.functions."
+    base_function_path = os.path.join(
+        os.path.dirname(os.path.abspath(__file__)), "post_process/functions/"
+    )
+    return browse(base_function_path, base_function_package)
+def browse_algorithms():
+    base_model_package = "sdg_core_lib.data_generator.models."
+    base_model_path = os.path.join(
+        os.path.dirname(os.path.abspath(__file__)), "data_generator/models/"
+    )
+    return browse(base_model_path, base_model_package)

sdg_core_lib-0.1.0/src/sdg_core_lib/data_generator/__init__.py ADDED Viewed

File without changes

sdg_core_lib-0.1.0/src/sdg_core_lib/data_generator/model_factory.py ADDED Viewed

@@ -0,0 +1,72 @@
+import importlib
+from sdg_core_lib.data_generator.models.UnspecializedModel import UnspecializedModel
+def dynamic_import(class_name: str):
+    """
+    Dynamically imports a class given its name.
+    :param class_name: a string with the full name of the class to import
+    :return: the class itself
+    """
+    module_name, class_name = class_name.rsplit(".", 1)
+    module = importlib.import_module(module_name)
+    return getattr(module, class_name)
+def model_factory(model_dict: dict, input_shape: str = None) -> UnspecializedModel:
+    """
+    This function is a generic model factory. Takes a dictionary containing useful model information and plugs
+    them in the model itself.
+    Input shape may be passed as an argument (i.e) from the request data itself, or [alternatively] may be present in
+    model dictionary. If not explicitly passed, it will use the model dictionary
+    :param model_dict: A dictionary containing model information, structured as follows:
+    {
+        "image" -> contains the possible path where to find the model image. If not none, model will be loaded from there
+        "metadata" -> a dictionary itself, containing miscellaneous information
+        "algorithm_name" -> includes the model class module to _load
+        "model_name" -> the model name, used to identify the model itself
+        "input_shape" [optional] -> contains a stringed tuple that identifies the input layer shape
+    }
+    :param input_shape:
+    :return: An instance of a BaseModel class or any subclass
+    """
+    model_file, metadata, model_type, model_name, input_shape_model = parse_model_info(
+        model_dict
+    )
+    if input_shape is None:
+        input_shape = input_shape_model
+    ModelClass = dynamic_import(model_type)
+    model = ModelClass(
+        metadata=metadata,
+        model_name=model_name,
+        input_shape=input_shape,
+        load_path=model_file,
+    )
+    return model
+def parse_model_info(model_dict: dict):
+    """
+    Extracts the necessary information from the model dictionary and returns them as separate arguments.
+    :param model_dict: A dictionary containing model information, structured as follows:
+    {
+        "image" -> contains the possible path where to find the model image. If not none, model will be loaded from there
+        "metadata" -> a dictionary itself, containing miscellaneous information
+        "algorithm_name" -> includes the model class module to _load
+        "model_name" -> the model name, used to identify the model itself
+        "input_shape" [optional] -> contains a stringed tuple that identifies the input layer shape
+    }
+    :return: model_file, metadata, model_type, model_name, input_shape
+    """
+    model_file = model_dict.get("image", None)
+    metadata = model_dict.get("metadata", {})
+    model_type = model_dict.get("algorithm_name")
+    model_name = model_dict.get("model_name")
+    input_shape = model_dict.get("input_shape", "")
+    return model_file, metadata, model_type, model_name, input_shape

sdg_core_lib-0.1.0/src/sdg_core_lib/data_generator/models/ModelInfo.py ADDED Viewed

@@ -0,0 +1,42 @@
+class AllowedData:
+    def __init__(self, dtype: str, is_categorical: bool):
+        self.dtype = dtype
+        self.is_categorical = is_categorical
+    def to_json(self):
+        return {"type": self.dtype, "is_categorical": self.is_categorical}
+class ModelInfo:
+    def __init__(
+        self,
+        name: str,
+        default_loss_function: str,
+        description: str,
+        allowed_data: list[AllowedData],
+    ):
+        self.name = name
+        self.default_loss_function = default_loss_function
+        self.description = description
+        self.allowed_data = allowed_data
+    def get_model_info(self):
+        """
+        Returns a dictionary containing the model information.
+        The dictionary includes the model's name, default loss function, description,
+        and a list of allowed data types with their categorical status.
+        :return: dict containing the model's information
+        """
+        allowed_data = [ad.to_json() for ad in self.allowed_data]
+        system_model_info = {
+            "algorithm": {
+                "name": self.name,
+                "default_loss_function": self.default_loss_function,
+                "description": self.description,
+            },
+            "datatypes": allowed_data,
+        }
+        return system_model_info

sdg_core_lib-0.1.0/src/sdg_core_lib/data_generator/models/TrainingInfo.py ADDED Viewed

@@ -0,0 +1,40 @@
+import json
+class TrainingInfo:
+    def __init__(
+        self,
+        loss_fn: str,
+        train_samples: int,
+        train_loss: float,
+        validation_samples: int = None,
+        validation_loss: float = None,
+    ):
+        self._loss_fn = loss_fn
+        self._train_samples = train_samples
+        self._train_loss = train_loss
+        self._validation_samples = validation_samples
+        self._validation_loss = validation_loss
+    def to_dict(self) -> dict:
+        """
+        Convert the TrainingInfo to a dictionary
+        :return: dict: A dictionary with the training info
+        """
+        return {
+            "loss_function": self._loss_fn,
+            "train_samples": self._train_samples,
+            "train_loss": self._train_loss,
+            "val_samples": self._validation_samples,
+            "val_loss": self._validation_loss,
+        }
+    def to_json(self) -> str:
+        """
+        Convert the TrainingInfo to a JSON string
+        :return: str: A JSON string with the training info
+        """
+        return json.dumps(self.to_dict())

sdg_core_lib-0.1.0/src/sdg_core_lib/data_generator/models/UnspecializedModel.py ADDED Viewed

@@ -0,0 +1,106 @@
+import numpy as np
+from abc import ABC, abstractmethod
+from sdg_core_lib import NumericDataset
+class UnspecializedModel(ABC):
+    """
+    Abstract class for all models. Implements common functionalities and defines abstract methods that must be implemented
+    by all subclasses.
+    Attributes:
+        _metadata (dict): A dictionary containing miscellaneous information about the model.
+        model_name (str): The model name, used to identify the model itself.
+        input_shape (tuple): A tuple containing the input shape of the model.
+        _load_path (str): A string containing the path where to load the model from.
+        _model (keras.Model): The model instance.
+        _scaler (Scaler): The scaler instance.
+        training_info (TrainingInfo): The training info instance.
+    """
+    def __init__(
+        self,
+        metadata: dict,
+        model_name: str,
+        input_shape: str = None,
+        load_path: str = None,
+    ):
+        self._metadata = metadata
+        self.model_name = model_name
+        self.input_shape = self._parse_stringed_input_shape(input_shape)
+        self._load_path = load_path
+        self._model = None  # Placeholder for the model instance
+        self._scaler = None  # Placeholder for model scaler
+        self.training_info = None  # Placeholder for training info
+        self._model_misc = None  # Placeholder for model miscellaneous info
+    @abstractmethod
+    def _build(self, input_shape: str):
+        raise NotImplementedError
+    @abstractmethod
+    def _load(self, model_filepath: str):
+        """Load trained_models weights."""
+        raise NotImplementedError
+    @abstractmethod
+    def _instantiate(self):
+        raise NotImplementedError
+    @abstractmethod
+    def _scale(self, data: np.array):
+        """Scale inputs with its logic"""
+        raise NotImplementedError
+    @abstractmethod
+    def _inverse_scale(self, data: np.array):
+        """Inverse scale inputs with its logic"""
+        raise NotImplementedError
+    @abstractmethod
+    def _pre_process(self, data: NumericDataset, **kwargs):
+        """Pre-process data"""
+        raise NotImplementedError
+    @abstractmethod
+    def train(self, data):
+        """Train the model."""
+        raise NotImplementedError
+    @abstractmethod
+    def fine_tune(self, data: np.array, **kwargs):
+        """Fine-tune the model."""
+        raise NotImplementedError
+    @abstractmethod
+    def infer(self, n_rows: int, **kwargs):
+        """Run inference."""
+        raise NotImplementedError
+    @abstractmethod
+    def save(self, folder_path):
+        """Save Model."""
+        raise NotImplementedError
+    @abstractmethod
+    def set_hyperparameters(self, **kwargs):
+        """Set Hyperparameters"""
+        raise NotImplementedError
+    @classmethod
+    def self_describe(cls):
+        raise NotImplementedError
+    @staticmethod
+    def _parse_stringed_input_shape(stringed_shape: str) -> tuple[int, ...]:
+        """
+        Parses a stringed list of numbers into a tuple
+        :param stringed_shape: a stringed list of number in format "[x,y,z]"
+        :return: a tuple of numbers, in format (x, y, z)
+        """
+        brackets = ["(", ")", "[", "]", "{", "}"]
+        for b in brackets:
+            stringed_shape = stringed_shape.replace(b, "")
+        return tuple([int(n) for n in stringed_shape.split(",") if n != ""])

sdg_core_lib-0.1.0/src/sdg_core_lib/data_generator/models/__init__.py ADDED Viewed

File without changes