PyPI - sdg-core-lib - Versions diffs - 0.1.6.dev4__tar.gz - Mend

sdg-core-lib 0.1.6.dev4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (110) hide show

sdg_core_lib-0.1.6.dev4/PKG-INFO ADDED Viewed

@@ -0,0 +1,20 @@
+Metadata-Version: 2.3
+Name: sdg-core-lib
+Version: 0.1.6.dev4
+Summary: Add your description here
+Author: emiliocimino
+Author-email: emiliocimino <emilio.cimino@outlook.it>
+Requires-Dist: numpy==2.0.2
+Requires-Dist: pandas==2.2.3
+Requires-Dist: seaborn==0.13.2
+Requires-Dist: scikit-learn==1.5.2
+Requires-Dist: keras==3.6.0
+Requires-Dist: tensorflow==2.18.0
+Requires-Dist: loguru==0.7.3
+Requires-Dist: skops==0.13.0
+Requires-Dist: statsmodels==0.14.5
+Requires-Dist: tslearn==0.7.0
+Requires-Dist: keras-tuner==1.4.8
+Requires-Python: >=3.12
+Description-Content-Type: text/markdown

sdg_core_lib-0.1.6.dev4/README.md ADDED Viewed

File without changes

sdg_core_lib-0.1.6.dev4/pyproject.toml ADDED Viewed

@@ -0,0 +1,51 @@
+[project]
+name = "sdg-core-lib"
+version = "0.1.6.dev4"
+description = "Add your description here"
+readme = "README.md"
+authors = [
+    { name = "emiliocimino", email = "emilio.cimino@outlook.it" }
+]
+requires-python = ">=3.12"
+dependencies = [
+    "numpy==2.0.2",
+    "pandas==2.2.3",
+    "seaborn==0.13.2",
+    "scikit-learn==1.5.2",
+    "keras==3.6.0",
+    "tensorflow==2.18.0",
+    "loguru==0.7.3",
+    "skops==0.13.0",
+    "statsmodels==0.14.5",
+    "tslearn==0.7.0",
+    "keras-tuner==1.4.8",
+]
+[dependency-groups]
+dev = [
+    "numpy==2.0.2",
+    "pandas==2.2.3",
+    "seaborn==0.13.2",
+    "scikit-learn==1.5.2",
+    "keras==3.6.0",
+    "tensorflow==2.18.0",
+    "loguru==0.7.3",
+    "skops==0.13.0",
+    "statsmodels==0.14.5",
+    "tslearn==0.7.0",
+    "keras-tuner==1.4.8"
+]
+test = [
+    {include-group = "dev"},
+    "pytest"
+]
+[project.scripts]
+sdg-core-lib = "sdg_core_lib:main"
+[build-system]
+requires = ["uv_build>=0.8.22,<0.9.0"]
+build-backend = "uv_build"

sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/__init__.py ADDED Viewed

File without changes

sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/browser.py ADDED Viewed

@@ -0,0 +1,73 @@
+import os
+from pathlib import Path
+import importlib
+from typing import Generator
+def find_implementations(
+    root_path: str, implementation_folder: str = "implementation"
+) -> list[str]:
+    """
+    Takes a root path and a name of a folder. Returns all modules existing in each of the so-named folders
+    :param implementation_folder: folder name where implemented modules exist
+    :param root_path: root path in which to explore
+    :return: list of stringed modules represented in py-like dot-notation
+    """
+    root_dir = Path(root_path).resolve()  # Ensure absolute path
+    implementation_dirs = root_dir.rglob(
+        implementation_folder
+    )  # Find all 'implementation' folders
+    module_paths = []
+    for impl_dir in implementation_dirs:
+        py_files = [
+            file for file in impl_dir.glob("*.py") if file.name != "__init__.py"
+        ]
+        for file in py_files:
+            rel_path = file.relative_to(root_dir).with_suffix("")  # Remove extension
+            module_path = ".".join(rel_path.parts)  # Convert to module notation
+            module_paths.append(module_path)
+    return module_paths
+def browse(path: str, package: str) -> Generator[dict | None, None, None]:
+    """
+    Generator function to iterate.
+    It exploits the find_implementations function to gather all module names, then extract from each module
+    the main class. Each main class so extracted provides a dictionary description.
+    :return: dictionary description of each implementation existing in sdg_core_lib
+    """
+    modules = find_implementations(path)
+    list_module_names = [f"{package}{module}" for module in modules]
+    for module_name in list_module_names:
+        class_name = module_name.split(".")[-1]
+        try:
+            module = importlib.import_module(module_name)
+        except ImportError:
+            yield None
+            continue
+        Class = getattr(module, class_name)
+        yield Class.self_describe()
+def browse_functions():
+    base_function_package = "sdg_core_lib.post_process.functions."
+    base_function_path = os.path.join(
+        os.path.dirname(os.path.abspath(__file__)), "post_process/functions/"
+    )
+    return browse(base_function_path, base_function_package)
+def browse_algorithms():
+    base_model_package = "sdg_core_lib.data_generator.models."
+    base_model_path = os.path.join(
+        os.path.dirname(os.path.abspath(__file__)), "data_generator/models/"
+    )
+    return browse(base_model_path, base_model_package)

sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/config.py ADDED Viewed

@@ -0,0 +1,9 @@
+import os
+def get_hyperparameters() -> dict:
+    return {
+        "epochs": os.environ.get("EPOCHS"),
+        "learning_rate": os.environ.get("LEARNING_RATE"),
+        "batch_size": os.environ.get("BATCH_SIZE"),
+    }

sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/data_generator/__init__.py ADDED Viewed

File without changes

sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/data_generator/model_factory.py ADDED Viewed

@@ -0,0 +1,72 @@
+import importlib
+from sdg_core_lib.data_generator.models.UnspecializedModel import UnspecializedModel
+def dynamic_import(class_name: str):
+    """
+    Dynamically imports a class given its name.
+    :param class_name: a string with the full name of the class to import
+    :return: the class itself
+    """
+    module_name, class_name = class_name.rsplit(".", 1)
+    module = importlib.import_module(module_name)
+    return getattr(module, class_name)
+def model_factory(model_dict: dict, input_shape: str = None) -> UnspecializedModel:
+    """
+    This function is a generic model factory. Takes a dictionary containing useful model information and plugs
+    them in the model itself.
+    Input shape may be passed as an argument (i.e) from the request data itself, or [alternatively] may be present in
+    model dictionary. If not explicitly passed, it will use the model dictionary
+    :param model_dict: A dictionary containing model information, structured as follows:
+    {
+        "image" -> contains the possible path where to find the model image. If not none, model will be loaded from there
+        "metadata" -> a dictionary itself, containing miscellaneous information
+        "algorithm_name" -> includes the model class module to _load
+        "model_name" -> the model name, used to identify the model itself
+        "input_shape" [optional] -> contains a stringed tuple that identifies the input layer shape
+    }
+    :param input_shape:
+    :return: An instance of a BaseModel class or any subclass
+    """
+    model_file, metadata, model_type, model_name, input_shape_model = parse_model_info(
+        model_dict
+    )
+    if input_shape is None:
+        input_shape = input_shape_model
+    ModelClass = dynamic_import(model_type)
+    model = ModelClass(
+        metadata=metadata,
+        model_name=model_name,
+        input_shape=input_shape,
+        load_path=model_file,
+    )
+    return model
+def parse_model_info(model_dict: dict):
+    """
+    Extracts the necessary information from the model dictionary and returns them as separate arguments.
+    :param model_dict: A dictionary containing model information, structured as follows:
+    {
+        "image" -> contains the possible path where to find the model image. If not none, model will be loaded from there
+        "metadata" -> a dictionary itself, containing miscellaneous information
+        "algorithm_name" -> includes the model class module to _load
+        "model_name" -> the model name, used to identify the model itself
+        "input_shape" [optional] -> contains a stringed tuple that identifies the input layer shape
+    }
+    :return: model_file, metadata, model_type, model_name, input_shape
+    """
+    model_file = model_dict.get("image", None)
+    metadata = model_dict.get("metadata", {})
+    model_type = model_dict.get("algorithm_name")
+    model_name = model_dict.get("model_name")
+    input_shape = model_dict.get("input_shape", "")
+    return model_file, metadata, model_type, model_name, input_shape

sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/data_generator/models/ModelInfo.py ADDED Viewed

@@ -0,0 +1,42 @@
+class AllowedData:
+    def __init__(self, dtype: str, is_categorical: bool):
+        self.dtype = dtype
+        self.is_categorical = is_categorical
+    def to_json(self):
+        return {"type": self.dtype, "is_categorical": self.is_categorical}
+class ModelInfo:
+    def __init__(
+        self,
+        name: str,
+        default_loss_function: str,
+        description: str,
+        allowed_data: list[AllowedData],
+    ):
+        self.name = name
+        self.default_loss_function = default_loss_function
+        self.description = description
+        self.allowed_data = allowed_data
+    def get_model_info(self):
+        """
+        Returns a dictionary containing the model information.
+        The dictionary includes the model's name, default loss function, description,
+        and a list of allowed data types with their categorical status.
+        :return: dict containing the model's information
+        """
+        allowed_data = [ad.to_json() for ad in self.allowed_data]
+        system_model_info = {
+            "algorithm": {
+                "name": self.name,
+                "default_loss_function": self.default_loss_function,
+                "description": self.description,
+            },
+            "datatypes": allowed_data,
+        }
+        return system_model_info

sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/data_generator/models/TrainingInfo.py ADDED Viewed

@@ -0,0 +1,40 @@
+import json
+class TrainingInfo:
+    def __init__(
+        self,
+        loss_fn: str,
+        train_samples: int,
+        train_loss: float,
+        validation_samples: int = None,
+        validation_loss: float = None,
+    ):
+        self._loss_fn = loss_fn
+        self._train_samples = train_samples
+        self._train_loss = train_loss
+        self._validation_samples = validation_samples
+        self._validation_loss = validation_loss
+    def to_dict(self) -> dict:
+        """
+        Convert the TrainingInfo to a dictionary
+        :return: dict: A dictionary with the training info
+        """
+        return {
+            "loss_function": self._loss_fn,
+            "train_samples": self._train_samples,
+            "train_loss": self._train_loss,
+            "val_samples": self._validation_samples,
+            "val_loss": self._validation_loss,
+        }
+    def to_json(self) -> str:
+        """
+        Convert the TrainingInfo to a JSON string
+        :return: str: A JSON string with the training info
+        """
+        return json.dumps(self.to_dict())

sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/data_generator/models/UnspecializedModel.py ADDED Viewed

@@ -0,0 +1,88 @@
+import numpy as np
+from abc import ABC, abstractmethod
+class UnspecializedModel(ABC):
+    """
+    Abstract class for all models. Implements common functionalities and defines abstract methods that must be implemented
+    by all subclasses.
+    Attributes:
+        _metadata (dict): A dictionary containing miscellaneous information about the model.
+        model_name (str): The model name, used to identify the model itself.
+        input_shape (tuple): A tuple containing the input shape of the model.
+        _load_path (str): A string containing the path where to load the model from.
+        _model (keras.Model): The model instance.
+        _scaler (Scaler): The scaler instance.
+        training_info (TrainingInfo): The training info instance.
+    """
+    def __init__(
+        self,
+        metadata: dict,
+        model_name: str,
+        input_shape: str = None,
+        load_path: str = None,
+    ):
+        self._metadata = metadata
+        self.model_name = model_name
+        self.input_shape = self._parse_stringed_input_shape(input_shape)
+        self._load_path = load_path
+        self._model = None  # Placeholder for the model instance
+        self.training_info = None  # Placeholder for training info
+        self._model_misc = None  # Placeholder for model miscellaneous info
+    @abstractmethod
+    def _build(self, input_shape: str):
+        raise NotImplementedError
+    @abstractmethod
+    def _load(self, model_filepath: str):
+        """Load trained_models weights."""
+        raise NotImplementedError
+    @abstractmethod
+    def _instantiate(self):
+        raise NotImplementedError
+    @abstractmethod
+    def train(self, data: np.ndarray):
+        """Train the model."""
+        raise NotImplementedError
+    @abstractmethod
+    def fine_tune(self, data: np.ndarray, **kwargs):
+        """Fine-tune the model."""
+        raise NotImplementedError
+    @abstractmethod
+    def infer(self, n_rows: int, **kwargs):
+        """Run inference."""
+        raise NotImplementedError
+    @abstractmethod
+    def save(self, folder_path):
+        """Save Model."""
+        raise NotImplementedError
+    @abstractmethod
+    def set_hyperparameters(self, **kwargs):
+        """Set Hyperparameters"""
+        raise NotImplementedError
+    @classmethod
+    def self_describe(cls):
+        raise NotImplementedError
+    @staticmethod
+    def _parse_stringed_input_shape(stringed_shape: str) -> tuple[int, ...]:
+        """
+        Parses a stringed list of numbers into a tuple
+        :param stringed_shape: a stringed list of number in format "[x,y,z]"
+        :return: a tuple of numbers, in format (x, y, z)
+        """
+        brackets = ["(", ")", "[", "]", "{", "}"]
+        for b in brackets:
+            stringed_shape = stringed_shape.replace(b, "")
+        return tuple([int(n) for n in stringed_shape.split(",") if n != ""])

sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/data_generator/models/__init__.py ADDED Viewed

File without changes

sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/data_generator/models/keras/KerasBaseVAE.py ADDED Viewed

@@ -0,0 +1,154 @@
+from abc import ABC
+import numpy as np
+import os
+import keras
+from keras import saving
+from sdg_core_lib.data_generator.models.UnspecializedModel import UnspecializedModel
+from sdg_core_lib.data_generator.models.TrainingInfo import TrainingInfo
+os.environ["KERAS_BACKEND"] = "tensorflow"
+class KerasBaseVAE(UnspecializedModel, ABC):
+    """
+    Abstract base class for keras VAE models.
+    This class provides a common interface for keras VAE models and handles the saving and loading of models.
+    """
+    def __init__(
+        self,
+        metadata: dict,
+        model_name: str,
+        input_shape: str,
+        load_path: str | None,
+        latent_dim: int,
+    ):
+        super().__init__(metadata, model_name, input_shape, load_path)
+        self._latent_dim = latent_dim
+        self._beta = None
+        self._learning_rate = None
+        self._batch_size = None
+        self._epochs = None
+    @staticmethod
+    def _load_files(folder_path: str):
+        """
+        Loads the saved VAE model files from the given folder path.
+        :param folder_path: path to the folder containing the saved model files
+        :return: encoder and decoder models
+        :raises FileNotFoundError: if the model files are not found in the given folder
+        """
+        encoder_filename = os.path.join(folder_path, "encoder.keras")
+        decoder_filename = os.path.join(folder_path, "decoder.keras")
+        encoder = saving.load_model(encoder_filename)
+        decoder = saving.load_model(decoder_filename)
+        return encoder, decoder
+    def _load_model(self, encoder, decoder):
+        raise NotImplementedError
+    def _load(self, folder_path: str):
+        encoder, decoder = self._load_files(folder_path)
+        self._load_model(encoder, decoder)
+    def _instantiate(self):
+        """
+        Instantiates the model and loads the saved model if the load_path is given.
+        :return: None
+        """
+        if self._load_path is not None:
+            self._load(self._load_path)
+            return
+        if not self._model and self.input_shape:
+            self._model = self._build(self.input_shape)
+    def save(self, folder_path: str):
+        """
+        Saves the model and scaler to the given folder path.
+        :param folder_path: path to the folder to save the model
+        :raises FileNotFoundError: if the folder does not exist
+        :raises AttributeError: if the model does not exist
+        :return: None
+        """
+        encoder_filename = os.path.join(folder_path, "encoder.keras")
+        decoder_filename = os.path.join(folder_path, "decoder.keras")
+        saving.save_model(self._model.encoder, encoder_filename)
+        saving.save_model(self._model.decoder, decoder_filename)
+    def fine_tune(self, data: np.ndarray, **kwargs):
+        raise NotImplementedError
+    def _build(self, input_shape: str):
+        raise NotImplementedError
+    def _set_hyperparams(self, learning_rate, batch_size, epochs):
+        if learning_rate is not None:
+            self._learning_rate = float(learning_rate)
+        if batch_size is not None:
+            self._batch_size = int(batch_size)
+        if epochs is not None:
+            self._epochs = int(epochs)
+    def set_hyperparameters(self, **kwargs):
+        learning_rate = kwargs.get("learning_rate", self._learning_rate)
+        batch_size = kwargs.get("batch_size", self._batch_size)
+        epochs = kwargs.get("epochs", self._epochs)
+        self._set_hyperparams(learning_rate, batch_size, epochs)
+    def train(
+        self,
+        data: np.ndarray,
+        learning_rate: float = None,
+        batch_size: int = None,
+        epochs: int = None,
+    ):
+        """
+        Trains the VAE model on the provided data.
+        :param data: the dataset to train the model on, must be a NumericDataset
+        :param learning_rate: optional, the learning rate to use for training
+        :param batch_size: optional, the batch size to use for training
+        :param epochs: optional, the number of epochs to train the model for
+        :raises TypeError: if the data is not of type NumericDataset
+        :raises ValueError: if the model shape does not match data shape
+        :return: None
+        """
+        learning_rate = (
+            learning_rate if learning_rate is not None else self._learning_rate
+        )
+        batch_size = batch_size if batch_size is not None else self._batch_size
+        epochs = epochs if epochs is not None else self._epochs
+        self._model.compile(
+            optimizer=keras.optimizers.Adam(learning_rate=learning_rate)
+        )
+        history = self._model.fit(data, epochs=epochs, batch_size=batch_size, verbose=1)
+        self.training_info = TrainingInfo(
+            loss_fn="ELBO",
+            train_loss=history.history["loss"][-1].numpy().item(),
+            train_samples=data.shape[0],
+            validation_loss=-1,
+            validation_samples=0,
+        )
+    def infer(self, n_rows: int, **kwargs):
+        """
+        Performs inference using the trained VAE model.
+        :param n_rows: Number of rows to generate.
+        :param kwargs: Additional arguments for inference, not currently used.
+        :raises AttributeError: If the model is not instantiated.
+        :return: A numpy array containing the generated data after decoding and inverse scaling.
+        """
+        z_random = np.random.normal(size=(n_rows, self._latent_dim))
+        results = self._model.decoder.predict(z_random)
+        return results
+    @classmethod
+    def self_describe(cls):
+        raise NotImplementedError

sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/data_generator/models/keras/VAE.py ADDED Viewed

@@ -0,0 +1,78 @@
+import keras
+from keras.api import layers, ops
+import tensorflow as tf
+class Sampling(layers.Layer):
+    """Uses (z_mean, z_log_var) to sample z, the vector encoding a digit."""
+    def __init__(self, seed: int = 42, **kwargs):
+        super().__init__(**kwargs)
+        self.seed_generator = keras.random.SeedGenerator(seed)
+    def call(self, inputs):
+        z_mean, z_log_var = inputs
+        batch = ops.shape(z_mean)[0]
+        dim = ops.shape(z_mean)[1]
+        epsilon = keras.random.normal(shape=(batch, dim), seed=self.seed_generator)
+        return z_mean + ops.exp(0.5 * z_log_var) * epsilon
+class VAE(keras.Model):
+    def __init__(self, encoder, decoder, beta=1, **kwargs):
+        super().__init__(**kwargs)
+        self.encoder = encoder
+        self.decoder = decoder
+        self._beta = beta
+        self.total_loss_tracker = keras.metrics.Mean(name="total_loss")
+        self.reconstruction_loss_tracker = keras.metrics.Mean(
+            name="reconstruction_loss"
+        )
+        self.kl_loss_tracker = keras.metrics.Mean(name="kl_loss")
+    @property
+    def metrics(self):
+        return [
+            self.total_loss_tracker,
+            self.reconstruction_loss_tracker,
+            self.kl_loss_tracker,
+        ]
+    def train_step(self, data):
+        with tf.GradientTape() as tape:
+            z_mean, z_log_var, z = self.encoder(data)
+            reconstruction = self.decoder(z)
+            reconstruction_loss = ops.mean(
+                ops.sum(ops.abs(data - reconstruction), axis=-1)
+            )
+            kl_loss = -0.5 * (1 + z_log_var - ops.square(z_mean) - ops.exp(z_log_var))
+            kl_loss = ops.mean(ops.sum(kl_loss, axis=1))
+            total_loss = reconstruction_loss + self._beta * kl_loss
+        grads = tape.gradient(total_loss, self.trainable_weights)
+        self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
+        self.total_loss_tracker.update_state(total_loss)
+        self.reconstruction_loss_tracker.update_state(reconstruction_loss)
+        self.kl_loss_tracker.update_state(kl_loss)
+        return {
+            "loss": self.total_loss_tracker.result(),
+            "reconstruction_loss": self.reconstruction_loss_tracker.result(),
+            "kl_loss": self.kl_loss_tracker.result(),
+        }
+    def test_step(self, data):
+        if isinstance(data, tuple):
+            data = data[0]
+        z_mean, z_log_var, z = self.encoder(data)
+        reconstruction = self.decoder(z)
+        ops.mean(ops.sum(ops.abs(data - reconstruction), axis=-1))
+        reconstruction_loss = ops.mean(ops.sum(ops.abs(data - reconstruction), axis=-1))
+        kl_loss = -0.5 * (1 + z_log_var - ops.square(z_mean) - ops.exp(z_log_var))
+        kl_loss = ops.mean(ops.sum(kl_loss, axis=1))
+        total_loss = reconstruction_loss + self._beta * kl_loss
+        return {
+            "loss": total_loss,
+            "reconstruction_loss": reconstruction_loss,
+            "kl_loss": kl_loss,
+        }

sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/data_generator/models/keras/__init__.py ADDED Viewed

File without changes