PyPI - radnn - Versions diffs - 0.1.4__tar.gz → 0.1.6__tar.gz - Mend

radnn 0.1.4tar.gz → 0.1.6tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (125) hide show

{radnn-0.1.4/src/radnn.egg-info → radnn-0.1.6}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: radnn
-Version: 0.1.4
+Version: 0.1.6
 Summary: Rapid Deep Neural Networks
 Author-email: "Pantelis I. Kaplanoglou" <pikaplanoglou@ihu.gr>
 License-Expression: MIT

{radnn-0.1.4 → radnn-0.1.6}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "radnn"
-version = "0.1.4"
+version = "0.1.6"
 description = "Rapid Deep Neural Networks"
 readme = "README.md"
 authors = [

{radnn-0.1.4 → radnn-0.1.6}/src/radnn/__init__.py RENAMED Viewed

@@ -8,10 +8,13 @@
 # Version 0.1.0    [2026-01-07]
 # Version 0.1.1    [2025-01-08]
 # Version 0.1.4    [2025-01-26]
-__version__ = "0.1.4"
+# Version 0.1.5    [2025-02-02]
+# Version 0.1.6    [2025-02-03]
+__version__ = "0.1.6"
 from .system import FileStore, FileSystem
 from .ml_system import MLSystem
 from .ml_system import mlsys
 from .utils import print_tensor, order_str
 from .errors import Errors
+from .learn.constants import MLTask

{radnn-0.1.4 → radnn-0.1.6}/src/radnn/data/__init__.py RENAMED Viewed

@@ -6,5 +6,5 @@ from .sample_set_kind import SampleSetKind
 from .sample_preprocessor import SamplePreprocessor
 from .dataset_factory import DatasetFactory, DatasetBuildAdapter
-from .custom_data_set import LegacyDataSet
+from .constants import DataPreprocessingKind
+from .custom_data_set import LegacyDataSet

radnn-0.1.6/src/radnn/data/constants.py ADDED Viewed

@@ -0,0 +1,8 @@
+from enum import Enum
+# =========================================================================================================================
+class DataPreprocessingKind(Enum):
+  MIN_MAX_NORMALIZE = 0
+  STANDARDIZE = 1
+# =========================================================================================================================

{radnn-0.1.4 → radnn-0.1.6}/src/radnn/data/custom_data_set.py RENAMED Viewed

@@ -23,9 +23,13 @@
 # ......................................................................................
-from sklearn.model_selection import train_test_split  # import a standalone procedure function from the pacakge
+from sklearn.model_selection import train_test_split  # import a standalone procedure toyfunction from the pacakge
+from sklearn.preprocessing import StandardScaler, MinMaxScaler
 from radnn import mlsys
 from radnn.data.sample_set_simple import SampleSet
+from .constants import DataPreprocessingKind
+from enum import Enum
 # =========================================================================================================================
 class LegacyDataSet(object):
@@ -41,27 +45,49 @@ class LegacyDataSet(object):
     self.samples = None
     self.labels = None
+    self.preprocessor = None
+    self.mean = None
+    self.std = None
     # training set object
     self.ts: SampleSet | None = None
     # validation set object
     self.vs: SampleSet | None = None
     # unknown test set object
-    self.ut: SampleSet | None = None
+    self.us: SampleSet | None = None
     # ................................................................
     if self.random_seed is not None:
       mlsys.random_seed_all(self.random_seed)
+  # --------------------------------------------------------------------------------------
+  def split(self, p_nValidationSamplesPC=0.10, preprocessing: DataPreprocessingKind | None = DataPreprocessingKind.STANDARDIZE):
+    oTS_Samples, oVS_Samples, oTS_Labels, oVS_Labels = train_test_split(self.samples, self.labels ,
+                                                                        test_size=p_nValidationSamplesPC,
+                                                                        random_state=2021)
+    if preprocessing == DataPreprocessingKind.MIN_MAX_NORMALIZE:
+      self.preprocessor = MinMaxScaler().fit(oTS_Samples)
+    elif preprocessing == DataPreprocessingKind.STANDARDIZE:
+      self.preprocessor = StandardScaler().fit(oTS_Samples)
+    if self.preprocessor is not None:
+      oTS_Samples = self.preprocessor.transform(oTS_Samples)
+      oVS_Samples = self.preprocessor.transform(oVS_Samples)
+    # (Re)creating the subsets of the dataset after the splits have been created
+    self.ts = SampleSet(self, oTS_Samples, oTS_Labels)
+    self.vs = SampleSet(self, oVS_Samples, oVS_Labels)
+    print("%d samples in the Training Set" % self.ts.sample_count)
+    print("%d samples in the Validation Set" % self.vs.sample_count)
   # --------------------------------------------------------------------------------------
   # Backwards Compatibility
   @property
   def TSSamples(self):
     return self.ts.samples
   @property
   def ts_samples(self):
     return self.ts_samples
   @property
   def TSLabels(self):
     return self.ts.labels
@@ -69,7 +95,7 @@ class LegacyDataSet(object):
   @property
   def ts_labels(self):
     return self.ts.labels
   @property
   def TSSampleCount(self):
     return self.ts.sample_count
@@ -77,40 +103,42 @@ class LegacyDataSet(object):
   @property
   def ts_sample_count(self):
     return self.ts.sample_count
   def VSSamples(self):
     return self.vs.samples
   @property
   def vs_samples(self):
     return self.vs.samples
   @property
   def VSLabels(self):
     return self.vs.labels
   @property
   def vs_labels(self):
     return self.vs.labels
   @property
   def VSSampleCount(self):
     return self.vs.sample_count
   @property
   def vs_sample_count(self):
     return self.vs.sample_count
   @property
   def FeatureCount(self):
     return self.feature_count
   @property
   def ClassCount(self):
     return self.class_count
   @property
   def ClassCount(self):
     return self.class_count
   @property
   def SampleCount(self):
     return self.sample_count
@@ -118,21 +146,8 @@ class LegacyDataSet(object):
   @property
   def Samples(self):
     return self.samples
   @property
   def Labels(self):
     return self.labels
-  # --------------------------------------------------------------------------------------
-  def split(self, p_nValidationSamplesPC=0.10):
-    oTS_Samples, oVS_Samples, oTS_Labels, oVS_Labels = train_test_split(self.samples, self.labels ,
-                                                                        test_size=p_nValidationSamplesPC,
-                                                                        random_state=2021)
-    # (Re)creating the subsets of the dataset after the splits have been created
-    self.ts = SampleSet(self, oTS_Samples, oTS_Labels)
-    self.vs = SampleSet(self, oVS_Samples, oVS_Labels)
-    print("%d samples in the Training Set" % self.ts.sample_count)
-    print("%d samples in the Validation Set" % self.vs.sample_count)
-  # --------------------------------------------------------------------------------------
 # =========================================================================================================================

radnn-0.1.6/src/radnn/data/dataset_base.py ADDED Viewed

@@ -0,0 +1,263 @@
+# ......................................................................................
+# MIT License
+# Copyright (c) 2019-2025 Pantelis I. Kaplanoglou
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+# ......................................................................................
+import os
+import numpy as np
+import pandas as pd
+from abc import ABC, abstractmethod
+from .sample_set_simple import SampleSet
+from .sample_set_kind import SampleSetKind
+from .sample_preprocessor import SamplePreprocessor, VoidPreprocessor
+from .errors import *
+from radnn import FileStore
+from radnn import mlsys
+from .constants import DataPreprocessingKind
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import MinMaxScaler, StandardScaler
+# ======================================================================================================================
+class DataSetCallbacks(object):
+  # --------------------------------------------------------------------------------------------------------------------
+  def __init__(self, download_method=None, seed_method=None):
+    self._lazy_download = download_method
+    self._random_seed = seed_method
+  # --------------------------------------------------------------------------------------------------------------------
+  def lazy_download(self, fs):
+    self._lazy_download(fs)
+  # --------------------------------------------------------------------------------------------------------------------
+  def random_seed(self, seed: int):
+    self._random_seed(seed)
+  # --------------------------------------------------------------------------------------------------------------------
+# ======================================================================================================================s
+# ======================================================================================================================
+class DataSetBase(ABC):
+  # --------------------------------------------------------------------------------------------------------------------
+  # Constructor
+  def __init__(self, name: str, variant: str | None = None, file_store=None, random_seed: int | None = None,
+               callbacks: DataSetCallbacks | None = None):
+    # ..................// Instance Fields \\.........................
+    self.name = name
+    self.variant = variant
+    self.fs = None
+    self._determine_local_filestore(file_store)
+    assert self.fs is not None, ERR_DS_MUST_PROVIDE_LOCAL_FILESTORE
+    self.random_seed = random_seed
+    self.all_ids = None
+    self.all_samples = None
+    self.all_labels = None
+    self.feature_count = None
+    self.class_count = None
+    self.sample_count = None
+    self.callbacks: DataSetCallbacks = callbacks
+    self.hprm: dict | None = None
+    self.ts: SampleSet | None = None
+    self.vs: SampleSet | None = None
+    self.us: SampleSet | None = None
+    self.preprocessor: SamplePreprocessor = VoidPreprocessor(self)
+    self.is_split = False
+    # ................................................................
+  # --------------------------------------------------------------------------------------
+  def split(self, validation_samples_pc=0.10,
+            preprocessing: DataPreprocessingKind | None = DataPreprocessingKind.STANDARDIZE,
+            random_seed: int=2021):
+    nTS_Samples, nVS_Samples, nTS_Labels, nVS_Labels = train_test_split(self.all_samples, self.all_labels,
+                                                                        test_size=validation_samples_pc,
+                                                                        random_state=random_seed)
+    if preprocessing == DataPreprocessingKind.MIN_MAX_NORMALIZE:
+      self.preprocessor = MinMaxScaler().fit(nTS_Samples)
+    elif preprocessing == DataPreprocessingKind.STANDARDIZE:
+      self.preprocessor = StandardScaler().fit(nTS_Samples)
+    else:
+      self.preprocessor = None
+    if self.preprocessor is not None:
+      nTS_Samples = self.preprocessor.transform(nTS_Samples)
+      nVS_Samples = self.preprocessor.transform(nVS_Samples)
+    # (Re)creating the subsets of the dataset after the splits have been created
+    self.ts = SampleSet(self, nTS_Samples, nTS_Labels, kind=SampleSetKind.TRAINING_SET)
+    if preprocessing == DataPreprocessingKind.STANDARDIZE:
+      self.ts.mean = self.preprocessor.mean_
+      self.ts.std = self.preprocessor.scale_
+    self.vs = SampleSet(self, nVS_Samples, nVS_Labels, kind=SampleSetKind.VALIDATION_SET)
+    self.is_split = True
+    return self
+  # --------------------------------------------------------------------------------------------------------------------
+  @property
+  def dataset_code(self):
+    sUniqueName = f"{self.name.upper()}"
+    if self.variant is not None:
+      sUniqueName += f"_{self.variant.upper()}"
+    return sUniqueName
+  # --------------------------------------------------------------------------------------------------------------------
+  def _determine_local_filestore(self, file_store):
+    if (file_store is not None):
+      if isinstance(file_store, FileStore):
+        self.fs = file_store
+      elif isinstance(file_store, str):
+        if not os.path.exists(file_store):
+          raise Exception(ERR_DS_FOLDER_NOT_FOUND % file_store)
+        self.fs = FileStore(file_store)
+    else:
+      assert mlsys.filesys is not None, ERR_MLSYS_FILESYS_NOT_INITIALIZED
+      self.fs: FileStore = mlsys.filesys.datasets.subfs(self.dataset_code)
+  # --------------------------------------------------------------------------------------------------------------------
+  @property
+  def filesystem_folder(self):
+    return self.fs.absolute_path
+  # --------------------------------------------------------------------------------------------------------------------
+  def read_hyperparams(self):
+    pass # Optionally override
+  # --------------------------------------------------------------------------------------------------------------------
+  @abstractmethod
+  def load_data(self):
+    pass # Must implement
+  # --------------------------------------------------------------------------------------------------------------------
+  def load_cache(self, is_vector_samples=True, is_last_axis_features=True):
+    nSuffix = ""
+    if is_vector_samples:
+      nSuffix = "-vec"
+    elif not is_last_axis_features:
+      nSuffix = "-torch"
+    nTSSamples = self.fs.obj.load(f"{self.dataset_code}-TS-Samples{nSuffix}.pkl")
+    nVSSamples = self.fs.obj.load(f"{self.dataset_code}-VS-Samples{nSuffix}.pkl")
+    nTSLabels = self.fs.obj.load(f"{self.dataset_code}-TS-Labels{nSuffix}.pkl")
+    nVSLabels = self.fs.obj.load(f"{self.dataset_code}-VS-Labels{nSuffix}.pkl")
+    return nTSSamples, nVSSamples, nTSLabels, nVSLabels
+  # --------------------------------------------------------------------------------------------------------------------
+  def save_cache(self, ts_samples, vs_samples, ts_labels, vs_labels, is_vector_samples=True, is_last_axis_features=True):
+    nSuffix = ""
+    if is_vector_samples:
+      nSuffix = "-vec"
+    elif not is_last_axis_features:
+      nSuffix = "-torch"
+    self.fs.obj.save(ts_samples, f"{self.dataset_code}-TS-Samples{nSuffix}.pkl")
+    self.fs.obj.save(vs_samples, f"{self.dataset_code}-VS-Samples{nSuffix}.pkl")
+    self.fs.obj.save(ts_labels, f"{self.dataset_code}-TS-Labels{nSuffix}.pkl")
+    self.fs.obj.save(vs_labels, f"{self.dataset_code}-VS-Labels{nSuffix}.pkl")
+  # --------------------------------------------------------------------------------------------------------------------
+  def prepare(self, hyperparams: dict | None = None):
+    self.hprm = hyperparams
+    # VIRTUAL CALL: Reads the hyperparameters into instance variables
+    if self.hprm is not None:
+      self.read_hyperparams()
+    if (self.callbacks is not None):
+      assert self.callbacks._lazy_download is not None, ERR_DS_NO_RANDOM_SEED_INITIALIZER_CALLBACK
+      if self.callbacks._lazy_download is not None:
+        self.callbacks.lazy_download(self.fs)
+    if (self.random_seed is not None):
+      bIsInitRandomSeed = False
+      if self.callbacks is not None:
+        if self.callbacks._random_seed is not None:
+          self.callbacks.random_seed(self.random_seed)
+          bIsInitRandomSeed = True
+      if not bIsInitRandomSeed:
+        mlsys.random_seed_all(self.random_seed)
+    self.ts = None
+    self.vs = None
+    self.us = None
+    # VIRTUAL CALL:  Imports the dataset from the source local/remote filestore to the local cache.
+    self.load_data()
+    if self.is_split:
+      assert self.ts is not None, ERR_DS_SUBSET_MUST_HAVE_TS
+      assert self.ts.kind == SampleSetKind.TRAINING_SET, ERR_DS_SUBSET_INVALID_SETUP
+      if self.vs is not None:
+        assert self.vs.kind == SampleSetKind.VALIDATION_SET, ERR_DS_SUBSET_INVALID_SETUP
+    if self.us is not None:
+      assert self.us.kind == SampleSetKind.UNKNOWN_TEST_SET, ERR_DS_SUBSET_INVALID_SETUP
+    return self
+  # --------------------------------------------------------------------------------------------------------------------
+  def assign(self, data, label_columns: range):
+    self.all_samples, self.all_labels, self.all_ids = None, None, None
+    if isinstance(data, tuple):
+      self.all_samples, self.all_labels = data
+    elif isinstance(data, np.ndarray):
+      self.all_samples = data
+    elif isinstance(data, dict):
+      if ("samples" in dict):
+        self.all_samples = data["samples"]
+      if ("labels" in dict):
+        self.all_labels = data["labels"]
+      if ("ids" in dict):
+        self.all_ids = data["ids"]
+    elif isinstance(data, pd.DataFrame):
+      if isinstance(data.columns, pd.Index):
+        nData = data.iloc[1:].to_numpy()
+      else:
+        nData = data.to_numpy()
+      if label_columns is None:
+        self.all_samples = nData
+      else:
+        if label_columns.start >= 0:
+          if label_columns.stop is None:
+            self.all_labels = nData[:, label_columns.start]
+            self.all_samples = nData[:, label_columns.start + 1:]
+          else:
+            self.all_labels = nData[:, label_columns.start:label_columns.stop + 1]
+            self.all_samples = nData[:, label_columns.stop + 1:]
+        else:
+          self.all_samples = nData[:, :label_columns.start]
+          self.all_labels = nData[:, label_columns.start:]
+    if self.all_ids is None:
+      self.all_ids = range(len(self.all_samples)) + 1
+    return self
+  # --------------------------------------------------------------------------------------------------------------------
+  def print_info(self):
+    print(f"Dataset [{self.dataset_code}]")
+    self.ts.print_info()
+    if self.vs is not None:
+      self.vs.print_info()
+    if self.us is not None:
+      self.us.print_info()
+  # --------------------------------------------------------------------------------------------------------------------

{radnn-0.1.4 → radnn-0.1.6}/src/radnn/data/dataset_base_legacy.py RENAMED Viewed

@@ -25,7 +25,7 @@
 import numpy as np
-from sklearn.model_selection import train_test_split  # import a standalone procedure function from the pacakge
+from sklearn.model_selection import train_test_split  # import a standalone procedure toyfunction from the pacakge
 # =========================================================================================================================

{radnn-0.1.4 → radnn-0.1.6}/src/radnn/data/errors.py RENAMED Viewed

@@ -22,11 +22,16 @@
 # SOFTWARE.
 # ......................................................................................
+ERR_MLSYS_FILESYS_NOT_INITIALIZED = "The filesystem for the Machine Learning host system has not been initialized."
 ERR_NO_CALLBACKS = "You should assign callbacks to the dataset perform proper random seed initialization for your framework."
-ERR_NO_RANDOM_SEED_INITIALIZER_CALLBACK = "Callback method for random seed initialization has not been defined."
+ERR_DS_NO_RANDOM_SEED_INITIALIZER_CALLBACK = "Callback method for random seed initialization has not been defined."
+ERR_DS_CALLBACKS_NO_LAZY_DOWNLOADER = "Callback method for downloading the dataset has not been defined."
-ERR_SUBSET_MUST_HAVE_TS = "A dataset must have at least a training subset."
-ERR_SUBSET_INVALID_SETUP = "Invalid sample subset setup. Please use one of the valid kinds: 'training/train/ts', 'validation/val/vs', 'testing/test/us'."
-ERR_SUBSET_MUST_HAVE_SAMPLES = "The subset has no samples, check the implementation of your dataset class."
-ERR_DATASET_FOLDER_NOT_FOUND = "The dataset was not found under the folder %s"
-ERR_DATASET_MUST_PROVIDE_LOCAL_FILESTORE = "You must provide a local filestore/path for the dataset"
+ERR_DS_SUBSET_MUST_HAVE_TS = "A dataset must have at least a training subset."
+ERR_DS_SUBSET_INVALID_SETUP = "Invalid sample subset setup. Please use one of the valid kinds: 'training/train/ts', 'validation/val/vs', 'testing/test/us'."
+ERR_DS_SUBSET_MUST_HAVE_SAMPLES = "The subset has no samples, check the implementation of your dataset class."
+ERR_DS_FOLDER_NOT_FOUND = "The dataset was not found under the folder %s"
+ERR_DS_MUST_PROVIDE_LOCAL_FILESTORE = "You must provide a local filestore/path for the dataset"

{radnn-0.1.4 → radnn-0.1.6}/src/radnn/data/sample_preprocessor.py RENAMED Viewed

@@ -47,6 +47,9 @@ class SamplePreprocessor(ABC):
     pass
   # --------------------------------------------------------------------------------------------------------------------
 # ======================================================================================================================
 class VoidPreprocessor(SamplePreprocessor):
   # --------------------------------------------------------------------------------------------------------------------

radnn 0.1.4__tar.gz → 0.1.6__tar.gz

radnn 0.1.4tar.gz → 0.1.6tar.gz