PyPI - radnn - Versions diffs - 0.1.4__py3-none-any.whl → 0.1.6__py3-none-any.whl - Mend

radnn 0.1.4py3-none-any.whl → 0.1.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

radnn/__init__.py +4 -1
radnn/data/__init__.py +2 -2
radnn/data/constants.py +8 -0
radnn/data/custom_data_set.py +44 -29
radnn/data/dataset_base.py +174 -76
radnn/data/dataset_base_legacy.py +1 -1
radnn/data/errors.py +11 -6
radnn/data/sample_preprocessor.py +3 -0
radnn/data/sample_set.py +50 -31
radnn/data/sample_set_kind.py +21 -5
radnn/data/sample_set_simple.py +62 -1
radnn/data/sequence_dataset.py +4 -4
radnn/experiment/ml_experiment.py +2 -2
radnn/experiment/ml_experiment_log.py +25 -19
radnn/learn/constants.py +24 -0
radnn/learn/torch/ml_model_freezer.py +1 -1
radnn/plots/__init__.py +3 -2
radnn/plots/plot_histogram_of_classes.py +6 -84
radnn/plots/plot_legacy.py +103 -0
radnn/plots/plot_roc.py +1 -0
radnn/system/hosts/windows_host.py +1 -1
radnn/utils.py +7 -4
{radnn-0.1.4.dist-info → radnn-0.1.6.dist-info}/METADATA +1 -1
{radnn-0.1.4.dist-info → radnn-0.1.6.dist-info}/RECORD +27 -24
{radnn-0.1.4.dist-info → radnn-0.1.6.dist-info}/WHEEL +0 -0
{radnn-0.1.4.dist-info → radnn-0.1.6.dist-info}/licenses/LICENSE.txt +0 -0
{radnn-0.1.4.dist-info → radnn-0.1.6.dist-info}/top_level.txt +0 -0

radnn/__init__.py CHANGED Viewed

@@ -8,10 +8,13 @@
 # Version 0.1.0    [2026-01-07]
 # Version 0.1.1    [2025-01-08]
 # Version 0.1.4    [2025-01-26]
-__version__ = "0.1.4"
+# Version 0.1.5    [2025-02-02]
+# Version 0.1.6    [2025-02-03]
+__version__ = "0.1.6"
 from .system import FileStore, FileSystem
 from .ml_system import MLSystem
 from .ml_system import mlsys
 from .utils import print_tensor, order_str
 from .errors import Errors
+from .learn.constants import MLTask

radnn/data/__init__.py CHANGED Viewed

@@ -6,5 +6,5 @@ from .sample_set_kind import SampleSetKind
 from .sample_preprocessor import SamplePreprocessor
 from .dataset_factory import DatasetFactory, DatasetBuildAdapter
-from .custom_data_set import LegacyDataSet
+from .constants import DataPreprocessingKind
+from .custom_data_set import LegacyDataSet

radnn/data/constants.py ADDED Viewed

@@ -0,0 +1,8 @@
+from enum import Enum
+# =========================================================================================================================
+class DataPreprocessingKind(Enum):
+  MIN_MAX_NORMALIZE = 0
+  STANDARDIZE = 1
+# =========================================================================================================================

radnn/data/custom_data_set.py CHANGED Viewed

@@ -23,9 +23,13 @@
 # ......................................................................................
-from sklearn.model_selection import train_test_split  # import a standalone procedure function from the pacakge
+from sklearn.model_selection import train_test_split  # import a standalone procedure toyfunction from the pacakge
+from sklearn.preprocessing import StandardScaler, MinMaxScaler
 from radnn import mlsys
 from radnn.data.sample_set_simple import SampleSet
+from .constants import DataPreprocessingKind
+from enum import Enum
 # =========================================================================================================================
 class LegacyDataSet(object):
@@ -41,27 +45,49 @@ class LegacyDataSet(object):
     self.samples = None
     self.labels = None
+    self.preprocessor = None
+    self.mean = None
+    self.std = None
     # training set object
     self.ts: SampleSet | None = None
     # validation set object
     self.vs: SampleSet | None = None
     # unknown test set object
-    self.ut: SampleSet | None = None
+    self.us: SampleSet | None = None
     # ................................................................
     if self.random_seed is not None:
       mlsys.random_seed_all(self.random_seed)
+  # --------------------------------------------------------------------------------------
+  def split(self, p_nValidationSamplesPC=0.10, preprocessing: DataPreprocessingKind | None = DataPreprocessingKind.STANDARDIZE):
+    oTS_Samples, oVS_Samples, oTS_Labels, oVS_Labels = train_test_split(self.samples, self.labels ,
+                                                                        test_size=p_nValidationSamplesPC,
+                                                                        random_state=2021)
+    if preprocessing == DataPreprocessingKind.MIN_MAX_NORMALIZE:
+      self.preprocessor = MinMaxScaler().fit(oTS_Samples)
+    elif preprocessing == DataPreprocessingKind.STANDARDIZE:
+      self.preprocessor = StandardScaler().fit(oTS_Samples)
+    if self.preprocessor is not None:
+      oTS_Samples = self.preprocessor.transform(oTS_Samples)
+      oVS_Samples = self.preprocessor.transform(oVS_Samples)
+    # (Re)creating the subsets of the dataset after the splits have been created
+    self.ts = SampleSet(self, oTS_Samples, oTS_Labels)
+    self.vs = SampleSet(self, oVS_Samples, oVS_Labels)
+    print("%d samples in the Training Set" % self.ts.sample_count)
+    print("%d samples in the Validation Set" % self.vs.sample_count)
   # --------------------------------------------------------------------------------------
   # Backwards Compatibility
   @property
   def TSSamples(self):
     return self.ts.samples
   @property
   def ts_samples(self):
     return self.ts_samples
   @property
   def TSLabels(self):
     return self.ts.labels
@@ -69,7 +95,7 @@ class LegacyDataSet(object):
   @property
   def ts_labels(self):
     return self.ts.labels
   @property
   def TSSampleCount(self):
     return self.ts.sample_count
@@ -77,40 +103,42 @@ class LegacyDataSet(object):
   @property
   def ts_sample_count(self):
     return self.ts.sample_count
   def VSSamples(self):
     return self.vs.samples
   @property
   def vs_samples(self):
     return self.vs.samples
   @property
   def VSLabels(self):
     return self.vs.labels
   @property
   def vs_labels(self):
     return self.vs.labels
   @property
   def VSSampleCount(self):
     return self.vs.sample_count
   @property
   def vs_sample_count(self):
     return self.vs.sample_count
   @property
   def FeatureCount(self):
     return self.feature_count
   @property
   def ClassCount(self):
     return self.class_count
   @property
   def ClassCount(self):
     return self.class_count
   @property
   def SampleCount(self):
     return self.sample_count
@@ -118,21 +146,8 @@ class LegacyDataSet(object):
   @property
   def Samples(self):
     return self.samples
   @property
   def Labels(self):
     return self.labels
-  # --------------------------------------------------------------------------------------
-  def split(self, p_nValidationSamplesPC=0.10):
-    oTS_Samples, oVS_Samples, oTS_Labels, oVS_Labels = train_test_split(self.samples, self.labels ,
-                                                                        test_size=p_nValidationSamplesPC,
-                                                                        random_state=2021)
-    # (Re)creating the subsets of the dataset after the splits have been created
-    self.ts = SampleSet(self, oTS_Samples, oTS_Labels)
-    self.vs = SampleSet(self, oVS_Samples, oVS_Labels)
-    print("%d samples in the Training Set" % self.ts.sample_count)
-    print("%d samples in the Validation Set" % self.vs.sample_count)
-  # --------------------------------------------------------------------------------------
 # =========================================================================================================================

radnn/data/dataset_base.py CHANGED Viewed

@@ -26,140 +26,238 @@ import os
 import numpy as np
 import pandas as pd
 from abc import ABC, abstractmethod
-from .sample_set import SampleSet
+from .sample_set_simple import SampleSet
 from .sample_set_kind import SampleSetKind
 from .sample_preprocessor import SamplePreprocessor, VoidPreprocessor
 from .errors import *
 from radnn import FileStore
+from radnn import mlsys
+from .constants import DataPreprocessingKind
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import MinMaxScaler, StandardScaler
 # ======================================================================================================================
 class DataSetCallbacks(object):
   # --------------------------------------------------------------------------------------------------------------------
-  def __init__(self, lazy_loader=None, random_seeder=None):
-    self.lazy_loader = lazy_loader
-    self.random_seeder = random_seeder
+  def __init__(self, download_method=None, seed_method=None):
+    self._lazy_download = download_method
+    self._random_seed = seed_method
   # --------------------------------------------------------------------------------------------------------------------
-  def lazy_load(self):
-    self.lazy_loader()
+  def lazy_download(self, fs):
+    self._lazy_download(fs)
   # --------------------------------------------------------------------------------------------------------------------
-  def initialize_random_seed(self, seed: int):
-    self.random_seeder(seed)
+  def random_seed(self, seed: int):
+    self._random_seed(seed)
   # --------------------------------------------------------------------------------------------------------------------
+# ======================================================================================================================s
 # ======================================================================================================================
 class DataSetBase(ABC):
   # --------------------------------------------------------------------------------------------------------------------
   # Constructor
-  def __init__(self, name: str, variant: str|None=None, file_store=None, random_seed: int | None=None, callbacks: DataSetCallbacks | None = None):
+  def __init__(self, name: str, variant: str | None = None, file_store=None, random_seed: int | None = None,
+               callbacks: DataSetCallbacks | None = None):
     # ..................// Instance Fields \\.........................
-    self.fs: FileStore|None = file_store
-    if (file_store is not None) and isinstance(file_store, str):
-      if not os.path.exists(file_store):
-        raise Exception(ERR_DATASET_FOLDER_NOT_FOUND % file_store)
-      self.fs = FileStore(file_store)
-    assert self.fs is not None, ERR_DATASET_MUST_PROVIDE_LOCAL_FILESTORE
-    self.name                 = name
-    self.variant              = variant
-    self.random_seed          = random_seed
-    self.callbacks: DataSetCallbacks = callbacks
+    self.name = name
+    self.variant = variant
+    self.fs = None
+    self._determine_local_filestore(file_store)
+    assert self.fs is not None, ERR_DS_MUST_PROVIDE_LOCAL_FILESTORE
+    self.random_seed = random_seed
+    self.all_ids = None
+    self.all_samples = None
+    self.all_labels = None
+    self.feature_count = None
+    self.class_count = None
+    self.sample_count = None
-    self.hparams :dict|None = None
-    self.ts: SampleSet|None = None
-    self.vs: SampleSet|None = None
-    self.ut: SampleSet|None = None
+    self.callbacks: DataSetCallbacks = callbacks
+    self.hprm: dict | None = None
+    self.ts: SampleSet | None = None
+    self.vs: SampleSet | None = None
+    self.us: SampleSet | None = None
     self.preprocessor: SamplePreprocessor = VoidPreprocessor(self)
+    self.is_split = False
     # ................................................................
-    if (self.random_seed is not None):
-      assert self.callbacks is not None, ERR_NO_CALLBACKS
-      assert self.callbacks.random_seeder is not None, ERR_NO_RANDOM_SEED_INITIALIZER_CALLBACK
-      self.callbacks.initialize_random_seed(self.random_seed)
+  # --------------------------------------------------------------------------------------
+  def split(self, validation_samples_pc=0.10,
+            preprocessing: DataPreprocessingKind | None = DataPreprocessingKind.STANDARDIZE,
+            random_seed: int=2021):
+    nTS_Samples, nVS_Samples, nTS_Labels, nVS_Labels = train_test_split(self.all_samples, self.all_labels,
+                                                                        test_size=validation_samples_pc,
+                                                                        random_state=random_seed)
+    if preprocessing == DataPreprocessingKind.MIN_MAX_NORMALIZE:
+      self.preprocessor = MinMaxScaler().fit(nTS_Samples)
+    elif preprocessing == DataPreprocessingKind.STANDARDIZE:
+      self.preprocessor = StandardScaler().fit(nTS_Samples)
+    else:
+      self.preprocessor = None
+    if self.preprocessor is not None:
+      nTS_Samples = self.preprocessor.transform(nTS_Samples)
+      nVS_Samples = self.preprocessor.transform(nVS_Samples)
+    # (Re)creating the subsets of the dataset after the splits have been created
+    self.ts = SampleSet(self, nTS_Samples, nTS_Labels, kind=SampleSetKind.TRAINING_SET)
+    if preprocessing == DataPreprocessingKind.STANDARDIZE:
+      self.ts.mean = self.preprocessor.mean_
+      self.ts.std = self.preprocessor.scale_
+    self.vs = SampleSet(self, nVS_Samples, nVS_Labels, kind=SampleSetKind.VALIDATION_SET)
+    self.is_split = True
+    return self
   # --------------------------------------------------------------------------------------------------------------------
   @property
-  def filesystem_folder(self):
-      return self.fs.absolute_path
+  def dataset_code(self):
+    sUniqueName = f"{self.name.upper()}"
+    if self.variant is not None:
+      sUniqueName += f"_{self.variant.upper()}"
+    return sUniqueName
   # --------------------------------------------------------------------------------------------------------------------
-  @abstractmethod
-  def do_read_hyperparams(self):
-    pass # must implement concrete method
+  def _determine_local_filestore(self, file_store):
+    if (file_store is not None):
+      if isinstance(file_store, FileStore):
+        self.fs = file_store
+      elif isinstance(file_store, str):
+        if not os.path.exists(file_store):
+          raise Exception(ERR_DS_FOLDER_NOT_FOUND % file_store)
+        self.fs = FileStore(file_store)
+    else:
+      assert mlsys.filesys is not None, ERR_MLSYS_FILESYS_NOT_INITIALIZED
+      self.fs: FileStore = mlsys.filesys.datasets.subfs(self.dataset_code)
   # --------------------------------------------------------------------------------------------------------------------
-  @abstractmethod
-  def do_import_data(self):
-    pass # must implement concrete method
+  @property
+  def filesystem_folder(self):
+    return self.fs.absolute_path
   # --------------------------------------------------------------------------------------------------------------------
-  @abstractmethod
-  def do_prepare_data(self):
-    pass # could optionally override
+  def read_hyperparams(self):
+    pass # Optionally override
   # --------------------------------------------------------------------------------------------------------------------
   @abstractmethod
-  def do_create_sample_sets(self):
-    pass # must implement concrete method
+  def load_data(self):
+    pass # Must implement
+  # --------------------------------------------------------------------------------------------------------------------
+  def load_cache(self, is_vector_samples=True, is_last_axis_features=True):
+    nSuffix = ""
+    if is_vector_samples:
+      nSuffix = "-vec"
+    elif not is_last_axis_features:
+      nSuffix = "-torch"
+    nTSSamples = self.fs.obj.load(f"{self.dataset_code}-TS-Samples{nSuffix}.pkl")
+    nVSSamples = self.fs.obj.load(f"{self.dataset_code}-VS-Samples{nSuffix}.pkl")
+    nTSLabels = self.fs.obj.load(f"{self.dataset_code}-TS-Labels{nSuffix}.pkl")
+    nVSLabels = self.fs.obj.load(f"{self.dataset_code}-VS-Labels{nSuffix}.pkl")
+    return nTSSamples, nVSSamples, nTSLabels, nVSLabels
   # --------------------------------------------------------------------------------------------------------------------
-  def prepare(self, hyperparams: dict|None = None):
-    self.hparams = hyperparams
-    if self.hparams is not None:
-      self.do_read_hyperparams()
+  def save_cache(self, ts_samples, vs_samples, ts_labels, vs_labels, is_vector_samples=True, is_last_axis_features=True):
+    nSuffix = ""
+    if is_vector_samples:
+      nSuffix = "-vec"
+    elif not is_last_axis_features:
+      nSuffix = "-torch"
+    self.fs.obj.save(ts_samples, f"{self.dataset_code}-TS-Samples{nSuffix}.pkl")
+    self.fs.obj.save(vs_samples, f"{self.dataset_code}-VS-Samples{nSuffix}.pkl")
+    self.fs.obj.save(ts_labels, f"{self.dataset_code}-TS-Labels{nSuffix}.pkl")
+    self.fs.obj.save(vs_labels, f"{self.dataset_code}-VS-Labels{nSuffix}.pkl")
+  # --------------------------------------------------------------------------------------------------------------------
+  def prepare(self, hyperparams: dict | None = None):
+    self.hprm = hyperparams
+    # VIRTUAL CALL: Reads the hyperparameters into instance variables
+    if self.hprm is not None:
+      self.read_hyperparams()
     if (self.callbacks is not None):
-      if self.callbacks.lazy_loader is not None:
-        self.callbacks.lazy_loader()
-    self.do_import_data()
-    self.do_prepare_data()
+      assert self.callbacks._lazy_download is not None, ERR_DS_NO_RANDOM_SEED_INITIALIZER_CALLBACK
+      if self.callbacks._lazy_download is not None:
+        self.callbacks.lazy_download(self.fs)
+    if (self.random_seed is not None):
+      bIsInitRandomSeed = False
+      if self.callbacks is not None:
+        if self.callbacks._random_seed is not None:
+          self.callbacks.random_seed(self.random_seed)
+          bIsInitRandomSeed = True
+      if not bIsInitRandomSeed:
+        mlsys.random_seed_all(self.random_seed)
     self.ts = None
     self.vs = None
     self.us = None
-    self.do_create_sample_sets()
-    assert self.ts is not None, ERR_SUBSET_MUST_HAVE_TS
-    assert self.ts.info.kind == SampleSetKind.TRAINING_SET.value, ERR_SUBSET_INVALID_SETUP
-    if self.vs is not None:
-      assert self.ts.info.kind == SampleSetKind.TRAINING_SET.value, ERR_SUBSET_INVALID_SETUP
+    # VIRTUAL CALL:  Imports the dataset from the source local/remote filestore to the local cache.
+    self.load_data()
+    if self.is_split:
+      assert self.ts is not None, ERR_DS_SUBSET_MUST_HAVE_TS
+      assert self.ts.kind == SampleSetKind.TRAINING_SET, ERR_DS_SUBSET_INVALID_SETUP
+      if self.vs is not None:
+        assert self.vs.kind == SampleSetKind.VALIDATION_SET, ERR_DS_SUBSET_INVALID_SETUP
     if self.us is not None:
-      assert self.ts.info.kind == SampleSetKind.TRAINING_SET.value, ERR_SUBSET_INVALID_SETUP
+      assert self.us.kind == SampleSetKind.UNKNOWN_TEST_SET, ERR_DS_SUBSET_INVALID_SETUP
+    return self
   # --------------------------------------------------------------------------------------------------------------------
   def assign(self, data, label_columns: range):
+    self.all_samples, self.all_labels, self.all_ids = None, None, None
     if isinstance(data, tuple):
-      self.samples, self.labels = data
+      self.all_samples, self.all_labels = data
     elif isinstance(data, np.ndarray):
-      self.samples = data
+      self.all_samples = data
     elif isinstance(data, dict):
-      if ("samples" in dict) and ("labels" in dict):
-        self.samples = data["samples"]
-        self.labels = data["labels"]
-      else:
-        pass # Support other formats
+      if ("samples" in dict):
+        self.all_samples = data["samples"]
+      if ("labels" in dict):
+        self.all_labels = data["labels"]
+      if ("ids" in dict):
+        self.all_ids = data["ids"]
     elif isinstance(data, pd.DataFrame):
       if isinstance(data.columns, pd.Index):
         nData = data.iloc[1:].to_numpy()
       else:
         nData = data.to_numpy()
       if label_columns is None:
-        self.samples = nData
+        self.all_samples = nData
       else:
         if label_columns.start >= 0:
           if label_columns.stop is None:
-            self.labels = nData[:, label_columns.start]
-            self.samples = nData[:, label_columns.start + 1:]
+            self.all_labels = nData[:, label_columns.start]
+            self.all_samples = nData[:, label_columns.start + 1:]
           else:
-            self.labels = nData[:, label_columns.start:label_columns.stop + 1]
-            self.samples = nData[:, label_columns.stop + 1:]
+            self.all_labels = nData[:, label_columns.start:label_columns.stop + 1]
+            self.all_samples = nData[:, label_columns.stop + 1:]
         else:
-          self.samples = nData[:, :label_columns.start]
-          self.labels  = nData[:, label_columns.start:]
+          self.all_samples = nData[:, :label_columns.start]
+          self.all_labels = nData[:, label_columns.start:]
+    if self.all_ids is None:
+      self.all_ids = range(len(self.all_samples)) + 1
     return self
   # --------------------------------------------------------------------------------------------------------------------
   def print_info(self):
-    if self.variant is not None:
-      print(f"Dataset [{self.name}] {self.variant}")
-    else:
-      print(f"Dataset [{self.name}]")
+    print(f"Dataset [{self.dataset_code}]")
     self.ts.print_info()
     if self.vs is not None:
       self.vs.print_info()
-    if self.ut is not None:
-      self.ut.print_info()
+    if self.us is not None:
+      self.us.print_info()
   # --------------------------------------------------------------------------------------------------------------------

radnn/data/dataset_base_legacy.py CHANGED Viewed

@@ -25,7 +25,7 @@
 import numpy as np
-from sklearn.model_selection import train_test_split  # import a standalone procedure function from the pacakge
+from sklearn.model_selection import train_test_split  # import a standalone procedure toyfunction from the pacakge
 # =========================================================================================================================

radnn/data/errors.py CHANGED Viewed

@@ -22,11 +22,16 @@
 # SOFTWARE.
 # ......................................................................................
+ERR_MLSYS_FILESYS_NOT_INITIALIZED = "The filesystem for the Machine Learning host system has not been initialized."
 ERR_NO_CALLBACKS = "You should assign callbacks to the dataset perform proper random seed initialization for your framework."
-ERR_NO_RANDOM_SEED_INITIALIZER_CALLBACK = "Callback method for random seed initialization has not been defined."
+ERR_DS_NO_RANDOM_SEED_INITIALIZER_CALLBACK = "Callback method for random seed initialization has not been defined."
+ERR_DS_CALLBACKS_NO_LAZY_DOWNLOADER = "Callback method for downloading the dataset has not been defined."
-ERR_SUBSET_MUST_HAVE_TS = "A dataset must have at least a training subset."
-ERR_SUBSET_INVALID_SETUP = "Invalid sample subset setup. Please use one of the valid kinds: 'training/train/ts', 'validation/val/vs', 'testing/test/us'."
-ERR_SUBSET_MUST_HAVE_SAMPLES = "The subset has no samples, check the implementation of your dataset class."
-ERR_DATASET_FOLDER_NOT_FOUND = "The dataset was not found under the folder %s"
-ERR_DATASET_MUST_PROVIDE_LOCAL_FILESTORE = "You must provide a local filestore/path for the dataset"
+ERR_DS_SUBSET_MUST_HAVE_TS = "A dataset must have at least a training subset."
+ERR_DS_SUBSET_INVALID_SETUP = "Invalid sample subset setup. Please use one of the valid kinds: 'training/train/ts', 'validation/val/vs', 'testing/test/us'."
+ERR_DS_SUBSET_MUST_HAVE_SAMPLES = "The subset has no samples, check the implementation of your dataset class."
+ERR_DS_FOLDER_NOT_FOUND = "The dataset was not found under the folder %s"
+ERR_DS_MUST_PROVIDE_LOCAL_FILESTORE = "You must provide a local filestore/path for the dataset"

radnn/data/sample_preprocessor.py CHANGED Viewed

@@ -47,6 +47,9 @@ class SamplePreprocessor(ABC):
     pass
   # --------------------------------------------------------------------------------------------------------------------
 # ======================================================================================================================
 class VoidPreprocessor(SamplePreprocessor):
   # --------------------------------------------------------------------------------------------------------------------

radnn 0.1.4__py3-none-any.whl → 0.1.6__py3-none-any.whl

radnn 0.1.4py3-none-any.whl → 0.1.6py3-none-any.whl