radnn 0.1.4__py3-none-any.whl → 0.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
radnn/__init__.py CHANGED
@@ -8,10 +8,12 @@
8
8
  # Version 0.1.0 [2026-01-07]
9
9
  # Version 0.1.1 [2025-01-08]
10
10
  # Version 0.1.4 [2025-01-26]
11
- __version__ = "0.1.4"
11
+ # Version 0.1.5 [2025-02-02]
12
+ __version__ = "0.1.5"
12
13
 
13
14
  from .system import FileStore, FileSystem
14
15
  from .ml_system import MLSystem
15
16
  from .ml_system import mlsys
16
17
  from .utils import print_tensor, order_str
17
18
  from .errors import Errors
19
+ from .learn.constants import MLTask
radnn/data/__init__.py CHANGED
@@ -6,5 +6,5 @@ from .sample_set_kind import SampleSetKind
6
6
  from .sample_preprocessor import SamplePreprocessor
7
7
 
8
8
  from .dataset_factory import DatasetFactory, DatasetBuildAdapter
9
-
10
- from .custom_data_set import LegacyDataSet
9
+ from .constants import DataPreprocessingKind
10
+ from .custom_data_set import LegacyDataSet
@@ -0,0 +1,8 @@
1
+ from enum import Enum
2
+
3
+ # =========================================================================================================================
4
+ class DataPreprocessingKind(Enum):
5
+ MIN_MAX_NORMALIZE = 0
6
+ STANDARDIZE = 1
7
+ # =========================================================================================================================
8
+
@@ -24,8 +24,12 @@
24
24
  # ......................................................................................
25
25
 
26
26
  from sklearn.model_selection import train_test_split # import a standalone procedure function from the pacakge
27
+ from sklearn.preprocessing import StandardScaler, MinMaxScaler
27
28
  from radnn import mlsys
28
29
  from radnn.data.sample_set_simple import SampleSet
30
+ from .constants import DataPreprocessingKind
31
+ from enum import Enum
32
+
29
33
 
30
34
  # =========================================================================================================================
31
35
  class LegacyDataSet(object):
@@ -41,27 +45,49 @@ class LegacyDataSet(object):
41
45
 
42
46
  self.samples = None
43
47
  self.labels = None
44
-
48
+ self.preprocessor = None
49
+ self.mean = None
50
+ self.std = None
51
+
45
52
  # training set object
46
53
  self.ts: SampleSet | None = None
47
54
  # validation set object
48
55
  self.vs: SampleSet | None = None
49
56
  # unknown test set object
50
- self.ut: SampleSet | None = None
57
+ self.us: SampleSet | None = None
51
58
  # ................................................................
52
59
  if self.random_seed is not None:
53
60
  mlsys.random_seed_all(self.random_seed)
54
-
61
+ # --------------------------------------------------------------------------------------
62
+ def split(self, p_nValidationSamplesPC=0.10, preprocessing: DataPreprocessingKind | None = DataPreprocessingKind.STANDARDIZE):
63
+ oTS_Samples, oVS_Samples, oTS_Labels, oVS_Labels = train_test_split(self.samples, self.labels ,
64
+ test_size=p_nValidationSamplesPC,
65
+ random_state=2021)
66
+ if preprocessing == DataPreprocessingKind.MIN_MAX_NORMALIZE:
67
+ self.preprocessor = MinMaxScaler().fit(oTS_Samples)
68
+ elif preprocessing == DataPreprocessingKind.STANDARDIZE:
69
+ self.preprocessor = StandardScaler().fit(oTS_Samples)
70
+
71
+ if self.preprocessor is not None:
72
+ oTS_Samples = self.preprocessor.transform(oTS_Samples)
73
+ oVS_Samples = self.preprocessor.transform(oVS_Samples)
74
+
75
+ # (Re)creating the subsets of the dataset after the splits have been created
76
+ self.ts = SampleSet(self, oTS_Samples, oTS_Labels)
77
+ self.vs = SampleSet(self, oVS_Samples, oVS_Labels)
78
+
79
+ print("%d samples in the Training Set" % self.ts.sample_count)
80
+ print("%d samples in the Validation Set" % self.vs.sample_count)
55
81
  # --------------------------------------------------------------------------------------
56
82
  # Backwards Compatibility
57
83
  @property
58
84
  def TSSamples(self):
59
85
  return self.ts.samples
60
-
86
+
61
87
  @property
62
88
  def ts_samples(self):
63
89
  return self.ts_samples
64
-
90
+
65
91
  @property
66
92
  def TSLabels(self):
67
93
  return self.ts.labels
@@ -69,7 +95,7 @@ class LegacyDataSet(object):
69
95
  @property
70
96
  def ts_labels(self):
71
97
  return self.ts.labels
72
-
98
+
73
99
  @property
74
100
  def TSSampleCount(self):
75
101
  return self.ts.sample_count
@@ -77,40 +103,42 @@ class LegacyDataSet(object):
77
103
  @property
78
104
  def ts_sample_count(self):
79
105
  return self.ts.sample_count
80
-
106
+
81
107
  def VSSamples(self):
82
108
  return self.vs.samples
83
-
109
+
84
110
  @property
85
111
  def vs_samples(self):
86
112
  return self.vs.samples
87
-
113
+
88
114
  @property
89
115
  def VSLabels(self):
90
116
  return self.vs.labels
91
-
117
+
92
118
  @property
93
119
  def vs_labels(self):
94
120
  return self.vs.labels
95
-
121
+
96
122
  @property
97
123
  def VSSampleCount(self):
98
124
  return self.vs.sample_count
99
-
125
+
100
126
  @property
101
127
  def vs_sample_count(self):
102
128
  return self.vs.sample_count
103
-
129
+
104
130
  @property
105
131
  def FeatureCount(self):
106
132
  return self.feature_count
133
+
107
134
  @property
108
135
  def ClassCount(self):
109
136
  return self.class_count
137
+
110
138
  @property
111
139
  def ClassCount(self):
112
140
  return self.class_count
113
-
141
+
114
142
  @property
115
143
  def SampleCount(self):
116
144
  return self.sample_count
@@ -118,21 +146,8 @@ class LegacyDataSet(object):
118
146
  @property
119
147
  def Samples(self):
120
148
  return self.samples
121
-
149
+
122
150
  @property
123
151
  def Labels(self):
124
152
  return self.labels
125
- # --------------------------------------------------------------------------------------
126
- def split(self, p_nValidationSamplesPC=0.10):
127
- oTS_Samples, oVS_Samples, oTS_Labels, oVS_Labels = train_test_split(self.samples, self.labels ,
128
- test_size=p_nValidationSamplesPC,
129
- random_state=2021)
130
-
131
- # (Re)creating the subsets of the dataset after the splits have been created
132
- self.ts = SampleSet(self, oTS_Samples, oTS_Labels)
133
- self.vs = SampleSet(self, oVS_Samples, oVS_Labels)
134
-
135
- print("%d samples in the Training Set" % self.ts.sample_count)
136
- print("%d samples in the Validation Set" % self.vs.sample_count)
137
- # --------------------------------------------------------------------------------------
138
153
  # =========================================================================================================================
@@ -31,135 +31,172 @@ from .sample_set_kind import SampleSetKind
31
31
  from .sample_preprocessor import SamplePreprocessor, VoidPreprocessor
32
32
  from .errors import *
33
33
  from radnn import FileStore
34
+ from radnn import mlsys
34
35
 
35
36
  # ======================================================================================================================
36
37
  class DataSetCallbacks(object):
37
38
  # --------------------------------------------------------------------------------------------------------------------
38
- def __init__(self, lazy_loader=None, random_seeder=None):
39
- self.lazy_loader = lazy_loader
40
- self.random_seeder = random_seeder
39
+ def __init__(self, download_method=None, seed_method=None):
40
+ self._lazy_download = download_method
41
+ self._random_seed = seed_method
41
42
  # --------------------------------------------------------------------------------------------------------------------
42
- def lazy_load(self):
43
- self.lazy_loader()
43
+ def lazy_download(self, fs):
44
+ self._lazy_download(fs)
44
45
  # --------------------------------------------------------------------------------------------------------------------
45
- def initialize_random_seed(self, seed: int):
46
- self.random_seeder(seed)
46
+ def random_seed(self, seed: int):
47
+ self._random_seed(seed)
47
48
  # --------------------------------------------------------------------------------------------------------------------
49
+ # ======================================================================================================================s
50
+
51
+
52
+
48
53
 
49
54
 
50
55
  # ======================================================================================================================
51
56
  class DataSetBase(ABC):
52
57
  # --------------------------------------------------------------------------------------------------------------------
53
58
  # Constructor
54
- def __init__(self, name: str, variant: str|None=None, file_store=None, random_seed: int | None=None, callbacks: DataSetCallbacks | None = None):
59
+ def __init__(self, name: str, variant: str | None = None, file_store=None, random_seed: int | None = None,
60
+ callbacks: DataSetCallbacks | None = None):
55
61
  # ..................// Instance Fields \\.........................
56
- self.fs: FileStore|None = file_store
57
- if (file_store is not None) and isinstance(file_store, str):
58
- if not os.path.exists(file_store):
59
- raise Exception(ERR_DATASET_FOLDER_NOT_FOUND % file_store)
60
- self.fs = FileStore(file_store)
61
- assert self.fs is not None, ERR_DATASET_MUST_PROVIDE_LOCAL_FILESTORE
62
-
63
- self.name = name
64
- self.variant = variant
65
- self.random_seed = random_seed
62
+ self.name = name
63
+ self.variant = variant
64
+ self.fs = None
65
+ self._determine_local_filestore(file_store)
66
+ assert self.fs is not None, ERR_DS_MUST_PROVIDE_LOCAL_FILESTORE
67
+ self.random_seed = random_seed
68
+
69
+ self.all_ids = None
70
+ self.all_samples = None
71
+ self.all_labels = None
72
+
73
+ self.feature_count = None
74
+ self.class_count = None
75
+ self.sample_count = None
76
+
66
77
  self.callbacks: DataSetCallbacks = callbacks
67
-
68
- self.hparams :dict|None = None
69
- self.ts: SampleSet|None = None
70
- self.vs: SampleSet|None = None
71
- self.ut: SampleSet|None = None
78
+
79
+ self.hprm: dict | None = None
80
+ self.ts: SampleSet | None = None
81
+ self.vs: SampleSet | None = None
82
+ self.us: SampleSet | None = None
72
83
  self.preprocessor: SamplePreprocessor = VoidPreprocessor(self)
73
84
  # ................................................................
74
- if (self.random_seed is not None):
75
- assert self.callbacks is not None, ERR_NO_CALLBACKS
76
- assert self.callbacks.random_seeder is not None, ERR_NO_RANDOM_SEED_INITIALIZER_CALLBACK
77
- self.callbacks.initialize_random_seed(self.random_seed)
78
85
 
79
86
  # --------------------------------------------------------------------------------------------------------------------
80
87
  @property
88
+ def dataset_code(self):
89
+ sUniqueName = f"{self.name.upper()}"
90
+ if self.variant is not None:
91
+ sUniqueName += f"_{self.variant.upper()}"
92
+ return sUniqueName
93
+ # --------------------------------------------------------------------------------------------------------------------
94
+ def _determine_local_filestore(self, file_store):
95
+ if (file_store is not None):
96
+ if isinstance(file_store, FileStore):
97
+ self.fs = file_store
98
+ elif isinstance(file_store, str):
99
+ if not os.path.exists(file_store):
100
+ raise Exception(ERR_DS_FOLDER_NOT_FOUND % file_store)
101
+ self.fs = FileStore(file_store)
102
+ else:
103
+ assert mlsys.filesys is not None, ERR_MLSYS_FILESYS_NOT_INITIALIZED
104
+
105
+ self.fs: FileStore = mlsys.filesys.datasets.subfs(self.dataset_code)
106
+ # --------------------------------------------------------------------------------------------------------------------
107
+ @property
81
108
  def filesystem_folder(self):
82
- return self.fs.absolute_path
109
+ return self.fs.absolute_path
83
110
  # --------------------------------------------------------------------------------------------------------------------
84
- @abstractmethod
85
- def do_read_hyperparams(self):
86
- pass # must implement concrete method
111
+ def read_hyperparams(self):
112
+ pass # Optionally override
87
113
  # --------------------------------------------------------------------------------------------------------------------
88
114
  @abstractmethod
89
- def do_import_data(self):
90
- pass # must implement concrete method
115
+ def load_data(self):
116
+ pass # Must implement
91
117
  # --------------------------------------------------------------------------------------------------------------------
92
- @abstractmethod
93
- def do_prepare_data(self):
94
- pass # could optionally override
118
+ def load_cache(self):
119
+ pass # Optionally override
95
120
  # --------------------------------------------------------------------------------------------------------------------
96
- @abstractmethod
97
- def do_create_sample_sets(self):
98
- pass # must implement concrete method
121
+ def save_cache(self):
122
+ pass # Optionally override
99
123
  # --------------------------------------------------------------------------------------------------------------------
100
- def prepare(self, hyperparams: dict|None = None):
101
- self.hparams = hyperparams
102
- if self.hparams is not None:
103
- self.do_read_hyperparams()
104
-
124
+ def prepare(self, hyperparams: dict | None = None):
125
+ self.hprm = hyperparams
126
+
127
+ # VIRTUAL CALL: Reads the hyperparameters into instance variables
128
+ if self.hprm is not None:
129
+ self.read_hyperparams()
130
+
105
131
  if (self.callbacks is not None):
106
- if self.callbacks.lazy_loader is not None:
107
- self.callbacks.lazy_loader()
108
- self.do_import_data()
109
- self.do_prepare_data()
110
-
132
+ assert self.callbacks._lazy_download is not None, ERR_DS_NO_RANDOM_SEED_INITIALIZER_CALLBACK
133
+ if self.callbacks._lazy_download is not None:
134
+ self.callbacks.lazy_download(self.fs)
135
+
136
+ if (self.random_seed is not None):
137
+ assert self.callbacks is not None, ERR_NO_CALLBACKS
138
+ assert self.callbacks._random_seed is not None, ERR_DS_NO_RANDOM_SEED_INITIALIZER_CALLBACK
139
+ self.callbacks.random_seed(self.random_seed)
140
+
111
141
  self.ts = None
112
142
  self.vs = None
113
143
  self.us = None
114
- self.do_create_sample_sets()
115
-
116
- assert self.ts is not None, ERR_SUBSET_MUST_HAVE_TS
117
- assert self.ts.info.kind == SampleSetKind.TRAINING_SET.value, ERR_SUBSET_INVALID_SETUP
144
+ # VIRTUAL CALL: Imports the data from the source local/remote filestore to the local cache.
145
+ self.load_data()
146
+
147
+ assert self.ts is not None, ERR_DS_SUBSET_MUST_HAVE_TS
148
+ assert self.ts.kind == SampleSetKind.TRAINING_SET, ERR_DS_SUBSET_INVALID_SETUP
118
149
  if self.vs is not None:
119
- assert self.ts.info.kind == SampleSetKind.TRAINING_SET.value, ERR_SUBSET_INVALID_SETUP
150
+ assert self.ts.kind == SampleSetKind.TRAINING_SET, ERR_DS_SUBSET_INVALID_SETUP
120
151
  if self.us is not None:
121
- assert self.ts.info.kind == SampleSetKind.TRAINING_SET.value, ERR_SUBSET_INVALID_SETUP
152
+ assert self.ts.kind == SampleSetKind.TRAINING_SET, ERR_DS_SUBSET_INVALID_SETUP
153
+
154
+
155
+ return self
122
156
  # --------------------------------------------------------------------------------------------------------------------
123
157
  def assign(self, data, label_columns: range):
158
+ self.all_samples, self.all_labels, self.all_ids = None, None, None
124
159
  if isinstance(data, tuple):
125
- self.samples, self.labels = data
160
+ self.all_samples, self.all_labels = data
126
161
  elif isinstance(data, np.ndarray):
127
- self.samples = data
162
+ self.all_samples = data
128
163
  elif isinstance(data, dict):
129
- if ("samples" in dict) and ("labels" in dict):
130
- self.samples = data["samples"]
131
- self.labels = data["labels"]
132
- else:
133
- pass # Support other formats
164
+ if ("samples" in dict):
165
+ self.all_samples = data["samples"]
166
+ if ("labels" in dict):
167
+ self.all_labels = data["labels"]
168
+ if ("ids" in dict):
169
+ self.all_ids = data["ids"]
134
170
  elif isinstance(data, pd.DataFrame):
135
171
  if isinstance(data.columns, pd.Index):
136
172
  nData = data.iloc[1:].to_numpy()
137
173
  else:
138
174
  nData = data.to_numpy()
139
-
175
+
140
176
  if label_columns is None:
141
- self.samples = nData
177
+ self.all_samples = nData
142
178
  else:
143
179
  if label_columns.start >= 0:
144
180
  if label_columns.stop is None:
145
- self.labels = nData[:, label_columns.start]
146
- self.samples = nData[:, label_columns.start + 1:]
181
+ self.all_labels = nData[:, label_columns.start]
182
+ self.all_samples = nData[:, label_columns.start + 1:]
147
183
  else:
148
- self.labels = nData[:, label_columns.start:label_columns.stop + 1]
149
- self.samples = nData[:, label_columns.stop + 1:]
184
+ self.all_labels = nData[:, label_columns.start:label_columns.stop + 1]
185
+ self.all_samples = nData[:, label_columns.stop + 1:]
150
186
  else:
151
- self.samples = nData[:, :label_columns.start]
152
- self.labels = nData[:, label_columns.start:]
187
+ self.all_samples = nData[:, :label_columns.start]
188
+ self.all_labels = nData[:, label_columns.start:]
189
+
190
+ if self.all_ids is None:
191
+ self.all_ids = range(len(self.all_samples)) + 1
192
+
153
193
  return self
154
194
  # --------------------------------------------------------------------------------------------------------------------
155
195
  def print_info(self):
156
- if self.variant is not None:
157
- print(f"Dataset [{self.name}] {self.variant}")
158
- else:
159
- print(f"Dataset [{self.name}]")
196
+ print(f"Dataset [{self.dataset_code}]")
160
197
  self.ts.print_info()
161
198
  if self.vs is not None:
162
199
  self.vs.print_info()
163
- if self.ut is not None:
164
- self.ut.print_info()
200
+ if self.us is not None:
201
+ self.us.print_info()
165
202
  # --------------------------------------------------------------------------------------------------------------------
radnn/data/errors.py CHANGED
@@ -22,11 +22,16 @@
22
22
  # SOFTWARE.
23
23
 
24
24
  # ......................................................................................
25
+
26
+ ERR_MLSYS_FILESYS_NOT_INITIALIZED = "The filesystem for the Machine Learning host system has not been initialized."
27
+
28
+
25
29
  ERR_NO_CALLBACKS = "You should assign callbacks to the dataset perform proper random seed initialization for your framework."
26
- ERR_NO_RANDOM_SEED_INITIALIZER_CALLBACK = "Callback method for random seed initialization has not been defined."
30
+ ERR_DS_NO_RANDOM_SEED_INITIALIZER_CALLBACK = "Callback method for random seed initialization has not been defined."
31
+ ERR_DS_CALLBACKS_NO_LAZY_DOWNLOADER = "Callback method for downloading the data has not been defined."
27
32
 
28
- ERR_SUBSET_MUST_HAVE_TS = "A dataset must have at least a training subset."
29
- ERR_SUBSET_INVALID_SETUP = "Invalid sample subset setup. Please use one of the valid kinds: 'training/train/ts', 'validation/val/vs', 'testing/test/us'."
30
- ERR_SUBSET_MUST_HAVE_SAMPLES = "The subset has no samples, check the implementation of your dataset class."
31
- ERR_DATASET_FOLDER_NOT_FOUND = "The dataset was not found under the folder %s"
32
- ERR_DATASET_MUST_PROVIDE_LOCAL_FILESTORE = "You must provide a local filestore/path for the dataset"
33
+ ERR_DS_SUBSET_MUST_HAVE_TS = "A dataset must have at least a training subset."
34
+ ERR_DS_SUBSET_INVALID_SETUP = "Invalid sample subset setup. Please use one of the valid kinds: 'training/train/ts', 'validation/val/vs', 'testing/test/us'."
35
+ ERR_DS_SUBSET_MUST_HAVE_SAMPLES = "The subset has no samples, check the implementation of your dataset class."
36
+ ERR_DS_FOLDER_NOT_FOUND = "The dataset was not found under the folder %s"
37
+ ERR_DS_MUST_PROVIDE_LOCAL_FILESTORE = "You must provide a local filestore/path for the dataset"