radnn 0.1.4__py3-none-any.whl → 0.1.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- radnn/__init__.py +4 -1
- radnn/data/__init__.py +2 -2
- radnn/data/constants.py +8 -0
- radnn/data/custom_data_set.py +44 -29
- radnn/data/dataset_base.py +174 -76
- radnn/data/dataset_base_legacy.py +1 -1
- radnn/data/errors.py +11 -6
- radnn/data/sample_preprocessor.py +3 -0
- radnn/data/sample_set.py +50 -31
- radnn/data/sample_set_kind.py +21 -5
- radnn/data/sample_set_simple.py +62 -1
- radnn/data/sequence_dataset.py +4 -4
- radnn/experiment/ml_experiment.py +2 -2
- radnn/experiment/ml_experiment_log.py +25 -19
- radnn/learn/constants.py +24 -0
- radnn/learn/torch/ml_model_freezer.py +1 -1
- radnn/plots/__init__.py +3 -2
- radnn/plots/plot_histogram_of_classes.py +6 -84
- radnn/plots/plot_legacy.py +103 -0
- radnn/plots/plot_roc.py +1 -0
- radnn/system/hosts/windows_host.py +1 -1
- radnn/utils.py +7 -4
- {radnn-0.1.4.dist-info → radnn-0.1.6.dist-info}/METADATA +1 -1
- {radnn-0.1.4.dist-info → radnn-0.1.6.dist-info}/RECORD +27 -24
- {radnn-0.1.4.dist-info → radnn-0.1.6.dist-info}/WHEEL +0 -0
- {radnn-0.1.4.dist-info → radnn-0.1.6.dist-info}/licenses/LICENSE.txt +0 -0
- {radnn-0.1.4.dist-info → radnn-0.1.6.dist-info}/top_level.txt +0 -0
radnn/__init__.py
CHANGED
|
@@ -8,10 +8,13 @@
|
|
|
8
8
|
# Version 0.1.0 [2026-01-07]
|
|
9
9
|
# Version 0.1.1 [2025-01-08]
|
|
10
10
|
# Version 0.1.4 [2025-01-26]
|
|
11
|
-
|
|
11
|
+
# Version 0.1.5 [2025-02-02]
|
|
12
|
+
# Version 0.1.6 [2025-02-03]
|
|
13
|
+
__version__ = "0.1.6"
|
|
12
14
|
|
|
13
15
|
from .system import FileStore, FileSystem
|
|
14
16
|
from .ml_system import MLSystem
|
|
15
17
|
from .ml_system import mlsys
|
|
16
18
|
from .utils import print_tensor, order_str
|
|
17
19
|
from .errors import Errors
|
|
20
|
+
from .learn.constants import MLTask
|
radnn/data/__init__.py
CHANGED
|
@@ -6,5 +6,5 @@ from .sample_set_kind import SampleSetKind
|
|
|
6
6
|
from .sample_preprocessor import SamplePreprocessor
|
|
7
7
|
|
|
8
8
|
from .dataset_factory import DatasetFactory, DatasetBuildAdapter
|
|
9
|
-
|
|
10
|
-
from .custom_data_set import LegacyDataSet
|
|
9
|
+
from .constants import DataPreprocessingKind
|
|
10
|
+
from .custom_data_set import LegacyDataSet
|
radnn/data/constants.py
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
from enum import Enum
|
|
2
|
+
|
|
3
|
+
# =========================================================================================================================
|
|
4
|
+
class DataPreprocessingKind(Enum):
|
|
5
|
+
MIN_MAX_NORMALIZE = 0
|
|
6
|
+
STANDARDIZE = 1
|
|
7
|
+
# =========================================================================================================================
|
|
8
|
+
|
radnn/data/custom_data_set.py
CHANGED
|
@@ -23,9 +23,13 @@
|
|
|
23
23
|
|
|
24
24
|
# ......................................................................................
|
|
25
25
|
|
|
26
|
-
from sklearn.model_selection import train_test_split # import a standalone procedure
|
|
26
|
+
from sklearn.model_selection import train_test_split # import a standalone procedure toyfunction from the pacakge
|
|
27
|
+
from sklearn.preprocessing import StandardScaler, MinMaxScaler
|
|
27
28
|
from radnn import mlsys
|
|
28
29
|
from radnn.data.sample_set_simple import SampleSet
|
|
30
|
+
from .constants import DataPreprocessingKind
|
|
31
|
+
from enum import Enum
|
|
32
|
+
|
|
29
33
|
|
|
30
34
|
# =========================================================================================================================
|
|
31
35
|
class LegacyDataSet(object):
|
|
@@ -41,27 +45,49 @@ class LegacyDataSet(object):
|
|
|
41
45
|
|
|
42
46
|
self.samples = None
|
|
43
47
|
self.labels = None
|
|
44
|
-
|
|
48
|
+
self.preprocessor = None
|
|
49
|
+
self.mean = None
|
|
50
|
+
self.std = None
|
|
51
|
+
|
|
45
52
|
# training set object
|
|
46
53
|
self.ts: SampleSet | None = None
|
|
47
54
|
# validation set object
|
|
48
55
|
self.vs: SampleSet | None = None
|
|
49
56
|
# unknown test set object
|
|
50
|
-
self.
|
|
57
|
+
self.us: SampleSet | None = None
|
|
51
58
|
# ................................................................
|
|
52
59
|
if self.random_seed is not None:
|
|
53
60
|
mlsys.random_seed_all(self.random_seed)
|
|
54
|
-
|
|
61
|
+
# --------------------------------------------------------------------------------------
|
|
62
|
+
def split(self, p_nValidationSamplesPC=0.10, preprocessing: DataPreprocessingKind | None = DataPreprocessingKind.STANDARDIZE):
|
|
63
|
+
oTS_Samples, oVS_Samples, oTS_Labels, oVS_Labels = train_test_split(self.samples, self.labels ,
|
|
64
|
+
test_size=p_nValidationSamplesPC,
|
|
65
|
+
random_state=2021)
|
|
66
|
+
if preprocessing == DataPreprocessingKind.MIN_MAX_NORMALIZE:
|
|
67
|
+
self.preprocessor = MinMaxScaler().fit(oTS_Samples)
|
|
68
|
+
elif preprocessing == DataPreprocessingKind.STANDARDIZE:
|
|
69
|
+
self.preprocessor = StandardScaler().fit(oTS_Samples)
|
|
70
|
+
|
|
71
|
+
if self.preprocessor is not None:
|
|
72
|
+
oTS_Samples = self.preprocessor.transform(oTS_Samples)
|
|
73
|
+
oVS_Samples = self.preprocessor.transform(oVS_Samples)
|
|
74
|
+
|
|
75
|
+
# (Re)creating the subsets of the dataset after the splits have been created
|
|
76
|
+
self.ts = SampleSet(self, oTS_Samples, oTS_Labels)
|
|
77
|
+
self.vs = SampleSet(self, oVS_Samples, oVS_Labels)
|
|
78
|
+
|
|
79
|
+
print("%d samples in the Training Set" % self.ts.sample_count)
|
|
80
|
+
print("%d samples in the Validation Set" % self.vs.sample_count)
|
|
55
81
|
# --------------------------------------------------------------------------------------
|
|
56
82
|
# Backwards Compatibility
|
|
57
83
|
@property
|
|
58
84
|
def TSSamples(self):
|
|
59
85
|
return self.ts.samples
|
|
60
|
-
|
|
86
|
+
|
|
61
87
|
@property
|
|
62
88
|
def ts_samples(self):
|
|
63
89
|
return self.ts_samples
|
|
64
|
-
|
|
90
|
+
|
|
65
91
|
@property
|
|
66
92
|
def TSLabels(self):
|
|
67
93
|
return self.ts.labels
|
|
@@ -69,7 +95,7 @@ class LegacyDataSet(object):
|
|
|
69
95
|
@property
|
|
70
96
|
def ts_labels(self):
|
|
71
97
|
return self.ts.labels
|
|
72
|
-
|
|
98
|
+
|
|
73
99
|
@property
|
|
74
100
|
def TSSampleCount(self):
|
|
75
101
|
return self.ts.sample_count
|
|
@@ -77,40 +103,42 @@ class LegacyDataSet(object):
|
|
|
77
103
|
@property
|
|
78
104
|
def ts_sample_count(self):
|
|
79
105
|
return self.ts.sample_count
|
|
80
|
-
|
|
106
|
+
|
|
81
107
|
def VSSamples(self):
|
|
82
108
|
return self.vs.samples
|
|
83
|
-
|
|
109
|
+
|
|
84
110
|
@property
|
|
85
111
|
def vs_samples(self):
|
|
86
112
|
return self.vs.samples
|
|
87
|
-
|
|
113
|
+
|
|
88
114
|
@property
|
|
89
115
|
def VSLabels(self):
|
|
90
116
|
return self.vs.labels
|
|
91
|
-
|
|
117
|
+
|
|
92
118
|
@property
|
|
93
119
|
def vs_labels(self):
|
|
94
120
|
return self.vs.labels
|
|
95
|
-
|
|
121
|
+
|
|
96
122
|
@property
|
|
97
123
|
def VSSampleCount(self):
|
|
98
124
|
return self.vs.sample_count
|
|
99
|
-
|
|
125
|
+
|
|
100
126
|
@property
|
|
101
127
|
def vs_sample_count(self):
|
|
102
128
|
return self.vs.sample_count
|
|
103
|
-
|
|
129
|
+
|
|
104
130
|
@property
|
|
105
131
|
def FeatureCount(self):
|
|
106
132
|
return self.feature_count
|
|
133
|
+
|
|
107
134
|
@property
|
|
108
135
|
def ClassCount(self):
|
|
109
136
|
return self.class_count
|
|
137
|
+
|
|
110
138
|
@property
|
|
111
139
|
def ClassCount(self):
|
|
112
140
|
return self.class_count
|
|
113
|
-
|
|
141
|
+
|
|
114
142
|
@property
|
|
115
143
|
def SampleCount(self):
|
|
116
144
|
return self.sample_count
|
|
@@ -118,21 +146,8 @@ class LegacyDataSet(object):
|
|
|
118
146
|
@property
|
|
119
147
|
def Samples(self):
|
|
120
148
|
return self.samples
|
|
121
|
-
|
|
149
|
+
|
|
122
150
|
@property
|
|
123
151
|
def Labels(self):
|
|
124
152
|
return self.labels
|
|
125
|
-
# --------------------------------------------------------------------------------------
|
|
126
|
-
def split(self, p_nValidationSamplesPC=0.10):
|
|
127
|
-
oTS_Samples, oVS_Samples, oTS_Labels, oVS_Labels = train_test_split(self.samples, self.labels ,
|
|
128
|
-
test_size=p_nValidationSamplesPC,
|
|
129
|
-
random_state=2021)
|
|
130
|
-
|
|
131
|
-
# (Re)creating the subsets of the dataset after the splits have been created
|
|
132
|
-
self.ts = SampleSet(self, oTS_Samples, oTS_Labels)
|
|
133
|
-
self.vs = SampleSet(self, oVS_Samples, oVS_Labels)
|
|
134
|
-
|
|
135
|
-
print("%d samples in the Training Set" % self.ts.sample_count)
|
|
136
|
-
print("%d samples in the Validation Set" % self.vs.sample_count)
|
|
137
|
-
# --------------------------------------------------------------------------------------
|
|
138
153
|
# =========================================================================================================================
|
radnn/data/dataset_base.py
CHANGED
|
@@ -26,140 +26,238 @@ import os
|
|
|
26
26
|
import numpy as np
|
|
27
27
|
import pandas as pd
|
|
28
28
|
from abc import ABC, abstractmethod
|
|
29
|
-
from .
|
|
29
|
+
from .sample_set_simple import SampleSet
|
|
30
30
|
from .sample_set_kind import SampleSetKind
|
|
31
31
|
from .sample_preprocessor import SamplePreprocessor, VoidPreprocessor
|
|
32
32
|
from .errors import *
|
|
33
33
|
from radnn import FileStore
|
|
34
|
+
from radnn import mlsys
|
|
35
|
+
from .constants import DataPreprocessingKind
|
|
36
|
+
from sklearn.model_selection import train_test_split
|
|
37
|
+
from sklearn.preprocessing import MinMaxScaler, StandardScaler
|
|
34
38
|
|
|
35
39
|
# ======================================================================================================================
|
|
36
40
|
class DataSetCallbacks(object):
|
|
37
41
|
# --------------------------------------------------------------------------------------------------------------------
|
|
38
|
-
def __init__(self,
|
|
39
|
-
self.
|
|
40
|
-
self.
|
|
42
|
+
def __init__(self, download_method=None, seed_method=None):
|
|
43
|
+
self._lazy_download = download_method
|
|
44
|
+
self._random_seed = seed_method
|
|
41
45
|
# --------------------------------------------------------------------------------------------------------------------
|
|
42
|
-
def
|
|
43
|
-
self.
|
|
46
|
+
def lazy_download(self, fs):
|
|
47
|
+
self._lazy_download(fs)
|
|
44
48
|
# --------------------------------------------------------------------------------------------------------------------
|
|
45
|
-
def
|
|
46
|
-
self.
|
|
49
|
+
def random_seed(self, seed: int):
|
|
50
|
+
self._random_seed(seed)
|
|
47
51
|
# --------------------------------------------------------------------------------------------------------------------
|
|
52
|
+
# ======================================================================================================================s
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
|
|
48
56
|
|
|
49
57
|
|
|
50
58
|
# ======================================================================================================================
|
|
51
59
|
class DataSetBase(ABC):
|
|
52
60
|
# --------------------------------------------------------------------------------------------------------------------
|
|
53
61
|
# Constructor
|
|
54
|
-
def __init__(self, name: str, variant: str|None=None, file_store=None, random_seed: int | None
|
|
62
|
+
def __init__(self, name: str, variant: str | None = None, file_store=None, random_seed: int | None = None,
|
|
63
|
+
callbacks: DataSetCallbacks | None = None):
|
|
55
64
|
# ..................// Instance Fields \\.........................
|
|
56
|
-
self.
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
self.
|
|
64
|
-
self.
|
|
65
|
-
self.
|
|
66
|
-
|
|
65
|
+
self.name = name
|
|
66
|
+
self.variant = variant
|
|
67
|
+
self.fs = None
|
|
68
|
+
self._determine_local_filestore(file_store)
|
|
69
|
+
assert self.fs is not None, ERR_DS_MUST_PROVIDE_LOCAL_FILESTORE
|
|
70
|
+
self.random_seed = random_seed
|
|
71
|
+
|
|
72
|
+
self.all_ids = None
|
|
73
|
+
self.all_samples = None
|
|
74
|
+
self.all_labels = None
|
|
75
|
+
|
|
76
|
+
self.feature_count = None
|
|
77
|
+
self.class_count = None
|
|
78
|
+
self.sample_count = None
|
|
67
79
|
|
|
68
|
-
self.
|
|
69
|
-
|
|
70
|
-
self.
|
|
71
|
-
self.
|
|
80
|
+
self.callbacks: DataSetCallbacks = callbacks
|
|
81
|
+
|
|
82
|
+
self.hprm: dict | None = None
|
|
83
|
+
self.ts: SampleSet | None = None
|
|
84
|
+
self.vs: SampleSet | None = None
|
|
85
|
+
self.us: SampleSet | None = None
|
|
72
86
|
self.preprocessor: SamplePreprocessor = VoidPreprocessor(self)
|
|
87
|
+
|
|
88
|
+
self.is_split = False
|
|
73
89
|
# ................................................................
|
|
74
|
-
if (self.random_seed is not None):
|
|
75
|
-
assert self.callbacks is not None, ERR_NO_CALLBACKS
|
|
76
|
-
assert self.callbacks.random_seeder is not None, ERR_NO_RANDOM_SEED_INITIALIZER_CALLBACK
|
|
77
|
-
self.callbacks.initialize_random_seed(self.random_seed)
|
|
78
90
|
|
|
91
|
+
# --------------------------------------------------------------------------------------
|
|
92
|
+
def split(self, validation_samples_pc=0.10,
|
|
93
|
+
preprocessing: DataPreprocessingKind | None = DataPreprocessingKind.STANDARDIZE,
|
|
94
|
+
random_seed: int=2021):
|
|
95
|
+
|
|
96
|
+
nTS_Samples, nVS_Samples, nTS_Labels, nVS_Labels = train_test_split(self.all_samples, self.all_labels,
|
|
97
|
+
test_size=validation_samples_pc,
|
|
98
|
+
random_state=random_seed)
|
|
99
|
+
if preprocessing == DataPreprocessingKind.MIN_MAX_NORMALIZE:
|
|
100
|
+
self.preprocessor = MinMaxScaler().fit(nTS_Samples)
|
|
101
|
+
elif preprocessing == DataPreprocessingKind.STANDARDIZE:
|
|
102
|
+
self.preprocessor = StandardScaler().fit(nTS_Samples)
|
|
103
|
+
else:
|
|
104
|
+
self.preprocessor = None
|
|
105
|
+
|
|
106
|
+
if self.preprocessor is not None:
|
|
107
|
+
nTS_Samples = self.preprocessor.transform(nTS_Samples)
|
|
108
|
+
nVS_Samples = self.preprocessor.transform(nVS_Samples)
|
|
109
|
+
|
|
110
|
+
# (Re)creating the subsets of the dataset after the splits have been created
|
|
111
|
+
self.ts = SampleSet(self, nTS_Samples, nTS_Labels, kind=SampleSetKind.TRAINING_SET)
|
|
112
|
+
if preprocessing == DataPreprocessingKind.STANDARDIZE:
|
|
113
|
+
self.ts.mean = self.preprocessor.mean_
|
|
114
|
+
self.ts.std = self.preprocessor.scale_
|
|
115
|
+
|
|
116
|
+
self.vs = SampleSet(self, nVS_Samples, nVS_Labels, kind=SampleSetKind.VALIDATION_SET)
|
|
117
|
+
|
|
118
|
+
self.is_split = True
|
|
119
|
+
return self
|
|
79
120
|
# --------------------------------------------------------------------------------------------------------------------
|
|
80
121
|
@property
|
|
81
|
-
def
|
|
82
|
-
|
|
122
|
+
def dataset_code(self):
|
|
123
|
+
sUniqueName = f"{self.name.upper()}"
|
|
124
|
+
if self.variant is not None:
|
|
125
|
+
sUniqueName += f"_{self.variant.upper()}"
|
|
126
|
+
return sUniqueName
|
|
83
127
|
# --------------------------------------------------------------------------------------------------------------------
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
128
|
+
def _determine_local_filestore(self, file_store):
|
|
129
|
+
if (file_store is not None):
|
|
130
|
+
if isinstance(file_store, FileStore):
|
|
131
|
+
self.fs = file_store
|
|
132
|
+
elif isinstance(file_store, str):
|
|
133
|
+
if not os.path.exists(file_store):
|
|
134
|
+
raise Exception(ERR_DS_FOLDER_NOT_FOUND % file_store)
|
|
135
|
+
self.fs = FileStore(file_store)
|
|
136
|
+
else:
|
|
137
|
+
assert mlsys.filesys is not None, ERR_MLSYS_FILESYS_NOT_INITIALIZED
|
|
138
|
+
|
|
139
|
+
self.fs: FileStore = mlsys.filesys.datasets.subfs(self.dataset_code)
|
|
87
140
|
# --------------------------------------------------------------------------------------------------------------------
|
|
88
|
-
@
|
|
89
|
-
def
|
|
90
|
-
|
|
141
|
+
@property
|
|
142
|
+
def filesystem_folder(self):
|
|
143
|
+
return self.fs.absolute_path
|
|
91
144
|
# --------------------------------------------------------------------------------------------------------------------
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
pass # could optionally override
|
|
145
|
+
def read_hyperparams(self):
|
|
146
|
+
pass # Optionally override
|
|
95
147
|
# --------------------------------------------------------------------------------------------------------------------
|
|
96
148
|
@abstractmethod
|
|
97
|
-
def
|
|
98
|
-
pass #
|
|
149
|
+
def load_data(self):
|
|
150
|
+
pass # Must implement
|
|
151
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
152
|
+
def load_cache(self, is_vector_samples=True, is_last_axis_features=True):
|
|
153
|
+
nSuffix = ""
|
|
154
|
+
if is_vector_samples:
|
|
155
|
+
nSuffix = "-vec"
|
|
156
|
+
elif not is_last_axis_features:
|
|
157
|
+
nSuffix = "-torch"
|
|
158
|
+
|
|
159
|
+
nTSSamples = self.fs.obj.load(f"{self.dataset_code}-TS-Samples{nSuffix}.pkl")
|
|
160
|
+
nVSSamples = self.fs.obj.load(f"{self.dataset_code}-VS-Samples{nSuffix}.pkl")
|
|
161
|
+
|
|
162
|
+
nTSLabels = self.fs.obj.load(f"{self.dataset_code}-TS-Labels{nSuffix}.pkl")
|
|
163
|
+
nVSLabels = self.fs.obj.load(f"{self.dataset_code}-VS-Labels{nSuffix}.pkl")
|
|
164
|
+
|
|
165
|
+
return nTSSamples, nVSSamples, nTSLabels, nVSLabels
|
|
99
166
|
# --------------------------------------------------------------------------------------------------------------------
|
|
100
|
-
def
|
|
101
|
-
|
|
102
|
-
if
|
|
103
|
-
|
|
167
|
+
def save_cache(self, ts_samples, vs_samples, ts_labels, vs_labels, is_vector_samples=True, is_last_axis_features=True):
|
|
168
|
+
nSuffix = ""
|
|
169
|
+
if is_vector_samples:
|
|
170
|
+
nSuffix = "-vec"
|
|
171
|
+
elif not is_last_axis_features:
|
|
172
|
+
nSuffix = "-torch"
|
|
173
|
+
self.fs.obj.save(ts_samples, f"{self.dataset_code}-TS-Samples{nSuffix}.pkl")
|
|
174
|
+
self.fs.obj.save(vs_samples, f"{self.dataset_code}-VS-Samples{nSuffix}.pkl")
|
|
104
175
|
|
|
176
|
+
self.fs.obj.save(ts_labels, f"{self.dataset_code}-TS-Labels{nSuffix}.pkl")
|
|
177
|
+
self.fs.obj.save(vs_labels, f"{self.dataset_code}-VS-Labels{nSuffix}.pkl")
|
|
178
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
179
|
+
def prepare(self, hyperparams: dict | None = None):
|
|
180
|
+
self.hprm = hyperparams
|
|
181
|
+
|
|
182
|
+
# VIRTUAL CALL: Reads the hyperparameters into instance variables
|
|
183
|
+
if self.hprm is not None:
|
|
184
|
+
self.read_hyperparams()
|
|
185
|
+
|
|
105
186
|
if (self.callbacks is not None):
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
187
|
+
assert self.callbacks._lazy_download is not None, ERR_DS_NO_RANDOM_SEED_INITIALIZER_CALLBACK
|
|
188
|
+
if self.callbacks._lazy_download is not None:
|
|
189
|
+
self.callbacks.lazy_download(self.fs)
|
|
190
|
+
|
|
191
|
+
if (self.random_seed is not None):
|
|
192
|
+
bIsInitRandomSeed = False
|
|
193
|
+
if self.callbacks is not None:
|
|
194
|
+
if self.callbacks._random_seed is not None:
|
|
195
|
+
self.callbacks.random_seed(self.random_seed)
|
|
196
|
+
bIsInitRandomSeed = True
|
|
197
|
+
if not bIsInitRandomSeed:
|
|
198
|
+
mlsys.random_seed_all(self.random_seed)
|
|
110
199
|
|
|
111
200
|
self.ts = None
|
|
112
201
|
self.vs = None
|
|
113
202
|
self.us = None
|
|
114
|
-
self.do_create_sample_sets()
|
|
115
203
|
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
204
|
+
# VIRTUAL CALL: Imports the dataset from the source local/remote filestore to the local cache.
|
|
205
|
+
self.load_data()
|
|
206
|
+
|
|
207
|
+
if self.is_split:
|
|
208
|
+
assert self.ts is not None, ERR_DS_SUBSET_MUST_HAVE_TS
|
|
209
|
+
assert self.ts.kind == SampleSetKind.TRAINING_SET, ERR_DS_SUBSET_INVALID_SETUP
|
|
210
|
+
if self.vs is not None:
|
|
211
|
+
assert self.vs.kind == SampleSetKind.VALIDATION_SET, ERR_DS_SUBSET_INVALID_SETUP
|
|
212
|
+
|
|
120
213
|
if self.us is not None:
|
|
121
|
-
assert self.
|
|
214
|
+
assert self.us.kind == SampleSetKind.UNKNOWN_TEST_SET, ERR_DS_SUBSET_INVALID_SETUP
|
|
215
|
+
|
|
216
|
+
return self
|
|
122
217
|
# --------------------------------------------------------------------------------------------------------------------
|
|
123
218
|
def assign(self, data, label_columns: range):
|
|
219
|
+
self.all_samples, self.all_labels, self.all_ids = None, None, None
|
|
124
220
|
if isinstance(data, tuple):
|
|
125
|
-
self.
|
|
221
|
+
self.all_samples, self.all_labels = data
|
|
126
222
|
elif isinstance(data, np.ndarray):
|
|
127
|
-
self.
|
|
223
|
+
self.all_samples = data
|
|
128
224
|
elif isinstance(data, dict):
|
|
129
|
-
if ("samples" in dict)
|
|
130
|
-
self.
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
225
|
+
if ("samples" in dict):
|
|
226
|
+
self.all_samples = data["samples"]
|
|
227
|
+
if ("labels" in dict):
|
|
228
|
+
self.all_labels = data["labels"]
|
|
229
|
+
if ("ids" in dict):
|
|
230
|
+
self.all_ids = data["ids"]
|
|
134
231
|
elif isinstance(data, pd.DataFrame):
|
|
135
232
|
if isinstance(data.columns, pd.Index):
|
|
136
233
|
nData = data.iloc[1:].to_numpy()
|
|
137
234
|
else:
|
|
138
235
|
nData = data.to_numpy()
|
|
139
|
-
|
|
236
|
+
|
|
140
237
|
if label_columns is None:
|
|
141
|
-
self.
|
|
238
|
+
self.all_samples = nData
|
|
142
239
|
else:
|
|
143
240
|
if label_columns.start >= 0:
|
|
144
241
|
if label_columns.stop is None:
|
|
145
|
-
self.
|
|
146
|
-
self.
|
|
242
|
+
self.all_labels = nData[:, label_columns.start]
|
|
243
|
+
self.all_samples = nData[:, label_columns.start + 1:]
|
|
147
244
|
else:
|
|
148
|
-
self.
|
|
149
|
-
self.
|
|
245
|
+
self.all_labels = nData[:, label_columns.start:label_columns.stop + 1]
|
|
246
|
+
self.all_samples = nData[:, label_columns.stop + 1:]
|
|
150
247
|
else:
|
|
151
|
-
self.
|
|
152
|
-
self.
|
|
248
|
+
self.all_samples = nData[:, :label_columns.start]
|
|
249
|
+
self.all_labels = nData[:, label_columns.start:]
|
|
250
|
+
|
|
251
|
+
if self.all_ids is None:
|
|
252
|
+
self.all_ids = range(len(self.all_samples)) + 1
|
|
253
|
+
|
|
153
254
|
return self
|
|
154
255
|
# --------------------------------------------------------------------------------------------------------------------
|
|
155
256
|
def print_info(self):
|
|
156
|
-
|
|
157
|
-
print(f"Dataset [{self.name}] {self.variant}")
|
|
158
|
-
else:
|
|
159
|
-
print(f"Dataset [{self.name}]")
|
|
257
|
+
print(f"Dataset [{self.dataset_code}]")
|
|
160
258
|
self.ts.print_info()
|
|
161
259
|
if self.vs is not None:
|
|
162
260
|
self.vs.print_info()
|
|
163
|
-
if self.
|
|
164
|
-
self.
|
|
261
|
+
if self.us is not None:
|
|
262
|
+
self.us.print_info()
|
|
165
263
|
# --------------------------------------------------------------------------------------------------------------------
|
|
@@ -25,7 +25,7 @@
|
|
|
25
25
|
|
|
26
26
|
|
|
27
27
|
import numpy as np
|
|
28
|
-
from sklearn.model_selection import train_test_split # import a standalone procedure
|
|
28
|
+
from sklearn.model_selection import train_test_split # import a standalone procedure toyfunction from the pacakge
|
|
29
29
|
|
|
30
30
|
|
|
31
31
|
# =========================================================================================================================
|
radnn/data/errors.py
CHANGED
|
@@ -22,11 +22,16 @@
|
|
|
22
22
|
# SOFTWARE.
|
|
23
23
|
|
|
24
24
|
# ......................................................................................
|
|
25
|
+
|
|
26
|
+
ERR_MLSYS_FILESYS_NOT_INITIALIZED = "The filesystem for the Machine Learning host system has not been initialized."
|
|
27
|
+
|
|
28
|
+
|
|
25
29
|
ERR_NO_CALLBACKS = "You should assign callbacks to the dataset perform proper random seed initialization for your framework."
|
|
26
|
-
|
|
30
|
+
ERR_DS_NO_RANDOM_SEED_INITIALIZER_CALLBACK = "Callback method for random seed initialization has not been defined."
|
|
31
|
+
ERR_DS_CALLBACKS_NO_LAZY_DOWNLOADER = "Callback method for downloading the dataset has not been defined."
|
|
27
32
|
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
+
ERR_DS_SUBSET_MUST_HAVE_TS = "A dataset must have at least a training subset."
|
|
34
|
+
ERR_DS_SUBSET_INVALID_SETUP = "Invalid sample subset setup. Please use one of the valid kinds: 'training/train/ts', 'validation/val/vs', 'testing/test/us'."
|
|
35
|
+
ERR_DS_SUBSET_MUST_HAVE_SAMPLES = "The subset has no samples, check the implementation of your dataset class."
|
|
36
|
+
ERR_DS_FOLDER_NOT_FOUND = "The dataset was not found under the folder %s"
|
|
37
|
+
ERR_DS_MUST_PROVIDE_LOCAL_FILESTORE = "You must provide a local filestore/path for the dataset"
|
|
@@ -47,6 +47,9 @@ class SamplePreprocessor(ABC):
|
|
|
47
47
|
pass
|
|
48
48
|
# --------------------------------------------------------------------------------------------------------------------
|
|
49
49
|
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
|
|
50
53
|
# ======================================================================================================================
|
|
51
54
|
class VoidPreprocessor(SamplePreprocessor):
|
|
52
55
|
# --------------------------------------------------------------------------------------------------------------------
|