radnn 0.1.4__tar.gz → 0.1.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {radnn-0.1.4/src/radnn.egg-info → radnn-0.1.6}/PKG-INFO +1 -1
- {radnn-0.1.4 → radnn-0.1.6}/pyproject.toml +1 -1
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/__init__.py +4 -1
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/data/__init__.py +2 -2
- radnn-0.1.6/src/radnn/data/constants.py +8 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/data/custom_data_set.py +44 -29
- radnn-0.1.6/src/radnn/data/dataset_base.py +263 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/data/dataset_base_legacy.py +1 -1
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/data/errors.py +11 -6
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/data/sample_preprocessor.py +3 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/data/sample_set.py +50 -31
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/data/sample_set_kind.py +21 -5
- radnn-0.1.6/src/radnn/data/sample_set_simple.py +94 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/data/sequence_dataset.py +4 -4
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/experiment/ml_experiment.py +2 -2
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/experiment/ml_experiment_log.py +25 -19
- radnn-0.1.6/src/radnn/learn/constants.py +24 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/learn/torch/ml_model_freezer.py +1 -1
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/plots/__init__.py +3 -2
- radnn-0.1.6/src/radnn/plots/plot_histogram_of_classes.py +65 -0
- radnn-0.1.6/src/radnn/plots/plot_legacy.py +103 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/plots/plot_roc.py +1 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/system/hosts/windows_host.py +1 -1
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/utils.py +7 -4
- {radnn-0.1.4 → radnn-0.1.6/src/radnn.egg-info}/PKG-INFO +1 -1
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn.egg-info/SOURCES.txt +3 -0
- radnn-0.1.4/src/radnn/data/dataset_base.py +0 -165
- radnn-0.1.4/src/radnn/data/sample_set_simple.py +0 -33
- radnn-0.1.4/src/radnn/plots/plot_histogram_of_classes.py +0 -143
- {radnn-0.1.4 → radnn-0.1.6}/LICENSE.txt +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/setup.cfg +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/benchmark/__init__.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/benchmark/latency.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/benchmark/vram.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/core.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/data/data_hyperparams.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/data/dataset_factory.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/data/structs/__init__.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/data/structs/tree.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/data_beta/__init__.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/data_beta/data_feed.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/data_beta/dataset_base.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/data_beta/dataset_folder.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/data_beta/image_dataset.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/data_beta/image_dataset_files.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/data_beta/preprocess/__init__.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/data_beta/preprocess/normalizer.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/data_beta/preprocess/standardizer.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/data_beta/sample_set.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/data_beta/sequence_dataset.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/data_beta/structures/__init__.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/data_beta/structures/dictionary.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/data_beta/subset_type.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/data_beta/tf_classification_data_feed.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/errors.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/evaluation/__init__.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/evaluation/evaluate_classification.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/experiment/__init__.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/experiment/identification.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/experiment/ml_experiment_config.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/experiment/ml_experiment_env.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/experiment/ml_experiment_store.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/images/__init__.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/images/colors.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/images/image_processor.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/images/transforms.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/learn/__init__.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/learn/keras/__init__.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/learn/keras/keras_best_state_saver.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/learn/keras/keras_learning_algorithm.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/learn/keras/keras_learning_rate_scheduler.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/learn/keras/keras_optimization_combo.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/learn/torch/__init__.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/learn/torch/gradient_descent_method.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/learn/torch/losses/__init__.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/learn/torch/losses/rmse.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/learn/torch/lr_schedulers.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/learn/torch/ml_trainer.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/ml_system.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/models/__init__.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/models/cnn/__init__.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/models/cnn/cnn_stem_setup.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/models/model_factory.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/models/model_hyperparams.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/models/model_info.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/models/torch/__init__.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/models/torch/model_utils.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/models/torch/torch_model_build_adapter.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/plots/plot_auto_multi_image.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/plots/plot_confusion_matrix.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/plots/plot_function.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/plots/plot_learning_curve.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/plots/plot_multi_scatter.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/plots/plot_visualize_dataset2d.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/plots/plot_voronoi_2d.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/stats/__init__.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/stats/descriptive_stats.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/system/__init__.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/system/files/__init__.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/system/files/csvfile.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/system/files/filelist.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/system/files/fileobject.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/system/files/imgfile.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/system/files/jsonfile.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/system/files/picklefile.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/system/files/textfile.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/system/files/zipfile.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/system/filestore.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/system/filesystem.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/system/hosts/__init__.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/system/hosts/colab_host.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/system/hosts/linux_host.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/system/log.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/system/tee_logger.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/system/threads/__init__.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/system/threads/semaphore_lock.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/system/threads/thread_context.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/system/threads/thread_safe_queue.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/system/threads/thread_safe_string_collection.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/system/threads/thread_worker.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/test/__init__.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn/test/tensor_hash.py +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn.egg-info/dependency_links.txt +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn.egg-info/requires.txt +0 -0
- {radnn-0.1.4 → radnn-0.1.6}/src/radnn.egg-info/top_level.txt +0 -0
|
@@ -8,10 +8,13 @@
|
|
|
8
8
|
# Version 0.1.0 [2026-01-07]
|
|
9
9
|
# Version 0.1.1 [2025-01-08]
|
|
10
10
|
# Version 0.1.4 [2025-01-26]
|
|
11
|
-
|
|
11
|
+
# Version 0.1.5 [2025-02-02]
|
|
12
|
+
# Version 0.1.6 [2025-02-03]
|
|
13
|
+
__version__ = "0.1.6"
|
|
12
14
|
|
|
13
15
|
from .system import FileStore, FileSystem
|
|
14
16
|
from .ml_system import MLSystem
|
|
15
17
|
from .ml_system import mlsys
|
|
16
18
|
from .utils import print_tensor, order_str
|
|
17
19
|
from .errors import Errors
|
|
20
|
+
from .learn.constants import MLTask
|
|
@@ -6,5 +6,5 @@ from .sample_set_kind import SampleSetKind
|
|
|
6
6
|
from .sample_preprocessor import SamplePreprocessor
|
|
7
7
|
|
|
8
8
|
from .dataset_factory import DatasetFactory, DatasetBuildAdapter
|
|
9
|
-
|
|
10
|
-
from .custom_data_set import LegacyDataSet
|
|
9
|
+
from .constants import DataPreprocessingKind
|
|
10
|
+
from .custom_data_set import LegacyDataSet
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
from enum import Enum
|
|
2
|
+
|
|
3
|
+
# =========================================================================================================================
|
|
4
|
+
class DataPreprocessingKind(Enum):
|
|
5
|
+
MIN_MAX_NORMALIZE = 0
|
|
6
|
+
STANDARDIZE = 1
|
|
7
|
+
# =========================================================================================================================
|
|
8
|
+
|
|
@@ -23,9 +23,13 @@
|
|
|
23
23
|
|
|
24
24
|
# ......................................................................................
|
|
25
25
|
|
|
26
|
-
from sklearn.model_selection import train_test_split # import a standalone procedure
|
|
26
|
+
from sklearn.model_selection import train_test_split # import a standalone procedure toyfunction from the pacakge
|
|
27
|
+
from sklearn.preprocessing import StandardScaler, MinMaxScaler
|
|
27
28
|
from radnn import mlsys
|
|
28
29
|
from radnn.data.sample_set_simple import SampleSet
|
|
30
|
+
from .constants import DataPreprocessingKind
|
|
31
|
+
from enum import Enum
|
|
32
|
+
|
|
29
33
|
|
|
30
34
|
# =========================================================================================================================
|
|
31
35
|
class LegacyDataSet(object):
|
|
@@ -41,27 +45,49 @@ class LegacyDataSet(object):
|
|
|
41
45
|
|
|
42
46
|
self.samples = None
|
|
43
47
|
self.labels = None
|
|
44
|
-
|
|
48
|
+
self.preprocessor = None
|
|
49
|
+
self.mean = None
|
|
50
|
+
self.std = None
|
|
51
|
+
|
|
45
52
|
# training set object
|
|
46
53
|
self.ts: SampleSet | None = None
|
|
47
54
|
# validation set object
|
|
48
55
|
self.vs: SampleSet | None = None
|
|
49
56
|
# unknown test set object
|
|
50
|
-
self.
|
|
57
|
+
self.us: SampleSet | None = None
|
|
51
58
|
# ................................................................
|
|
52
59
|
if self.random_seed is not None:
|
|
53
60
|
mlsys.random_seed_all(self.random_seed)
|
|
54
|
-
|
|
61
|
+
# --------------------------------------------------------------------------------------
|
|
62
|
+
def split(self, p_nValidationSamplesPC=0.10, preprocessing: DataPreprocessingKind | None = DataPreprocessingKind.STANDARDIZE):
|
|
63
|
+
oTS_Samples, oVS_Samples, oTS_Labels, oVS_Labels = train_test_split(self.samples, self.labels ,
|
|
64
|
+
test_size=p_nValidationSamplesPC,
|
|
65
|
+
random_state=2021)
|
|
66
|
+
if preprocessing == DataPreprocessingKind.MIN_MAX_NORMALIZE:
|
|
67
|
+
self.preprocessor = MinMaxScaler().fit(oTS_Samples)
|
|
68
|
+
elif preprocessing == DataPreprocessingKind.STANDARDIZE:
|
|
69
|
+
self.preprocessor = StandardScaler().fit(oTS_Samples)
|
|
70
|
+
|
|
71
|
+
if self.preprocessor is not None:
|
|
72
|
+
oTS_Samples = self.preprocessor.transform(oTS_Samples)
|
|
73
|
+
oVS_Samples = self.preprocessor.transform(oVS_Samples)
|
|
74
|
+
|
|
75
|
+
# (Re)creating the subsets of the dataset after the splits have been created
|
|
76
|
+
self.ts = SampleSet(self, oTS_Samples, oTS_Labels)
|
|
77
|
+
self.vs = SampleSet(self, oVS_Samples, oVS_Labels)
|
|
78
|
+
|
|
79
|
+
print("%d samples in the Training Set" % self.ts.sample_count)
|
|
80
|
+
print("%d samples in the Validation Set" % self.vs.sample_count)
|
|
55
81
|
# --------------------------------------------------------------------------------------
|
|
56
82
|
# Backwards Compatibility
|
|
57
83
|
@property
|
|
58
84
|
def TSSamples(self):
|
|
59
85
|
return self.ts.samples
|
|
60
|
-
|
|
86
|
+
|
|
61
87
|
@property
|
|
62
88
|
def ts_samples(self):
|
|
63
89
|
return self.ts_samples
|
|
64
|
-
|
|
90
|
+
|
|
65
91
|
@property
|
|
66
92
|
def TSLabels(self):
|
|
67
93
|
return self.ts.labels
|
|
@@ -69,7 +95,7 @@ class LegacyDataSet(object):
|
|
|
69
95
|
@property
|
|
70
96
|
def ts_labels(self):
|
|
71
97
|
return self.ts.labels
|
|
72
|
-
|
|
98
|
+
|
|
73
99
|
@property
|
|
74
100
|
def TSSampleCount(self):
|
|
75
101
|
return self.ts.sample_count
|
|
@@ -77,40 +103,42 @@ class LegacyDataSet(object):
|
|
|
77
103
|
@property
|
|
78
104
|
def ts_sample_count(self):
|
|
79
105
|
return self.ts.sample_count
|
|
80
|
-
|
|
106
|
+
|
|
81
107
|
def VSSamples(self):
|
|
82
108
|
return self.vs.samples
|
|
83
|
-
|
|
109
|
+
|
|
84
110
|
@property
|
|
85
111
|
def vs_samples(self):
|
|
86
112
|
return self.vs.samples
|
|
87
|
-
|
|
113
|
+
|
|
88
114
|
@property
|
|
89
115
|
def VSLabels(self):
|
|
90
116
|
return self.vs.labels
|
|
91
|
-
|
|
117
|
+
|
|
92
118
|
@property
|
|
93
119
|
def vs_labels(self):
|
|
94
120
|
return self.vs.labels
|
|
95
|
-
|
|
121
|
+
|
|
96
122
|
@property
|
|
97
123
|
def VSSampleCount(self):
|
|
98
124
|
return self.vs.sample_count
|
|
99
|
-
|
|
125
|
+
|
|
100
126
|
@property
|
|
101
127
|
def vs_sample_count(self):
|
|
102
128
|
return self.vs.sample_count
|
|
103
|
-
|
|
129
|
+
|
|
104
130
|
@property
|
|
105
131
|
def FeatureCount(self):
|
|
106
132
|
return self.feature_count
|
|
133
|
+
|
|
107
134
|
@property
|
|
108
135
|
def ClassCount(self):
|
|
109
136
|
return self.class_count
|
|
137
|
+
|
|
110
138
|
@property
|
|
111
139
|
def ClassCount(self):
|
|
112
140
|
return self.class_count
|
|
113
|
-
|
|
141
|
+
|
|
114
142
|
@property
|
|
115
143
|
def SampleCount(self):
|
|
116
144
|
return self.sample_count
|
|
@@ -118,21 +146,8 @@ class LegacyDataSet(object):
|
|
|
118
146
|
@property
|
|
119
147
|
def Samples(self):
|
|
120
148
|
return self.samples
|
|
121
|
-
|
|
149
|
+
|
|
122
150
|
@property
|
|
123
151
|
def Labels(self):
|
|
124
152
|
return self.labels
|
|
125
|
-
# --------------------------------------------------------------------------------------
|
|
126
|
-
def split(self, p_nValidationSamplesPC=0.10):
|
|
127
|
-
oTS_Samples, oVS_Samples, oTS_Labels, oVS_Labels = train_test_split(self.samples, self.labels ,
|
|
128
|
-
test_size=p_nValidationSamplesPC,
|
|
129
|
-
random_state=2021)
|
|
130
|
-
|
|
131
|
-
# (Re)creating the subsets of the dataset after the splits have been created
|
|
132
|
-
self.ts = SampleSet(self, oTS_Samples, oTS_Labels)
|
|
133
|
-
self.vs = SampleSet(self, oVS_Samples, oVS_Labels)
|
|
134
|
-
|
|
135
|
-
print("%d samples in the Training Set" % self.ts.sample_count)
|
|
136
|
-
print("%d samples in the Validation Set" % self.vs.sample_count)
|
|
137
|
-
# --------------------------------------------------------------------------------------
|
|
138
153
|
# =========================================================================================================================
|
|
@@ -0,0 +1,263 @@
|
|
|
1
|
+
# ......................................................................................
|
|
2
|
+
# MIT License
|
|
3
|
+
|
|
4
|
+
# Copyright (c) 2019-2025 Pantelis I. Kaplanoglou
|
|
5
|
+
|
|
6
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
7
|
+
# of this software and associated documentation files (the "Software"), to deal
|
|
8
|
+
# in the Software without restriction, including without limitation the rights
|
|
9
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
10
|
+
# copies of the Software, and to permit persons to whom the Software is
|
|
11
|
+
# furnished to do so, subject to the following conditions:
|
|
12
|
+
|
|
13
|
+
# The above copyright notice and this permission notice shall be included in all
|
|
14
|
+
# copies or substantial portions of the Software.
|
|
15
|
+
|
|
16
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
17
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
18
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
19
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
20
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
21
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
22
|
+
# SOFTWARE.
|
|
23
|
+
|
|
24
|
+
# ......................................................................................
|
|
25
|
+
import os
|
|
26
|
+
import numpy as np
|
|
27
|
+
import pandas as pd
|
|
28
|
+
from abc import ABC, abstractmethod
|
|
29
|
+
from .sample_set_simple import SampleSet
|
|
30
|
+
from .sample_set_kind import SampleSetKind
|
|
31
|
+
from .sample_preprocessor import SamplePreprocessor, VoidPreprocessor
|
|
32
|
+
from .errors import *
|
|
33
|
+
from radnn import FileStore
|
|
34
|
+
from radnn import mlsys
|
|
35
|
+
from .constants import DataPreprocessingKind
|
|
36
|
+
from sklearn.model_selection import train_test_split
|
|
37
|
+
from sklearn.preprocessing import MinMaxScaler, StandardScaler
|
|
38
|
+
|
|
39
|
+
# ======================================================================================================================
|
|
40
|
+
class DataSetCallbacks(object):
|
|
41
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
42
|
+
def __init__(self, download_method=None, seed_method=None):
|
|
43
|
+
self._lazy_download = download_method
|
|
44
|
+
self._random_seed = seed_method
|
|
45
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
46
|
+
def lazy_download(self, fs):
|
|
47
|
+
self._lazy_download(fs)
|
|
48
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
49
|
+
def random_seed(self, seed: int):
|
|
50
|
+
self._random_seed(seed)
|
|
51
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
52
|
+
# ======================================================================================================================s
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
# ======================================================================================================================
|
|
59
|
+
class DataSetBase(ABC):
|
|
60
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
61
|
+
# Constructor
|
|
62
|
+
def __init__(self, name: str, variant: str | None = None, file_store=None, random_seed: int | None = None,
|
|
63
|
+
callbacks: DataSetCallbacks | None = None):
|
|
64
|
+
# ..................// Instance Fields \\.........................
|
|
65
|
+
self.name = name
|
|
66
|
+
self.variant = variant
|
|
67
|
+
self.fs = None
|
|
68
|
+
self._determine_local_filestore(file_store)
|
|
69
|
+
assert self.fs is not None, ERR_DS_MUST_PROVIDE_LOCAL_FILESTORE
|
|
70
|
+
self.random_seed = random_seed
|
|
71
|
+
|
|
72
|
+
self.all_ids = None
|
|
73
|
+
self.all_samples = None
|
|
74
|
+
self.all_labels = None
|
|
75
|
+
|
|
76
|
+
self.feature_count = None
|
|
77
|
+
self.class_count = None
|
|
78
|
+
self.sample_count = None
|
|
79
|
+
|
|
80
|
+
self.callbacks: DataSetCallbacks = callbacks
|
|
81
|
+
|
|
82
|
+
self.hprm: dict | None = None
|
|
83
|
+
self.ts: SampleSet | None = None
|
|
84
|
+
self.vs: SampleSet | None = None
|
|
85
|
+
self.us: SampleSet | None = None
|
|
86
|
+
self.preprocessor: SamplePreprocessor = VoidPreprocessor(self)
|
|
87
|
+
|
|
88
|
+
self.is_split = False
|
|
89
|
+
# ................................................................
|
|
90
|
+
|
|
91
|
+
# --------------------------------------------------------------------------------------
|
|
92
|
+
def split(self, validation_samples_pc=0.10,
|
|
93
|
+
preprocessing: DataPreprocessingKind | None = DataPreprocessingKind.STANDARDIZE,
|
|
94
|
+
random_seed: int=2021):
|
|
95
|
+
|
|
96
|
+
nTS_Samples, nVS_Samples, nTS_Labels, nVS_Labels = train_test_split(self.all_samples, self.all_labels,
|
|
97
|
+
test_size=validation_samples_pc,
|
|
98
|
+
random_state=random_seed)
|
|
99
|
+
if preprocessing == DataPreprocessingKind.MIN_MAX_NORMALIZE:
|
|
100
|
+
self.preprocessor = MinMaxScaler().fit(nTS_Samples)
|
|
101
|
+
elif preprocessing == DataPreprocessingKind.STANDARDIZE:
|
|
102
|
+
self.preprocessor = StandardScaler().fit(nTS_Samples)
|
|
103
|
+
else:
|
|
104
|
+
self.preprocessor = None
|
|
105
|
+
|
|
106
|
+
if self.preprocessor is not None:
|
|
107
|
+
nTS_Samples = self.preprocessor.transform(nTS_Samples)
|
|
108
|
+
nVS_Samples = self.preprocessor.transform(nVS_Samples)
|
|
109
|
+
|
|
110
|
+
# (Re)creating the subsets of the dataset after the splits have been created
|
|
111
|
+
self.ts = SampleSet(self, nTS_Samples, nTS_Labels, kind=SampleSetKind.TRAINING_SET)
|
|
112
|
+
if preprocessing == DataPreprocessingKind.STANDARDIZE:
|
|
113
|
+
self.ts.mean = self.preprocessor.mean_
|
|
114
|
+
self.ts.std = self.preprocessor.scale_
|
|
115
|
+
|
|
116
|
+
self.vs = SampleSet(self, nVS_Samples, nVS_Labels, kind=SampleSetKind.VALIDATION_SET)
|
|
117
|
+
|
|
118
|
+
self.is_split = True
|
|
119
|
+
return self
|
|
120
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
121
|
+
@property
|
|
122
|
+
def dataset_code(self):
|
|
123
|
+
sUniqueName = f"{self.name.upper()}"
|
|
124
|
+
if self.variant is not None:
|
|
125
|
+
sUniqueName += f"_{self.variant.upper()}"
|
|
126
|
+
return sUniqueName
|
|
127
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
128
|
+
def _determine_local_filestore(self, file_store):
|
|
129
|
+
if (file_store is not None):
|
|
130
|
+
if isinstance(file_store, FileStore):
|
|
131
|
+
self.fs = file_store
|
|
132
|
+
elif isinstance(file_store, str):
|
|
133
|
+
if not os.path.exists(file_store):
|
|
134
|
+
raise Exception(ERR_DS_FOLDER_NOT_FOUND % file_store)
|
|
135
|
+
self.fs = FileStore(file_store)
|
|
136
|
+
else:
|
|
137
|
+
assert mlsys.filesys is not None, ERR_MLSYS_FILESYS_NOT_INITIALIZED
|
|
138
|
+
|
|
139
|
+
self.fs: FileStore = mlsys.filesys.datasets.subfs(self.dataset_code)
|
|
140
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
141
|
+
@property
|
|
142
|
+
def filesystem_folder(self):
|
|
143
|
+
return self.fs.absolute_path
|
|
144
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
145
|
+
def read_hyperparams(self):
|
|
146
|
+
pass # Optionally override
|
|
147
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
148
|
+
@abstractmethod
|
|
149
|
+
def load_data(self):
|
|
150
|
+
pass # Must implement
|
|
151
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
152
|
+
def load_cache(self, is_vector_samples=True, is_last_axis_features=True):
|
|
153
|
+
nSuffix = ""
|
|
154
|
+
if is_vector_samples:
|
|
155
|
+
nSuffix = "-vec"
|
|
156
|
+
elif not is_last_axis_features:
|
|
157
|
+
nSuffix = "-torch"
|
|
158
|
+
|
|
159
|
+
nTSSamples = self.fs.obj.load(f"{self.dataset_code}-TS-Samples{nSuffix}.pkl")
|
|
160
|
+
nVSSamples = self.fs.obj.load(f"{self.dataset_code}-VS-Samples{nSuffix}.pkl")
|
|
161
|
+
|
|
162
|
+
nTSLabels = self.fs.obj.load(f"{self.dataset_code}-TS-Labels{nSuffix}.pkl")
|
|
163
|
+
nVSLabels = self.fs.obj.load(f"{self.dataset_code}-VS-Labels{nSuffix}.pkl")
|
|
164
|
+
|
|
165
|
+
return nTSSamples, nVSSamples, nTSLabels, nVSLabels
|
|
166
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
167
|
+
def save_cache(self, ts_samples, vs_samples, ts_labels, vs_labels, is_vector_samples=True, is_last_axis_features=True):
|
|
168
|
+
nSuffix = ""
|
|
169
|
+
if is_vector_samples:
|
|
170
|
+
nSuffix = "-vec"
|
|
171
|
+
elif not is_last_axis_features:
|
|
172
|
+
nSuffix = "-torch"
|
|
173
|
+
self.fs.obj.save(ts_samples, f"{self.dataset_code}-TS-Samples{nSuffix}.pkl")
|
|
174
|
+
self.fs.obj.save(vs_samples, f"{self.dataset_code}-VS-Samples{nSuffix}.pkl")
|
|
175
|
+
|
|
176
|
+
self.fs.obj.save(ts_labels, f"{self.dataset_code}-TS-Labels{nSuffix}.pkl")
|
|
177
|
+
self.fs.obj.save(vs_labels, f"{self.dataset_code}-VS-Labels{nSuffix}.pkl")
|
|
178
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
179
|
+
def prepare(self, hyperparams: dict | None = None):
|
|
180
|
+
self.hprm = hyperparams
|
|
181
|
+
|
|
182
|
+
# VIRTUAL CALL: Reads the hyperparameters into instance variables
|
|
183
|
+
if self.hprm is not None:
|
|
184
|
+
self.read_hyperparams()
|
|
185
|
+
|
|
186
|
+
if (self.callbacks is not None):
|
|
187
|
+
assert self.callbacks._lazy_download is not None, ERR_DS_NO_RANDOM_SEED_INITIALIZER_CALLBACK
|
|
188
|
+
if self.callbacks._lazy_download is not None:
|
|
189
|
+
self.callbacks.lazy_download(self.fs)
|
|
190
|
+
|
|
191
|
+
if (self.random_seed is not None):
|
|
192
|
+
bIsInitRandomSeed = False
|
|
193
|
+
if self.callbacks is not None:
|
|
194
|
+
if self.callbacks._random_seed is not None:
|
|
195
|
+
self.callbacks.random_seed(self.random_seed)
|
|
196
|
+
bIsInitRandomSeed = True
|
|
197
|
+
if not bIsInitRandomSeed:
|
|
198
|
+
mlsys.random_seed_all(self.random_seed)
|
|
199
|
+
|
|
200
|
+
self.ts = None
|
|
201
|
+
self.vs = None
|
|
202
|
+
self.us = None
|
|
203
|
+
|
|
204
|
+
# VIRTUAL CALL: Imports the dataset from the source local/remote filestore to the local cache.
|
|
205
|
+
self.load_data()
|
|
206
|
+
|
|
207
|
+
if self.is_split:
|
|
208
|
+
assert self.ts is not None, ERR_DS_SUBSET_MUST_HAVE_TS
|
|
209
|
+
assert self.ts.kind == SampleSetKind.TRAINING_SET, ERR_DS_SUBSET_INVALID_SETUP
|
|
210
|
+
if self.vs is not None:
|
|
211
|
+
assert self.vs.kind == SampleSetKind.VALIDATION_SET, ERR_DS_SUBSET_INVALID_SETUP
|
|
212
|
+
|
|
213
|
+
if self.us is not None:
|
|
214
|
+
assert self.us.kind == SampleSetKind.UNKNOWN_TEST_SET, ERR_DS_SUBSET_INVALID_SETUP
|
|
215
|
+
|
|
216
|
+
return self
|
|
217
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
218
|
+
def assign(self, data, label_columns: range):
|
|
219
|
+
self.all_samples, self.all_labels, self.all_ids = None, None, None
|
|
220
|
+
if isinstance(data, tuple):
|
|
221
|
+
self.all_samples, self.all_labels = data
|
|
222
|
+
elif isinstance(data, np.ndarray):
|
|
223
|
+
self.all_samples = data
|
|
224
|
+
elif isinstance(data, dict):
|
|
225
|
+
if ("samples" in dict):
|
|
226
|
+
self.all_samples = data["samples"]
|
|
227
|
+
if ("labels" in dict):
|
|
228
|
+
self.all_labels = data["labels"]
|
|
229
|
+
if ("ids" in dict):
|
|
230
|
+
self.all_ids = data["ids"]
|
|
231
|
+
elif isinstance(data, pd.DataFrame):
|
|
232
|
+
if isinstance(data.columns, pd.Index):
|
|
233
|
+
nData = data.iloc[1:].to_numpy()
|
|
234
|
+
else:
|
|
235
|
+
nData = data.to_numpy()
|
|
236
|
+
|
|
237
|
+
if label_columns is None:
|
|
238
|
+
self.all_samples = nData
|
|
239
|
+
else:
|
|
240
|
+
if label_columns.start >= 0:
|
|
241
|
+
if label_columns.stop is None:
|
|
242
|
+
self.all_labels = nData[:, label_columns.start]
|
|
243
|
+
self.all_samples = nData[:, label_columns.start + 1:]
|
|
244
|
+
else:
|
|
245
|
+
self.all_labels = nData[:, label_columns.start:label_columns.stop + 1]
|
|
246
|
+
self.all_samples = nData[:, label_columns.stop + 1:]
|
|
247
|
+
else:
|
|
248
|
+
self.all_samples = nData[:, :label_columns.start]
|
|
249
|
+
self.all_labels = nData[:, label_columns.start:]
|
|
250
|
+
|
|
251
|
+
if self.all_ids is None:
|
|
252
|
+
self.all_ids = range(len(self.all_samples)) + 1
|
|
253
|
+
|
|
254
|
+
return self
|
|
255
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
256
|
+
def print_info(self):
|
|
257
|
+
print(f"Dataset [{self.dataset_code}]")
|
|
258
|
+
self.ts.print_info()
|
|
259
|
+
if self.vs is not None:
|
|
260
|
+
self.vs.print_info()
|
|
261
|
+
if self.us is not None:
|
|
262
|
+
self.us.print_info()
|
|
263
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
@@ -25,7 +25,7 @@
|
|
|
25
25
|
|
|
26
26
|
|
|
27
27
|
import numpy as np
|
|
28
|
-
from sklearn.model_selection import train_test_split # import a standalone procedure
|
|
28
|
+
from sklearn.model_selection import train_test_split # import a standalone procedure toyfunction from the pacakge
|
|
29
29
|
|
|
30
30
|
|
|
31
31
|
# =========================================================================================================================
|
|
@@ -22,11 +22,16 @@
|
|
|
22
22
|
# SOFTWARE.
|
|
23
23
|
|
|
24
24
|
# ......................................................................................
|
|
25
|
+
|
|
26
|
+
ERR_MLSYS_FILESYS_NOT_INITIALIZED = "The filesystem for the Machine Learning host system has not been initialized."
|
|
27
|
+
|
|
28
|
+
|
|
25
29
|
ERR_NO_CALLBACKS = "You should assign callbacks to the dataset perform proper random seed initialization for your framework."
|
|
26
|
-
|
|
30
|
+
ERR_DS_NO_RANDOM_SEED_INITIALIZER_CALLBACK = "Callback method for random seed initialization has not been defined."
|
|
31
|
+
ERR_DS_CALLBACKS_NO_LAZY_DOWNLOADER = "Callback method for downloading the dataset has not been defined."
|
|
27
32
|
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
+
ERR_DS_SUBSET_MUST_HAVE_TS = "A dataset must have at least a training subset."
|
|
34
|
+
ERR_DS_SUBSET_INVALID_SETUP = "Invalid sample subset setup. Please use one of the valid kinds: 'training/train/ts', 'validation/val/vs', 'testing/test/us'."
|
|
35
|
+
ERR_DS_SUBSET_MUST_HAVE_SAMPLES = "The subset has no samples, check the implementation of your dataset class."
|
|
36
|
+
ERR_DS_FOLDER_NOT_FOUND = "The dataset was not found under the folder %s"
|
|
37
|
+
ERR_DS_MUST_PROVIDE_LOCAL_FILESTORE = "You must provide a local filestore/path for the dataset"
|
|
@@ -47,6 +47,9 @@ class SamplePreprocessor(ABC):
|
|
|
47
47
|
pass
|
|
48
48
|
# --------------------------------------------------------------------------------------------------------------------
|
|
49
49
|
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
|
|
50
53
|
# ======================================================================================================================
|
|
51
54
|
class VoidPreprocessor(SamplePreprocessor):
|
|
52
55
|
# --------------------------------------------------------------------------------------------------------------------
|