radnn 0.0.6__tar.gz → 0.0.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {radnn-0.0.6/src/radnn.egg-info → radnn-0.0.7}/PKG-INFO +2 -2
- {radnn-0.0.6 → radnn-0.0.7}/pyproject.toml +2 -2
- radnn-0.0.7/src/radnn/__init__.py +14 -0
- {radnn-0.0.6 → radnn-0.0.7}/src/radnn/core.py +9 -0
- radnn-0.0.7/src/radnn/data/__init__.py +4 -0
- radnn-0.0.7/src/radnn/data/dataset_base.py +327 -0
- radnn-0.0.7/src/radnn/data/image_dataset.py +100 -0
- radnn-0.0.7/src/radnn/data/preprocess/__init__.py +2 -0
- radnn-0.0.7/src/radnn/data/preprocess/normalizer.py +111 -0
- radnn-0.0.7/src/radnn/data/preprocess/standardizer.py +100 -0
- radnn-0.0.7/src/radnn/data/sample_set.py +153 -0
- radnn-0.0.7/src/radnn/data/sequence_dataset.py +136 -0
- radnn-0.0.7/src/radnn/errors.py +2 -0
- {radnn-0.0.6 → radnn-0.0.7}/src/radnn/evaluation/evaluate_classification.py +2 -2
- radnn-0.0.7/src/radnn/experiment/ml_experiment.py +389 -0
- {radnn-0.0.6 → radnn-0.0.7}/src/radnn/experiment/ml_experiment_config.py +37 -6
- {radnn-0.0.6 → radnn-0.0.7}/src/radnn/experiment/ml_experiment_env.py +7 -1
- radnn-0.0.7/src/radnn/experiment/ml_experiment_store.py +10 -0
- radnn-0.0.7/src/radnn/learn/__init__.py +7 -0
- radnn-0.0.7/src/radnn/learn/keras_learning_rate_scheduler.py +31 -0
- radnn-0.0.7/src/radnn/learn/keras_optimization_algorithm.py +32 -0
- radnn-0.0.7/src/radnn/learn/learning_algorithm.py +35 -0
- radnn-0.0.7/src/radnn/learn/state/__init__.py +4 -0
- radnn-0.0.7/src/radnn/learn/state/keras_best_state_saver.py +17 -0
- radnn-0.0.7/src/radnn/ml_system.py +96 -0
- {radnn-0.0.6 → radnn-0.0.7}/src/radnn/plots/__init__.py +2 -1
- radnn-0.0.7/src/radnn/plots/plot_auto_multi_image.py +112 -0
- {radnn-0.0.6 → radnn-0.0.7}/src/radnn/system/files/jsonfile.py +4 -1
- {radnn-0.0.6 → radnn-0.0.7}/src/radnn/system/filesystem.py +2 -6
- radnn-0.0.7/src/radnn/utils.py +89 -0
- {radnn-0.0.6 → radnn-0.0.7/src/radnn.egg-info}/PKG-INFO +2 -2
- {radnn-0.0.6 → radnn-0.0.7}/src/radnn.egg-info/SOURCES.txt +31 -1
- {radnn-0.0.6 → radnn-0.0.7}/src/radnn.egg-info/requires.txt +1 -1
- radnn-0.0.7/test/test_dataset_base.py +56 -0
- radnn-0.0.7/test/test_dataset_from_pandas.py +41 -0
- radnn-0.0.7/test/test_json.py +9 -0
- radnn-0.0.7/test/test_ml_system.py +6 -0
- radnn-0.0.7/test/test_mnist.py +43 -0
- radnn-0.0.7/test/test_normalizer.py +45 -0
- radnn-0.0.7/test/test_normalizer_div_zero.py +23 -0
- radnn-0.0.7/test/test_sample_set.py +42 -0
- radnn-0.0.7/test/test_standardizer.py +46 -0
- radnn-0.0.7/test/test_train.py +57 -0
- radnn-0.0.6/src/radnn/__init__.py +0 -11
- {radnn-0.0.6 → radnn-0.0.7}/LICENSE.txt +0 -0
- {radnn-0.0.6 → radnn-0.0.7}/README.md +0 -0
- {radnn-0.0.6 → radnn-0.0.7}/setup.cfg +0 -0
- {radnn-0.0.6 → radnn-0.0.7}/src/radnn/evaluation/__init__.py +0 -0
- {radnn-0.0.6 → radnn-0.0.7}/src/radnn/experiment/__init__.py +0 -0
- {radnn-0.0.6 → radnn-0.0.7}/src/radnn/plots/plot_confusion_matrix.py +0 -0
- {radnn-0.0.6 → radnn-0.0.7}/src/radnn/plots/plot_learning_curve.py +0 -0
- {radnn-0.0.6 → radnn-0.0.7}/src/radnn/plots/plot_multi_scatter.py +0 -0
- {radnn-0.0.6 → radnn-0.0.7}/src/radnn/plots/plot_roc.py +0 -0
- {radnn-0.0.6 → radnn-0.0.7}/src/radnn/plots/plot_voronoi_2d.py +0 -0
- {radnn-0.0.6 → radnn-0.0.7}/src/radnn/system/__init__.py +0 -0
- {radnn-0.0.6 → radnn-0.0.7}/src/radnn/system/files/__init__.py +0 -0
- {radnn-0.0.6 → radnn-0.0.7}/src/radnn/system/files/csvfile.py +0 -0
- {radnn-0.0.6 → radnn-0.0.7}/src/radnn/system/files/fileobject.py +0 -0
- {radnn-0.0.6 → radnn-0.0.7}/src/radnn/system/files/imgfile.py +0 -0
- {radnn-0.0.6 → radnn-0.0.7}/src/radnn/system/files/picklefile.py +0 -0
- {radnn-0.0.6 → radnn-0.0.7}/src/radnn/system/files/textfile.py +0 -0
- {radnn-0.0.6 → radnn-0.0.7}/src/radnn/system/filestore.py +0 -0
- {radnn-0.0.6 → radnn-0.0.7}/src/radnn/system/hosts/__init__.py +0 -0
- {radnn-0.0.6 → radnn-0.0.7}/src/radnn/system/hosts/colab_host.py +0 -0
- {radnn-0.0.6 → radnn-0.0.7}/src/radnn/system/hosts/linux_host.py +0 -0
- {radnn-0.0.6 → radnn-0.0.7}/src/radnn/system/hosts/windows_host.py +0 -0
- {radnn-0.0.6 → radnn-0.0.7}/src/radnn/system/tee_logger.py +0 -0
- {radnn-0.0.6 → radnn-0.0.7}/src/radnn.egg-info/dependency_links.txt +0 -0
- {radnn-0.0.6 → radnn-0.0.7}/src/radnn.egg-info/top_level.txt +0 -0
- {radnn-0.0.6 → radnn-0.0.7}/test/test_config.py +0 -0
- {radnn-0.0.6 → radnn-0.0.7}/test/test_experiment_env.py +0 -0
- {radnn-0.0.6 → radnn-0.0.7}/test/test_filestore.py +0 -0
- {radnn-0.0.6 → radnn-0.0.7}/test/test_filesystem.py +0 -0
- {radnn-0.0.6 → radnn-0.0.7}/test/test_hosts.py +0 -0
- {radnn-0.0.6 → radnn-0.0.7}/test/test_sort.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: radnn
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.7
|
|
4
4
|
Summary: Rapid Deep Neural Networks
|
|
5
5
|
Author-email: "Pantelis I. Kaplanoglou" <pikaplanoglou@ihu.gr>
|
|
6
6
|
License: MIT License
|
|
@@ -45,7 +45,7 @@ Description-Content-Type: text/markdown
|
|
|
45
45
|
License-File: LICENSE.txt
|
|
46
46
|
Requires-Dist: numpy>=1.26.4
|
|
47
47
|
Requires-Dist: matplotlib>=3.8.4
|
|
48
|
-
Requires-Dist: pandas>=2.2.
|
|
48
|
+
Requires-Dist: pandas>=2.2.1
|
|
49
49
|
Requires-Dist: scikit-learn>=1.4.2
|
|
50
50
|
|
|
51
51
|
# radnn - Rapid Deep Neural Networks
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "radnn"
|
|
3
|
-
version = "0.0.
|
|
3
|
+
version = "0.0.7"
|
|
4
4
|
description = "Rapid Deep Neural Networks"
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
authors = [
|
|
@@ -27,7 +27,7 @@ classifiers=[
|
|
|
27
27
|
dependencies = [
|
|
28
28
|
"numpy >= 1.26.4",
|
|
29
29
|
"matplotlib >= 3.8.4",
|
|
30
|
-
"pandas >= 2.2.
|
|
30
|
+
"pandas >= 2.2.1",
|
|
31
31
|
"scikit-learn >= 1.4.2"
|
|
32
32
|
]
|
|
33
33
|
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
# Version 0.0.3 [2025-01-25]
|
|
2
|
+
# Version 0.0.5 [2025-01-26]
|
|
3
|
+
# Version 0.0.6 [2025-02-04]
|
|
4
|
+
# Version 0.0.7 [2025-02-17]
|
|
5
|
+
__version__ = "0.0.7"
|
|
6
|
+
|
|
7
|
+
from .system import FileStore, FileSystem
|
|
8
|
+
from .ml_system import MLSystem
|
|
9
|
+
from .ml_system import mlsys
|
|
10
|
+
from .utils import print_tensor
|
|
11
|
+
from .errors import Errors
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
|
|
@@ -10,6 +10,15 @@ import importlib.util
|
|
|
10
10
|
def is_opencv_installed():
|
|
11
11
|
return importlib.util.find_spec("cv2") is not None
|
|
12
12
|
# ----------------------------------------------------------------------------------------------------------------------
|
|
13
|
+
def is_tensorflow_installed():
|
|
14
|
+
bIsInstalled = importlib.util.find_spec("tensorflow") is not None
|
|
15
|
+
if not is_tensorflow_installed:
|
|
16
|
+
bIsInstalled = importlib.util.find_spec("tensorflow-gpu") is not None
|
|
17
|
+
return bIsInstalled
|
|
18
|
+
# ----------------------------------------------------------------------------------------------------------------------
|
|
19
|
+
def is_torch_installed():
|
|
20
|
+
return importlib.util.find_spec("torch") is not None
|
|
21
|
+
# ----------------------------------------------------------------------------------------------------------------------
|
|
13
22
|
|
|
14
23
|
|
|
15
24
|
|
|
@@ -0,0 +1,327 @@
|
|
|
1
|
+
# ......................................................................................
|
|
2
|
+
# MIT License
|
|
3
|
+
|
|
4
|
+
# Copyright (c) 2019-2025 Pantelis I. Kaplanoglou
|
|
5
|
+
|
|
6
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
7
|
+
# of this software and associated documentation files (the "Software"), to deal
|
|
8
|
+
# in the Software without restriction, including without limitation the rights
|
|
9
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
10
|
+
# copies of the Software, and to permit persons to whom the Software is
|
|
11
|
+
# furnished to do so, subject to the following conditions:
|
|
12
|
+
|
|
13
|
+
# The above copyright notice and this permission notice shall be included in all
|
|
14
|
+
# copies or substantial portions of the Software.
|
|
15
|
+
|
|
16
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
17
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
18
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
19
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
20
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
21
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
22
|
+
# SOFTWARE.
|
|
23
|
+
|
|
24
|
+
# ......................................................................................
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
import numpy as np
|
|
28
|
+
import pandas as pd
|
|
29
|
+
from sklearn.model_selection import train_test_split
|
|
30
|
+
from radnn import FileSystem, FileStore, MLSystem, Errors
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class DataSetBase(object):
|
|
34
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
35
|
+
# Constructor
|
|
36
|
+
def __init__(self, fs=None, name=None, variant=None, random_seed=None, is_classification=False):
|
|
37
|
+
# ..................// Instance Fields \\.........................
|
|
38
|
+
self.name = name
|
|
39
|
+
self.fs = fs
|
|
40
|
+
self.variant = variant
|
|
41
|
+
self.ts = None
|
|
42
|
+
self.vs = None
|
|
43
|
+
self.ut = None
|
|
44
|
+
|
|
45
|
+
if self.fs is None:
|
|
46
|
+
if MLSystem.Instance().filesys is not None:
|
|
47
|
+
self.fs = MLSystem.Instance().filesys
|
|
48
|
+
else:
|
|
49
|
+
raise Exception(Errors.MLSYS_NO_FILESYS)
|
|
50
|
+
|
|
51
|
+
if self.fs is not None:
|
|
52
|
+
if isinstance(self.fs, FileSystem):
|
|
53
|
+
if variant is not None:
|
|
54
|
+
name = name + "_" + variant
|
|
55
|
+
self.filestore = self.fs.datasets.subfs(name.upper())
|
|
56
|
+
elif isinstance(self.fs, FileStore):
|
|
57
|
+
self.filestore = self.fs
|
|
58
|
+
elif isinstance(self.fs, str):
|
|
59
|
+
self.filestore = FileSystem(self.fs)
|
|
60
|
+
else:
|
|
61
|
+
raise Exception("The parameter fs could be a path, a filestore or a filesystem")
|
|
62
|
+
else:
|
|
63
|
+
raise Exception("Could not determine the filestore for the dataset")
|
|
64
|
+
|
|
65
|
+
self.random_seed = random_seed
|
|
66
|
+
self.is_classification = is_classification
|
|
67
|
+
|
|
68
|
+
self.feature_count = None
|
|
69
|
+
self.class_count = None
|
|
70
|
+
self.class_names = None
|
|
71
|
+
self.sample_count = None
|
|
72
|
+
|
|
73
|
+
self.samples = None
|
|
74
|
+
self.labels = None
|
|
75
|
+
|
|
76
|
+
self.ts_sample_ids = None
|
|
77
|
+
self.ts_samples = None
|
|
78
|
+
self.ts_labels = None
|
|
79
|
+
self.ts_sample_count = 0
|
|
80
|
+
|
|
81
|
+
self.vs_sample_ids = None
|
|
82
|
+
self.vs_samples = None
|
|
83
|
+
self.vs_labels = None
|
|
84
|
+
self.vs_sample_count = 0
|
|
85
|
+
|
|
86
|
+
self.ut_sample_ids = None
|
|
87
|
+
self.ut_samples = None
|
|
88
|
+
self.ut_labels = None
|
|
89
|
+
self.ut_sample_count = None
|
|
90
|
+
|
|
91
|
+
self.card = dict()
|
|
92
|
+
self.card["name"] = name
|
|
93
|
+
# ................................................................
|
|
94
|
+
if self.random_seed is not None:
|
|
95
|
+
MLSystem.Instance().random_seed_all(self.random_seed)
|
|
96
|
+
|
|
97
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
98
|
+
def open(self):
|
|
99
|
+
pass
|
|
100
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
101
|
+
def close(self):
|
|
102
|
+
pass
|
|
103
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
104
|
+
def for_classification(self, class_count, class_names=None):
|
|
105
|
+
self.is_classification = True
|
|
106
|
+
self.class_count = class_count
|
|
107
|
+
if class_names is not None:
|
|
108
|
+
# We assume class_names is a dictionary, in other cases we turn it into a dictionary
|
|
109
|
+
if isinstance(class_names, set) or isinstance(class_names, list):
|
|
110
|
+
dClassNames = dict()
|
|
111
|
+
for nIndex, sClassName in enumerate(class_names):
|
|
112
|
+
dClassNames[nIndex] = sClassName
|
|
113
|
+
class_names = dClassNames
|
|
114
|
+
self.class_names = class_names
|
|
115
|
+
return self
|
|
116
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
117
|
+
def count_samples(self):
|
|
118
|
+
if self.ts_samples is not None:
|
|
119
|
+
self.ts_sample_count = int(self.ts_samples.shape[0])
|
|
120
|
+
self.sample_count = self.ts_sample_count + self.vs_sample_count
|
|
121
|
+
|
|
122
|
+
if self.vs_samples is not None:
|
|
123
|
+
self.vs_sample_count = int(self.vs_samples.shape[0])
|
|
124
|
+
self.sample_count = self.ts_sample_count + self.vs_sample_count
|
|
125
|
+
|
|
126
|
+
# The test set samples are not included in the available sample count
|
|
127
|
+
if self.ut_samples is not None:
|
|
128
|
+
self.ut_sample_count = int(self.ut_samples.shape[0])
|
|
129
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
130
|
+
def assign(self, data, label_start_column=None, label_end_column=None):
|
|
131
|
+
if isinstance(data, tuple):
|
|
132
|
+
self.samples, self.labels = data
|
|
133
|
+
elif isinstance(data, np.ndarray):
|
|
134
|
+
self.samples = data
|
|
135
|
+
elif isinstance(data, dict):
|
|
136
|
+
if ("samples" in dict) and ("labels" in dict):
|
|
137
|
+
self.samples = data["samples"]
|
|
138
|
+
self.labels = data["labels"]
|
|
139
|
+
else:
|
|
140
|
+
pass # Support other formats
|
|
141
|
+
elif isinstance(data, pd.DataFrame):
|
|
142
|
+
if isinstance(data.columns, pd.Index):
|
|
143
|
+
nData = data.iloc[1:].to_numpy()
|
|
144
|
+
else:
|
|
145
|
+
nData = data.to_numpy()
|
|
146
|
+
|
|
147
|
+
if label_start_column is None:
|
|
148
|
+
self.samples = nData
|
|
149
|
+
else:
|
|
150
|
+
if label_start_column >= 0:
|
|
151
|
+
if label_end_column is None:
|
|
152
|
+
self.labels = nData[:, label_start_column]
|
|
153
|
+
self.samples = nData[:, label_start_column + 1:]
|
|
154
|
+
else:
|
|
155
|
+
self.labels = nData[:, label_start_column:label_end_column + 1]
|
|
156
|
+
self.samples = nData[:, label_end_column + 1:]
|
|
157
|
+
else:
|
|
158
|
+
self.samples = nData[:, :label_start_column]
|
|
159
|
+
self.labels = nData[:, label_start_column:]
|
|
160
|
+
return self
|
|
161
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
162
|
+
def assign_training_set(self, samples, labels):
|
|
163
|
+
self.ts_samples = samples
|
|
164
|
+
self.ts_labels = labels
|
|
165
|
+
self.count_samples()
|
|
166
|
+
self.ts_sample_ids = np.arange(0, self.ts_sample_count)
|
|
167
|
+
|
|
168
|
+
# Feature count is calculated on samples that are flattened as vectors
|
|
169
|
+
if self.feature_count is None:
|
|
170
|
+
self.feature_count = np.prod(self.ts_samples.shape[1:])
|
|
171
|
+
|
|
172
|
+
if self.class_count is None:
|
|
173
|
+
if self.is_classification:
|
|
174
|
+
self.class_count = len(np.unique(self.ts_labels))
|
|
175
|
+
else:
|
|
176
|
+
self.class_count = 0
|
|
177
|
+
return self
|
|
178
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
179
|
+
def assign_validation_set(self, samples, labels):
|
|
180
|
+
self.vs_samples = samples
|
|
181
|
+
self.vs_labels = labels
|
|
182
|
+
self.count_samples()
|
|
183
|
+
self.vs_sample_ids = np.arange(0, self.vs_sample_count)
|
|
184
|
+
|
|
185
|
+
return self
|
|
186
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
187
|
+
def assign_unknown_test_set(self, samples, labels):
|
|
188
|
+
self.ut_samples = samples
|
|
189
|
+
self.ut_labels = labels
|
|
190
|
+
self.count_samples()
|
|
191
|
+
self.ut_sample_ids = np.arange(0, self.ut_sample_count)
|
|
192
|
+
|
|
193
|
+
return self
|
|
194
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
195
|
+
def infox(self):
|
|
196
|
+
self.print_info()
|
|
197
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
198
|
+
def print_info(self):
|
|
199
|
+
print("Dataset [%s]" % self.name)
|
|
200
|
+
print(" |__ FeatureCount:", self.feature_count)
|
|
201
|
+
if self.is_classification:
|
|
202
|
+
print(" |__ ClassCount:", self.class_count)
|
|
203
|
+
if self.class_names is not None:
|
|
204
|
+
print(" |__ Classes:", self.class_names)
|
|
205
|
+
|
|
206
|
+
if self.ts_samples is not None:
|
|
207
|
+
print(" |__ Training set samples : %d shape:%s" % (self.ts_sample_count, self.ts_samples.shape))
|
|
208
|
+
if self.ts_labels is not None:
|
|
209
|
+
print(" |__ Training set targets : %d shape:%s" % (self.ts_sample_count, self.ts_labels.shape))
|
|
210
|
+
|
|
211
|
+
if self.vs_samples is not None:
|
|
212
|
+
print(" |__ Validation set samples: %d shape:%s" % (self.vs_sample_count, self.vs_samples.shape))
|
|
213
|
+
if self.vs_labels is not None:
|
|
214
|
+
print(" |__ Validation set targets: %d shape:%s" % (self.vs_sample_count, self.vs_labels.shape))
|
|
215
|
+
|
|
216
|
+
if self.ut_samples is not None:
|
|
217
|
+
print(" |__ MemoryTest set samples : %d shape:%s" % (self.ut_sample_count, self.ut_samples.shape))
|
|
218
|
+
if self.ut_labels is not None:
|
|
219
|
+
print(" |__ MemoryTest set targets : %d shape:%s" % (self.ut_sample_count, self.ut_labels.shape))
|
|
220
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
221
|
+
def split(self, training_samples_pc, random_seed=None):
|
|
222
|
+
if random_seed is None:
|
|
223
|
+
random_seed = self.random_seed
|
|
224
|
+
|
|
225
|
+
nTSSamples, nVSSamples, nTSTargets, nVSTargets = train_test_split(self.samples, self.labels
|
|
226
|
+
, test_size=1.0 - training_samples_pc
|
|
227
|
+
, random_state=random_seed
|
|
228
|
+
, shuffle=True
|
|
229
|
+
, stratify=self.labels
|
|
230
|
+
)
|
|
231
|
+
self.assign_training_set(nTSSamples, nTSTargets)
|
|
232
|
+
self.assign_validation_set(nVSSamples, nVSTargets)
|
|
233
|
+
self.count_samples()
|
|
234
|
+
return self
|
|
235
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
236
|
+
def has_cache(self, samples_file_prefix="Samples"):
|
|
237
|
+
return self.filestore.exists("%s.pkl" % samples_file_prefix) or self.filestore.exists("%s.TS.pkl" % samples_file_prefix)
|
|
238
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
239
|
+
def load_cache(self, filestore: FileStore = None, samples_file_prefix="Samples", targets_file_prefix="Labels", is_verbose=False):
|
|
240
|
+
if filestore is None:
|
|
241
|
+
filestore = self.filestore
|
|
242
|
+
if filestore is None:
|
|
243
|
+
raise Exception("To use load_cache() without providing a filestore, you should provide a filesystem or filestore during instantiation.")
|
|
244
|
+
|
|
245
|
+
bResult = filestore.exists("%s.pkl" % samples_file_prefix) or filestore.exists("%s.TS.pkl" % samples_file_prefix)
|
|
246
|
+
|
|
247
|
+
if bResult:
|
|
248
|
+
if is_verbose:
|
|
249
|
+
print("Loading known data set ...")
|
|
250
|
+
|
|
251
|
+
dInfo = filestore.json.load(f"{self.name}_info.json")
|
|
252
|
+
if dInfo is not None:
|
|
253
|
+
if "class_names" in dInfo: self.class_names = dInfo["class_names"]
|
|
254
|
+
if "feature_count" in dInfo: self.feature_count = dInfo["feature_count"]
|
|
255
|
+
if "class_count" in dInfo:
|
|
256
|
+
self.is_classification = True
|
|
257
|
+
self.class_count = dInfo["class_count"]
|
|
258
|
+
|
|
259
|
+
self.samples = filestore.obj.load("%s.pkl" % samples_file_prefix)
|
|
260
|
+
self.labels = filestore.obj.load("%s.pkl" % targets_file_prefix)
|
|
261
|
+
|
|
262
|
+
if is_verbose:
|
|
263
|
+
print("Loading training set ...")
|
|
264
|
+
nTSSamples = filestore.obj.load("%s.TS.pkl" % samples_file_prefix)
|
|
265
|
+
nTSTargets = filestore.obj.load("%s.TS.pkl" % targets_file_prefix)
|
|
266
|
+
self.assign_training_set(nTSSamples, nTSTargets)
|
|
267
|
+
|
|
268
|
+
if is_verbose:
|
|
269
|
+
print("Loading validation set ...")
|
|
270
|
+
nVSSamples = filestore.obj.load("%s.VS.pkl" % samples_file_prefix)
|
|
271
|
+
nVSTargets = filestore.obj.load("%s.VS.pkl" % targets_file_prefix)
|
|
272
|
+
self.assign_validation_set(nVSSamples, nVSTargets)
|
|
273
|
+
|
|
274
|
+
if is_verbose:
|
|
275
|
+
print("Loading unknown test data set ...")
|
|
276
|
+
nUTSamples = filestore.obj.load("%s.UT.pkl" % samples_file_prefix)
|
|
277
|
+
if nUTSamples is not None:
|
|
278
|
+
nUTTargets = filestore.obj.load("%s.UT.pkl" % targets_file_prefix)
|
|
279
|
+
self.assign_unknown_test_set(nUTSamples, nUTTargets)
|
|
280
|
+
|
|
281
|
+
|
|
282
|
+
return bResult
|
|
283
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
284
|
+
def save_cache(self, filestore: FileStore = None, samples_file_prefix="Samples", targets_file_prefix="Labels"):
|
|
285
|
+
if filestore is None:
|
|
286
|
+
filestore = self.filestore
|
|
287
|
+
if filestore is None:
|
|
288
|
+
raise Exception("To use save_cache() without providing a filestore, you should provide a filesystem or filestore during instantiation.")
|
|
289
|
+
|
|
290
|
+
if self.samples is not None:
|
|
291
|
+
filestore.obj.save(self.samples, "%s.pkl" % samples_file_prefix, is_overwriting=True)
|
|
292
|
+
filestore.obj.save(self.labels, "%s.pkl" % targets_file_prefix, is_overwriting=True)
|
|
293
|
+
|
|
294
|
+
filestore.obj.save(self.ts_samples, "%s.TS.pkl" % samples_file_prefix, is_overwriting=True)
|
|
295
|
+
filestore.obj.save(self.ts_labels, "%s.TS.pkl" % targets_file_prefix, is_overwriting=True)
|
|
296
|
+
|
|
297
|
+
filestore.obj.save(self.vs_samples, "%s.VS.pkl" % samples_file_prefix, is_overwriting=True)
|
|
298
|
+
filestore.obj.save(self.vs_labels, "%s.VS.pkl" % targets_file_prefix, is_overwriting=True)
|
|
299
|
+
|
|
300
|
+
if self.ut_samples is not None:
|
|
301
|
+
filestore.obj.save(self.ut_samples, "%s.UT.pkl" % samples_file_prefix, is_overwriting=True)
|
|
302
|
+
filestore.obj.save(self.ut_labels, "%s.UT.pkl" % targets_file_prefix, is_overwriting=True)
|
|
303
|
+
|
|
304
|
+
self.card["name"] = self.name
|
|
305
|
+
if self.feature_count is not None:
|
|
306
|
+
self.card["feature_count"] = int(self.feature_count)
|
|
307
|
+
else:
|
|
308
|
+
self.card["feature_count"] = self.feature_count
|
|
309
|
+
|
|
310
|
+
if self.random_seed is not None:
|
|
311
|
+
self.card["random_seed"] = int(self.random_seed)
|
|
312
|
+
else:
|
|
313
|
+
self.card["random_seed"] = self.random_seed
|
|
314
|
+
|
|
315
|
+
if self.is_classification:
|
|
316
|
+
if self.class_count is not None:
|
|
317
|
+
self.card["class_count"] = int(self.class_count)
|
|
318
|
+
else:
|
|
319
|
+
self.card["class_count"] = self.class_count
|
|
320
|
+
self.card["class_names"] = self.class_names
|
|
321
|
+
|
|
322
|
+
|
|
323
|
+
print(self.card)
|
|
324
|
+
filestore.json.save(self.card, f"{self.name}_card.json", is_sorted_keys=False)
|
|
325
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
326
|
+
|
|
327
|
+
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
# ......................................................................................
|
|
2
|
+
# MIT License
|
|
3
|
+
|
|
4
|
+
# Copyright (c) 2019-2025 Pantelis I. Kaplanoglou
|
|
5
|
+
|
|
6
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
7
|
+
# of this software and associated documentation files (the "Software"), to deal
|
|
8
|
+
# in the Software without restriction, including without limitation the rights
|
|
9
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
10
|
+
# copies of the Software, and to permit persons to whom the Software is
|
|
11
|
+
# furnished to do so, subject to the following conditions:
|
|
12
|
+
|
|
13
|
+
# The above copyright notice and this permission notice shall be included in all
|
|
14
|
+
# copies or substantial portions of the Software.
|
|
15
|
+
|
|
16
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
17
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
18
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
19
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
20
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
21
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
22
|
+
# SOFTWARE.
|
|
23
|
+
|
|
24
|
+
# ......................................................................................
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
import numpy as np
|
|
28
|
+
from radnn import FileStore
|
|
29
|
+
from .dataset_base import DataSetBase
|
|
30
|
+
|
|
31
|
+
class ImageDataSet(DataSetBase):
|
|
32
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
33
|
+
def __init__(self, fs, name=None, variant=None, image_shape=None, random_seed=None, is_classification=True):
|
|
34
|
+
super(ImageDataSet, self).__init__(fs, name, variant, random_seed, is_classification)
|
|
35
|
+
|
|
36
|
+
self.image_shape = None
|
|
37
|
+
self.feature_count = None
|
|
38
|
+
if image_shape is not None:
|
|
39
|
+
self.image_shape = image_shape
|
|
40
|
+
self.feature_count = int(np.prod(list(image_shape)))
|
|
41
|
+
|
|
42
|
+
self.source_fs = None
|
|
43
|
+
self.source_class_subfs_list = None
|
|
44
|
+
self.image_file_list = None
|
|
45
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
46
|
+
def build(self, source_fs):
|
|
47
|
+
self.source_fs = source_fs
|
|
48
|
+
# If a path is supplied init the file store
|
|
49
|
+
if isinstance(source_fs, str):
|
|
50
|
+
self.source_fs = FileStore(source_fs)
|
|
51
|
+
|
|
52
|
+
if self.is_classification:
|
|
53
|
+
self._determine_classes()
|
|
54
|
+
self._detect_class_subfilestores()
|
|
55
|
+
|
|
56
|
+
self._generate_image_file_list()
|
|
57
|
+
#TODO: Image preprocess / resize / save shard
|
|
58
|
+
|
|
59
|
+
return self
|
|
60
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
61
|
+
def _determine_classes(self):
|
|
62
|
+
|
|
63
|
+
if self.class_names is None:
|
|
64
|
+
# TODO: Detect a JSON file that has the class names dictionary on the source filestore
|
|
65
|
+
pass
|
|
66
|
+
# TODO: Enumerate all subfolder in the source filestore
|
|
67
|
+
|
|
68
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
69
|
+
def _detect_class_subfilestores(self):
|
|
70
|
+
self.source_class_subfs_list = []
|
|
71
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
72
|
+
def _generate_image_file_list(self):
|
|
73
|
+
self.image_file_list = []
|
|
74
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
75
|
+
def preview_images(self):
|
|
76
|
+
import matplotlib.pyplot as plt
|
|
77
|
+
|
|
78
|
+
# Look at some sample images from dataset
|
|
79
|
+
plt.figure(figsize=(10, 10))
|
|
80
|
+
for i in range(25):
|
|
81
|
+
plt.subplot(5, 5, i + 1)
|
|
82
|
+
if self.ts_samples.shape[2] == 2:
|
|
83
|
+
plt.imshow(self.ts_samples[i].squeeze().astype(np.uint8), cmap='gray')
|
|
84
|
+
else:
|
|
85
|
+
plt.imshow(self.ts_samples[i].squeeze().astype(np.uint8))
|
|
86
|
+
|
|
87
|
+
nClassIndex = self.ts_labels[i]
|
|
88
|
+
sClassDescr = str(nClassIndex)
|
|
89
|
+
if isinstance(self.class_names, dict):
|
|
90
|
+
if nClassIndex in self.class_names:
|
|
91
|
+
sClassName = self.class_names[nClassIndex]
|
|
92
|
+
sClassDescr += " `" + sClassName + "`"
|
|
93
|
+
elif isinstance(self.class_names, list):
|
|
94
|
+
sClassName = self.class_names[nClassIndex]
|
|
95
|
+
sClassDescr += " `" + sClassName + "` "
|
|
96
|
+
|
|
97
|
+
plt.title(f"Label: {sClassDescr}")
|
|
98
|
+
plt.axis('off')
|
|
99
|
+
plt.show()
|
|
100
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
# ......................................................................................
|
|
2
|
+
# MIT License
|
|
3
|
+
|
|
4
|
+
# Copyright (c) 2023-2025 Pantelis I. Kaplanoglou
|
|
5
|
+
|
|
6
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
7
|
+
# of this software and associated documentation files (the "Software"), to deal
|
|
8
|
+
# in the Software without restriction, including without limitation the rights
|
|
9
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
10
|
+
# copies of the Software, and to permit persons to whom the Software is
|
|
11
|
+
# furnished to do so, subject to the following conditions:
|
|
12
|
+
|
|
13
|
+
# The above copyright notice and this permission notice shall be included in all
|
|
14
|
+
# copies or substantial portions of the Software.
|
|
15
|
+
|
|
16
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
17
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
18
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
19
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
20
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
21
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
22
|
+
# SOFTWARE.
|
|
23
|
+
|
|
24
|
+
# ......................................................................................
|
|
25
|
+
|
|
26
|
+
import numpy as np
|
|
27
|
+
|
|
28
|
+
'''
|
|
29
|
+
Normalization for rank 3 and above tensors using numpy
|
|
30
|
+
'''
|
|
31
|
+
class Normalizer(object):
|
|
32
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
33
|
+
def __init__(self, name=None, filestore=None):
|
|
34
|
+
# ................................................................
|
|
35
|
+
# // Fields \\
|
|
36
|
+
self.min = None
|
|
37
|
+
self.max = None
|
|
38
|
+
self._small_e = 1e-7
|
|
39
|
+
self.name = name
|
|
40
|
+
self.filestore = filestore
|
|
41
|
+
# ................................................................
|
|
42
|
+
self.load()
|
|
43
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
44
|
+
@property
|
|
45
|
+
def small_e(self):
|
|
46
|
+
return self._small_e
|
|
47
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
48
|
+
def load(self):
|
|
49
|
+
if (self.name is not None) and (self.filestore is not None):
|
|
50
|
+
dStats = self.filestore.obj.load("%s-minmax.pkl" % self.name)
|
|
51
|
+
if dStats is not None:
|
|
52
|
+
self.min = dStats["min"]
|
|
53
|
+
self.max = dStats["max"]
|
|
54
|
+
|
|
55
|
+
if np.any((self.max - self.min) <= self._small_e):
|
|
56
|
+
self.max += self._small_e
|
|
57
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
58
|
+
def save(self):
|
|
59
|
+
if (self.name is not None) and (self.filestore is not None):
|
|
60
|
+
dStats = {"min": self.min, "max": self.max}
|
|
61
|
+
self.filestore.obj.save(dStats, "%s-minmax.pkl" % self.name, is_overwriting=True)
|
|
62
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
63
|
+
def fit(self, data, axis_for_stats=-1, is_recalculating=False, is_verbose=False):
|
|
64
|
+
bIsCached = False
|
|
65
|
+
if (self.name is not None) and (self.filestore is not None):
|
|
66
|
+
if self.min is not None:
|
|
67
|
+
bIsCached = True
|
|
68
|
+
|
|
69
|
+
if (not bIsCached) or is_recalculating:
|
|
70
|
+
data = data.astype(np.float64)
|
|
71
|
+
nAxes = list(range(len(data.shape)))
|
|
72
|
+
if axis_for_stats is None:
|
|
73
|
+
nAxes = tuple(nAxes)
|
|
74
|
+
else:
|
|
75
|
+
if axis_for_stats == -1:
|
|
76
|
+
axis_for_stats = nAxes[-1]
|
|
77
|
+
|
|
78
|
+
nAxes.remove(axis_for_stats)
|
|
79
|
+
if len(nAxes) == 1:
|
|
80
|
+
nAxes = nAxes[0]
|
|
81
|
+
else:
|
|
82
|
+
nAxes = tuple(nAxes)
|
|
83
|
+
|
|
84
|
+
# Calculate min max difference with maximum precision
|
|
85
|
+
self.min = np.min(data, axis=nAxes)
|
|
86
|
+
self.max = np.max(data, axis=nAxes)
|
|
87
|
+
if np.any((self.max - self.min) <= self._small_e):
|
|
88
|
+
self.max += self._small_e
|
|
89
|
+
|
|
90
|
+
if is_verbose:
|
|
91
|
+
print(" Normalization: min/max shape:%s" % str(self.min.shape))
|
|
92
|
+
self.save()
|
|
93
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
94
|
+
def fit_transform(self, data, axis_for_stats=-1, is_recalculating=False, is_verbose=False):
|
|
95
|
+
self.fit(data, axis_for_stats, is_recalculating, is_verbose)
|
|
96
|
+
return self.transform(data)
|
|
97
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
98
|
+
def normalize(self, data):
|
|
99
|
+
return (data - self.min) / (self.max - self.min)
|
|
100
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
101
|
+
def denormalize(self, data):
|
|
102
|
+
return (data * (self.max - self.min)) + self.min
|
|
103
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
104
|
+
def transform(self, data):
|
|
105
|
+
nNormalizedData = (data - self.min) / (self.max - self.min)
|
|
106
|
+
return nNormalizedData.astype(data.dtype)
|
|
107
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
108
|
+
def inverse_transform(self, data):
|
|
109
|
+
nDenormalizedData = (data * (self.max - self.min)) + self.min
|
|
110
|
+
return nDenormalizedData.astype(data.dtype)
|
|
111
|
+
# --------------------------------------------------------------------------------------------------------------------
|