radnn 0.0.7.2__py3-none-any.whl → 0.0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- radnn/__init__.py +7 -5
- radnn/core.py +44 -28
- radnn/data/__init__.py +8 -0
- radnn/data/data_feed.py +147 -0
- radnn/data/dataset_base.py +3 -5
- radnn/data/dataset_folder.py +55 -0
- radnn/data/image_dataset.py +0 -2
- radnn/data/image_dataset_files.py +175 -0
- radnn/data/preprocess/normalizer.py +7 -1
- radnn/data/preprocess/standardizer.py +9 -2
- radnn/data/sample_set.py +30 -17
- radnn/data/sequence_dataset.py +0 -2
- radnn/data/subset_type.py +45 -0
- radnn/data/tf_classification_data_feed.py +113 -0
- radnn/errors.py +29 -0
- radnn/evaluation/evaluate_classification.py +7 -3
- radnn/experiment/ml_experiment.py +29 -0
- radnn/experiment/ml_experiment_config.py +61 -32
- radnn/experiment/ml_experiment_env.py +6 -2
- radnn/experiment/ml_experiment_store.py +0 -1
- radnn/images/__init__.py +2 -0
- radnn/images/colors.py +28 -0
- radnn/images/image_processor.py +513 -0
- radnn/learn/learning_algorithm.py +4 -3
- radnn/ml_system.py +59 -18
- radnn/plots/plot_auto_multi_image.py +27 -17
- radnn/plots/plot_confusion_matrix.py +7 -4
- radnn/plots/plot_learning_curve.py +7 -3
- radnn/plots/plot_multi_scatter.py +7 -3
- radnn/plots/plot_roc.py +8 -4
- radnn/plots/plot_voronoi_2d.py +8 -5
- radnn/stats/__init__.py +1 -0
- radnn/stats/descriptive_stats.py +45 -0
- radnn/system/files/__init__.py +1 -0
- radnn/system/files/csvfile.py +8 -5
- radnn/system/files/filelist.py +40 -0
- radnn/system/files/fileobject.py +9 -4
- radnn/system/files/imgfile.py +8 -4
- radnn/system/files/jsonfile.py +8 -4
- radnn/system/files/picklefile.py +8 -4
- radnn/system/files/textfile.py +37 -7
- radnn/system/filestore.py +36 -18
- radnn/system/filesystem.py +8 -3
- radnn/system/hosts/colab_host.py +29 -0
- radnn/system/hosts/linux_host.py +29 -0
- radnn/system/hosts/windows_host.py +39 -1
- radnn/system/tee_logger.py +7 -3
- radnn/system/threads/__init__.py +5 -0
- radnn/system/threads/semaphore_lock.py +58 -0
- radnn/system/threads/thread_context.py +175 -0
- radnn/system/threads/thread_safe_queue.py +163 -0
- radnn/system/threads/thread_safe_string_collection.py +66 -0
- radnn/system/threads/thread_worker.py +68 -0
- radnn/utils.py +96 -2
- {radnn-0.0.7.2.dist-info → radnn-0.0.8.dist-info}/METADATA +1 -1
- radnn-0.0.8.dist-info/RECORD +70 -0
- radnn-0.0.7.2.dist-info/RECORD +0 -53
- {radnn-0.0.7.2.dist-info → radnn-0.0.8.dist-info}/LICENSE.txt +0 -0
- {radnn-0.0.7.2.dist-info → radnn-0.0.8.dist-info}/WHEEL +0 -0
- {radnn-0.0.7.2.dist-info → radnn-0.0.8.dist-info}/top_level.txt +0 -0
radnn/__init__.py
CHANGED
|
@@ -1,8 +1,10 @@
|
|
|
1
|
-
# Version 0.0.3
|
|
2
|
-
# Version 0.0.5
|
|
3
|
-
# Version 0.0.6
|
|
4
|
-
# Version 0.0.7 [2025-02-17]
|
|
5
|
-
|
|
1
|
+
# Version 0.0.3 [2025-01-25]
|
|
2
|
+
# Version 0.0.5 [2025-01-26]
|
|
3
|
+
# Version 0.0.6 [2025-02-04]
|
|
4
|
+
# Version 0.0.7.2 [2025-02-17]
|
|
5
|
+
# Version 0.0.7.3 [2025-02-21]
|
|
6
|
+
# Version 0.0.8 [2025-02-xx]
|
|
7
|
+
__version__ = "0.0.8"
|
|
6
8
|
|
|
7
9
|
from .system import FileStore, FileSystem
|
|
8
10
|
from .ml_system import MLSystem
|
radnn/core.py
CHANGED
|
@@ -1,27 +1,37 @@
|
|
|
1
|
+
# ======================================================================================
|
|
2
|
+
#
|
|
3
|
+
# Rapid Deep Neural Networks
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the MIT License
|
|
6
|
+
# ______________________________________________________________________________________
|
|
7
|
+
# ......................................................................................
|
|
8
|
+
|
|
9
|
+
# Copyright (c) 2018-2025 Pantelis I. Kaplanoglou
|
|
10
|
+
|
|
11
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
12
|
+
# of this software and associated documentation files (the "Software"), to deal
|
|
13
|
+
# in the Software without restriction, including without limitation the rights
|
|
14
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
15
|
+
# copies of the Software, and to permit persons to whom the Software is
|
|
16
|
+
# furnished to do so, subject to the following conditions:
|
|
17
|
+
|
|
18
|
+
# The above copyright notice and this permission notice shall be included in all
|
|
19
|
+
# copies or substantial portions of the Software.
|
|
20
|
+
|
|
21
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
22
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
23
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
24
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
25
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
26
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
27
|
+
# SOFTWARE.
|
|
28
|
+
|
|
29
|
+
# .......................................................................................
|
|
1
30
|
import sys
|
|
2
31
|
import socket
|
|
3
32
|
import platform
|
|
4
33
|
import subprocess
|
|
5
34
|
from datetime import datetime
|
|
6
|
-
import importlib.util
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
# ----------------------------------------------------------------------------------------------------------------------
|
|
10
|
-
def is_opencv_installed():
|
|
11
|
-
return importlib.util.find_spec("cv2") is not None
|
|
12
|
-
# ----------------------------------------------------------------------------------------------------------------------
|
|
13
|
-
def is_tensorflow_installed():
|
|
14
|
-
bIsInstalled = importlib.util.find_spec("tensorflow") is not None
|
|
15
|
-
if not is_tensorflow_installed:
|
|
16
|
-
bIsInstalled = importlib.util.find_spec("tensorflow-gpu") is not None
|
|
17
|
-
return bIsInstalled
|
|
18
|
-
# ----------------------------------------------------------------------------------------------------------------------
|
|
19
|
-
def is_torch_installed():
|
|
20
|
-
return importlib.util.find_spec("torch") is not None
|
|
21
|
-
# ----------------------------------------------------------------------------------------------------------------------
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
35
|
|
|
26
36
|
# ----------------------------------------------------------------------------------------------------------------------
|
|
27
37
|
def system_name() -> str:
|
|
@@ -45,24 +55,27 @@ def shell_command_output(command_string):
|
|
|
45
55
|
|
|
46
56
|
|
|
47
57
|
|
|
48
|
-
|
|
49
|
-
#TODO: macOS support
|
|
50
|
-
|
|
58
|
+
# ======================================================================================================================
|
|
51
59
|
class MLInfrastructure(object):
|
|
52
|
-
#
|
|
60
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
53
61
|
@classmethod
|
|
54
62
|
def is_linux(cls):
|
|
55
|
-
return not (cls.is_windows or cls.is_colab)
|
|
56
|
-
#
|
|
63
|
+
return not (cls.is_windows or cls.is_colab or cls.is_macos())
|
|
64
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
57
65
|
@classmethod
|
|
58
66
|
def is_windows(cls):
|
|
59
67
|
sPlatform = platform.system()
|
|
60
68
|
return (sPlatform == "Windows")
|
|
61
|
-
#
|
|
69
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
62
70
|
@classmethod
|
|
63
71
|
def is_colab(cls):
|
|
64
72
|
return "google.colab" in sys.modules
|
|
65
|
-
#
|
|
73
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
74
|
+
@classmethod
|
|
75
|
+
def is_macos(cls):
|
|
76
|
+
sPlatform = platform.system()
|
|
77
|
+
return (sPlatform == "Darwin")
|
|
78
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
66
79
|
@classmethod
|
|
67
80
|
def host_name(cls, is_using_ip_address=True) -> str:
|
|
68
81
|
sPlatform = platform.system()
|
|
@@ -77,7 +90,10 @@ class MLInfrastructure(object):
|
|
|
77
90
|
else:
|
|
78
91
|
if sPlatform == "Windows":
|
|
79
92
|
sResult = "(windows)-" + sHostName
|
|
93
|
+
elif sPlatform == "Darwin":
|
|
94
|
+
sResult = "(macos)-" + sHostName
|
|
80
95
|
else:
|
|
81
96
|
sResult = "(linux)-" + sHostName
|
|
82
97
|
return sResult
|
|
83
|
-
#
|
|
98
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
99
|
+
# ======================================================================================================================
|
radnn/data/__init__.py
CHANGED
|
@@ -1,4 +1,12 @@
|
|
|
1
1
|
from .dataset_base import DataSetBase
|
|
2
2
|
from .image_dataset import ImageDataSet
|
|
3
3
|
from .sample_set import SampleSet
|
|
4
|
+
from .data_feed import DataFeed
|
|
5
|
+
from .subset_type import SubsetType
|
|
6
|
+
from .sample_set import SampleSet
|
|
7
|
+
from radnn import mlsys
|
|
8
|
+
if mlsys.is_tensorflow_installed:
|
|
9
|
+
from .tf_classification_data_feed import TFClassificationDataFeed
|
|
10
|
+
|
|
11
|
+
from .image_dataset_files import ImageDataSetFiles
|
|
4
12
|
|
radnn/data/data_feed.py
ADDED
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
# ......................................................................................
|
|
2
|
+
# MIT License
|
|
3
|
+
|
|
4
|
+
# Copyright (c) 2019-2025 Pantelis I. Kaplanoglou
|
|
5
|
+
|
|
6
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
7
|
+
# of this software and associated documentation files (the "Software"), to deal
|
|
8
|
+
# in the Software without restriction, including without limitation the rights
|
|
9
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
10
|
+
# copies of the Software, and to permit persons to whom the Software is
|
|
11
|
+
# furnished to do so, subject to the following conditions:
|
|
12
|
+
|
|
13
|
+
# The above copyright notice and this permission notice shall be included in all
|
|
14
|
+
# copies or substantial portions of the Software.
|
|
15
|
+
|
|
16
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
17
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
18
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
19
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
20
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
21
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
22
|
+
# SOFTWARE.
|
|
23
|
+
|
|
24
|
+
# ......................................................................................
|
|
25
|
+
from .dataset_base import DataSetBase
|
|
26
|
+
from .subset_type import SubsetType
|
|
27
|
+
from radnn.data.preprocess import Normalizer, Standardizer
|
|
28
|
+
|
|
29
|
+
class DataFeed(object):
|
|
30
|
+
def __init__(self, dataset: DataSetBase, subset_type):
|
|
31
|
+
self.subset_type: SubsetType = None
|
|
32
|
+
if isinstance(subset_type, SubsetType):
|
|
33
|
+
self.subset_type = subset_type
|
|
34
|
+
elif isinstance(subset_type, str):
|
|
35
|
+
self.subset_type = SubsetType(subset_type)
|
|
36
|
+
else:
|
|
37
|
+
self.subset_type = None
|
|
38
|
+
|
|
39
|
+
self.dataset = dataset
|
|
40
|
+
self.feed = None
|
|
41
|
+
self.pipeline_objects = []
|
|
42
|
+
self.method_actions = []
|
|
43
|
+
self.augmentations = []
|
|
44
|
+
|
|
45
|
+
self.value_preprocessor = None
|
|
46
|
+
self.padding_offset = None
|
|
47
|
+
self.padding_target = None
|
|
48
|
+
|
|
49
|
+
self.input_shape = self.dataset.sample_shape
|
|
50
|
+
self.sample_count_to_shuffle = None
|
|
51
|
+
if self.subset_type.is_training_set:
|
|
52
|
+
self.sample_count_to_shuffle = self.dataset.ts_sample_count
|
|
53
|
+
elif self.subset_type.is_validation_set:
|
|
54
|
+
self.sample_count_to_shuffle = self.dataset.vs_sample_count
|
|
55
|
+
elif self.subset_type.is_unknown_test_set:
|
|
56
|
+
self.sample_count_to_shuffle = self.dataset.ut_sample_count
|
|
57
|
+
self.batch_size = None
|
|
58
|
+
|
|
59
|
+
self._has_mapped_preprocessing_method = False
|
|
60
|
+
self._is_multiclass = False
|
|
61
|
+
|
|
62
|
+
self.feed = self.build_iterator()
|
|
63
|
+
self.pipeline_objects.append(self.feed)
|
|
64
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
65
|
+
def multiclass(self):
|
|
66
|
+
self._is_multiclass = True
|
|
67
|
+
return self
|
|
68
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
69
|
+
def normalize(self):
|
|
70
|
+
self.value_preprocessor = Normalizer(self.dataset.name, self.dataset.filestore)
|
|
71
|
+
if self.value_preprocessor.min is None:
|
|
72
|
+
self.value_preprocessor.fit(self.dataset.ts_samples)
|
|
73
|
+
self.method_actions.append("normalize")
|
|
74
|
+
if not self._has_mapped_preprocessing_method:
|
|
75
|
+
self.feed = self.build_preprocessor(self.feed)
|
|
76
|
+
self.pipeline_objects.append(self.feed)
|
|
77
|
+
self._has_mapped_preprocessing_method = True
|
|
78
|
+
return self
|
|
79
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
80
|
+
def map_preprocessing(self):
|
|
81
|
+
if not self._has_mapped_preprocessing_method:
|
|
82
|
+
self.feed = self.build_preprocessor(self.feed)
|
|
83
|
+
self.pipeline_objects.append(self.feed)
|
|
84
|
+
self._has_mapped_preprocessing_method = True
|
|
85
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
86
|
+
def standardize(self, axis_for_stats=None):
|
|
87
|
+
self.value_preprocessor = Standardizer(self.dataset.name, self.dataset.filestore)
|
|
88
|
+
if self.value_preprocessor.mean is None:
|
|
89
|
+
self.value_preprocessor.fit(self.dataset.ts_samples, axis_for_stats=axis_for_stats)
|
|
90
|
+
self.method_actions.append("standardize")
|
|
91
|
+
self.map_preprocessing()
|
|
92
|
+
return self
|
|
93
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
94
|
+
def random_shuffle(self):
|
|
95
|
+
self.feed = self.build_random_shuffler(self.feed)
|
|
96
|
+
self.pipeline_objects.append(self.feed)
|
|
97
|
+
return self
|
|
98
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
99
|
+
def batch(self, batch_size):
|
|
100
|
+
self.batch_size = batch_size
|
|
101
|
+
self.feed = self.build_minibatch_maker(self.feed)
|
|
102
|
+
self.pipeline_objects.append(self.feed)
|
|
103
|
+
return self
|
|
104
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
105
|
+
def augment_crop(self, padding_offset):
|
|
106
|
+
self.padding_offset = padding_offset
|
|
107
|
+
assert self.dataset.sample_shape is not None, "You should define the images input shape on the dataset"
|
|
108
|
+
self.padding_target = self.dataset.sample_shape[0] + self.padding_offset
|
|
109
|
+
self.map_preprocessing()
|
|
110
|
+
self.feed = self.build_augmentation(self.feed, "random_crop")
|
|
111
|
+
self.pipeline_objects.append(self.feed)
|
|
112
|
+
return self
|
|
113
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
114
|
+
def augment_flip_left_right(self):
|
|
115
|
+
self.map_preprocessing()
|
|
116
|
+
self.feed = self.build_augmentation(self.feed, "random_flip_left_right")
|
|
117
|
+
self.pipeline_objects.append(self.feed)
|
|
118
|
+
return self
|
|
119
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
120
|
+
def augment_cutout(self):
|
|
121
|
+
self.map_preprocessing()
|
|
122
|
+
self.feed = self.build_augmentation(self.feed, "random_cutout")
|
|
123
|
+
self.pipeline_objects.append(self.feed)
|
|
124
|
+
return self
|
|
125
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
126
|
+
|
|
127
|
+
#// To be overrided \\
|
|
128
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
129
|
+
def build_iterator(self):
|
|
130
|
+
return None
|
|
131
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
132
|
+
def build_preprocessor(self, feed):
|
|
133
|
+
return feed
|
|
134
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
135
|
+
def add_augmentation(self, augmentation_kind):
|
|
136
|
+
self.method_actions.add(augmentation_kind)
|
|
137
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
138
|
+
def build_random_shuffler(self, feed):
|
|
139
|
+
return feed
|
|
140
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
141
|
+
def build_minibatch_maker(self, feed):
|
|
142
|
+
return feed
|
|
143
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
|
radnn/data/dataset_base.py
CHANGED
|
@@ -22,8 +22,6 @@
|
|
|
22
22
|
# SOFTWARE.
|
|
23
23
|
|
|
24
24
|
# ......................................................................................
|
|
25
|
-
|
|
26
|
-
|
|
27
25
|
import numpy as np
|
|
28
26
|
import pandas as pd
|
|
29
27
|
from sklearn.model_selection import train_test_split
|
|
@@ -56,7 +54,7 @@ class DataSetBase(object):
|
|
|
56
54
|
elif isinstance(self.fs, FileStore):
|
|
57
55
|
self.filestore = self.fs
|
|
58
56
|
elif isinstance(self.fs, str):
|
|
59
|
-
self.filestore =
|
|
57
|
+
self.filestore = FileStore(self.fs)
|
|
60
58
|
else:
|
|
61
59
|
raise Exception("The parameter fs could be a path, a filestore or a filesystem")
|
|
62
60
|
else:
|
|
@@ -88,6 +86,8 @@ class DataSetBase(object):
|
|
|
88
86
|
self.ut_labels = None
|
|
89
87
|
self.ut_sample_count = None
|
|
90
88
|
|
|
89
|
+
self.sample_shape = None
|
|
90
|
+
|
|
91
91
|
self.card = dict()
|
|
92
92
|
self.card["name"] = name
|
|
93
93
|
# ................................................................
|
|
@@ -319,8 +319,6 @@ class DataSetBase(object):
|
|
|
319
319
|
self.card["class_count"] = self.class_count
|
|
320
320
|
self.card["class_names"] = self.class_names
|
|
321
321
|
|
|
322
|
-
|
|
323
|
-
print(self.card)
|
|
324
322
|
filestore.json.save(self.card, f"{self.name}_card.json", is_sorted_keys=False)
|
|
325
323
|
# --------------------------------------------------------------------------------------------------------------------
|
|
326
324
|
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
from .subset_type import SubsetType
|
|
2
|
+
|
|
3
|
+
class DataSetFolder(object):
|
|
4
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
5
|
+
def __init__(self, folder_name, filestore):
|
|
6
|
+
self.folder_name = folder_name
|
|
7
|
+
self.filestore = filestore
|
|
8
|
+
self.filestore_ts = None
|
|
9
|
+
self.filestore_vs = None
|
|
10
|
+
self.filestore_ut = None
|
|
11
|
+
self.split_filestores = []
|
|
12
|
+
|
|
13
|
+
self.subfolders = self.filestore.list_folders(is_full_path=False)
|
|
14
|
+
self.is_split, sTSFolder, sVSFolder, sUTFolder = self.get_split_subfolders(self.subfolders)
|
|
15
|
+
if self.is_split:
|
|
16
|
+
if sTSFolder is not None:
|
|
17
|
+
self.filestore_ts = self.filestore.subfs(sTSFolder, must_exist=True)
|
|
18
|
+
self.split_filestores.append(self.filestore_ts)
|
|
19
|
+
if sVSFolder is not None:
|
|
20
|
+
self.filestore_vs = self.filestore.subfs(sVSFolder, must_exist=True)
|
|
21
|
+
self.split_filestores.append(self.filestore_vs)
|
|
22
|
+
if sUTFolder is not None:
|
|
23
|
+
self.filestore_ut = self.filestore.subfs(sUTFolder, must_exist=True)
|
|
24
|
+
self.split_filestores.append(self.filestore_ut)
|
|
25
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
26
|
+
def get_split_subfolders(self, folders):
|
|
27
|
+
sTSFolder = None
|
|
28
|
+
sVSFolder = None
|
|
29
|
+
sUTFolder = None
|
|
30
|
+
bIsSplit = False
|
|
31
|
+
for sFolder in folders:
|
|
32
|
+
oFolderSubsetType = SubsetType(sFolder)
|
|
33
|
+
if oFolderSubsetType.is_training_set:
|
|
34
|
+
sTSFolder = sFolder
|
|
35
|
+
bIsSplit = True
|
|
36
|
+
elif oFolderSubsetType.is_validation_set:
|
|
37
|
+
sVSFolder = sFolder
|
|
38
|
+
bIsSplit = True
|
|
39
|
+
elif oFolderSubsetType.is_unknown_test_set:
|
|
40
|
+
sUTFolder = sFolder
|
|
41
|
+
bIsSplit = True
|
|
42
|
+
|
|
43
|
+
# When confusing terminology is uses and 'test' means 'validation'
|
|
44
|
+
if (sUTFolder is not None) and (sVSFolder is None):
|
|
45
|
+
sVSFolder = sUTFolder
|
|
46
|
+
sUTFolder = None
|
|
47
|
+
|
|
48
|
+
return bIsSplit, sTSFolder, sVSFolder, sUTFolder
|
|
49
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
50
|
+
def __str__(self):
|
|
51
|
+
return "./" + self.folder_name
|
|
52
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
53
|
+
def __repr__(self):
|
|
54
|
+
return self.__str__()
|
|
55
|
+
# --------------------------------------------------------------------------------------------------------------------
|
radnn/data/image_dataset.py
CHANGED
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
from radnn import FileStore
|
|
2
|
+
from radnn.utils import camel_case
|
|
3
|
+
from radnn.system.files import FileList
|
|
4
|
+
from .dataset_folder import DataSetFolder
|
|
5
|
+
import sys
|
|
6
|
+
from tqdm import tqdm
|
|
7
|
+
from datetime import datetime
|
|
8
|
+
|
|
9
|
+
class ImageDataSetFiles(object):
|
|
10
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
11
|
+
def __init__(self, images_fs, name="files", is_progress_indicator=True):
|
|
12
|
+
self.images_fs :FileStore = images_fs
|
|
13
|
+
self.name = name
|
|
14
|
+
self.is_progress_indicator = is_progress_indicator
|
|
15
|
+
self.class_names :dict = dict()
|
|
16
|
+
self.class_folders :list = []
|
|
17
|
+
self.files :FileList = dict()
|
|
18
|
+
self.files_ts :FileList = dict()
|
|
19
|
+
self.files_vs :FileList = dict()
|
|
20
|
+
self.files_ut :FileList = dict()
|
|
21
|
+
self.total_file_count = 0
|
|
22
|
+
self.is_split_on_main_folder = False
|
|
23
|
+
self.is_split_in_class_folders = False
|
|
24
|
+
self.run_date_time = None
|
|
25
|
+
|
|
26
|
+
self.detect_class_names_from_folders()
|
|
27
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
28
|
+
def load(self, extensions="*.jpg; *.png"):
|
|
29
|
+
if not self.load_file_lists():
|
|
30
|
+
self.detect_files(extensions)
|
|
31
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
32
|
+
def load_file_lists(self):
|
|
33
|
+
bResult = False
|
|
34
|
+
oDict = self.images_fs.obj.load(f"{self.name}-file-info.pkl")
|
|
35
|
+
if oDict is not None:
|
|
36
|
+
self.run_date_time = oDict["RunDateTime"]
|
|
37
|
+
self.class_names = oDict["ClassNames"]
|
|
38
|
+
self.class_folders = oDict["ClassFolders"]
|
|
39
|
+
self.files = oDict["ClassFiles.All"]
|
|
40
|
+
self.files_ts = oDict["ClassFiles.TrainingSet"]
|
|
41
|
+
self.files_vs = oDict["ClassFiles.ValidationSet"]
|
|
42
|
+
self.files_ut = oDict["ClassFiles.UnknownTestSet"]
|
|
43
|
+
self.total_file_count = oDict["TotalFileCount"]
|
|
44
|
+
self.is_split_on_main_folder = oDict["IsSplitOnMainFolder"]
|
|
45
|
+
self.is_split_in_class_folders = oDict["IsSplitInClassFolders"]
|
|
46
|
+
bResult = True
|
|
47
|
+
|
|
48
|
+
return bResult
|
|
49
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
50
|
+
def save_file_lists(self):
|
|
51
|
+
oDict = dict()
|
|
52
|
+
oDict["RunDateTime"] = self.run_date_time
|
|
53
|
+
oDict["ClassNames"] = self.class_names
|
|
54
|
+
oDict["ClassFolders"] = self.class_folders
|
|
55
|
+
oDict["ClassFiles.All"] = self.files
|
|
56
|
+
oDict["ClassFiles.TrainingSet"] = self.files_ts
|
|
57
|
+
oDict["ClassFiles.ValidationSet"] = self.files_vs
|
|
58
|
+
oDict["ClassFiles.UnknownTestSet"] = self.files_ut
|
|
59
|
+
oDict["TotalFileCount"] = self.total_file_count
|
|
60
|
+
oDict["IsSplitOnMainFolder"] = self.is_split_on_main_folder
|
|
61
|
+
oDict["IsSplitInClassFolders"] = self.is_split_in_class_folders
|
|
62
|
+
self.images_fs.obj.save(oDict, f"{self.name}-file-info.pkl")
|
|
63
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
64
|
+
def detect_class_names_from_folders(self):
|
|
65
|
+
oClassNamesFS = self.images_fs
|
|
66
|
+
oMainFolder = DataSetFolder("/", self.images_fs)
|
|
67
|
+
oFolders = oMainFolder.subfolders
|
|
68
|
+
|
|
69
|
+
self.is_split_on_main_folder = oMainFolder.is_split
|
|
70
|
+
if self.is_split_on_main_folder:
|
|
71
|
+
# Detect the class names under the training set subfolder
|
|
72
|
+
oClassNamesFS = oMainFolder.filestore_ts
|
|
73
|
+
oFolders = oClassNamesFS.list_folders(is_full_path=False)
|
|
74
|
+
|
|
75
|
+
for nIndex, sFolder in enumerate(oFolders):
|
|
76
|
+
sClassName = camel_case(sFolder)
|
|
77
|
+
self.class_names[nIndex] = sClassName
|
|
78
|
+
oClassFS = oClassNamesFS.subfs(sFolder, must_exist=True)
|
|
79
|
+
oClassFolder = DataSetFolder(sFolder, oClassFS)
|
|
80
|
+
if not self.is_split_on_main_folder:
|
|
81
|
+
if oClassFolder.is_split:
|
|
82
|
+
self.is_split_in_class_folders = True
|
|
83
|
+
self.class_folders.append(oClassFolder)
|
|
84
|
+
|
|
85
|
+
return self.class_folders
|
|
86
|
+
|
|
87
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
88
|
+
def traverse_sub_folders(self, extensions, progress):
|
|
89
|
+
for nClassIndex, oClassFolder in enumerate(self.class_folders):
|
|
90
|
+
if progress is not None:
|
|
91
|
+
progress.set_description(f"Finding files for class {self.class_names[nClassIndex]}")
|
|
92
|
+
progress.refresh()
|
|
93
|
+
self.files[nClassIndex] = oClassFolder.filestore.filelist(extensions)
|
|
94
|
+
self.total_file_count += len(self.files[nClassIndex])
|
|
95
|
+
if progress is not None:
|
|
96
|
+
progress.update(1)
|
|
97
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
98
|
+
def traverse_sub_folders_with_split(self, extensions, progress):
|
|
99
|
+
self.total_file_count = 0
|
|
100
|
+
for nClassIndex, oClassFolder in enumerate(self.class_folders):
|
|
101
|
+
if progress is not None:
|
|
102
|
+
progress.set_description(f"Finding files for class {self.class_names[nClassIndex]}")
|
|
103
|
+
progress.refresh()
|
|
104
|
+
if oClassFolder.is_split:
|
|
105
|
+
oClassAllFiles = FileList()
|
|
106
|
+
for nIndex, oSplitFileStore in enumerate(oClassFolder.split_filestores):
|
|
107
|
+
if oSplitFileStore is not None:
|
|
108
|
+
oFileList = oSplitFileStore.filelist(extensions)
|
|
109
|
+
for oFile in oFileList.full_paths:
|
|
110
|
+
dSplit = None
|
|
111
|
+
if oSplitFileStore == oClassFolder.filestore_ts:
|
|
112
|
+
dSplit = self.files_ts
|
|
113
|
+
elif oSplitFileStore == oClassFolder.filestore_vs:
|
|
114
|
+
dSplit = self.files_vs
|
|
115
|
+
elif oSplitFileStore == oClassFolder.filestore_ut:
|
|
116
|
+
dSplit = self.files_ut
|
|
117
|
+
|
|
118
|
+
if dSplit is not None:
|
|
119
|
+
if nClassIndex not in dSplit:
|
|
120
|
+
dSplit[nClassIndex] = []
|
|
121
|
+
dSplit[nClassIndex].append(oFile)
|
|
122
|
+
|
|
123
|
+
oClassAllFiles.append(oFile)
|
|
124
|
+
else:
|
|
125
|
+
raise Exception(f"No split subfolders for class {nIndex} '{self.class_names[nIndex]}',\n"
|
|
126
|
+
+ f"that is stored in {oClassFolder.filestore}\n"
|
|
127
|
+
+ f"All of the classes should have the same split.")
|
|
128
|
+
self.files[nClassIndex] = oClassAllFiles
|
|
129
|
+
self.total_file_count += len(self.files[nClassIndex])
|
|
130
|
+
if progress is not None:
|
|
131
|
+
progress.update(1)
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
if progress is not None:
|
|
135
|
+
progress.set_description("Finished")
|
|
136
|
+
progress.refresh()
|
|
137
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
138
|
+
def detect_files(self, extensions="*.jpg; *.png"):
|
|
139
|
+
oProgress = None
|
|
140
|
+
if len(self.class_folders) > 0:
|
|
141
|
+
if (not self.is_split_on_main_folder) and (not self.is_split_in_class_folders):
|
|
142
|
+
if self.is_progress_indicator:
|
|
143
|
+
oProgress = tqdm(total=len(self.class_folders), ncols=80)
|
|
144
|
+
try:
|
|
145
|
+
self.traverse_sub_folders(extensions, oProgress)
|
|
146
|
+
finally:
|
|
147
|
+
if self.is_progress_indicator:
|
|
148
|
+
oProgress.close()
|
|
149
|
+
|
|
150
|
+
elif (not self.is_split_on_main_folder) and self.is_split_in_class_folders:
|
|
151
|
+
if self.is_progress_indicator:
|
|
152
|
+
oProgress = tqdm(total=len(self.class_folders), ncols=80)
|
|
153
|
+
try:
|
|
154
|
+
self.traverse_sub_folders_with_split(extensions, oProgress)
|
|
155
|
+
finally:
|
|
156
|
+
if self.is_progress_indicator:
|
|
157
|
+
oProgress.close()
|
|
158
|
+
|
|
159
|
+
self.save_file_lists()
|
|
160
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
|
|
@@ -22,7 +22,6 @@
|
|
|
22
22
|
# SOFTWARE.
|
|
23
23
|
|
|
24
24
|
# ......................................................................................
|
|
25
|
-
|
|
26
25
|
import numpy as np
|
|
27
26
|
|
|
28
27
|
'''
|
|
@@ -90,6 +89,7 @@ class Normalizer(object):
|
|
|
90
89
|
if is_verbose:
|
|
91
90
|
print(" Normalization: min/max shape:%s" % str(self.min.shape))
|
|
92
91
|
self.save()
|
|
92
|
+
return self
|
|
93
93
|
# --------------------------------------------------------------------------------------------------------------------
|
|
94
94
|
def fit_transform(self, data, axis_for_stats=-1, is_recalculating=False, is_verbose=False):
|
|
95
95
|
self.fit(data, axis_for_stats, is_recalculating, is_verbose)
|
|
@@ -109,3 +109,9 @@ class Normalizer(object):
|
|
|
109
109
|
nDenormalizedData = (data * (self.max - self.min)) + self.min
|
|
110
110
|
return nDenormalizedData.astype(data.dtype)
|
|
111
111
|
# --------------------------------------------------------------------------------------------------------------------
|
|
112
|
+
def __str__(self):
|
|
113
|
+
return f"Normalizer: min={self.min} max={self.max}"
|
|
114
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
115
|
+
def __repr__(self):
|
|
116
|
+
return self.__str__()
|
|
117
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
@@ -22,7 +22,6 @@
|
|
|
22
22
|
# SOFTWARE.
|
|
23
23
|
|
|
24
24
|
# ......................................................................................
|
|
25
|
-
|
|
26
25
|
import numpy as np
|
|
27
26
|
|
|
28
27
|
'''
|
|
@@ -52,7 +51,7 @@ class Standardizer(object):
|
|
|
52
51
|
dStats = {"mean": self.mean, "std": self.std}
|
|
53
52
|
self.filestore.obj.save(dStats, "%s-meanstd.pkl" % self.name, is_overwriting=True)
|
|
54
53
|
# --------------------------------------------------------------------------------------------------------------------
|
|
55
|
-
def fit(self, data, axis_for_stats
|
|
54
|
+
def fit(self, data, axis_for_stats=None, is_recalculating=False, is_verbose=False):
|
|
56
55
|
bIsCached = False
|
|
57
56
|
if (self.name is not None) and (self.filestore is not None):
|
|
58
57
|
if self.mean is not None:
|
|
@@ -79,6 +78,8 @@ class Standardizer(object):
|
|
|
79
78
|
if is_verbose:
|
|
80
79
|
print(" Standardization: mean/std shape:%s" % str(self.mean.shape))
|
|
81
80
|
self.save()
|
|
81
|
+
|
|
82
|
+
return self
|
|
82
83
|
# --------------------------------------------------------------------------------------------------------------------
|
|
83
84
|
def fit_transform(self, data, axis_for_stats=-1, is_recalculating=False, is_verbose=False):
|
|
84
85
|
self.fit(data, axis_for_stats, is_recalculating, is_verbose)
|
|
@@ -98,3 +99,9 @@ class Standardizer(object):
|
|
|
98
99
|
nNonStandardizedData = (data * self.std) + self.mean
|
|
99
100
|
return nNonStandardizedData.astype(data.dtype)
|
|
100
101
|
# --------------------------------------------------------------------------------------------------------------------
|
|
102
|
+
def __str__(self):
|
|
103
|
+
return f"Standardizer: mean={self.mean} std={self.std}"
|
|
104
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
105
|
+
def __repr__(self):
|
|
106
|
+
return self.__str__()
|
|
107
|
+
# --------------------------------------------------------------------------------------------------------------------
|