radnn 0.0.8__py3-none-any.whl → 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- radnn/__init__.py +5 -5
- radnn/benchmark/__init__.py +1 -0
- radnn/benchmark/latency.py +55 -0
- radnn/core.py +146 -2
- radnn/data/__init__.py +5 -10
- radnn/data/dataset_base.py +100 -260
- radnn/data/dataset_base_legacy.py +280 -0
- radnn/data/errors.py +32 -0
- radnn/data/sample_preprocessor.py +58 -0
- radnn/data/sample_set.py +203 -90
- radnn/data/sample_set_kind.py +126 -0
- radnn/data/sequence_dataset.py +25 -30
- radnn/data/structs/__init__.py +1 -0
- radnn/data/structs/tree.py +322 -0
- radnn/data_beta/__init__.py +12 -0
- radnn/{data → data_beta}/data_feed.py +1 -1
- radnn/data_beta/dataset_base.py +337 -0
- radnn/data_beta/sample_set.py +166 -0
- radnn/data_beta/sequence_dataset.py +134 -0
- radnn/data_beta/structures/__init__.py +2 -0
- radnn/data_beta/structures/dictionary.py +41 -0
- radnn/{data → data_beta}/tf_classification_data_feed.py +5 -2
- radnn/errors.py +10 -2
- radnn/experiment/__init__.py +2 -0
- radnn/experiment/identification.py +7 -0
- radnn/experiment/ml_experiment.py +7 -2
- radnn/experiment/ml_experiment_log.py +47 -0
- radnn/images/image_processor.py +4 -1
- radnn/learn/__init__.py +0 -7
- radnn/learn/keras/__init__.py +4 -0
- radnn/learn/{state → keras}/keras_best_state_saver.py +5 -1
- radnn/learn/{learning_algorithm.py → keras/keras_learning_algorithm.py} +5 -9
- radnn/learn/{keras_learning_rate_scheduler.py → keras/keras_learning_rate_scheduler.py} +4 -1
- radnn/learn/{keras_optimization_algorithm.py → keras/keras_optimization_combo.py} +7 -3
- radnn/learn/torch/__init__.py +3 -0
- radnn/learn/torch/ml_model_freezer.py +330 -0
- radnn/learn/torch/ml_trainer.py +461 -0
- radnn/learn/torch/staircase_lr_scheduler.py +21 -0
- radnn/ml_system.py +68 -52
- radnn/models/__init__.py +5 -0
- radnn/models/cnn/__init__.py +0 -0
- radnn/models/cnn/cnn_stem_setup.py +35 -0
- radnn/models/model_factory.py +85 -0
- radnn/models/model_hyperparams.py +128 -0
- radnn/models/model_info.py +91 -0
- radnn/plots/plot_learning_curve.py +19 -8
- radnn/system/__init__.py +1 -0
- radnn/system/files/__init__.py +1 -1
- radnn/system/files/csvfile.py +37 -5
- radnn/system/files/filelist.py +30 -0
- radnn/system/files/fileobject.py +11 -1
- radnn/system/files/imgfile.py +1 -1
- radnn/system/files/jsonfile.py +37 -9
- radnn/system/files/picklefile.py +3 -3
- radnn/system/files/textfile.py +39 -10
- radnn/system/files/zipfile.py +96 -0
- radnn/system/filestore.py +147 -47
- radnn/system/filesystem.py +3 -3
- radnn/test/__init__.py +1 -0
- radnn/test/tensor_hash.py +130 -0
- radnn/utils.py +16 -2
- radnn-0.1.0.dist-info/METADATA +30 -0
- radnn-0.1.0.dist-info/RECORD +99 -0
- {radnn-0.0.8.dist-info → radnn-0.1.0.dist-info}/WHEEL +1 -1
- {radnn-0.0.8.dist-info → radnn-0.1.0.dist-info/licenses}/LICENSE.txt +1 -1
- radnn/learn/state/__init__.py +0 -4
- radnn-0.0.8.dist-info/METADATA +0 -58
- radnn-0.0.8.dist-info/RECORD +0 -70
- /radnn/{data → data_beta}/dataset_folder.py +0 -0
- /radnn/{data → data_beta}/image_dataset.py +0 -0
- /radnn/{data → data_beta}/image_dataset_files.py +0 -0
- /radnn/{data → data_beta}/preprocess/__init__.py +0 -0
- /radnn/{data → data_beta}/preprocess/normalizer.py +0 -0
- /radnn/{data → data_beta}/preprocess/standardizer.py +0 -0
- /radnn/{data → data_beta}/subset_type.py +0 -0
- {radnn-0.0.8.dist-info → radnn-0.1.0.dist-info}/top_level.txt +0 -0
radnn/__init__.py
CHANGED
|
@@ -3,14 +3,14 @@
|
|
|
3
3
|
# Version 0.0.6 [2025-02-04]
|
|
4
4
|
# Version 0.0.7.2 [2025-02-17]
|
|
5
5
|
# Version 0.0.7.3 [2025-02-21]
|
|
6
|
-
# Version 0.0.8 [2025-02-
|
|
7
|
-
|
|
6
|
+
# Version 0.0.8 [2025-02-25]
|
|
7
|
+
# Version 0.0.9 [2025-04-15]
|
|
8
|
+
# Version 0.1.0 [2026-01-06]
|
|
9
|
+
# Version 0.1.1 [2025-01-xx]
|
|
10
|
+
__version__ = "0.1.0"
|
|
8
11
|
|
|
9
12
|
from .system import FileStore, FileSystem
|
|
10
13
|
from .ml_system import MLSystem
|
|
11
14
|
from .ml_system import mlsys
|
|
12
15
|
from .utils import print_tensor
|
|
13
16
|
from .errors import Errors
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .latency import timed_method, mlbench
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import time
|
|
3
|
+
import functools
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class LatencyBenchmark(dict):
|
|
7
|
+
def __init__(self):
|
|
8
|
+
self.enabled = False
|
|
9
|
+
|
|
10
|
+
def stats(self, key):
|
|
11
|
+
nSeries = np.asarray(self.get(key, [0.0]), np.float32)
|
|
12
|
+
nMean = np.mean(nSeries)
|
|
13
|
+
nStd = np.std(nSeries)
|
|
14
|
+
nMax = np.max(nSeries)
|
|
15
|
+
nMin = np.min(nSeries)
|
|
16
|
+
return nMin, nMax, nMean, nStd
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
mlbench = LatencyBenchmark()
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def timed_method(name: str | None = None):
|
|
24
|
+
"""
|
|
25
|
+
Decorator to measure elapsed time using a high-resolution timer.
|
|
26
|
+
|
|
27
|
+
Usage:
|
|
28
|
+
@timeit()
|
|
29
|
+
def f(...):
|
|
30
|
+
|
|
31
|
+
@timeit("custom_name")
|
|
32
|
+
def g(...):
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
def decorator(func):
|
|
36
|
+
label = name or func.__qualname__
|
|
37
|
+
|
|
38
|
+
@functools.wraps(func)
|
|
39
|
+
def wrapper(*args, **kwargs):
|
|
40
|
+
if not mlbench.enabled:
|
|
41
|
+
return func(*args, **kwargs)
|
|
42
|
+
else:
|
|
43
|
+
start = time.perf_counter()
|
|
44
|
+
try:
|
|
45
|
+
return func(*args, **kwargs)
|
|
46
|
+
finally:
|
|
47
|
+
end = time.perf_counter()
|
|
48
|
+
elapsed = end - start
|
|
49
|
+
if not label in mlbench:
|
|
50
|
+
mlbench[label] = []
|
|
51
|
+
mlbench[label].append(elapsed * 1e3)
|
|
52
|
+
|
|
53
|
+
return wrapper
|
|
54
|
+
|
|
55
|
+
return decorator
|
radnn/core.py
CHANGED
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
# ______________________________________________________________________________________
|
|
7
7
|
# ......................................................................................
|
|
8
8
|
|
|
9
|
-
# Copyright (c) 2018-
|
|
9
|
+
# Copyright (c) 2018-2026 Pantelis I. Kaplanoglou
|
|
10
10
|
|
|
11
11
|
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
12
12
|
# of this software and associated documentation files (the "Software"), to deal
|
|
@@ -30,8 +30,23 @@
|
|
|
30
30
|
import sys
|
|
31
31
|
import socket
|
|
32
32
|
import platform
|
|
33
|
+
import psutil
|
|
33
34
|
import subprocess
|
|
34
35
|
from datetime import datetime
|
|
36
|
+
import importlib
|
|
37
|
+
import importlib.util
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class RequiredLibs(object):
|
|
41
|
+
def __init__(self):
|
|
42
|
+
self.is_tensorflow_installed = importlib.util.find_spec("tensorflow") is not None
|
|
43
|
+
if not self.is_tensorflow_installed:
|
|
44
|
+
self.is_tensorflow_installed = importlib.util.find_spec("tensorflow-gpu") is not None
|
|
45
|
+
self.is_torch_installed = importlib.util.find_spec("torch") is not None
|
|
46
|
+
self.is_opencv_installed = importlib.util.find_spec("cv2") is not None
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
|
|
35
50
|
|
|
36
51
|
# ----------------------------------------------------------------------------------------------------------------------
|
|
37
52
|
def system_name() -> str:
|
|
@@ -96,4 +111,133 @@ class MLInfrastructure(object):
|
|
|
96
111
|
sResult = "(linux)-" + sHostName
|
|
97
112
|
return sResult
|
|
98
113
|
# --------------------------------------------------------------------------------------------------------------------
|
|
99
|
-
# ======================================================================================================================
|
|
114
|
+
# ======================================================================================================================
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
# ======================================================================================================================
|
|
119
|
+
class HardwareDevice(object):
|
|
120
|
+
def __init__(self, name):
|
|
121
|
+
self.name = name
|
|
122
|
+
|
|
123
|
+
def __str__(self):
|
|
124
|
+
return self.name
|
|
125
|
+
|
|
126
|
+
def __repr__(self):
|
|
127
|
+
return self.__str__()
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
# ======================================================================================================================
|
|
131
|
+
class CPU(HardwareDevice):
|
|
132
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
133
|
+
def __init__(self, name):
|
|
134
|
+
super(CPU, self).__init__(name)
|
|
135
|
+
self._cpuid()
|
|
136
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
137
|
+
def _cpuid(self):
|
|
138
|
+
'''
|
|
139
|
+
CPU Identification for both Windows and Linux
|
|
140
|
+
'''
|
|
141
|
+
sPlatform = platform.system()
|
|
142
|
+
|
|
143
|
+
if sPlatform == "Windows":
|
|
144
|
+
oCPUs = subprocess.check_output(
|
|
145
|
+
["powershell", "-Command",
|
|
146
|
+
"(Get-CimInstance Win32_Processor | Select-Object -ExpandProperty Name) -join \"\n\""],
|
|
147
|
+
text=True
|
|
148
|
+
).strip().splitlines()
|
|
149
|
+
|
|
150
|
+
self.name = ""
|
|
151
|
+
for sCPU in oCPUs:
|
|
152
|
+
self.name += f', {sCPU}'
|
|
153
|
+
|
|
154
|
+
elif sPlatform == "Darwin":
|
|
155
|
+
pass #MacOS
|
|
156
|
+
else:
|
|
157
|
+
self.name = ""
|
|
158
|
+
with open("/proc/cpuinfo") as f:
|
|
159
|
+
for line in f:
|
|
160
|
+
line = line.strip()
|
|
161
|
+
if line.startswith("model name"):
|
|
162
|
+
self.name += f', {line.split(":", 1)[1].strip()}'
|
|
163
|
+
if self.name.startswith(", "):
|
|
164
|
+
self.name = self.name[2:]
|
|
165
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
# ======================================================================================================================
|
|
169
|
+
class NeuralProcessingUnit(HardwareDevice):
|
|
170
|
+
def __init__(self, name):
|
|
171
|
+
super(NeuralProcessingUnit, self).__init__(name)
|
|
172
|
+
self.compute_capability = None
|
|
173
|
+
self.vram_in_gb = None
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
# ======================================================================================================================
|
|
177
|
+
class AIGridInfo(HardwareDevice):
|
|
178
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
179
|
+
def __init__(self, name=None):
|
|
180
|
+
self.name = name
|
|
181
|
+
if self.name is None:
|
|
182
|
+
self.name = socket.gethostname()
|
|
183
|
+
self.cpu = CPU(platform.processor())
|
|
184
|
+
|
|
185
|
+
mem = psutil.virtual_memory()
|
|
186
|
+
total_bytes = mem.total
|
|
187
|
+
self.ram_in_gb = round(total_bytes / (1024 ** 3))
|
|
188
|
+
|
|
189
|
+
self.devices = []
|
|
190
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
191
|
+
@property
|
|
192
|
+
def device(self):
|
|
193
|
+
if len(self.devices) > 0:
|
|
194
|
+
return self.devices[0]
|
|
195
|
+
else:
|
|
196
|
+
return self.cpu
|
|
197
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
198
|
+
def discover_devices(self, framework_type: str = "torch"):
|
|
199
|
+
'''
|
|
200
|
+
Detects the AI accelerators using the framework libraries
|
|
201
|
+
:param framework_type: The framework that is used "torch" or "tensorflow"
|
|
202
|
+
:return:
|
|
203
|
+
'''
|
|
204
|
+
self.cpu = CPU(platform.processor())
|
|
205
|
+
|
|
206
|
+
if framework_type == "torch":
|
|
207
|
+
import torch
|
|
208
|
+
device_count = torch.cuda.device_count()
|
|
209
|
+
for i in range(device_count):
|
|
210
|
+
oUnit = NeuralProcessingUnit(torch.cuda.get_device_name(i))
|
|
211
|
+
oUnit.compute_capability = torch.cuda.get_device_capability(i)
|
|
212
|
+
oUnit.vram_in_gb = round(torch.cuda.get_device_properties(i).total_memory / (1024 ** 3))
|
|
213
|
+
self.devices.append(oUnit)
|
|
214
|
+
elif framework_type == "tensorflow":
|
|
215
|
+
import tensorflow as tf
|
|
216
|
+
gpus = tf.config.list_physical_devices("GPU")
|
|
217
|
+
for gpu in gpus:
|
|
218
|
+
details = tf.config.experimental.get_device_details(gpu)
|
|
219
|
+
oUnit = NeuralProcessingUnit(details["device_name"])
|
|
220
|
+
oUnit.compute_capability = details["compute_capability"]
|
|
221
|
+
print(details)
|
|
222
|
+
return self
|
|
223
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
224
|
+
def __str__(self):
|
|
225
|
+
sResult = f'{"|"*24}\n'
|
|
226
|
+
sResult += f"|| [{self.name[:16]:^16}] ||\n"
|
|
227
|
+
sResult = f'{"|"*24}\n'
|
|
228
|
+
sResult += f" |__ CPU: {self.cpu}\n"
|
|
229
|
+
sResult += f" |__ RAM: {self.ram_in_gb} GB\n"
|
|
230
|
+
sResult += f" |__ NPUs\n"
|
|
231
|
+
for oDevice in self.devices:
|
|
232
|
+
if isinstance(oDevice, NeuralProcessingUnit):
|
|
233
|
+
sResult += f'{" "*5} |__ {oDevice.name} {oDevice.vram_in_gb} GB\n'
|
|
234
|
+
else:
|
|
235
|
+
sResult += f'{" "*5} |__ {oDevice} \n'
|
|
236
|
+
return sResult
|
|
237
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
238
|
+
def __repr__(self):
|
|
239
|
+
return self.__str__()
|
|
240
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
241
|
+
|
|
242
|
+
# ======================================================================================================================
|
|
243
|
+
|
radnn/data/__init__.py
CHANGED
|
@@ -1,12 +1,7 @@
|
|
|
1
|
-
from .dataset_base import DataSetBase
|
|
2
|
-
from .
|
|
1
|
+
from .dataset_base import DataSetBase, DataSetCallbacks
|
|
2
|
+
from .dataset_base_legacy import CDataSetBase
|
|
3
|
+
from .sequence_dataset import SequenceDataSet
|
|
3
4
|
from .sample_set import SampleSet
|
|
4
|
-
from .
|
|
5
|
-
from .
|
|
6
|
-
from .sample_set import SampleSet
|
|
7
|
-
from radnn import mlsys
|
|
8
|
-
if mlsys.is_tensorflow_installed:
|
|
9
|
-
from .tf_classification_data_feed import TFClassificationDataFeed
|
|
10
|
-
|
|
11
|
-
from .image_dataset_files import ImageDataSetFiles
|
|
5
|
+
from .sample_set_kind import SampleSetKind
|
|
6
|
+
from .sample_preprocessor import SamplePreprocessor
|
|
12
7
|
|
radnn/data/dataset_base.py
CHANGED
|
@@ -22,112 +22,105 @@
|
|
|
22
22
|
# SOFTWARE.
|
|
23
23
|
|
|
24
24
|
# ......................................................................................
|
|
25
|
+
import os
|
|
25
26
|
import numpy as np
|
|
26
27
|
import pandas as pd
|
|
27
|
-
from
|
|
28
|
-
from
|
|
28
|
+
from abc import ABC, abstractmethod
|
|
29
|
+
from .sample_set import SampleSet
|
|
30
|
+
from .sample_set_kind import SampleSetKind
|
|
31
|
+
from .sample_preprocessor import SamplePreprocessor, VoidPreprocessor
|
|
32
|
+
from .errors import *
|
|
33
|
+
from radnn import FileStore
|
|
29
34
|
|
|
35
|
+
# ======================================================================================================================
|
|
36
|
+
class DataSetCallbacks(object):
|
|
37
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
38
|
+
def __init__(self, lazy_loader=None, random_seeder=None):
|
|
39
|
+
self.lazy_loader = lazy_loader
|
|
40
|
+
self.random_seeder = random_seeder
|
|
41
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
42
|
+
def lazy_load(self):
|
|
43
|
+
self.lazy_loader()
|
|
44
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
45
|
+
def initialize_random_seed(self, seed: int):
|
|
46
|
+
self.random_seeder(seed)
|
|
47
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
30
48
|
|
|
31
|
-
|
|
49
|
+
|
|
50
|
+
# ======================================================================================================================
|
|
51
|
+
class DataSetBase(ABC):
|
|
32
52
|
# --------------------------------------------------------------------------------------------------------------------
|
|
33
53
|
# Constructor
|
|
34
|
-
def __init__(self,
|
|
54
|
+
def __init__(self, name: str, variant: str|None=None, file_store=None, random_seed: int | None=None, callbacks: DataSetCallbacks | None = None):
|
|
35
55
|
# ..................// Instance Fields \\.........................
|
|
36
|
-
self.
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
self.
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
self.filestore = self.fs.datasets.subfs(name.upper())
|
|
54
|
-
elif isinstance(self.fs, FileStore):
|
|
55
|
-
self.filestore = self.fs
|
|
56
|
-
elif isinstance(self.fs, str):
|
|
57
|
-
self.filestore = FileStore(self.fs)
|
|
58
|
-
else:
|
|
59
|
-
raise Exception("The parameter fs could be a path, a filestore or a filesystem")
|
|
60
|
-
else:
|
|
61
|
-
raise Exception("Could not determine the filestore for the dataset")
|
|
62
|
-
|
|
63
|
-
self.random_seed = random_seed
|
|
64
|
-
self.is_classification = is_classification
|
|
65
|
-
|
|
66
|
-
self.feature_count = None
|
|
67
|
-
self.class_count = None
|
|
68
|
-
self.class_names = None
|
|
69
|
-
self.sample_count = None
|
|
70
|
-
|
|
71
|
-
self.samples = None
|
|
72
|
-
self.labels = None
|
|
73
|
-
|
|
74
|
-
self.ts_sample_ids = None
|
|
75
|
-
self.ts_samples = None
|
|
76
|
-
self.ts_labels = None
|
|
77
|
-
self.ts_sample_count = 0
|
|
78
|
-
|
|
79
|
-
self.vs_sample_ids = None
|
|
80
|
-
self.vs_samples = None
|
|
81
|
-
self.vs_labels = None
|
|
82
|
-
self.vs_sample_count = 0
|
|
83
|
-
|
|
84
|
-
self.ut_sample_ids = None
|
|
85
|
-
self.ut_samples = None
|
|
86
|
-
self.ut_labels = None
|
|
87
|
-
self.ut_sample_count = None
|
|
88
|
-
|
|
89
|
-
self.sample_shape = None
|
|
90
|
-
|
|
91
|
-
self.card = dict()
|
|
92
|
-
self.card["name"] = name
|
|
56
|
+
self.fs: FileStore|None = file_store
|
|
57
|
+
if (file_store is not None) and isinstance(file_store, str):
|
|
58
|
+
if not os.path.exists(file_store):
|
|
59
|
+
raise Exception(ERR_DATASET_FOLDER_NOT_FOUND % file_store)
|
|
60
|
+
self.fs = FileStore(file_store)
|
|
61
|
+
assert self.fs is not None, ERR_DATASET_MUST_PROVIDE_LOCAL_FILESTORE
|
|
62
|
+
|
|
63
|
+
self.name = name
|
|
64
|
+
self.variant = variant
|
|
65
|
+
self.random_seed = random_seed
|
|
66
|
+
self.callbacks: DataSetCallbacks = callbacks
|
|
67
|
+
|
|
68
|
+
self.hparams :dict|None = None
|
|
69
|
+
self.ts: SampleSet|None = None
|
|
70
|
+
self.vs: SampleSet|None = None
|
|
71
|
+
self.ut: SampleSet|None = None
|
|
72
|
+
self.preprocessor: SamplePreprocessor = VoidPreprocessor(self)
|
|
93
73
|
# ................................................................
|
|
94
|
-
if self.random_seed is not None:
|
|
95
|
-
|
|
74
|
+
if (self.random_seed is not None):
|
|
75
|
+
assert self.callbacks is not None, ERR_NO_CALLBACKS
|
|
76
|
+
assert self.callbacks.random_seeder is not None, ERR_NO_RANDOM_SEED_INITIALIZER_CALLBACK
|
|
77
|
+
self.callbacks.initialize_random_seed(self.random_seed)
|
|
96
78
|
|
|
97
79
|
# --------------------------------------------------------------------------------------------------------------------
|
|
98
|
-
|
|
99
|
-
|
|
80
|
+
@property
|
|
81
|
+
def filesystem_folder(self):
|
|
82
|
+
return self.fs.absolute_path
|
|
100
83
|
# --------------------------------------------------------------------------------------------------------------------
|
|
101
|
-
|
|
102
|
-
|
|
84
|
+
@abstractmethod
|
|
85
|
+
def do_read_hyperparams(self):
|
|
86
|
+
pass # must implement concrete method
|
|
103
87
|
# --------------------------------------------------------------------------------------------------------------------
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
return self
|
|
88
|
+
@abstractmethod
|
|
89
|
+
def do_import_data(self):
|
|
90
|
+
pass # must implement concrete method
|
|
91
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
92
|
+
@abstractmethod
|
|
93
|
+
def do_prepare_data(self):
|
|
94
|
+
pass # could optionally override
|
|
95
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
96
|
+
@abstractmethod
|
|
97
|
+
def do_create_sample_sets(self):
|
|
98
|
+
pass # must implement concrete method
|
|
116
99
|
# --------------------------------------------------------------------------------------------------------------------
|
|
117
|
-
def
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
self.
|
|
100
|
+
def prepare(self, hyperparams: dict|None = None):
|
|
101
|
+
self.hparams = hyperparams
|
|
102
|
+
if self.hparams is not None:
|
|
103
|
+
self.do_read_hyperparams()
|
|
121
104
|
|
|
122
|
-
if self.
|
|
123
|
-
self.
|
|
124
|
-
|
|
105
|
+
if (self.callbacks is not None):
|
|
106
|
+
if self.callbacks.lazy_loader is not None:
|
|
107
|
+
self.callbacks.lazy_loader()
|
|
108
|
+
self.do_import_data()
|
|
109
|
+
self.do_prepare_data()
|
|
110
|
+
|
|
111
|
+
self.ts = None
|
|
112
|
+
self.vs = None
|
|
113
|
+
self.us = None
|
|
114
|
+
self.do_create_sample_sets()
|
|
125
115
|
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
116
|
+
assert self.ts is not None, ERR_SUBSET_MUST_HAVE_TS
|
|
117
|
+
assert self.ts.info.kind == SampleSetKind.TRAINING_SET.value, ERR_SUBSET_INVALID_SETUP
|
|
118
|
+
if self.vs is not None:
|
|
119
|
+
assert self.ts.info.kind == SampleSetKind.TRAINING_SET.value, ERR_SUBSET_INVALID_SETUP
|
|
120
|
+
if self.us is not None:
|
|
121
|
+
assert self.ts.info.kind == SampleSetKind.TRAINING_SET.value, ERR_SUBSET_INVALID_SETUP
|
|
129
122
|
# --------------------------------------------------------------------------------------------------------------------
|
|
130
|
-
def assign(self, data,
|
|
123
|
+
def assign(self, data, label_columns: range):
|
|
131
124
|
if isinstance(data, tuple):
|
|
132
125
|
self.samples, self.labels = data
|
|
133
126
|
elif isinstance(data, np.ndarray):
|
|
@@ -144,182 +137,29 @@ class DataSetBase(object):
|
|
|
144
137
|
else:
|
|
145
138
|
nData = data.to_numpy()
|
|
146
139
|
|
|
147
|
-
if
|
|
140
|
+
if label_columns is None:
|
|
148
141
|
self.samples = nData
|
|
149
142
|
else:
|
|
150
|
-
if
|
|
151
|
-
if
|
|
152
|
-
self.labels = nData[:,
|
|
153
|
-
self.samples = nData[:,
|
|
143
|
+
if label_columns.start >= 0:
|
|
144
|
+
if label_columns.stop is None:
|
|
145
|
+
self.labels = nData[:, label_columns.start]
|
|
146
|
+
self.samples = nData[:, label_columns.start + 1:]
|
|
154
147
|
else:
|
|
155
|
-
self.labels = nData[:,
|
|
156
|
-
self.samples = nData[:,
|
|
148
|
+
self.labels = nData[:, label_columns.start:label_columns.stop + 1]
|
|
149
|
+
self.samples = nData[:, label_columns.stop + 1:]
|
|
157
150
|
else:
|
|
158
|
-
self.samples = nData[:, :
|
|
159
|
-
self.labels = nData[:,
|
|
160
|
-
return self
|
|
161
|
-
# --------------------------------------------------------------------------------------------------------------------
|
|
162
|
-
def assign_training_set(self, samples, labels):
|
|
163
|
-
self.ts_samples = samples
|
|
164
|
-
self.ts_labels = labels
|
|
165
|
-
self.count_samples()
|
|
166
|
-
self.ts_sample_ids = np.arange(0, self.ts_sample_count)
|
|
167
|
-
|
|
168
|
-
# Feature count is calculated on samples that are flattened as vectors
|
|
169
|
-
if self.feature_count is None:
|
|
170
|
-
self.feature_count = np.prod(self.ts_samples.shape[1:])
|
|
171
|
-
|
|
172
|
-
if self.class_count is None:
|
|
173
|
-
if self.is_classification:
|
|
174
|
-
self.class_count = len(np.unique(self.ts_labels))
|
|
175
|
-
else:
|
|
176
|
-
self.class_count = 0
|
|
177
|
-
return self
|
|
178
|
-
# --------------------------------------------------------------------------------------------------------------------
|
|
179
|
-
def assign_validation_set(self, samples, labels):
|
|
180
|
-
self.vs_samples = samples
|
|
181
|
-
self.vs_labels = labels
|
|
182
|
-
self.count_samples()
|
|
183
|
-
self.vs_sample_ids = np.arange(0, self.vs_sample_count)
|
|
184
|
-
|
|
151
|
+
self.samples = nData[:, :label_columns.start]
|
|
152
|
+
self.labels = nData[:, label_columns.start:]
|
|
185
153
|
return self
|
|
186
154
|
# --------------------------------------------------------------------------------------------------------------------
|
|
187
|
-
def assign_unknown_test_set(self, samples, labels):
|
|
188
|
-
self.ut_samples = samples
|
|
189
|
-
self.ut_labels = labels
|
|
190
|
-
self.count_samples()
|
|
191
|
-
self.ut_sample_ids = np.arange(0, self.ut_sample_count)
|
|
192
|
-
|
|
193
|
-
return self
|
|
194
|
-
# --------------------------------------------------------------------------------------------------------------------
|
|
195
|
-
def infox(self):
|
|
196
|
-
self.print_info()
|
|
197
|
-
# --------------------------------------------------------------------------------------------------------------------
|
|
198
155
|
def print_info(self):
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
if self.is_classification:
|
|
202
|
-
print(" |__ ClassCount:", self.class_count)
|
|
203
|
-
if self.class_names is not None:
|
|
204
|
-
print(" |__ Classes:", self.class_names)
|
|
205
|
-
|
|
206
|
-
if self.ts_samples is not None:
|
|
207
|
-
print(" |__ Training set samples : %d shape:%s" % (self.ts_sample_count, self.ts_samples.shape))
|
|
208
|
-
if self.ts_labels is not None:
|
|
209
|
-
print(" |__ Training set targets : %d shape:%s" % (self.ts_sample_count, self.ts_labels.shape))
|
|
210
|
-
|
|
211
|
-
if self.vs_samples is not None:
|
|
212
|
-
print(" |__ Validation set samples: %d shape:%s" % (self.vs_sample_count, self.vs_samples.shape))
|
|
213
|
-
if self.vs_labels is not None:
|
|
214
|
-
print(" |__ Validation set targets: %d shape:%s" % (self.vs_sample_count, self.vs_labels.shape))
|
|
215
|
-
|
|
216
|
-
if self.ut_samples is not None:
|
|
217
|
-
print(" |__ MemoryTest set samples : %d shape:%s" % (self.ut_sample_count, self.ut_samples.shape))
|
|
218
|
-
if self.ut_labels is not None:
|
|
219
|
-
print(" |__ MemoryTest set targets : %d shape:%s" % (self.ut_sample_count, self.ut_labels.shape))
|
|
220
|
-
# --------------------------------------------------------------------------------------------------------------------
|
|
221
|
-
def split(self, training_samples_pc, random_seed=None):
|
|
222
|
-
if random_seed is None:
|
|
223
|
-
random_seed = self.random_seed
|
|
224
|
-
|
|
225
|
-
nTSSamples, nVSSamples, nTSTargets, nVSTargets = train_test_split(self.samples, self.labels
|
|
226
|
-
, test_size=1.0 - training_samples_pc
|
|
227
|
-
, random_state=random_seed
|
|
228
|
-
, shuffle=True
|
|
229
|
-
, stratify=self.labels
|
|
230
|
-
)
|
|
231
|
-
self.assign_training_set(nTSSamples, nTSTargets)
|
|
232
|
-
self.assign_validation_set(nVSSamples, nVSTargets)
|
|
233
|
-
self.count_samples()
|
|
234
|
-
return self
|
|
235
|
-
# --------------------------------------------------------------------------------------------------------------------
|
|
236
|
-
def has_cache(self, samples_file_prefix="Samples"):
|
|
237
|
-
return self.filestore.exists("%s.pkl" % samples_file_prefix) or self.filestore.exists("%s.TS.pkl" % samples_file_prefix)
|
|
238
|
-
# --------------------------------------------------------------------------------------------------------------------
|
|
239
|
-
def load_cache(self, filestore: FileStore = None, samples_file_prefix="Samples", targets_file_prefix="Labels", is_verbose=False):
|
|
240
|
-
if filestore is None:
|
|
241
|
-
filestore = self.filestore
|
|
242
|
-
if filestore is None:
|
|
243
|
-
raise Exception("To use load_cache() without providing a filestore, you should provide a filesystem or filestore during instantiation.")
|
|
244
|
-
|
|
245
|
-
bResult = filestore.exists("%s.pkl" % samples_file_prefix) or filestore.exists("%s.TS.pkl" % samples_file_prefix)
|
|
246
|
-
|
|
247
|
-
if bResult:
|
|
248
|
-
if is_verbose:
|
|
249
|
-
print("Loading known data set ...")
|
|
250
|
-
|
|
251
|
-
dInfo = filestore.json.load(f"{self.name}_info.json")
|
|
252
|
-
if dInfo is not None:
|
|
253
|
-
if "class_names" in dInfo: self.class_names = dInfo["class_names"]
|
|
254
|
-
if "feature_count" in dInfo: self.feature_count = dInfo["feature_count"]
|
|
255
|
-
if "class_count" in dInfo:
|
|
256
|
-
self.is_classification = True
|
|
257
|
-
self.class_count = dInfo["class_count"]
|
|
258
|
-
|
|
259
|
-
self.samples = filestore.obj.load("%s.pkl" % samples_file_prefix)
|
|
260
|
-
self.labels = filestore.obj.load("%s.pkl" % targets_file_prefix)
|
|
261
|
-
|
|
262
|
-
if is_verbose:
|
|
263
|
-
print("Loading training set ...")
|
|
264
|
-
nTSSamples = filestore.obj.load("%s.TS.pkl" % samples_file_prefix)
|
|
265
|
-
nTSTargets = filestore.obj.load("%s.TS.pkl" % targets_file_prefix)
|
|
266
|
-
self.assign_training_set(nTSSamples, nTSTargets)
|
|
267
|
-
|
|
268
|
-
if is_verbose:
|
|
269
|
-
print("Loading validation set ...")
|
|
270
|
-
nVSSamples = filestore.obj.load("%s.VS.pkl" % samples_file_prefix)
|
|
271
|
-
nVSTargets = filestore.obj.load("%s.VS.pkl" % targets_file_prefix)
|
|
272
|
-
self.assign_validation_set(nVSSamples, nVSTargets)
|
|
273
|
-
|
|
274
|
-
if is_verbose:
|
|
275
|
-
print("Loading unknown test data set ...")
|
|
276
|
-
nUTSamples = filestore.obj.load("%s.UT.pkl" % samples_file_prefix)
|
|
277
|
-
if nUTSamples is not None:
|
|
278
|
-
nUTTargets = filestore.obj.load("%s.UT.pkl" % targets_file_prefix)
|
|
279
|
-
self.assign_unknown_test_set(nUTSamples, nUTTargets)
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
return bResult
|
|
283
|
-
# --------------------------------------------------------------------------------------------------------------------
|
|
284
|
-
def save_cache(self, filestore: FileStore = None, samples_file_prefix="Samples", targets_file_prefix="Labels"):
|
|
285
|
-
if filestore is None:
|
|
286
|
-
filestore = self.filestore
|
|
287
|
-
if filestore is None:
|
|
288
|
-
raise Exception("To use save_cache() without providing a filestore, you should provide a filesystem or filestore during instantiation.")
|
|
289
|
-
|
|
290
|
-
if self.samples is not None:
|
|
291
|
-
filestore.obj.save(self.samples, "%s.pkl" % samples_file_prefix, is_overwriting=True)
|
|
292
|
-
filestore.obj.save(self.labels, "%s.pkl" % targets_file_prefix, is_overwriting=True)
|
|
293
|
-
|
|
294
|
-
filestore.obj.save(self.ts_samples, "%s.TS.pkl" % samples_file_prefix, is_overwriting=True)
|
|
295
|
-
filestore.obj.save(self.ts_labels, "%s.TS.pkl" % targets_file_prefix, is_overwriting=True)
|
|
296
|
-
|
|
297
|
-
filestore.obj.save(self.vs_samples, "%s.VS.pkl" % samples_file_prefix, is_overwriting=True)
|
|
298
|
-
filestore.obj.save(self.vs_labels, "%s.VS.pkl" % targets_file_prefix, is_overwriting=True)
|
|
299
|
-
|
|
300
|
-
if self.ut_samples is not None:
|
|
301
|
-
filestore.obj.save(self.ut_samples, "%s.UT.pkl" % samples_file_prefix, is_overwriting=True)
|
|
302
|
-
filestore.obj.save(self.ut_labels, "%s.UT.pkl" % targets_file_prefix, is_overwriting=True)
|
|
303
|
-
|
|
304
|
-
self.card["name"] = self.name
|
|
305
|
-
if self.feature_count is not None:
|
|
306
|
-
self.card["feature_count"] = int(self.feature_count)
|
|
156
|
+
if self.variant is not None:
|
|
157
|
+
print(f"Dataset [{self.name}] {self.variant}")
|
|
307
158
|
else:
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
if self.
|
|
311
|
-
self.
|
|
312
|
-
|
|
313
|
-
self.
|
|
314
|
-
|
|
315
|
-
if self.is_classification:
|
|
316
|
-
if self.class_count is not None:
|
|
317
|
-
self.card["class_count"] = int(self.class_count)
|
|
318
|
-
else:
|
|
319
|
-
self.card["class_count"] = self.class_count
|
|
320
|
-
self.card["class_names"] = self.class_names
|
|
321
|
-
|
|
322
|
-
filestore.json.save(self.card, f"{self.name}_card.json", is_sorted_keys=False)
|
|
159
|
+
print(f"Dataset [{self.name}]")
|
|
160
|
+
self.ts.print_info()
|
|
161
|
+
if self.vs is not None:
|
|
162
|
+
self.vs.print_info()
|
|
163
|
+
if self.ut is not None:
|
|
164
|
+
self.ut.print_info()
|
|
323
165
|
# --------------------------------------------------------------------------------------------------------------------
|
|
324
|
-
|
|
325
|
-
|