radnn 0.0.8__py3-none-any.whl → 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. radnn/__init__.py +5 -5
  2. radnn/benchmark/__init__.py +1 -0
  3. radnn/benchmark/latency.py +55 -0
  4. radnn/core.py +146 -2
  5. radnn/data/__init__.py +5 -10
  6. radnn/data/dataset_base.py +100 -260
  7. radnn/data/dataset_base_legacy.py +280 -0
  8. radnn/data/errors.py +32 -0
  9. radnn/data/sample_preprocessor.py +58 -0
  10. radnn/data/sample_set.py +203 -90
  11. radnn/data/sample_set_kind.py +126 -0
  12. radnn/data/sequence_dataset.py +25 -30
  13. radnn/data/structs/__init__.py +1 -0
  14. radnn/data/structs/tree.py +322 -0
  15. radnn/data_beta/__init__.py +12 -0
  16. radnn/{data → data_beta}/data_feed.py +1 -1
  17. radnn/data_beta/dataset_base.py +337 -0
  18. radnn/data_beta/sample_set.py +166 -0
  19. radnn/data_beta/sequence_dataset.py +134 -0
  20. radnn/data_beta/structures/__init__.py +2 -0
  21. radnn/data_beta/structures/dictionary.py +41 -0
  22. radnn/{data → data_beta}/tf_classification_data_feed.py +5 -2
  23. radnn/errors.py +10 -2
  24. radnn/experiment/__init__.py +2 -0
  25. radnn/experiment/identification.py +7 -0
  26. radnn/experiment/ml_experiment.py +7 -2
  27. radnn/experiment/ml_experiment_log.py +47 -0
  28. radnn/images/image_processor.py +4 -1
  29. radnn/learn/__init__.py +0 -7
  30. radnn/learn/keras/__init__.py +4 -0
  31. radnn/learn/{state → keras}/keras_best_state_saver.py +5 -1
  32. radnn/learn/{learning_algorithm.py → keras/keras_learning_algorithm.py} +5 -9
  33. radnn/learn/{keras_learning_rate_scheduler.py → keras/keras_learning_rate_scheduler.py} +4 -1
  34. radnn/learn/{keras_optimization_algorithm.py → keras/keras_optimization_combo.py} +7 -3
  35. radnn/learn/torch/__init__.py +3 -0
  36. radnn/learn/torch/ml_model_freezer.py +330 -0
  37. radnn/learn/torch/ml_trainer.py +461 -0
  38. radnn/learn/torch/staircase_lr_scheduler.py +21 -0
  39. radnn/ml_system.py +68 -52
  40. radnn/models/__init__.py +5 -0
  41. radnn/models/cnn/__init__.py +0 -0
  42. radnn/models/cnn/cnn_stem_setup.py +35 -0
  43. radnn/models/model_factory.py +85 -0
  44. radnn/models/model_hyperparams.py +128 -0
  45. radnn/models/model_info.py +91 -0
  46. radnn/plots/plot_learning_curve.py +19 -8
  47. radnn/system/__init__.py +1 -0
  48. radnn/system/files/__init__.py +1 -1
  49. radnn/system/files/csvfile.py +37 -5
  50. radnn/system/files/filelist.py +30 -0
  51. radnn/system/files/fileobject.py +11 -1
  52. radnn/system/files/imgfile.py +1 -1
  53. radnn/system/files/jsonfile.py +37 -9
  54. radnn/system/files/picklefile.py +3 -3
  55. radnn/system/files/textfile.py +39 -10
  56. radnn/system/files/zipfile.py +96 -0
  57. radnn/system/filestore.py +147 -47
  58. radnn/system/filesystem.py +3 -3
  59. radnn/test/__init__.py +1 -0
  60. radnn/test/tensor_hash.py +130 -0
  61. radnn/utils.py +16 -2
  62. radnn-0.1.0.dist-info/METADATA +30 -0
  63. radnn-0.1.0.dist-info/RECORD +99 -0
  64. {radnn-0.0.8.dist-info → radnn-0.1.0.dist-info}/WHEEL +1 -1
  65. {radnn-0.0.8.dist-info → radnn-0.1.0.dist-info/licenses}/LICENSE.txt +1 -1
  66. radnn/learn/state/__init__.py +0 -4
  67. radnn-0.0.8.dist-info/METADATA +0 -58
  68. radnn-0.0.8.dist-info/RECORD +0 -70
  69. /radnn/{data → data_beta}/dataset_folder.py +0 -0
  70. /radnn/{data → data_beta}/image_dataset.py +0 -0
  71. /radnn/{data → data_beta}/image_dataset_files.py +0 -0
  72. /radnn/{data → data_beta}/preprocess/__init__.py +0 -0
  73. /radnn/{data → data_beta}/preprocess/normalizer.py +0 -0
  74. /radnn/{data → data_beta}/preprocess/standardizer.py +0 -0
  75. /radnn/{data → data_beta}/subset_type.py +0 -0
  76. {radnn-0.0.8.dist-info → radnn-0.1.0.dist-info}/top_level.txt +0 -0
radnn/__init__.py CHANGED
@@ -3,14 +3,14 @@
3
3
  # Version 0.0.6 [2025-02-04]
4
4
  # Version 0.0.7.2 [2025-02-17]
5
5
  # Version 0.0.7.3 [2025-02-21]
6
- # Version 0.0.8 [2025-02-xx]
7
- __version__ = "0.0.8"
6
+ # Version 0.0.8 [2025-02-25]
7
+ # Version 0.0.9 [2025-04-15]
8
+ # Version 0.1.0 [2026-01-06]
9
+ # Version 0.1.1 [2025-01-xx]
10
+ __version__ = "0.1.0"
8
11
 
9
12
  from .system import FileStore, FileSystem
10
13
  from .ml_system import MLSystem
11
14
  from .ml_system import mlsys
12
15
  from .utils import print_tensor
13
16
  from .errors import Errors
14
-
15
-
16
-
@@ -0,0 +1 @@
1
+ from .latency import timed_method, mlbench
@@ -0,0 +1,55 @@
1
+ import numpy as np
2
+ import time
3
+ import functools
4
+
5
+
6
+ class LatencyBenchmark(dict):
7
+ def __init__(self):
8
+ self.enabled = False
9
+
10
+ def stats(self, key):
11
+ nSeries = np.asarray(self.get(key, [0.0]), np.float32)
12
+ nMean = np.mean(nSeries)
13
+ nStd = np.std(nSeries)
14
+ nMax = np.max(nSeries)
15
+ nMin = np.min(nSeries)
16
+ return nMin, nMax, nMean, nStd
17
+
18
+
19
+
20
+ mlbench = LatencyBenchmark()
21
+
22
+
23
+ def timed_method(name: str | None = None):
24
+ """
25
+ Decorator to measure elapsed time using a high-resolution timer.
26
+
27
+ Usage:
28
+ @timeit()
29
+ def f(...):
30
+
31
+ @timeit("custom_name")
32
+ def g(...):
33
+ """
34
+
35
+ def decorator(func):
36
+ label = name or func.__qualname__
37
+
38
+ @functools.wraps(func)
39
+ def wrapper(*args, **kwargs):
40
+ if not mlbench.enabled:
41
+ return func(*args, **kwargs)
42
+ else:
43
+ start = time.perf_counter()
44
+ try:
45
+ return func(*args, **kwargs)
46
+ finally:
47
+ end = time.perf_counter()
48
+ elapsed = end - start
49
+ if not label in mlbench:
50
+ mlbench[label] = []
51
+ mlbench[label].append(elapsed * 1e3)
52
+
53
+ return wrapper
54
+
55
+ return decorator
radnn/core.py CHANGED
@@ -6,7 +6,7 @@
6
6
  # ______________________________________________________________________________________
7
7
  # ......................................................................................
8
8
 
9
- # Copyright (c) 2018-2025 Pantelis I. Kaplanoglou
9
+ # Copyright (c) 2018-2026 Pantelis I. Kaplanoglou
10
10
 
11
11
  # Permission is hereby granted, free of charge, to any person obtaining a copy
12
12
  # of this software and associated documentation files (the "Software"), to deal
@@ -30,8 +30,23 @@
30
30
  import sys
31
31
  import socket
32
32
  import platform
33
+ import psutil
33
34
  import subprocess
34
35
  from datetime import datetime
36
+ import importlib
37
+ import importlib.util
38
+
39
+
40
+ class RequiredLibs(object):
41
+ def __init__(self):
42
+ self.is_tensorflow_installed = importlib.util.find_spec("tensorflow") is not None
43
+ if not self.is_tensorflow_installed:
44
+ self.is_tensorflow_installed = importlib.util.find_spec("tensorflow-gpu") is not None
45
+ self.is_torch_installed = importlib.util.find_spec("torch") is not None
46
+ self.is_opencv_installed = importlib.util.find_spec("cv2") is not None
47
+
48
+
49
+
35
50
 
36
51
  # ----------------------------------------------------------------------------------------------------------------------
37
52
  def system_name() -> str:
@@ -96,4 +111,133 @@ class MLInfrastructure(object):
96
111
  sResult = "(linux)-" + sHostName
97
112
  return sResult
98
113
  # --------------------------------------------------------------------------------------------------------------------
99
- # ======================================================================================================================
114
+ # ======================================================================================================================
115
+
116
+
117
+
118
+ # ======================================================================================================================
119
+ class HardwareDevice(object):
120
+ def __init__(self, name):
121
+ self.name = name
122
+
123
+ def __str__(self):
124
+ return self.name
125
+
126
+ def __repr__(self):
127
+ return self.__str__()
128
+
129
+
130
+ # ======================================================================================================================
131
+ class CPU(HardwareDevice):
132
+ # --------------------------------------------------------------------------------------------------------------------
133
+ def __init__(self, name):
134
+ super(CPU, self).__init__(name)
135
+ self._cpuid()
136
+ # --------------------------------------------------------------------------------------------------------------------
137
+ def _cpuid(self):
138
+ '''
139
+ CPU Identification for both Windows and Linux
140
+ '''
141
+ sPlatform = platform.system()
142
+
143
+ if sPlatform == "Windows":
144
+ oCPUs = subprocess.check_output(
145
+ ["powershell", "-Command",
146
+ "(Get-CimInstance Win32_Processor | Select-Object -ExpandProperty Name) -join \"\n\""],
147
+ text=True
148
+ ).strip().splitlines()
149
+
150
+ self.name = ""
151
+ for sCPU in oCPUs:
152
+ self.name += f', {sCPU}'
153
+
154
+ elif sPlatform == "Darwin":
155
+ pass #MacOS
156
+ else:
157
+ self.name = ""
158
+ with open("/proc/cpuinfo") as f:
159
+ for line in f:
160
+ line = line.strip()
161
+ if line.startswith("model name"):
162
+ self.name += f', {line.split(":", 1)[1].strip()}'
163
+ if self.name.startswith(", "):
164
+ self.name = self.name[2:]
165
+ # --------------------------------------------------------------------------------------------------------------------
166
+
167
+
168
+ # ======================================================================================================================
169
+ class NeuralProcessingUnit(HardwareDevice):
170
+ def __init__(self, name):
171
+ super(NeuralProcessingUnit, self).__init__(name)
172
+ self.compute_capability = None
173
+ self.vram_in_gb = None
174
+
175
+
176
+ # ======================================================================================================================
177
+ class AIGridInfo(HardwareDevice):
178
+ # --------------------------------------------------------------------------------------------------------------------
179
+ def __init__(self, name=None):
180
+ self.name = name
181
+ if self.name is None:
182
+ self.name = socket.gethostname()
183
+ self.cpu = CPU(platform.processor())
184
+
185
+ mem = psutil.virtual_memory()
186
+ total_bytes = mem.total
187
+ self.ram_in_gb = round(total_bytes / (1024 ** 3))
188
+
189
+ self.devices = []
190
+ # --------------------------------------------------------------------------------------------------------------------
191
+ @property
192
+ def device(self):
193
+ if len(self.devices) > 0:
194
+ return self.devices[0]
195
+ else:
196
+ return self.cpu
197
+ # --------------------------------------------------------------------------------------------------------------------
198
+ def discover_devices(self, framework_type: str = "torch"):
199
+ '''
200
+ Detects the AI accelerators using the framework libraries
201
+ :param framework_type: The framework that is used "torch" or "tensorflow"
202
+ :return:
203
+ '''
204
+ self.cpu = CPU(platform.processor())
205
+
206
+ if framework_type == "torch":
207
+ import torch
208
+ device_count = torch.cuda.device_count()
209
+ for i in range(device_count):
210
+ oUnit = NeuralProcessingUnit(torch.cuda.get_device_name(i))
211
+ oUnit.compute_capability = torch.cuda.get_device_capability(i)
212
+ oUnit.vram_in_gb = round(torch.cuda.get_device_properties(i).total_memory / (1024 ** 3))
213
+ self.devices.append(oUnit)
214
+ elif framework_type == "tensorflow":
215
+ import tensorflow as tf
216
+ gpus = tf.config.list_physical_devices("GPU")
217
+ for gpu in gpus:
218
+ details = tf.config.experimental.get_device_details(gpu)
219
+ oUnit = NeuralProcessingUnit(details["device_name"])
220
+ oUnit.compute_capability = details["compute_capability"]
221
+ print(details)
222
+ return self
223
+ # --------------------------------------------------------------------------------------------------------------------
224
+ def __str__(self):
225
+ sResult = f'{"|"*24}\n'
226
+ sResult += f"|| [{self.name[:16]:^16}] ||\n"
227
+ sResult = f'{"|"*24}\n'
228
+ sResult += f" |__ CPU: {self.cpu}\n"
229
+ sResult += f" |__ RAM: {self.ram_in_gb} GB\n"
230
+ sResult += f" |__ NPUs\n"
231
+ for oDevice in self.devices:
232
+ if isinstance(oDevice, NeuralProcessingUnit):
233
+ sResult += f'{" "*5} |__ {oDevice.name} {oDevice.vram_in_gb} GB\n'
234
+ else:
235
+ sResult += f'{" "*5} |__ {oDevice} \n'
236
+ return sResult
237
+ # --------------------------------------------------------------------------------------------------------------------
238
+ def __repr__(self):
239
+ return self.__str__()
240
+ # --------------------------------------------------------------------------------------------------------------------
241
+
242
+ # ======================================================================================================================
243
+
radnn/data/__init__.py CHANGED
@@ -1,12 +1,7 @@
1
- from .dataset_base import DataSetBase
2
- from .image_dataset import ImageDataSet
1
+ from .dataset_base import DataSetBase, DataSetCallbacks
2
+ from .dataset_base_legacy import CDataSetBase
3
+ from .sequence_dataset import SequenceDataSet
3
4
  from .sample_set import SampleSet
4
- from .data_feed import DataFeed
5
- from .subset_type import SubsetType
6
- from .sample_set import SampleSet
7
- from radnn import mlsys
8
- if mlsys.is_tensorflow_installed:
9
- from .tf_classification_data_feed import TFClassificationDataFeed
10
-
11
- from .image_dataset_files import ImageDataSetFiles
5
+ from .sample_set_kind import SampleSetKind
6
+ from .sample_preprocessor import SamplePreprocessor
12
7
 
@@ -22,112 +22,105 @@
22
22
  # SOFTWARE.
23
23
 
24
24
  # ......................................................................................
25
+ import os
25
26
  import numpy as np
26
27
  import pandas as pd
27
- from sklearn.model_selection import train_test_split
28
- from radnn import FileSystem, FileStore, MLSystem, Errors
28
+ from abc import ABC, abstractmethod
29
+ from .sample_set import SampleSet
30
+ from .sample_set_kind import SampleSetKind
31
+ from .sample_preprocessor import SamplePreprocessor, VoidPreprocessor
32
+ from .errors import *
33
+ from radnn import FileStore
29
34
 
35
+ # ======================================================================================================================
36
+ class DataSetCallbacks(object):
37
+ # --------------------------------------------------------------------------------------------------------------------
38
+ def __init__(self, lazy_loader=None, random_seeder=None):
39
+ self.lazy_loader = lazy_loader
40
+ self.random_seeder = random_seeder
41
+ # --------------------------------------------------------------------------------------------------------------------
42
+ def lazy_load(self):
43
+ self.lazy_loader()
44
+ # --------------------------------------------------------------------------------------------------------------------
45
+ def initialize_random_seed(self, seed: int):
46
+ self.random_seeder(seed)
47
+ # --------------------------------------------------------------------------------------------------------------------
30
48
 
31
- class DataSetBase(object):
49
+
50
+ # ======================================================================================================================
51
+ class DataSetBase(ABC):
32
52
  # --------------------------------------------------------------------------------------------------------------------
33
53
  # Constructor
34
- def __init__(self, fs=None, name=None, variant=None, random_seed=None, is_classification=False):
54
+ def __init__(self, name: str, variant: str|None=None, file_store=None, random_seed: int | None=None, callbacks: DataSetCallbacks | None = None):
35
55
  # ..................// Instance Fields \\.........................
36
- self.name = name
37
- self.fs = fs
38
- self.variant = variant
39
- self.ts = None
40
- self.vs = None
41
- self.ut = None
42
-
43
- if self.fs is None:
44
- if MLSystem.Instance().filesys is not None:
45
- self.fs = MLSystem.Instance().filesys
46
- else:
47
- raise Exception(Errors.MLSYS_NO_FILESYS)
48
-
49
- if self.fs is not None:
50
- if isinstance(self.fs, FileSystem):
51
- if variant is not None:
52
- name = name + "_" + variant
53
- self.filestore = self.fs.datasets.subfs(name.upper())
54
- elif isinstance(self.fs, FileStore):
55
- self.filestore = self.fs
56
- elif isinstance(self.fs, str):
57
- self.filestore = FileStore(self.fs)
58
- else:
59
- raise Exception("The parameter fs could be a path, a filestore or a filesystem")
60
- else:
61
- raise Exception("Could not determine the filestore for the dataset")
62
-
63
- self.random_seed = random_seed
64
- self.is_classification = is_classification
65
-
66
- self.feature_count = None
67
- self.class_count = None
68
- self.class_names = None
69
- self.sample_count = None
70
-
71
- self.samples = None
72
- self.labels = None
73
-
74
- self.ts_sample_ids = None
75
- self.ts_samples = None
76
- self.ts_labels = None
77
- self.ts_sample_count = 0
78
-
79
- self.vs_sample_ids = None
80
- self.vs_samples = None
81
- self.vs_labels = None
82
- self.vs_sample_count = 0
83
-
84
- self.ut_sample_ids = None
85
- self.ut_samples = None
86
- self.ut_labels = None
87
- self.ut_sample_count = None
88
-
89
- self.sample_shape = None
90
-
91
- self.card = dict()
92
- self.card["name"] = name
56
+ self.fs: FileStore|None = file_store
57
+ if (file_store is not None) and isinstance(file_store, str):
58
+ if not os.path.exists(file_store):
59
+ raise Exception(ERR_DATASET_FOLDER_NOT_FOUND % file_store)
60
+ self.fs = FileStore(file_store)
61
+ assert self.fs is not None, ERR_DATASET_MUST_PROVIDE_LOCAL_FILESTORE
62
+
63
+ self.name = name
64
+ self.variant = variant
65
+ self.random_seed = random_seed
66
+ self.callbacks: DataSetCallbacks = callbacks
67
+
68
+ self.hparams :dict|None = None
69
+ self.ts: SampleSet|None = None
70
+ self.vs: SampleSet|None = None
71
+ self.ut: SampleSet|None = None
72
+ self.preprocessor: SamplePreprocessor = VoidPreprocessor(self)
93
73
  # ................................................................
94
- if self.random_seed is not None:
95
- MLSystem.Instance().random_seed_all(self.random_seed)
74
+ if (self.random_seed is not None):
75
+ assert self.callbacks is not None, ERR_NO_CALLBACKS
76
+ assert self.callbacks.random_seeder is not None, ERR_NO_RANDOM_SEED_INITIALIZER_CALLBACK
77
+ self.callbacks.initialize_random_seed(self.random_seed)
96
78
 
97
79
  # --------------------------------------------------------------------------------------------------------------------
98
- def open(self):
99
- pass
80
+ @property
81
+ def filesystem_folder(self):
82
+ return self.fs.absolute_path
100
83
  # --------------------------------------------------------------------------------------------------------------------
101
- def close(self):
102
- pass
84
+ @abstractmethod
85
+ def do_read_hyperparams(self):
86
+ pass # must implement concrete method
103
87
  # --------------------------------------------------------------------------------------------------------------------
104
- def for_classification(self, class_count, class_names=None):
105
- self.is_classification = True
106
- self.class_count = class_count
107
- if class_names is not None:
108
- # We assume class_names is a dictionary, in other cases we turn it into a dictionary
109
- if isinstance(class_names, set) or isinstance(class_names, list):
110
- dClassNames = dict()
111
- for nIndex, sClassName in enumerate(class_names):
112
- dClassNames[nIndex] = sClassName
113
- class_names = dClassNames
114
- self.class_names = class_names
115
- return self
88
+ @abstractmethod
89
+ def do_import_data(self):
90
+ pass # must implement concrete method
91
+ # --------------------------------------------------------------------------------------------------------------------
92
+ @abstractmethod
93
+ def do_prepare_data(self):
94
+ pass # could optionally override
95
+ # --------------------------------------------------------------------------------------------------------------------
96
+ @abstractmethod
97
+ def do_create_sample_sets(self):
98
+ pass # must implement concrete method
116
99
  # --------------------------------------------------------------------------------------------------------------------
117
- def count_samples(self):
118
- if self.ts_samples is not None:
119
- self.ts_sample_count = int(self.ts_samples.shape[0])
120
- self.sample_count = self.ts_sample_count + self.vs_sample_count
100
+ def prepare(self, hyperparams: dict|None = None):
101
+ self.hparams = hyperparams
102
+ if self.hparams is not None:
103
+ self.do_read_hyperparams()
121
104
 
122
- if self.vs_samples is not None:
123
- self.vs_sample_count = int(self.vs_samples.shape[0])
124
- self.sample_count = self.ts_sample_count + self.vs_sample_count
105
+ if (self.callbacks is not None):
106
+ if self.callbacks.lazy_loader is not None:
107
+ self.callbacks.lazy_loader()
108
+ self.do_import_data()
109
+ self.do_prepare_data()
110
+
111
+ self.ts = None
112
+ self.vs = None
113
+ self.us = None
114
+ self.do_create_sample_sets()
125
115
 
126
- # The test set samples are not included in the available sample count
127
- if self.ut_samples is not None:
128
- self.ut_sample_count = int(self.ut_samples.shape[0])
116
+ assert self.ts is not None, ERR_SUBSET_MUST_HAVE_TS
117
+ assert self.ts.info.kind == SampleSetKind.TRAINING_SET.value, ERR_SUBSET_INVALID_SETUP
118
+ if self.vs is not None:
119
+ assert self.ts.info.kind == SampleSetKind.TRAINING_SET.value, ERR_SUBSET_INVALID_SETUP
120
+ if self.us is not None:
121
+ assert self.ts.info.kind == SampleSetKind.TRAINING_SET.value, ERR_SUBSET_INVALID_SETUP
129
122
  # --------------------------------------------------------------------------------------------------------------------
130
- def assign(self, data, label_start_column=None, label_end_column=None):
123
+ def assign(self, data, label_columns: range):
131
124
  if isinstance(data, tuple):
132
125
  self.samples, self.labels = data
133
126
  elif isinstance(data, np.ndarray):
@@ -144,182 +137,29 @@ class DataSetBase(object):
144
137
  else:
145
138
  nData = data.to_numpy()
146
139
 
147
- if label_start_column is None:
140
+ if label_columns is None:
148
141
  self.samples = nData
149
142
  else:
150
- if label_start_column >= 0:
151
- if label_end_column is None:
152
- self.labels = nData[:, label_start_column]
153
- self.samples = nData[:, label_start_column + 1:]
143
+ if label_columns.start >= 0:
144
+ if label_columns.stop is None:
145
+ self.labels = nData[:, label_columns.start]
146
+ self.samples = nData[:, label_columns.start + 1:]
154
147
  else:
155
- self.labels = nData[:, label_start_column:label_end_column + 1]
156
- self.samples = nData[:, label_end_column + 1:]
148
+ self.labels = nData[:, label_columns.start:label_columns.stop + 1]
149
+ self.samples = nData[:, label_columns.stop + 1:]
157
150
  else:
158
- self.samples = nData[:, :label_start_column]
159
- self.labels = nData[:, label_start_column:]
160
- return self
161
- # --------------------------------------------------------------------------------------------------------------------
162
- def assign_training_set(self, samples, labels):
163
- self.ts_samples = samples
164
- self.ts_labels = labels
165
- self.count_samples()
166
- self.ts_sample_ids = np.arange(0, self.ts_sample_count)
167
-
168
- # Feature count is calculated on samples that are flattened as vectors
169
- if self.feature_count is None:
170
- self.feature_count = np.prod(self.ts_samples.shape[1:])
171
-
172
- if self.class_count is None:
173
- if self.is_classification:
174
- self.class_count = len(np.unique(self.ts_labels))
175
- else:
176
- self.class_count = 0
177
- return self
178
- # --------------------------------------------------------------------------------------------------------------------
179
- def assign_validation_set(self, samples, labels):
180
- self.vs_samples = samples
181
- self.vs_labels = labels
182
- self.count_samples()
183
- self.vs_sample_ids = np.arange(0, self.vs_sample_count)
184
-
151
+ self.samples = nData[:, :label_columns.start]
152
+ self.labels = nData[:, label_columns.start:]
185
153
  return self
186
154
  # --------------------------------------------------------------------------------------------------------------------
187
- def assign_unknown_test_set(self, samples, labels):
188
- self.ut_samples = samples
189
- self.ut_labels = labels
190
- self.count_samples()
191
- self.ut_sample_ids = np.arange(0, self.ut_sample_count)
192
-
193
- return self
194
- # --------------------------------------------------------------------------------------------------------------------
195
- def infox(self):
196
- self.print_info()
197
- # --------------------------------------------------------------------------------------------------------------------
198
155
  def print_info(self):
199
- print("Dataset [%s]" % self.name)
200
- print(" |__ FeatureCount:", self.feature_count)
201
- if self.is_classification:
202
- print(" |__ ClassCount:", self.class_count)
203
- if self.class_names is not None:
204
- print(" |__ Classes:", self.class_names)
205
-
206
- if self.ts_samples is not None:
207
- print(" |__ Training set samples : %d shape:%s" % (self.ts_sample_count, self.ts_samples.shape))
208
- if self.ts_labels is not None:
209
- print(" |__ Training set targets : %d shape:%s" % (self.ts_sample_count, self.ts_labels.shape))
210
-
211
- if self.vs_samples is not None:
212
- print(" |__ Validation set samples: %d shape:%s" % (self.vs_sample_count, self.vs_samples.shape))
213
- if self.vs_labels is not None:
214
- print(" |__ Validation set targets: %d shape:%s" % (self.vs_sample_count, self.vs_labels.shape))
215
-
216
- if self.ut_samples is not None:
217
- print(" |__ MemoryTest set samples : %d shape:%s" % (self.ut_sample_count, self.ut_samples.shape))
218
- if self.ut_labels is not None:
219
- print(" |__ MemoryTest set targets : %d shape:%s" % (self.ut_sample_count, self.ut_labels.shape))
220
- # --------------------------------------------------------------------------------------------------------------------
221
- def split(self, training_samples_pc, random_seed=None):
222
- if random_seed is None:
223
- random_seed = self.random_seed
224
-
225
- nTSSamples, nVSSamples, nTSTargets, nVSTargets = train_test_split(self.samples, self.labels
226
- , test_size=1.0 - training_samples_pc
227
- , random_state=random_seed
228
- , shuffle=True
229
- , stratify=self.labels
230
- )
231
- self.assign_training_set(nTSSamples, nTSTargets)
232
- self.assign_validation_set(nVSSamples, nVSTargets)
233
- self.count_samples()
234
- return self
235
- # --------------------------------------------------------------------------------------------------------------------
236
- def has_cache(self, samples_file_prefix="Samples"):
237
- return self.filestore.exists("%s.pkl" % samples_file_prefix) or self.filestore.exists("%s.TS.pkl" % samples_file_prefix)
238
- # --------------------------------------------------------------------------------------------------------------------
239
- def load_cache(self, filestore: FileStore = None, samples_file_prefix="Samples", targets_file_prefix="Labels", is_verbose=False):
240
- if filestore is None:
241
- filestore = self.filestore
242
- if filestore is None:
243
- raise Exception("To use load_cache() without providing a filestore, you should provide a filesystem or filestore during instantiation.")
244
-
245
- bResult = filestore.exists("%s.pkl" % samples_file_prefix) or filestore.exists("%s.TS.pkl" % samples_file_prefix)
246
-
247
- if bResult:
248
- if is_verbose:
249
- print("Loading known data set ...")
250
-
251
- dInfo = filestore.json.load(f"{self.name}_info.json")
252
- if dInfo is not None:
253
- if "class_names" in dInfo: self.class_names = dInfo["class_names"]
254
- if "feature_count" in dInfo: self.feature_count = dInfo["feature_count"]
255
- if "class_count" in dInfo:
256
- self.is_classification = True
257
- self.class_count = dInfo["class_count"]
258
-
259
- self.samples = filestore.obj.load("%s.pkl" % samples_file_prefix)
260
- self.labels = filestore.obj.load("%s.pkl" % targets_file_prefix)
261
-
262
- if is_verbose:
263
- print("Loading training set ...")
264
- nTSSamples = filestore.obj.load("%s.TS.pkl" % samples_file_prefix)
265
- nTSTargets = filestore.obj.load("%s.TS.pkl" % targets_file_prefix)
266
- self.assign_training_set(nTSSamples, nTSTargets)
267
-
268
- if is_verbose:
269
- print("Loading validation set ...")
270
- nVSSamples = filestore.obj.load("%s.VS.pkl" % samples_file_prefix)
271
- nVSTargets = filestore.obj.load("%s.VS.pkl" % targets_file_prefix)
272
- self.assign_validation_set(nVSSamples, nVSTargets)
273
-
274
- if is_verbose:
275
- print("Loading unknown test data set ...")
276
- nUTSamples = filestore.obj.load("%s.UT.pkl" % samples_file_prefix)
277
- if nUTSamples is not None:
278
- nUTTargets = filestore.obj.load("%s.UT.pkl" % targets_file_prefix)
279
- self.assign_unknown_test_set(nUTSamples, nUTTargets)
280
-
281
-
282
- return bResult
283
- # --------------------------------------------------------------------------------------------------------------------
284
- def save_cache(self, filestore: FileStore = None, samples_file_prefix="Samples", targets_file_prefix="Labels"):
285
- if filestore is None:
286
- filestore = self.filestore
287
- if filestore is None:
288
- raise Exception("To use save_cache() without providing a filestore, you should provide a filesystem or filestore during instantiation.")
289
-
290
- if self.samples is not None:
291
- filestore.obj.save(self.samples, "%s.pkl" % samples_file_prefix, is_overwriting=True)
292
- filestore.obj.save(self.labels, "%s.pkl" % targets_file_prefix, is_overwriting=True)
293
-
294
- filestore.obj.save(self.ts_samples, "%s.TS.pkl" % samples_file_prefix, is_overwriting=True)
295
- filestore.obj.save(self.ts_labels, "%s.TS.pkl" % targets_file_prefix, is_overwriting=True)
296
-
297
- filestore.obj.save(self.vs_samples, "%s.VS.pkl" % samples_file_prefix, is_overwriting=True)
298
- filestore.obj.save(self.vs_labels, "%s.VS.pkl" % targets_file_prefix, is_overwriting=True)
299
-
300
- if self.ut_samples is not None:
301
- filestore.obj.save(self.ut_samples, "%s.UT.pkl" % samples_file_prefix, is_overwriting=True)
302
- filestore.obj.save(self.ut_labels, "%s.UT.pkl" % targets_file_prefix, is_overwriting=True)
303
-
304
- self.card["name"] = self.name
305
- if self.feature_count is not None:
306
- self.card["feature_count"] = int(self.feature_count)
156
+ if self.variant is not None:
157
+ print(f"Dataset [{self.name}] {self.variant}")
307
158
  else:
308
- self.card["feature_count"] = self.feature_count
309
-
310
- if self.random_seed is not None:
311
- self.card["random_seed"] = int(self.random_seed)
312
- else:
313
- self.card["random_seed"] = self.random_seed
314
-
315
- if self.is_classification:
316
- if self.class_count is not None:
317
- self.card["class_count"] = int(self.class_count)
318
- else:
319
- self.card["class_count"] = self.class_count
320
- self.card["class_names"] = self.class_names
321
-
322
- filestore.json.save(self.card, f"{self.name}_card.json", is_sorted_keys=False)
159
+ print(f"Dataset [{self.name}]")
160
+ self.ts.print_info()
161
+ if self.vs is not None:
162
+ self.vs.print_info()
163
+ if self.ut is not None:
164
+ self.ut.print_info()
323
165
  # --------------------------------------------------------------------------------------------------------------------
324
-
325
-