radnn 0.0.9__py3-none-any.whl → 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. radnn/__init__.py +4 -5
  2. radnn/benchmark/__init__.py +1 -0
  3. radnn/benchmark/latency.py +55 -0
  4. radnn/core.py +146 -2
  5. radnn/data/__init__.py +5 -10
  6. radnn/data/dataset_base.py +100 -272
  7. radnn/data/dataset_base_legacy.py +280 -0
  8. radnn/data/errors.py +32 -0
  9. radnn/data/sample_preprocessor.py +58 -0
  10. radnn/data/sample_set.py +203 -90
  11. radnn/data/sample_set_kind.py +126 -0
  12. radnn/data/sequence_dataset.py +25 -30
  13. radnn/data/structs/__init__.py +1 -0
  14. radnn/data/structs/tree.py +322 -0
  15. radnn/data_beta/__init__.py +12 -0
  16. radnn/{data → data_beta}/data_feed.py +1 -1
  17. radnn/data_beta/dataset_base.py +337 -0
  18. radnn/data_beta/sample_set.py +166 -0
  19. radnn/data_beta/sequence_dataset.py +134 -0
  20. radnn/data_beta/structures/__init__.py +2 -0
  21. radnn/data_beta/structures/dictionary.py +41 -0
  22. radnn/{data → data_beta}/tf_classification_data_feed.py +5 -2
  23. radnn/errors.py +10 -2
  24. radnn/experiment/__init__.py +2 -0
  25. radnn/experiment/identification.py +7 -0
  26. radnn/experiment/ml_experiment.py +7 -2
  27. radnn/experiment/ml_experiment_log.py +47 -0
  28. radnn/images/image_processor.py +4 -1
  29. radnn/learn/__init__.py +0 -7
  30. radnn/learn/keras/__init__.py +4 -0
  31. radnn/learn/{state → keras}/keras_best_state_saver.py +5 -1
  32. radnn/learn/{learning_algorithm.py → keras/keras_learning_algorithm.py} +5 -9
  33. radnn/learn/{keras_learning_rate_scheduler.py → keras/keras_learning_rate_scheduler.py} +4 -1
  34. radnn/learn/{keras_optimization_algorithm.py → keras/keras_optimization_combo.py} +7 -3
  35. radnn/learn/torch/__init__.py +3 -0
  36. radnn/learn/torch/ml_model_freezer.py +330 -0
  37. radnn/learn/torch/ml_trainer.py +465 -0
  38. radnn/learn/torch/staircase_lr_scheduler.py +21 -0
  39. radnn/ml_system.py +68 -52
  40. radnn/models/__init__.py +5 -0
  41. radnn/models/cnn/__init__.py +0 -0
  42. radnn/models/cnn/cnn_stem_setup.py +35 -0
  43. radnn/models/model_factory.py +85 -0
  44. radnn/models/model_hyperparams.py +128 -0
  45. radnn/models/model_info.py +91 -0
  46. radnn/plots/plot_learning_curve.py +19 -8
  47. radnn/system/__init__.py +1 -0
  48. radnn/system/files/__init__.py +1 -1
  49. radnn/system/files/csvfile.py +37 -5
  50. radnn/system/files/filelist.py +30 -0
  51. radnn/system/files/fileobject.py +11 -1
  52. radnn/system/files/imgfile.py +1 -1
  53. radnn/system/files/jsonfile.py +34 -9
  54. radnn/system/files/picklefile.py +3 -3
  55. radnn/system/files/textfile.py +48 -16
  56. radnn/system/files/zipfile.py +96 -0
  57. radnn/system/filestore.py +147 -47
  58. radnn/system/filesystem.py +3 -3
  59. radnn/test/__init__.py +1 -0
  60. radnn/test/tensor_hash.py +130 -0
  61. radnn/utils.py +16 -2
  62. {radnn-0.0.9.dist-info → radnn-0.1.1.dist-info}/METADATA +5 -11
  63. radnn-0.1.1.dist-info/RECORD +99 -0
  64. {radnn-0.0.9.dist-info → radnn-0.1.1.dist-info}/WHEEL +1 -1
  65. {radnn-0.0.9.dist-info → radnn-0.1.1.dist-info}/licenses/LICENSE.txt +1 -1
  66. radnn/learn/state/__init__.py +0 -4
  67. radnn-0.0.9.dist-info/RECORD +0 -70
  68. /radnn/{data → data_beta}/dataset_folder.py +0 -0
  69. /radnn/{data → data_beta}/image_dataset.py +0 -0
  70. /radnn/{data → data_beta}/image_dataset_files.py +0 -0
  71. /radnn/{data → data_beta}/preprocess/__init__.py +0 -0
  72. /radnn/{data → data_beta}/preprocess/normalizer.py +0 -0
  73. /radnn/{data → data_beta}/preprocess/standardizer.py +0 -0
  74. /radnn/{data → data_beta}/subset_type.py +0 -0
  75. {radnn-0.0.9.dist-info → radnn-0.1.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,322 @@
1
+ # ......................................................................................
2
+ # MIT License
3
+
4
+ # Copyright (c) 2019-2026 Pantelis I. Kaplanoglou
5
+
6
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ # of this software and associated documentation files (the "Software"), to deal
8
+ # in the Software without restriction, including without limitation the rights
9
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ # copies of the Software, and to permit persons to whom the Software is
11
+ # furnished to do so, subject to the following conditions:
12
+
13
+ # The above copyright notice and this permission notice shall be included in all
14
+ # copies or substantial portions of the Software.
15
+
16
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
+ # SOFTWARE.
23
+
24
+ # ......................................................................................
25
+
26
+ # This implementation is ported from the Java/C# trees that have been developed by me
27
+ # for the lesson CS215 "Data Structures & Algorithms" at Anatolia American University.
28
+
29
+ import numpy as np
30
+
31
+ from typing import Type, Any, Callable, Optional, Iterable, Union, List
32
+
33
+
34
+ class TreeNodeList(list):
35
+ """
36
+ Python version of CTreeNodeList<T>:
37
+ - inherits from list
38
+ - enforces uniqueness and max_branching_factor in append_node
39
+ """
40
+
41
+ def __init__(self, max_branching_factor: int = np.inf):
42
+ super().__init__()
43
+ self._max_branching_factor = max_branching_factor
44
+
45
+ @property
46
+ def item_count(self) -> int:
47
+ return len(self)
48
+
49
+ @property
50
+ def max_branching_factor(self) -> int:
51
+ return self._max_branching_factor
52
+
53
+ @max_branching_factor.setter
54
+ def max_branching_factor(self, value: int) -> None:
55
+ self._max_branching_factor = int(value)
56
+
57
+ def contains(self, node: Type["TreeNode"]) -> bool:
58
+ return node in self
59
+
60
+ def append_node(self, node: Type["TreeNode"]) -> None:
61
+ if node is None:
62
+ return
63
+ if len(self) >= self._max_branching_factor:
64
+ return
65
+ if node not in self:
66
+ super().append(node)
67
+
68
+ def remove_node(self, node: Type["TreeNode"]) -> None:
69
+ if node in self:
70
+ super().remove(node)
71
+
72
+ def __str__(self) -> str:
73
+ lines = []
74
+ for n in self:
75
+ # C# used "[{Value}]".PadRight(16) + " " + Path
76
+ v = getattr(n, "value", None)
77
+ left = f"[{v}]"
78
+ left = left + (" " * max(0, 16 - len(left)))
79
+ lines.append(f"{left} {n.path}")
80
+ return "\r\n".join(lines)
81
+
82
+
83
+ class TreeNode(object):
84
+ def __init__(self):
85
+
86
+ self.name: str = ""
87
+ self.index: int = -1
88
+ self.value: Any = None
89
+ self._parent: TreeNode | None = None
90
+ self._children: TreeNodeList = TreeNodeList()
91
+
92
+ # -------------------------
93
+ # Properties (C#-like)
94
+ # -------------------------
95
+ @property
96
+ def children(self) -> TreeNodeList:
97
+ return self._children
98
+
99
+ @property
100
+ def parent(self) -> Type["TreeNode"] | None:
101
+ return self._parent
102
+
103
+ @parent.setter
104
+ def parent(self, new_parent: Type["TreeNode"] | None):
105
+ # Remove from current parent
106
+ if self._parent is not None:
107
+ self._parent.children.remove_node(self)
108
+
109
+ self._parent = new_parent
110
+
111
+ # Add to new parent
112
+ if self._parent is not None:
113
+ self._parent.children.append_node(self)
114
+
115
+ @property
116
+ def is_root(self) -> bool:
117
+ return self._parent is None
118
+
119
+ @property
120
+ def is_leaf(self) -> bool:
121
+ return self._children.item_count == 0
122
+
123
+ @property
124
+ def child_count(self) -> int:
125
+ return len(self._children)
126
+
127
+ def __getitem__(self, index: int) -> Type["TreeNode"] | None:
128
+ try:
129
+ return self._children[index]
130
+ except IndexError:
131
+ return None
132
+
133
+ @property
134
+ def level(self) -> int:
135
+ if self._parent is None:
136
+ return 0
137
+ return self._parent.level + 1
138
+
139
+ @property
140
+ def root(self) -> Type["TreeNode"]:
141
+ if self._parent is None:
142
+ return self
143
+ return self._parent.root
144
+
145
+ @property
146
+ def path(self) -> str:
147
+ if self._parent is None:
148
+ return "/"
149
+ if self._parent.is_root:
150
+ return self._parent.path + self.name
151
+ return self._parent.path + "/" + self.name
152
+
153
+ def new_child(self, node_name_or_id: Union[str, int, None] = None) -> Type["TreeNode"]:
154
+ child = TreeNode()
155
+ child.index = len(self._children) + 1
156
+ if node_name_or_id is None:
157
+ child.name = str(child.index)
158
+ else:
159
+ child.name = str(node_name_or_id)
160
+ child.parent = self
161
+ return child
162
+
163
+ def add_child(self, child_node: Type["TreeNode"]) -> int:
164
+ child_node.parent = self
165
+ return self._children.item_count - 1
166
+
167
+ def remove_child(self, child_or_name: Union[Type["TreeNode"], str]) -> None:
168
+ if isinstance(child_or_name, TreeNode):
169
+ child_or_name.parent = None
170
+ else:
171
+ name = str(child_or_name)
172
+ for c in list(self.children):
173
+ if c.name == name:
174
+ c.parent = None
175
+ break
176
+
177
+ def delete(self) -> None:
178
+ # Postorder delete: delete children first
179
+ for c in list(self.children):
180
+ c.delete()
181
+
182
+ # Then remove self from parent
183
+ if self._parent is not None:
184
+ self._parent.remove_child(self)
185
+
186
+ def __eq__(self, other: Type["TreeNode"]) -> bool:
187
+ return self.name == other.name
188
+
189
+ def __str__(self) -> str:
190
+ return self.path
191
+
192
+
193
+
194
+
195
+ class TreeNodeQueue(list):
196
+ @property
197
+ def is_empty(self) -> bool:
198
+ return len(self) == 0
199
+
200
+ def enqueue(self, item: TreeNode) -> None:
201
+ self.append(item)
202
+
203
+ def peek(self) -> Optional[TreeNode]:
204
+ return self[0] if len(self) == 0 else None
205
+
206
+ def dequeue(self) -> Optional[TreeNode]:
207
+ return self.pop[0] if len(self) == 0 else None
208
+
209
+
210
+
211
+ class Tree:
212
+ def __init__(self, root: Optional[TreeNode] = None):
213
+ self.root: TreeNode = root if root is not None else TreeNode()
214
+ self._node_list: Optional[TreeNodeList] = None
215
+ #self.comparison_by: Optional[Callable[[Any, Any], int]] = None #TODO
216
+ '''
217
+ def compare(self, this_item: Any, other_item: Any) -> int:
218
+ """
219
+ Closest Python equivalent of C# compare(T,T):
220
+ - If comparison_by provided, use it.
221
+ - Else try normal Python comparisons.
222
+ - If not comparable, return 1 (same default as C# code's nResult=1).
223
+ """
224
+ if self.comparison_by is not None:
225
+ return int(self.comparison_by(this_item, other_item))
226
+
227
+ try:
228
+ if this_item == other_item:
229
+ return 0
230
+ # Python doesn't have CompareTo; approximate:
231
+ return -1 if this_item < other_item else 1
232
+ except Exception:
233
+ return 1
234
+ '''
235
+
236
+ def clear(self) -> None:
237
+ self.root.delete()
238
+ self.root = TreeNode()
239
+
240
+ def _recurse_preorder_append(self, current: TreeNode, depth: int) -> None:
241
+ self._node_list.append_node(current)
242
+ for child in current.children:
243
+ self._recurse_preorder_append(child, depth + 1)
244
+
245
+ def _recurse_postorder_append(self, current: TreeNode, depth: int) -> None:
246
+ for child in current.children:
247
+ self._recurse_postorder_append(child, depth + 1)
248
+ self._node_list.append_node(current)
249
+
250
+ def traverse_depth_first(self, is_preorder: bool = True) -> TreeNodeList:
251
+ self._node_list = TreeNodeList()
252
+ if is_preorder:
253
+ self._recurse_preorder_append(self.root, 0)
254
+ else:
255
+ self._recurse_postorder_append(self.root, 0)
256
+ return self._node_list
257
+
258
+ def traverse_breadth_first(self) -> TreeNodeList:
259
+ node_list = TreeNodeList()
260
+ q = TreeNodeQueue()
261
+ q.enqueue(self.root)
262
+
263
+ while not q.is_empty:
264
+ node = q.dequeue()
265
+ if node is None:
266
+ continue
267
+ node_list.append_node(node)
268
+ for child in node.children:
269
+ q.enqueue(child)
270
+
271
+ return node_list
272
+
273
+ def _recurse_follow_path(self, path_names: list, current: TreeNode, depth: int) -> Optional[TreeNode]:
274
+ next_name = path_names.pop(0)
275
+ if next_name is None:
276
+ return None
277
+
278
+ for child in current.children:
279
+ if child.name == next_name:
280
+ if len(path_names) == 0:
281
+ return child
282
+ return self._recurse_follow_path(path_names, child, depth + 1)
283
+
284
+ return None
285
+
286
+ def follow(self, path: str) -> Optional[TreeNode]:
287
+ # Split by '/'
288
+ parts = path.split("/")
289
+
290
+ q = list()
291
+ for p in parts:
292
+ q.append(p)
293
+
294
+ # In an empty tree the result will be the root node
295
+ result: Optional[TreeNode] = self.root
296
+ if not q.is_empty:
297
+ q.pop(0) # remove "" representing root when path starts with "/"
298
+ result = self._recurse_follow_path(q, self.root, 1)
299
+
300
+ return result
301
+
302
+ def _indent(self, depth: int) -> str:
303
+ if depth - 1 >= 0:
304
+ return " " * ((depth - 1) * 4)
305
+ return ""
306
+
307
+ def _recurse_node_description(self, current: Optional[TreeNode], depth: int) -> str:
308
+ if current is None or not current.is_root:
309
+ prefix = self._indent(depth) + "|__ "
310
+ else:
311
+ prefix = ">"
312
+
313
+ if current is None:
314
+ return prefix
315
+
316
+ s = prefix + current.name
317
+ for child in current.children:
318
+ s += "\r\n" + self._recurse_node_description(child, depth + 1)
319
+ return s
320
+
321
+ def __str__(self) -> str:
322
+ return self._recurse_node_description(self.root, 0)
@@ -0,0 +1,12 @@
1
+ from .dataset_base import DataSetBase
2
+ from .image_dataset import ImageDataSet
3
+ from .sample_set import SampleSet
4
+ from .data_feed import DataFeed
5
+ from .subset_type import SubsetType
6
+ from .sample_set import SampleSet
7
+ from radnn import mlsys
8
+ if mlsys.is_tensorflow_installed:
9
+ from .tf_classification_data_feed import TFClassificationDataFeed
10
+
11
+ from .image_dataset_files import ImageDataSetFiles
12
+
@@ -24,7 +24,7 @@
24
24
  # ......................................................................................
25
25
  from .dataset_base import DataSetBase
26
26
  from .subset_type import SubsetType
27
- from radnn.data.preprocess import Normalizer, Standardizer
27
+ from radnn.data_beta.preprocess import Normalizer, Standardizer
28
28
 
29
29
  class DataFeed(object):
30
30
  def __init__(self, dataset: DataSetBase, subset_type):
@@ -0,0 +1,337 @@
1
+ # ......................................................................................
2
+ # MIT License
3
+
4
+ # Copyright (c) 2019-2025 Pantelis I. Kaplanoglou
5
+
6
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ # of this software and associated documentation files (the "Software"), to deal
8
+ # in the Software without restriction, including without limitation the rights
9
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ # copies of the Software, and to permit persons to whom the Software is
11
+ # furnished to do so, subject to the following conditions:
12
+
13
+ # The above copyright notice and this permission notice shall be included in all
14
+ # copies or substantial portions of the Software.
15
+
16
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
+ # SOFTWARE.
23
+
24
+ # ......................................................................................
25
+ import numpy as np
26
+ import pandas as pd
27
+ from sklearn.model_selection import train_test_split
28
+ from radnn import FileSystem, FileStore, MLSystem, Errors
29
+
30
+
31
+ class DataSetBase(object):
32
+ # --------------------------------------------------------------------------------------------------------------------
33
+ # Constructor
34
+ def __init__(self, fs=None, name=None, variant=None, random_seed=None, is_classification=False):
35
+ # ..................// Instance Fields \\.........................
36
+ self.name = name
37
+ self.fs = fs
38
+ self.variant = variant
39
+ self.ts = None
40
+ self.vs = None
41
+ self.ut = None
42
+
43
+ if self.fs is None:
44
+ if MLSystem.Instance().filesys is not None:
45
+ self.fs = MLSystem.Instance().filesys
46
+ else:
47
+ raise Exception(Errors.MLSYS_NO_FILESYS)
48
+
49
+ if self.fs is not None:
50
+ if isinstance(self.fs, FileSystem):
51
+ if variant is not None:
52
+ name = name + "_" + variant
53
+ self.filestore = self.fs.datasets.subfs(name.upper())
54
+ elif isinstance(self.fs, FileStore):
55
+ self.filestore = self.fs
56
+ elif isinstance(self.fs, str):
57
+ self.filestore = FileStore(self.fs)
58
+ else:
59
+ raise Exception("The parameter fs could be a path, a filestore or a filesystem")
60
+ else:
61
+ raise Exception("Could not determine the filestore for the dataset")
62
+
63
+ self.random_seed = random_seed
64
+ self.is_classification = is_classification
65
+
66
+ self.feature_count = None
67
+ self.class_count = None
68
+ self.class_names = None
69
+ self.sample_count = None
70
+
71
+ self.samples = None
72
+ self.labels = None
73
+
74
+ self.ts_sample_ids = None
75
+ self.ts_samples = None
76
+ self.ts_labels = None
77
+ self.ts_sample_count = 0
78
+
79
+ self.vs_sample_ids = None
80
+ self.vs_samples = None
81
+ self.vs_labels = None
82
+ self.vs_sample_count = 0
83
+
84
+ self.ut_sample_ids = None
85
+ self.ut_samples = None
86
+ self.ut_labels = None
87
+ self.ut_sample_count = None
88
+
89
+ self.sample_shape = None
90
+
91
+ self.card = dict()
92
+ self.card["name"] = name
93
+ # ................................................................
94
+ if self.random_seed is not None:
95
+ MLSystem.Instance().random_seed_all(self.random_seed)
96
+
97
+ # --------------------------------------------------------------------------------------------------------------------
98
+ def open(self):
99
+ pass
100
+ # --------------------------------------------------------------------------------------------------------------------
101
+ def close(self):
102
+ pass
103
+ # --------------------------------------------------------------------------------------------------------------------
104
+ def for_classification(self, class_count, class_names=None):
105
+ self.is_classification = True
106
+ self.class_count = class_count
107
+ if class_names is not None:
108
+ # We assume class_names is a dictionary, in other cases we turn it into a dictionary
109
+ if isinstance(class_names, set) or isinstance(class_names, list):
110
+ dClassNames = dict()
111
+ for nIndex, sClassName in enumerate(class_names):
112
+ dClassNames[nIndex] = sClassName
113
+ class_names = dClassNames
114
+ self.class_names = class_names
115
+ return self
116
+ # --------------------------------------------------------------------------------------------------------------------
117
+ def count_samples(self):
118
+ if self.ts_samples is not None:
119
+ self.ts_sample_count = int(self.ts_samples.shape[0])
120
+ self.sample_count = self.ts_sample_count + self.vs_sample_count
121
+
122
+ if self.vs_samples is not None:
123
+ self.vs_sample_count = int(self.vs_samples.shape[0])
124
+ self.sample_count = self.ts_sample_count + self.vs_sample_count
125
+
126
+ # The test set samples are not included in the available sample count
127
+ if self.ut_samples is not None:
128
+ self.ut_sample_count = int(self.ut_samples.shape[0])
129
+ # --------------------------------------------------------------------------------------------------------------------
130
+ def assign(self, data, label_start_column=None, label_end_column=None):
131
+ if isinstance(data, tuple):
132
+ self.samples, self.labels = data
133
+ elif isinstance(data, np.ndarray):
134
+ self.samples = data
135
+ elif isinstance(data, dict):
136
+ if ("samples" in dict) and ("labels" in dict):
137
+ self.samples = data["samples"]
138
+ self.labels = data["labels"]
139
+ else:
140
+ pass # Support other formats
141
+ elif isinstance(data, pd.DataFrame):
142
+ if isinstance(data.columns, pd.Index):
143
+ nData = data.iloc[1:].to_numpy()
144
+ else:
145
+ nData = data.to_numpy()
146
+
147
+ if label_start_column is None:
148
+ self.samples = nData
149
+ else:
150
+ if label_start_column >= 0:
151
+ if label_end_column is None:
152
+ self.labels = nData[:, label_start_column]
153
+ self.samples = nData[:, label_start_column + 1:]
154
+ else:
155
+ self.labels = nData[:, label_start_column:label_end_column + 1]
156
+ self.samples = nData[:, label_end_column + 1:]
157
+ else:
158
+ self.samples = nData[:, :label_start_column]
159
+ self.labels = nData[:, label_start_column:]
160
+ return self
161
+ # --------------------------------------------------------------------------------------------------------------------
162
+ def assign_training_set(self, samples, labels):
163
+ self.ts_samples = samples
164
+ self.ts_labels = labels
165
+ self.count_samples()
166
+ self.ts_sample_ids = np.arange(0, self.ts_sample_count)
167
+
168
+ # Feature count is calculated on samples that are flattened as vectors
169
+ if self.feature_count is None:
170
+ self.feature_count = np.prod(self.ts_samples.shape[1:])
171
+
172
+ if self.class_count is None:
173
+ if self.is_classification:
174
+ self.class_count = len(np.unique(self.ts_labels))
175
+ else:
176
+ self.class_count = 0
177
+ return self
178
+ # --------------------------------------------------------------------------------------------------------------------
179
+ def assign_validation_set(self, samples, labels):
180
+ self.vs_samples = samples
181
+ self.vs_labels = labels
182
+ self.count_samples()
183
+ self.vs_sample_ids = np.arange(0, self.vs_sample_count)
184
+
185
+ return self
186
+ # --------------------------------------------------------------------------------------------------------------------
187
+ def assign_unknown_test_set(self, samples, labels):
188
+ self.ut_samples = samples
189
+ self.ut_labels = labels
190
+ self.count_samples()
191
+ self.ut_sample_ids = np.arange(0, self.ut_sample_count)
192
+
193
+ return self
194
+ # --------------------------------------------------------------------------------------------------------------------
195
+ def infox(self):
196
+ self.print_info()
197
+ # --------------------------------------------------------------------------------------------------------------------
198
+ def print_info(self):
199
+ print("Dataset [%s]" % self.name)
200
+ print(" |__ FeatureCount:", self.feature_count)
201
+ if self.is_classification:
202
+ print(" |__ ClassCount:", self.class_count)
203
+ if self.class_names is not None:
204
+ print(" |__ Classes:", self.class_names)
205
+
206
+ if self.ts_samples is not None:
207
+ print(" |__ Training set samples : %d shape:%s" % (self.ts_sample_count, self.ts_samples.shape))
208
+ if self.ts_labels is not None:
209
+ print(" |__ Training set targets : %d shape:%s" % (self.ts_sample_count, self.ts_labels.shape))
210
+
211
+ if self.vs_samples is not None:
212
+ print(" |__ Validation set samples: %d shape:%s" % (self.vs_sample_count, self.vs_samples.shape))
213
+ if self.vs_labels is not None:
214
+ print(" |__ Validation set targets: %d shape:%s" % (self.vs_sample_count, self.vs_labels.shape))
215
+
216
+ if self.ut_samples is not None:
217
+ print(" |__ MemoryTest set samples : %d shape:%s" % (self.ut_sample_count, self.ut_samples.shape))
218
+ if self.ut_labels is not None:
219
+ print(" |__ MemoryTest set targets : %d shape:%s" % (self.ut_sample_count, self.ut_labels.shape))
220
+ # --------------------------------------------------------------------------------------------------------------------
221
+ def split(self, training_samples_pc, random_seed=None):
222
+ if random_seed is None:
223
+ random_seed = self.random_seed
224
+
225
+ nTSSamples, nVSSamples, nTSTargets, nVSTargets = train_test_split(self.samples, self.labels
226
+ , test_size=1.0 - training_samples_pc
227
+ , random_state=random_seed
228
+ , shuffle=True
229
+ , stratify=self.labels
230
+ )
231
+ self.assign_training_set(nTSSamples, nTSTargets)
232
+ self.assign_validation_set(nVSSamples, nVSTargets)
233
+ self.count_samples()
234
+ return self
235
+ # --------------------------------------------------------------------------------------------------------------------
236
+ def has_cache(self, samples_file_prefix="Samples"):
237
+ return self.filestore.exists("%s.pkl" % samples_file_prefix) or self.filestore.exists("%s.TS.pkl" % samples_file_prefix)
238
+ # --------------------------------------------------------------------------------------------------------------------
239
+ def load_cache(self, filestore: FileStore = None, samples_file_prefix="Samples", targets_file_prefix="Labels", ids_file_prefix="Ids", is_verbose=False):
240
+ if filestore is None:
241
+ filestore = self.filestore
242
+ if filestore is None:
243
+ raise Exception("To use load_cache() without providing a filestore, you should provide a filesystem or filestore during instantiation.")
244
+
245
+ bResult = filestore.exists("%s.pkl" % samples_file_prefix) or filestore.exists("%s.TS.pkl" % samples_file_prefix)
246
+
247
+ if bResult:
248
+ if is_verbose:
249
+ print("Loading known data set ...")
250
+
251
+ dInfo = filestore.json.load(f"{self.name}_info.json")
252
+ if dInfo is not None:
253
+ if "class_names" in dInfo: self.class_names = dInfo["class_names"]
254
+ if "feature_count" in dInfo: self.feature_count = dInfo["feature_count"]
255
+ if "class_count" in dInfo:
256
+ self.is_classification = True
257
+ self.class_count = dInfo["class_count"]
258
+
259
+ self.samples = filestore.obj.load("%s.pkl" % samples_file_prefix)
260
+ self.labels = filestore.obj.load("%s.pkl" % targets_file_prefix)
261
+
262
+ if is_verbose:
263
+ print("Loading training set ...")
264
+ nTSSamples = filestore.obj.load("%s.TS.pkl" % samples_file_prefix)
265
+ nTSTargets = filestore.obj.load("%s.TS.pkl" % targets_file_prefix)
266
+ self.assign_training_set(nTSSamples, nTSTargets)
267
+ nTSIDs = filestore.obj.load("%s.TS.pkl" % ids_file_prefix)
268
+ if nTSIDs is not None:
269
+ self.ts_sample_ids = nTSIDs
270
+
271
+ if is_verbose:
272
+ print("Loading validation set ...")
273
+ nVSSamples = filestore.obj.load("%s.VS.pkl" % samples_file_prefix)
274
+ nVSTargets = filestore.obj.load("%s.VS.pkl" % targets_file_prefix)
275
+ self.assign_validation_set(nVSSamples, nVSTargets)
276
+ nVSIds = filestore.obj.load("%s.VS.pkl" % ids_file_prefix)
277
+ if nVSIds is not None:
278
+ self.vs_sample_ids = nVSIds
279
+
280
+ if is_verbose:
281
+ print("Loading unknown test data set ...")
282
+ nUTSamples = filestore.obj.load("%s.UT.pkl" % samples_file_prefix)
283
+ if nUTSamples is not None:
284
+ nUTTargets = filestore.obj.load("%s.UT.pkl" % targets_file_prefix)
285
+ self.assign_unknown_test_set(nUTSamples, nUTTargets)
286
+ nUTIds = filestore.obj.load("%s.UT.pkl" % ids_file_prefix)
287
+ if nUTIds is not None:
288
+ self.ut_sample_ids = nUTIds
289
+
290
+
291
+ return bResult
292
+ # --------------------------------------------------------------------------------------------------------------------
293
+ def save_cache(self, filestore: FileStore = None, samples_file_prefix="Samples", targets_file_prefix="Labels", ids_file_prefix="Ids"):
294
+ if filestore is None:
295
+ filestore = self.filestore
296
+ if filestore is None:
297
+ raise Exception("To use save_cache() without providing a filestore, you should provide a filesystem or filestore during instantiation.")
298
+
299
+ if self.samples is not None:
300
+ filestore.obj.save(self.samples, "%s.pkl" % samples_file_prefix, is_overwriting=True)
301
+ filestore.obj.save(self.labels, "%s.pkl" % targets_file_prefix, is_overwriting=True)
302
+
303
+ filestore.obj.save(self.ts_samples, "%s.TS.pkl" % samples_file_prefix, is_overwriting=True)
304
+ filestore.obj.save(self.ts_labels, "%s.TS.pkl" % targets_file_prefix, is_overwriting=True)
305
+ filestore.obj.save(self.ts_sample_ids, "%s.TS.pkl" % ids_file_prefix, is_overwriting=True)
306
+
307
+ filestore.obj.save(self.vs_samples, "%s.VS.pkl" % samples_file_prefix, is_overwriting=True)
308
+ filestore.obj.save(self.vs_labels, "%s.VS.pkl" % targets_file_prefix, is_overwriting=True)
309
+ filestore.obj.save(self.vs_sample_ids, "%s.VS.pkl" % ids_file_prefix, is_overwriting=True)
310
+
311
+ if self.ut_samples is not None:
312
+ filestore.obj.save(self.ut_samples, "%s.UT.pkl" % samples_file_prefix, is_overwriting=True)
313
+ filestore.obj.save(self.ut_labels, "%s.UT.pkl" % targets_file_prefix, is_overwriting=True)
314
+ filestore.obj.save(self.ut_sample_ids, "%s.UT.pkl" % ids_file_prefix, is_overwriting=True)
315
+
316
+ self.card["name"] = self.name
317
+ if self.feature_count is not None:
318
+ self.card["feature_count"] = int(self.feature_count)
319
+ else:
320
+ self.card["feature_count"] = self.feature_count
321
+
322
+ if self.random_seed is not None:
323
+ self.card["random_seed"] = int(self.random_seed)
324
+ else:
325
+ self.card["random_seed"] = self.random_seed
326
+
327
+ if self.is_classification:
328
+ if self.class_count is not None:
329
+ self.card["class_count"] = int(self.class_count)
330
+ else:
331
+ self.card["class_count"] = self.class_count
332
+ self.card["class_names"] = self.class_names
333
+
334
+ filestore.json.save(self.card, f"{self.name}_card.json", is_sorted_keys=False)
335
+ # --------------------------------------------------------------------------------------------------------------------
336
+
337
+