radnn 0.0.9__py3-none-any.whl → 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- radnn/__init__.py +4 -5
- radnn/benchmark/__init__.py +1 -0
- radnn/benchmark/latency.py +55 -0
- radnn/core.py +146 -2
- radnn/data/__init__.py +5 -10
- radnn/data/dataset_base.py +100 -272
- radnn/data/dataset_base_legacy.py +280 -0
- radnn/data/errors.py +32 -0
- radnn/data/sample_preprocessor.py +58 -0
- radnn/data/sample_set.py +203 -90
- radnn/data/sample_set_kind.py +126 -0
- radnn/data/sequence_dataset.py +25 -30
- radnn/data/structs/__init__.py +1 -0
- radnn/data/structs/tree.py +322 -0
- radnn/data_beta/__init__.py +12 -0
- radnn/{data → data_beta}/data_feed.py +1 -1
- radnn/data_beta/dataset_base.py +337 -0
- radnn/data_beta/sample_set.py +166 -0
- radnn/data_beta/sequence_dataset.py +134 -0
- radnn/data_beta/structures/__init__.py +2 -0
- radnn/data_beta/structures/dictionary.py +41 -0
- radnn/{data → data_beta}/tf_classification_data_feed.py +5 -2
- radnn/errors.py +10 -2
- radnn/experiment/__init__.py +2 -0
- radnn/experiment/identification.py +7 -0
- radnn/experiment/ml_experiment.py +7 -2
- radnn/experiment/ml_experiment_log.py +47 -0
- radnn/images/image_processor.py +4 -1
- radnn/learn/__init__.py +0 -7
- radnn/learn/keras/__init__.py +4 -0
- radnn/learn/{state → keras}/keras_best_state_saver.py +5 -1
- radnn/learn/{learning_algorithm.py → keras/keras_learning_algorithm.py} +5 -9
- radnn/learn/{keras_learning_rate_scheduler.py → keras/keras_learning_rate_scheduler.py} +4 -1
- radnn/learn/{keras_optimization_algorithm.py → keras/keras_optimization_combo.py} +7 -3
- radnn/learn/torch/__init__.py +3 -0
- radnn/learn/torch/ml_model_freezer.py +330 -0
- radnn/learn/torch/ml_trainer.py +465 -0
- radnn/learn/torch/staircase_lr_scheduler.py +21 -0
- radnn/ml_system.py +68 -52
- radnn/models/__init__.py +5 -0
- radnn/models/cnn/__init__.py +0 -0
- radnn/models/cnn/cnn_stem_setup.py +35 -0
- radnn/models/model_factory.py +85 -0
- radnn/models/model_hyperparams.py +128 -0
- radnn/models/model_info.py +91 -0
- radnn/plots/plot_learning_curve.py +19 -8
- radnn/system/__init__.py +1 -0
- radnn/system/files/__init__.py +1 -1
- radnn/system/files/csvfile.py +37 -5
- radnn/system/files/filelist.py +30 -0
- radnn/system/files/fileobject.py +11 -1
- radnn/system/files/imgfile.py +1 -1
- radnn/system/files/jsonfile.py +34 -9
- radnn/system/files/picklefile.py +3 -3
- radnn/system/files/textfile.py +48 -16
- radnn/system/files/zipfile.py +96 -0
- radnn/system/filestore.py +147 -47
- radnn/system/filesystem.py +3 -3
- radnn/test/__init__.py +1 -0
- radnn/test/tensor_hash.py +130 -0
- radnn/utils.py +16 -2
- {radnn-0.0.9.dist-info → radnn-0.1.1.dist-info}/METADATA +5 -11
- radnn-0.1.1.dist-info/RECORD +99 -0
- {radnn-0.0.9.dist-info → radnn-0.1.1.dist-info}/WHEEL +1 -1
- {radnn-0.0.9.dist-info → radnn-0.1.1.dist-info}/licenses/LICENSE.txt +1 -1
- radnn/learn/state/__init__.py +0 -4
- radnn-0.0.9.dist-info/RECORD +0 -70
- /radnn/{data → data_beta}/dataset_folder.py +0 -0
- /radnn/{data → data_beta}/image_dataset.py +0 -0
- /radnn/{data → data_beta}/image_dataset_files.py +0 -0
- /radnn/{data → data_beta}/preprocess/__init__.py +0 -0
- /radnn/{data → data_beta}/preprocess/normalizer.py +0 -0
- /radnn/{data → data_beta}/preprocess/standardizer.py +0 -0
- /radnn/{data → data_beta}/subset_type.py +0 -0
- {radnn-0.0.9.dist-info → radnn-0.1.1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,322 @@
|
|
|
1
|
+
# ......................................................................................
|
|
2
|
+
# MIT License
|
|
3
|
+
|
|
4
|
+
# Copyright (c) 2019-2026 Pantelis I. Kaplanoglou
|
|
5
|
+
|
|
6
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
7
|
+
# of this software and associated documentation files (the "Software"), to deal
|
|
8
|
+
# in the Software without restriction, including without limitation the rights
|
|
9
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
10
|
+
# copies of the Software, and to permit persons to whom the Software is
|
|
11
|
+
# furnished to do so, subject to the following conditions:
|
|
12
|
+
|
|
13
|
+
# The above copyright notice and this permission notice shall be included in all
|
|
14
|
+
# copies or substantial portions of the Software.
|
|
15
|
+
|
|
16
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
17
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
18
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
19
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
20
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
21
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
22
|
+
# SOFTWARE.
|
|
23
|
+
|
|
24
|
+
# ......................................................................................
|
|
25
|
+
|
|
26
|
+
# This implementation is ported from the Java/C# trees that have been developed by me
|
|
27
|
+
# for the lesson CS215 "Data Structures & Algorithms" at Anatolia American University.
|
|
28
|
+
|
|
29
|
+
import numpy as np
|
|
30
|
+
|
|
31
|
+
from typing import Type, Any, Callable, Optional, Iterable, Union, List
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class TreeNodeList(list):
|
|
35
|
+
"""
|
|
36
|
+
Python version of CTreeNodeList<T>:
|
|
37
|
+
- inherits from list
|
|
38
|
+
- enforces uniqueness and max_branching_factor in append_node
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
def __init__(self, max_branching_factor: int = np.inf):
|
|
42
|
+
super().__init__()
|
|
43
|
+
self._max_branching_factor = max_branching_factor
|
|
44
|
+
|
|
45
|
+
@property
|
|
46
|
+
def item_count(self) -> int:
|
|
47
|
+
return len(self)
|
|
48
|
+
|
|
49
|
+
@property
|
|
50
|
+
def max_branching_factor(self) -> int:
|
|
51
|
+
return self._max_branching_factor
|
|
52
|
+
|
|
53
|
+
@max_branching_factor.setter
|
|
54
|
+
def max_branching_factor(self, value: int) -> None:
|
|
55
|
+
self._max_branching_factor = int(value)
|
|
56
|
+
|
|
57
|
+
def contains(self, node: Type["TreeNode"]) -> bool:
|
|
58
|
+
return node in self
|
|
59
|
+
|
|
60
|
+
def append_node(self, node: Type["TreeNode"]) -> None:
|
|
61
|
+
if node is None:
|
|
62
|
+
return
|
|
63
|
+
if len(self) >= self._max_branching_factor:
|
|
64
|
+
return
|
|
65
|
+
if node not in self:
|
|
66
|
+
super().append(node)
|
|
67
|
+
|
|
68
|
+
def remove_node(self, node: Type["TreeNode"]) -> None:
|
|
69
|
+
if node in self:
|
|
70
|
+
super().remove(node)
|
|
71
|
+
|
|
72
|
+
def __str__(self) -> str:
|
|
73
|
+
lines = []
|
|
74
|
+
for n in self:
|
|
75
|
+
# C# used "[{Value}]".PadRight(16) + " " + Path
|
|
76
|
+
v = getattr(n, "value", None)
|
|
77
|
+
left = f"[{v}]"
|
|
78
|
+
left = left + (" " * max(0, 16 - len(left)))
|
|
79
|
+
lines.append(f"{left} {n.path}")
|
|
80
|
+
return "\r\n".join(lines)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
class TreeNode(object):
|
|
84
|
+
def __init__(self):
|
|
85
|
+
|
|
86
|
+
self.name: str = ""
|
|
87
|
+
self.index: int = -1
|
|
88
|
+
self.value: Any = None
|
|
89
|
+
self._parent: TreeNode | None = None
|
|
90
|
+
self._children: TreeNodeList = TreeNodeList()
|
|
91
|
+
|
|
92
|
+
# -------------------------
|
|
93
|
+
# Properties (C#-like)
|
|
94
|
+
# -------------------------
|
|
95
|
+
@property
|
|
96
|
+
def children(self) -> TreeNodeList:
|
|
97
|
+
return self._children
|
|
98
|
+
|
|
99
|
+
@property
|
|
100
|
+
def parent(self) -> Type["TreeNode"] | None:
|
|
101
|
+
return self._parent
|
|
102
|
+
|
|
103
|
+
@parent.setter
|
|
104
|
+
def parent(self, new_parent: Type["TreeNode"] | None):
|
|
105
|
+
# Remove from current parent
|
|
106
|
+
if self._parent is not None:
|
|
107
|
+
self._parent.children.remove_node(self)
|
|
108
|
+
|
|
109
|
+
self._parent = new_parent
|
|
110
|
+
|
|
111
|
+
# Add to new parent
|
|
112
|
+
if self._parent is not None:
|
|
113
|
+
self._parent.children.append_node(self)
|
|
114
|
+
|
|
115
|
+
@property
|
|
116
|
+
def is_root(self) -> bool:
|
|
117
|
+
return self._parent is None
|
|
118
|
+
|
|
119
|
+
@property
|
|
120
|
+
def is_leaf(self) -> bool:
|
|
121
|
+
return self._children.item_count == 0
|
|
122
|
+
|
|
123
|
+
@property
|
|
124
|
+
def child_count(self) -> int:
|
|
125
|
+
return len(self._children)
|
|
126
|
+
|
|
127
|
+
def __getitem__(self, index: int) -> Type["TreeNode"] | None:
|
|
128
|
+
try:
|
|
129
|
+
return self._children[index]
|
|
130
|
+
except IndexError:
|
|
131
|
+
return None
|
|
132
|
+
|
|
133
|
+
@property
|
|
134
|
+
def level(self) -> int:
|
|
135
|
+
if self._parent is None:
|
|
136
|
+
return 0
|
|
137
|
+
return self._parent.level + 1
|
|
138
|
+
|
|
139
|
+
@property
|
|
140
|
+
def root(self) -> Type["TreeNode"]:
|
|
141
|
+
if self._parent is None:
|
|
142
|
+
return self
|
|
143
|
+
return self._parent.root
|
|
144
|
+
|
|
145
|
+
@property
|
|
146
|
+
def path(self) -> str:
|
|
147
|
+
if self._parent is None:
|
|
148
|
+
return "/"
|
|
149
|
+
if self._parent.is_root:
|
|
150
|
+
return self._parent.path + self.name
|
|
151
|
+
return self._parent.path + "/" + self.name
|
|
152
|
+
|
|
153
|
+
def new_child(self, node_name_or_id: Union[str, int, None] = None) -> Type["TreeNode"]:
|
|
154
|
+
child = TreeNode()
|
|
155
|
+
child.index = len(self._children) + 1
|
|
156
|
+
if node_name_or_id is None:
|
|
157
|
+
child.name = str(child.index)
|
|
158
|
+
else:
|
|
159
|
+
child.name = str(node_name_or_id)
|
|
160
|
+
child.parent = self
|
|
161
|
+
return child
|
|
162
|
+
|
|
163
|
+
def add_child(self, child_node: Type["TreeNode"]) -> int:
|
|
164
|
+
child_node.parent = self
|
|
165
|
+
return self._children.item_count - 1
|
|
166
|
+
|
|
167
|
+
def remove_child(self, child_or_name: Union[Type["TreeNode"], str]) -> None:
|
|
168
|
+
if isinstance(child_or_name, TreeNode):
|
|
169
|
+
child_or_name.parent = None
|
|
170
|
+
else:
|
|
171
|
+
name = str(child_or_name)
|
|
172
|
+
for c in list(self.children):
|
|
173
|
+
if c.name == name:
|
|
174
|
+
c.parent = None
|
|
175
|
+
break
|
|
176
|
+
|
|
177
|
+
def delete(self) -> None:
|
|
178
|
+
# Postorder delete: delete children first
|
|
179
|
+
for c in list(self.children):
|
|
180
|
+
c.delete()
|
|
181
|
+
|
|
182
|
+
# Then remove self from parent
|
|
183
|
+
if self._parent is not None:
|
|
184
|
+
self._parent.remove_child(self)
|
|
185
|
+
|
|
186
|
+
def __eq__(self, other: Type["TreeNode"]) -> bool:
|
|
187
|
+
return self.name == other.name
|
|
188
|
+
|
|
189
|
+
def __str__(self) -> str:
|
|
190
|
+
return self.path
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
class TreeNodeQueue(list):
|
|
196
|
+
@property
|
|
197
|
+
def is_empty(self) -> bool:
|
|
198
|
+
return len(self) == 0
|
|
199
|
+
|
|
200
|
+
def enqueue(self, item: TreeNode) -> None:
|
|
201
|
+
self.append(item)
|
|
202
|
+
|
|
203
|
+
def peek(self) -> Optional[TreeNode]:
|
|
204
|
+
return self[0] if len(self) == 0 else None
|
|
205
|
+
|
|
206
|
+
def dequeue(self) -> Optional[TreeNode]:
|
|
207
|
+
return self.pop[0] if len(self) == 0 else None
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
class Tree:
|
|
212
|
+
def __init__(self, root: Optional[TreeNode] = None):
|
|
213
|
+
self.root: TreeNode = root if root is not None else TreeNode()
|
|
214
|
+
self._node_list: Optional[TreeNodeList] = None
|
|
215
|
+
#self.comparison_by: Optional[Callable[[Any, Any], int]] = None #TODO
|
|
216
|
+
'''
|
|
217
|
+
def compare(self, this_item: Any, other_item: Any) -> int:
|
|
218
|
+
"""
|
|
219
|
+
Closest Python equivalent of C# compare(T,T):
|
|
220
|
+
- If comparison_by provided, use it.
|
|
221
|
+
- Else try normal Python comparisons.
|
|
222
|
+
- If not comparable, return 1 (same default as C# code's nResult=1).
|
|
223
|
+
"""
|
|
224
|
+
if self.comparison_by is not None:
|
|
225
|
+
return int(self.comparison_by(this_item, other_item))
|
|
226
|
+
|
|
227
|
+
try:
|
|
228
|
+
if this_item == other_item:
|
|
229
|
+
return 0
|
|
230
|
+
# Python doesn't have CompareTo; approximate:
|
|
231
|
+
return -1 if this_item < other_item else 1
|
|
232
|
+
except Exception:
|
|
233
|
+
return 1
|
|
234
|
+
'''
|
|
235
|
+
|
|
236
|
+
def clear(self) -> None:
|
|
237
|
+
self.root.delete()
|
|
238
|
+
self.root = TreeNode()
|
|
239
|
+
|
|
240
|
+
def _recurse_preorder_append(self, current: TreeNode, depth: int) -> None:
|
|
241
|
+
self._node_list.append_node(current)
|
|
242
|
+
for child in current.children:
|
|
243
|
+
self._recurse_preorder_append(child, depth + 1)
|
|
244
|
+
|
|
245
|
+
def _recurse_postorder_append(self, current: TreeNode, depth: int) -> None:
|
|
246
|
+
for child in current.children:
|
|
247
|
+
self._recurse_postorder_append(child, depth + 1)
|
|
248
|
+
self._node_list.append_node(current)
|
|
249
|
+
|
|
250
|
+
def traverse_depth_first(self, is_preorder: bool = True) -> TreeNodeList:
|
|
251
|
+
self._node_list = TreeNodeList()
|
|
252
|
+
if is_preorder:
|
|
253
|
+
self._recurse_preorder_append(self.root, 0)
|
|
254
|
+
else:
|
|
255
|
+
self._recurse_postorder_append(self.root, 0)
|
|
256
|
+
return self._node_list
|
|
257
|
+
|
|
258
|
+
def traverse_breadth_first(self) -> TreeNodeList:
|
|
259
|
+
node_list = TreeNodeList()
|
|
260
|
+
q = TreeNodeQueue()
|
|
261
|
+
q.enqueue(self.root)
|
|
262
|
+
|
|
263
|
+
while not q.is_empty:
|
|
264
|
+
node = q.dequeue()
|
|
265
|
+
if node is None:
|
|
266
|
+
continue
|
|
267
|
+
node_list.append_node(node)
|
|
268
|
+
for child in node.children:
|
|
269
|
+
q.enqueue(child)
|
|
270
|
+
|
|
271
|
+
return node_list
|
|
272
|
+
|
|
273
|
+
def _recurse_follow_path(self, path_names: list, current: TreeNode, depth: int) -> Optional[TreeNode]:
|
|
274
|
+
next_name = path_names.pop(0)
|
|
275
|
+
if next_name is None:
|
|
276
|
+
return None
|
|
277
|
+
|
|
278
|
+
for child in current.children:
|
|
279
|
+
if child.name == next_name:
|
|
280
|
+
if len(path_names) == 0:
|
|
281
|
+
return child
|
|
282
|
+
return self._recurse_follow_path(path_names, child, depth + 1)
|
|
283
|
+
|
|
284
|
+
return None
|
|
285
|
+
|
|
286
|
+
def follow(self, path: str) -> Optional[TreeNode]:
|
|
287
|
+
# Split by '/'
|
|
288
|
+
parts = path.split("/")
|
|
289
|
+
|
|
290
|
+
q = list()
|
|
291
|
+
for p in parts:
|
|
292
|
+
q.append(p)
|
|
293
|
+
|
|
294
|
+
# In an empty tree the result will be the root node
|
|
295
|
+
result: Optional[TreeNode] = self.root
|
|
296
|
+
if not q.is_empty:
|
|
297
|
+
q.pop(0) # remove "" representing root when path starts with "/"
|
|
298
|
+
result = self._recurse_follow_path(q, self.root, 1)
|
|
299
|
+
|
|
300
|
+
return result
|
|
301
|
+
|
|
302
|
+
def _indent(self, depth: int) -> str:
|
|
303
|
+
if depth - 1 >= 0:
|
|
304
|
+
return " " * ((depth - 1) * 4)
|
|
305
|
+
return ""
|
|
306
|
+
|
|
307
|
+
def _recurse_node_description(self, current: Optional[TreeNode], depth: int) -> str:
|
|
308
|
+
if current is None or not current.is_root:
|
|
309
|
+
prefix = self._indent(depth) + "|__ "
|
|
310
|
+
else:
|
|
311
|
+
prefix = ">"
|
|
312
|
+
|
|
313
|
+
if current is None:
|
|
314
|
+
return prefix
|
|
315
|
+
|
|
316
|
+
s = prefix + current.name
|
|
317
|
+
for child in current.children:
|
|
318
|
+
s += "\r\n" + self._recurse_node_description(child, depth + 1)
|
|
319
|
+
return s
|
|
320
|
+
|
|
321
|
+
def __str__(self) -> str:
|
|
322
|
+
return self._recurse_node_description(self.root, 0)
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
from .dataset_base import DataSetBase
|
|
2
|
+
from .image_dataset import ImageDataSet
|
|
3
|
+
from .sample_set import SampleSet
|
|
4
|
+
from .data_feed import DataFeed
|
|
5
|
+
from .subset_type import SubsetType
|
|
6
|
+
from .sample_set import SampleSet
|
|
7
|
+
from radnn import mlsys
|
|
8
|
+
if mlsys.is_tensorflow_installed:
|
|
9
|
+
from .tf_classification_data_feed import TFClassificationDataFeed
|
|
10
|
+
|
|
11
|
+
from .image_dataset_files import ImageDataSetFiles
|
|
12
|
+
|
|
@@ -24,7 +24,7 @@
|
|
|
24
24
|
# ......................................................................................
|
|
25
25
|
from .dataset_base import DataSetBase
|
|
26
26
|
from .subset_type import SubsetType
|
|
27
|
-
from radnn.
|
|
27
|
+
from radnn.data_beta.preprocess import Normalizer, Standardizer
|
|
28
28
|
|
|
29
29
|
class DataFeed(object):
|
|
30
30
|
def __init__(self, dataset: DataSetBase, subset_type):
|
|
@@ -0,0 +1,337 @@
|
|
|
1
|
+
# ......................................................................................
|
|
2
|
+
# MIT License
|
|
3
|
+
|
|
4
|
+
# Copyright (c) 2019-2025 Pantelis I. Kaplanoglou
|
|
5
|
+
|
|
6
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
7
|
+
# of this software and associated documentation files (the "Software"), to deal
|
|
8
|
+
# in the Software without restriction, including without limitation the rights
|
|
9
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
10
|
+
# copies of the Software, and to permit persons to whom the Software is
|
|
11
|
+
# furnished to do so, subject to the following conditions:
|
|
12
|
+
|
|
13
|
+
# The above copyright notice and this permission notice shall be included in all
|
|
14
|
+
# copies or substantial portions of the Software.
|
|
15
|
+
|
|
16
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
17
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
18
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
19
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
20
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
21
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
22
|
+
# SOFTWARE.
|
|
23
|
+
|
|
24
|
+
# ......................................................................................
|
|
25
|
+
import numpy as np
|
|
26
|
+
import pandas as pd
|
|
27
|
+
from sklearn.model_selection import train_test_split
|
|
28
|
+
from radnn import FileSystem, FileStore, MLSystem, Errors
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class DataSetBase(object):
|
|
32
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
33
|
+
# Constructor
|
|
34
|
+
def __init__(self, fs=None, name=None, variant=None, random_seed=None, is_classification=False):
|
|
35
|
+
# ..................// Instance Fields \\.........................
|
|
36
|
+
self.name = name
|
|
37
|
+
self.fs = fs
|
|
38
|
+
self.variant = variant
|
|
39
|
+
self.ts = None
|
|
40
|
+
self.vs = None
|
|
41
|
+
self.ut = None
|
|
42
|
+
|
|
43
|
+
if self.fs is None:
|
|
44
|
+
if MLSystem.Instance().filesys is not None:
|
|
45
|
+
self.fs = MLSystem.Instance().filesys
|
|
46
|
+
else:
|
|
47
|
+
raise Exception(Errors.MLSYS_NO_FILESYS)
|
|
48
|
+
|
|
49
|
+
if self.fs is not None:
|
|
50
|
+
if isinstance(self.fs, FileSystem):
|
|
51
|
+
if variant is not None:
|
|
52
|
+
name = name + "_" + variant
|
|
53
|
+
self.filestore = self.fs.datasets.subfs(name.upper())
|
|
54
|
+
elif isinstance(self.fs, FileStore):
|
|
55
|
+
self.filestore = self.fs
|
|
56
|
+
elif isinstance(self.fs, str):
|
|
57
|
+
self.filestore = FileStore(self.fs)
|
|
58
|
+
else:
|
|
59
|
+
raise Exception("The parameter fs could be a path, a filestore or a filesystem")
|
|
60
|
+
else:
|
|
61
|
+
raise Exception("Could not determine the filestore for the dataset")
|
|
62
|
+
|
|
63
|
+
self.random_seed = random_seed
|
|
64
|
+
self.is_classification = is_classification
|
|
65
|
+
|
|
66
|
+
self.feature_count = None
|
|
67
|
+
self.class_count = None
|
|
68
|
+
self.class_names = None
|
|
69
|
+
self.sample_count = None
|
|
70
|
+
|
|
71
|
+
self.samples = None
|
|
72
|
+
self.labels = None
|
|
73
|
+
|
|
74
|
+
self.ts_sample_ids = None
|
|
75
|
+
self.ts_samples = None
|
|
76
|
+
self.ts_labels = None
|
|
77
|
+
self.ts_sample_count = 0
|
|
78
|
+
|
|
79
|
+
self.vs_sample_ids = None
|
|
80
|
+
self.vs_samples = None
|
|
81
|
+
self.vs_labels = None
|
|
82
|
+
self.vs_sample_count = 0
|
|
83
|
+
|
|
84
|
+
self.ut_sample_ids = None
|
|
85
|
+
self.ut_samples = None
|
|
86
|
+
self.ut_labels = None
|
|
87
|
+
self.ut_sample_count = None
|
|
88
|
+
|
|
89
|
+
self.sample_shape = None
|
|
90
|
+
|
|
91
|
+
self.card = dict()
|
|
92
|
+
self.card["name"] = name
|
|
93
|
+
# ................................................................
|
|
94
|
+
if self.random_seed is not None:
|
|
95
|
+
MLSystem.Instance().random_seed_all(self.random_seed)
|
|
96
|
+
|
|
97
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
98
|
+
def open(self):
|
|
99
|
+
pass
|
|
100
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
101
|
+
def close(self):
|
|
102
|
+
pass
|
|
103
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
104
|
+
def for_classification(self, class_count, class_names=None):
|
|
105
|
+
self.is_classification = True
|
|
106
|
+
self.class_count = class_count
|
|
107
|
+
if class_names is not None:
|
|
108
|
+
# We assume class_names is a dictionary, in other cases we turn it into a dictionary
|
|
109
|
+
if isinstance(class_names, set) or isinstance(class_names, list):
|
|
110
|
+
dClassNames = dict()
|
|
111
|
+
for nIndex, sClassName in enumerate(class_names):
|
|
112
|
+
dClassNames[nIndex] = sClassName
|
|
113
|
+
class_names = dClassNames
|
|
114
|
+
self.class_names = class_names
|
|
115
|
+
return self
|
|
116
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
117
|
+
def count_samples(self):
|
|
118
|
+
if self.ts_samples is not None:
|
|
119
|
+
self.ts_sample_count = int(self.ts_samples.shape[0])
|
|
120
|
+
self.sample_count = self.ts_sample_count + self.vs_sample_count
|
|
121
|
+
|
|
122
|
+
if self.vs_samples is not None:
|
|
123
|
+
self.vs_sample_count = int(self.vs_samples.shape[0])
|
|
124
|
+
self.sample_count = self.ts_sample_count + self.vs_sample_count
|
|
125
|
+
|
|
126
|
+
# The test set samples are not included in the available sample count
|
|
127
|
+
if self.ut_samples is not None:
|
|
128
|
+
self.ut_sample_count = int(self.ut_samples.shape[0])
|
|
129
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
130
|
+
def assign(self, data, label_start_column=None, label_end_column=None):
|
|
131
|
+
if isinstance(data, tuple):
|
|
132
|
+
self.samples, self.labels = data
|
|
133
|
+
elif isinstance(data, np.ndarray):
|
|
134
|
+
self.samples = data
|
|
135
|
+
elif isinstance(data, dict):
|
|
136
|
+
if ("samples" in dict) and ("labels" in dict):
|
|
137
|
+
self.samples = data["samples"]
|
|
138
|
+
self.labels = data["labels"]
|
|
139
|
+
else:
|
|
140
|
+
pass # Support other formats
|
|
141
|
+
elif isinstance(data, pd.DataFrame):
|
|
142
|
+
if isinstance(data.columns, pd.Index):
|
|
143
|
+
nData = data.iloc[1:].to_numpy()
|
|
144
|
+
else:
|
|
145
|
+
nData = data.to_numpy()
|
|
146
|
+
|
|
147
|
+
if label_start_column is None:
|
|
148
|
+
self.samples = nData
|
|
149
|
+
else:
|
|
150
|
+
if label_start_column >= 0:
|
|
151
|
+
if label_end_column is None:
|
|
152
|
+
self.labels = nData[:, label_start_column]
|
|
153
|
+
self.samples = nData[:, label_start_column + 1:]
|
|
154
|
+
else:
|
|
155
|
+
self.labels = nData[:, label_start_column:label_end_column + 1]
|
|
156
|
+
self.samples = nData[:, label_end_column + 1:]
|
|
157
|
+
else:
|
|
158
|
+
self.samples = nData[:, :label_start_column]
|
|
159
|
+
self.labels = nData[:, label_start_column:]
|
|
160
|
+
return self
|
|
161
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
162
|
+
def assign_training_set(self, samples, labels):
|
|
163
|
+
self.ts_samples = samples
|
|
164
|
+
self.ts_labels = labels
|
|
165
|
+
self.count_samples()
|
|
166
|
+
self.ts_sample_ids = np.arange(0, self.ts_sample_count)
|
|
167
|
+
|
|
168
|
+
# Feature count is calculated on samples that are flattened as vectors
|
|
169
|
+
if self.feature_count is None:
|
|
170
|
+
self.feature_count = np.prod(self.ts_samples.shape[1:])
|
|
171
|
+
|
|
172
|
+
if self.class_count is None:
|
|
173
|
+
if self.is_classification:
|
|
174
|
+
self.class_count = len(np.unique(self.ts_labels))
|
|
175
|
+
else:
|
|
176
|
+
self.class_count = 0
|
|
177
|
+
return self
|
|
178
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
179
|
+
def assign_validation_set(self, samples, labels):
|
|
180
|
+
self.vs_samples = samples
|
|
181
|
+
self.vs_labels = labels
|
|
182
|
+
self.count_samples()
|
|
183
|
+
self.vs_sample_ids = np.arange(0, self.vs_sample_count)
|
|
184
|
+
|
|
185
|
+
return self
|
|
186
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
187
|
+
def assign_unknown_test_set(self, samples, labels):
|
|
188
|
+
self.ut_samples = samples
|
|
189
|
+
self.ut_labels = labels
|
|
190
|
+
self.count_samples()
|
|
191
|
+
self.ut_sample_ids = np.arange(0, self.ut_sample_count)
|
|
192
|
+
|
|
193
|
+
return self
|
|
194
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
195
|
+
def infox(self):
|
|
196
|
+
self.print_info()
|
|
197
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
198
|
+
def print_info(self):
|
|
199
|
+
print("Dataset [%s]" % self.name)
|
|
200
|
+
print(" |__ FeatureCount:", self.feature_count)
|
|
201
|
+
if self.is_classification:
|
|
202
|
+
print(" |__ ClassCount:", self.class_count)
|
|
203
|
+
if self.class_names is not None:
|
|
204
|
+
print(" |__ Classes:", self.class_names)
|
|
205
|
+
|
|
206
|
+
if self.ts_samples is not None:
|
|
207
|
+
print(" |__ Training set samples : %d shape:%s" % (self.ts_sample_count, self.ts_samples.shape))
|
|
208
|
+
if self.ts_labels is not None:
|
|
209
|
+
print(" |__ Training set targets : %d shape:%s" % (self.ts_sample_count, self.ts_labels.shape))
|
|
210
|
+
|
|
211
|
+
if self.vs_samples is not None:
|
|
212
|
+
print(" |__ Validation set samples: %d shape:%s" % (self.vs_sample_count, self.vs_samples.shape))
|
|
213
|
+
if self.vs_labels is not None:
|
|
214
|
+
print(" |__ Validation set targets: %d shape:%s" % (self.vs_sample_count, self.vs_labels.shape))
|
|
215
|
+
|
|
216
|
+
if self.ut_samples is not None:
|
|
217
|
+
print(" |__ MemoryTest set samples : %d shape:%s" % (self.ut_sample_count, self.ut_samples.shape))
|
|
218
|
+
if self.ut_labels is not None:
|
|
219
|
+
print(" |__ MemoryTest set targets : %d shape:%s" % (self.ut_sample_count, self.ut_labels.shape))
|
|
220
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
221
|
+
def split(self, training_samples_pc, random_seed=None):
|
|
222
|
+
if random_seed is None:
|
|
223
|
+
random_seed = self.random_seed
|
|
224
|
+
|
|
225
|
+
nTSSamples, nVSSamples, nTSTargets, nVSTargets = train_test_split(self.samples, self.labels
|
|
226
|
+
, test_size=1.0 - training_samples_pc
|
|
227
|
+
, random_state=random_seed
|
|
228
|
+
, shuffle=True
|
|
229
|
+
, stratify=self.labels
|
|
230
|
+
)
|
|
231
|
+
self.assign_training_set(nTSSamples, nTSTargets)
|
|
232
|
+
self.assign_validation_set(nVSSamples, nVSTargets)
|
|
233
|
+
self.count_samples()
|
|
234
|
+
return self
|
|
235
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
236
|
+
def has_cache(self, samples_file_prefix="Samples"):
|
|
237
|
+
return self.filestore.exists("%s.pkl" % samples_file_prefix) or self.filestore.exists("%s.TS.pkl" % samples_file_prefix)
|
|
238
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
239
|
+
def load_cache(self, filestore: FileStore = None, samples_file_prefix="Samples", targets_file_prefix="Labels", ids_file_prefix="Ids", is_verbose=False):
|
|
240
|
+
if filestore is None:
|
|
241
|
+
filestore = self.filestore
|
|
242
|
+
if filestore is None:
|
|
243
|
+
raise Exception("To use load_cache() without providing a filestore, you should provide a filesystem or filestore during instantiation.")
|
|
244
|
+
|
|
245
|
+
bResult = filestore.exists("%s.pkl" % samples_file_prefix) or filestore.exists("%s.TS.pkl" % samples_file_prefix)
|
|
246
|
+
|
|
247
|
+
if bResult:
|
|
248
|
+
if is_verbose:
|
|
249
|
+
print("Loading known data set ...")
|
|
250
|
+
|
|
251
|
+
dInfo = filestore.json.load(f"{self.name}_info.json")
|
|
252
|
+
if dInfo is not None:
|
|
253
|
+
if "class_names" in dInfo: self.class_names = dInfo["class_names"]
|
|
254
|
+
if "feature_count" in dInfo: self.feature_count = dInfo["feature_count"]
|
|
255
|
+
if "class_count" in dInfo:
|
|
256
|
+
self.is_classification = True
|
|
257
|
+
self.class_count = dInfo["class_count"]
|
|
258
|
+
|
|
259
|
+
self.samples = filestore.obj.load("%s.pkl" % samples_file_prefix)
|
|
260
|
+
self.labels = filestore.obj.load("%s.pkl" % targets_file_prefix)
|
|
261
|
+
|
|
262
|
+
if is_verbose:
|
|
263
|
+
print("Loading training set ...")
|
|
264
|
+
nTSSamples = filestore.obj.load("%s.TS.pkl" % samples_file_prefix)
|
|
265
|
+
nTSTargets = filestore.obj.load("%s.TS.pkl" % targets_file_prefix)
|
|
266
|
+
self.assign_training_set(nTSSamples, nTSTargets)
|
|
267
|
+
nTSIDs = filestore.obj.load("%s.TS.pkl" % ids_file_prefix)
|
|
268
|
+
if nTSIDs is not None:
|
|
269
|
+
self.ts_sample_ids = nTSIDs
|
|
270
|
+
|
|
271
|
+
if is_verbose:
|
|
272
|
+
print("Loading validation set ...")
|
|
273
|
+
nVSSamples = filestore.obj.load("%s.VS.pkl" % samples_file_prefix)
|
|
274
|
+
nVSTargets = filestore.obj.load("%s.VS.pkl" % targets_file_prefix)
|
|
275
|
+
self.assign_validation_set(nVSSamples, nVSTargets)
|
|
276
|
+
nVSIds = filestore.obj.load("%s.VS.pkl" % ids_file_prefix)
|
|
277
|
+
if nVSIds is not None:
|
|
278
|
+
self.vs_sample_ids = nVSIds
|
|
279
|
+
|
|
280
|
+
if is_verbose:
|
|
281
|
+
print("Loading unknown test data set ...")
|
|
282
|
+
nUTSamples = filestore.obj.load("%s.UT.pkl" % samples_file_prefix)
|
|
283
|
+
if nUTSamples is not None:
|
|
284
|
+
nUTTargets = filestore.obj.load("%s.UT.pkl" % targets_file_prefix)
|
|
285
|
+
self.assign_unknown_test_set(nUTSamples, nUTTargets)
|
|
286
|
+
nUTIds = filestore.obj.load("%s.UT.pkl" % ids_file_prefix)
|
|
287
|
+
if nUTIds is not None:
|
|
288
|
+
self.ut_sample_ids = nUTIds
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
return bResult
|
|
292
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
293
|
+
def save_cache(self, filestore: FileStore = None, samples_file_prefix="Samples", targets_file_prefix="Labels", ids_file_prefix="Ids"):
|
|
294
|
+
if filestore is None:
|
|
295
|
+
filestore = self.filestore
|
|
296
|
+
if filestore is None:
|
|
297
|
+
raise Exception("To use save_cache() without providing a filestore, you should provide a filesystem or filestore during instantiation.")
|
|
298
|
+
|
|
299
|
+
if self.samples is not None:
|
|
300
|
+
filestore.obj.save(self.samples, "%s.pkl" % samples_file_prefix, is_overwriting=True)
|
|
301
|
+
filestore.obj.save(self.labels, "%s.pkl" % targets_file_prefix, is_overwriting=True)
|
|
302
|
+
|
|
303
|
+
filestore.obj.save(self.ts_samples, "%s.TS.pkl" % samples_file_prefix, is_overwriting=True)
|
|
304
|
+
filestore.obj.save(self.ts_labels, "%s.TS.pkl" % targets_file_prefix, is_overwriting=True)
|
|
305
|
+
filestore.obj.save(self.ts_sample_ids, "%s.TS.pkl" % ids_file_prefix, is_overwriting=True)
|
|
306
|
+
|
|
307
|
+
filestore.obj.save(self.vs_samples, "%s.VS.pkl" % samples_file_prefix, is_overwriting=True)
|
|
308
|
+
filestore.obj.save(self.vs_labels, "%s.VS.pkl" % targets_file_prefix, is_overwriting=True)
|
|
309
|
+
filestore.obj.save(self.vs_sample_ids, "%s.VS.pkl" % ids_file_prefix, is_overwriting=True)
|
|
310
|
+
|
|
311
|
+
if self.ut_samples is not None:
|
|
312
|
+
filestore.obj.save(self.ut_samples, "%s.UT.pkl" % samples_file_prefix, is_overwriting=True)
|
|
313
|
+
filestore.obj.save(self.ut_labels, "%s.UT.pkl" % targets_file_prefix, is_overwriting=True)
|
|
314
|
+
filestore.obj.save(self.ut_sample_ids, "%s.UT.pkl" % ids_file_prefix, is_overwriting=True)
|
|
315
|
+
|
|
316
|
+
self.card["name"] = self.name
|
|
317
|
+
if self.feature_count is not None:
|
|
318
|
+
self.card["feature_count"] = int(self.feature_count)
|
|
319
|
+
else:
|
|
320
|
+
self.card["feature_count"] = self.feature_count
|
|
321
|
+
|
|
322
|
+
if self.random_seed is not None:
|
|
323
|
+
self.card["random_seed"] = int(self.random_seed)
|
|
324
|
+
else:
|
|
325
|
+
self.card["random_seed"] = self.random_seed
|
|
326
|
+
|
|
327
|
+
if self.is_classification:
|
|
328
|
+
if self.class_count is not None:
|
|
329
|
+
self.card["class_count"] = int(self.class_count)
|
|
330
|
+
else:
|
|
331
|
+
self.card["class_count"] = self.class_count
|
|
332
|
+
self.card["class_names"] = self.class_names
|
|
333
|
+
|
|
334
|
+
filestore.json.save(self.card, f"{self.name}_card.json", is_sorted_keys=False)
|
|
335
|
+
# --------------------------------------------------------------------------------------------------------------------
|
|
336
|
+
|
|
337
|
+
|