radnn 0.1.4__tar.gz → 0.1.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (125) hide show
  1. {radnn-0.1.4/src/radnn.egg-info → radnn-0.1.6}/PKG-INFO +1 -1
  2. {radnn-0.1.4 → radnn-0.1.6}/pyproject.toml +1 -1
  3. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/__init__.py +4 -1
  4. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/data/__init__.py +2 -2
  5. radnn-0.1.6/src/radnn/data/constants.py +8 -0
  6. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/data/custom_data_set.py +44 -29
  7. radnn-0.1.6/src/radnn/data/dataset_base.py +263 -0
  8. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/data/dataset_base_legacy.py +1 -1
  9. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/data/errors.py +11 -6
  10. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/data/sample_preprocessor.py +3 -0
  11. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/data/sample_set.py +50 -31
  12. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/data/sample_set_kind.py +21 -5
  13. radnn-0.1.6/src/radnn/data/sample_set_simple.py +94 -0
  14. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/data/sequence_dataset.py +4 -4
  15. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/experiment/ml_experiment.py +2 -2
  16. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/experiment/ml_experiment_log.py +25 -19
  17. radnn-0.1.6/src/radnn/learn/constants.py +24 -0
  18. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/learn/torch/ml_model_freezer.py +1 -1
  19. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/plots/__init__.py +3 -2
  20. radnn-0.1.6/src/radnn/plots/plot_histogram_of_classes.py +65 -0
  21. radnn-0.1.6/src/radnn/plots/plot_legacy.py +103 -0
  22. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/plots/plot_roc.py +1 -0
  23. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/system/hosts/windows_host.py +1 -1
  24. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/utils.py +7 -4
  25. {radnn-0.1.4 → radnn-0.1.6/src/radnn.egg-info}/PKG-INFO +1 -1
  26. {radnn-0.1.4 → radnn-0.1.6}/src/radnn.egg-info/SOURCES.txt +3 -0
  27. radnn-0.1.4/src/radnn/data/dataset_base.py +0 -165
  28. radnn-0.1.4/src/radnn/data/sample_set_simple.py +0 -33
  29. radnn-0.1.4/src/radnn/plots/plot_histogram_of_classes.py +0 -143
  30. {radnn-0.1.4 → radnn-0.1.6}/LICENSE.txt +0 -0
  31. {radnn-0.1.4 → radnn-0.1.6}/setup.cfg +0 -0
  32. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/benchmark/__init__.py +0 -0
  33. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/benchmark/latency.py +0 -0
  34. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/benchmark/vram.py +0 -0
  35. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/core.py +0 -0
  36. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/data/data_hyperparams.py +0 -0
  37. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/data/dataset_factory.py +0 -0
  38. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/data/structs/__init__.py +0 -0
  39. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/data/structs/tree.py +0 -0
  40. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/data_beta/__init__.py +0 -0
  41. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/data_beta/data_feed.py +0 -0
  42. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/data_beta/dataset_base.py +0 -0
  43. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/data_beta/dataset_folder.py +0 -0
  44. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/data_beta/image_dataset.py +0 -0
  45. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/data_beta/image_dataset_files.py +0 -0
  46. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/data_beta/preprocess/__init__.py +0 -0
  47. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/data_beta/preprocess/normalizer.py +0 -0
  48. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/data_beta/preprocess/standardizer.py +0 -0
  49. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/data_beta/sample_set.py +0 -0
  50. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/data_beta/sequence_dataset.py +0 -0
  51. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/data_beta/structures/__init__.py +0 -0
  52. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/data_beta/structures/dictionary.py +0 -0
  53. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/data_beta/subset_type.py +0 -0
  54. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/data_beta/tf_classification_data_feed.py +0 -0
  55. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/errors.py +0 -0
  56. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/evaluation/__init__.py +0 -0
  57. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/evaluation/evaluate_classification.py +0 -0
  58. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/experiment/__init__.py +0 -0
  59. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/experiment/identification.py +0 -0
  60. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/experiment/ml_experiment_config.py +0 -0
  61. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/experiment/ml_experiment_env.py +0 -0
  62. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/experiment/ml_experiment_store.py +0 -0
  63. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/images/__init__.py +0 -0
  64. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/images/colors.py +0 -0
  65. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/images/image_processor.py +0 -0
  66. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/images/transforms.py +0 -0
  67. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/learn/__init__.py +0 -0
  68. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/learn/keras/__init__.py +0 -0
  69. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/learn/keras/keras_best_state_saver.py +0 -0
  70. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/learn/keras/keras_learning_algorithm.py +0 -0
  71. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/learn/keras/keras_learning_rate_scheduler.py +0 -0
  72. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/learn/keras/keras_optimization_combo.py +0 -0
  73. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/learn/torch/__init__.py +0 -0
  74. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/learn/torch/gradient_descent_method.py +0 -0
  75. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/learn/torch/losses/__init__.py +0 -0
  76. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/learn/torch/losses/rmse.py +0 -0
  77. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/learn/torch/lr_schedulers.py +0 -0
  78. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/learn/torch/ml_trainer.py +0 -0
  79. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/ml_system.py +0 -0
  80. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/models/__init__.py +0 -0
  81. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/models/cnn/__init__.py +0 -0
  82. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/models/cnn/cnn_stem_setup.py +0 -0
  83. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/models/model_factory.py +0 -0
  84. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/models/model_hyperparams.py +0 -0
  85. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/models/model_info.py +0 -0
  86. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/models/torch/__init__.py +0 -0
  87. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/models/torch/model_utils.py +0 -0
  88. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/models/torch/torch_model_build_adapter.py +0 -0
  89. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/plots/plot_auto_multi_image.py +0 -0
  90. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/plots/plot_confusion_matrix.py +0 -0
  91. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/plots/plot_function.py +0 -0
  92. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/plots/plot_learning_curve.py +0 -0
  93. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/plots/plot_multi_scatter.py +0 -0
  94. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/plots/plot_visualize_dataset2d.py +0 -0
  95. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/plots/plot_voronoi_2d.py +0 -0
  96. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/stats/__init__.py +0 -0
  97. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/stats/descriptive_stats.py +0 -0
  98. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/system/__init__.py +0 -0
  99. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/system/files/__init__.py +0 -0
  100. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/system/files/csvfile.py +0 -0
  101. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/system/files/filelist.py +0 -0
  102. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/system/files/fileobject.py +0 -0
  103. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/system/files/imgfile.py +0 -0
  104. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/system/files/jsonfile.py +0 -0
  105. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/system/files/picklefile.py +0 -0
  106. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/system/files/textfile.py +0 -0
  107. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/system/files/zipfile.py +0 -0
  108. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/system/filestore.py +0 -0
  109. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/system/filesystem.py +0 -0
  110. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/system/hosts/__init__.py +0 -0
  111. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/system/hosts/colab_host.py +0 -0
  112. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/system/hosts/linux_host.py +0 -0
  113. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/system/log.py +0 -0
  114. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/system/tee_logger.py +0 -0
  115. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/system/threads/__init__.py +0 -0
  116. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/system/threads/semaphore_lock.py +0 -0
  117. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/system/threads/thread_context.py +0 -0
  118. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/system/threads/thread_safe_queue.py +0 -0
  119. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/system/threads/thread_safe_string_collection.py +0 -0
  120. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/system/threads/thread_worker.py +0 -0
  121. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/test/__init__.py +0 -0
  122. {radnn-0.1.4 → radnn-0.1.6}/src/radnn/test/tensor_hash.py +0 -0
  123. {radnn-0.1.4 → radnn-0.1.6}/src/radnn.egg-info/dependency_links.txt +0 -0
  124. {radnn-0.1.4 → radnn-0.1.6}/src/radnn.egg-info/requires.txt +0 -0
  125. {radnn-0.1.4 → radnn-0.1.6}/src/radnn.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: radnn
3
- Version: 0.1.4
3
+ Version: 0.1.6
4
4
  Summary: Rapid Deep Neural Networks
5
5
  Author-email: "Pantelis I. Kaplanoglou" <pikaplanoglou@ihu.gr>
6
6
  License-Expression: MIT
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "radnn"
3
- version = "0.1.4"
3
+ version = "0.1.6"
4
4
  description = "Rapid Deep Neural Networks"
5
5
  readme = "README.md"
6
6
  authors = [
@@ -8,10 +8,13 @@
8
8
  # Version 0.1.0 [2026-01-07]
9
9
  # Version 0.1.1 [2025-01-08]
10
10
  # Version 0.1.4 [2025-01-26]
11
- __version__ = "0.1.4"
11
+ # Version 0.1.5 [2025-02-02]
12
+ # Version 0.1.6 [2025-02-03]
13
+ __version__ = "0.1.6"
12
14
 
13
15
  from .system import FileStore, FileSystem
14
16
  from .ml_system import MLSystem
15
17
  from .ml_system import mlsys
16
18
  from .utils import print_tensor, order_str
17
19
  from .errors import Errors
20
+ from .learn.constants import MLTask
@@ -6,5 +6,5 @@ from .sample_set_kind import SampleSetKind
6
6
  from .sample_preprocessor import SamplePreprocessor
7
7
 
8
8
  from .dataset_factory import DatasetFactory, DatasetBuildAdapter
9
-
10
- from .custom_data_set import LegacyDataSet
9
+ from .constants import DataPreprocessingKind
10
+ from .custom_data_set import LegacyDataSet
@@ -0,0 +1,8 @@
1
+ from enum import Enum
2
+
3
+ # =========================================================================================================================
4
+ class DataPreprocessingKind(Enum):
5
+ MIN_MAX_NORMALIZE = 0
6
+ STANDARDIZE = 1
7
+ # =========================================================================================================================
8
+
@@ -23,9 +23,13 @@
23
23
 
24
24
  # ......................................................................................
25
25
 
26
- from sklearn.model_selection import train_test_split # import a standalone procedure function from the pacakge
26
+ from sklearn.model_selection import train_test_split # import a standalone procedure toyfunction from the pacakge
27
+ from sklearn.preprocessing import StandardScaler, MinMaxScaler
27
28
  from radnn import mlsys
28
29
  from radnn.data.sample_set_simple import SampleSet
30
+ from .constants import DataPreprocessingKind
31
+ from enum import Enum
32
+
29
33
 
30
34
  # =========================================================================================================================
31
35
  class LegacyDataSet(object):
@@ -41,27 +45,49 @@ class LegacyDataSet(object):
41
45
 
42
46
  self.samples = None
43
47
  self.labels = None
44
-
48
+ self.preprocessor = None
49
+ self.mean = None
50
+ self.std = None
51
+
45
52
  # training set object
46
53
  self.ts: SampleSet | None = None
47
54
  # validation set object
48
55
  self.vs: SampleSet | None = None
49
56
  # unknown test set object
50
- self.ut: SampleSet | None = None
57
+ self.us: SampleSet | None = None
51
58
  # ................................................................
52
59
  if self.random_seed is not None:
53
60
  mlsys.random_seed_all(self.random_seed)
54
-
61
+ # --------------------------------------------------------------------------------------
62
+ def split(self, p_nValidationSamplesPC=0.10, preprocessing: DataPreprocessingKind | None = DataPreprocessingKind.STANDARDIZE):
63
+ oTS_Samples, oVS_Samples, oTS_Labels, oVS_Labels = train_test_split(self.samples, self.labels ,
64
+ test_size=p_nValidationSamplesPC,
65
+ random_state=2021)
66
+ if preprocessing == DataPreprocessingKind.MIN_MAX_NORMALIZE:
67
+ self.preprocessor = MinMaxScaler().fit(oTS_Samples)
68
+ elif preprocessing == DataPreprocessingKind.STANDARDIZE:
69
+ self.preprocessor = StandardScaler().fit(oTS_Samples)
70
+
71
+ if self.preprocessor is not None:
72
+ oTS_Samples = self.preprocessor.transform(oTS_Samples)
73
+ oVS_Samples = self.preprocessor.transform(oVS_Samples)
74
+
75
+ # (Re)creating the subsets of the dataset after the splits have been created
76
+ self.ts = SampleSet(self, oTS_Samples, oTS_Labels)
77
+ self.vs = SampleSet(self, oVS_Samples, oVS_Labels)
78
+
79
+ print("%d samples in the Training Set" % self.ts.sample_count)
80
+ print("%d samples in the Validation Set" % self.vs.sample_count)
55
81
  # --------------------------------------------------------------------------------------
56
82
  # Backwards Compatibility
57
83
  @property
58
84
  def TSSamples(self):
59
85
  return self.ts.samples
60
-
86
+
61
87
  @property
62
88
  def ts_samples(self):
63
89
  return self.ts_samples
64
-
90
+
65
91
  @property
66
92
  def TSLabels(self):
67
93
  return self.ts.labels
@@ -69,7 +95,7 @@ class LegacyDataSet(object):
69
95
  @property
70
96
  def ts_labels(self):
71
97
  return self.ts.labels
72
-
98
+
73
99
  @property
74
100
  def TSSampleCount(self):
75
101
  return self.ts.sample_count
@@ -77,40 +103,42 @@ class LegacyDataSet(object):
77
103
  @property
78
104
  def ts_sample_count(self):
79
105
  return self.ts.sample_count
80
-
106
+
81
107
  def VSSamples(self):
82
108
  return self.vs.samples
83
-
109
+
84
110
  @property
85
111
  def vs_samples(self):
86
112
  return self.vs.samples
87
-
113
+
88
114
  @property
89
115
  def VSLabels(self):
90
116
  return self.vs.labels
91
-
117
+
92
118
  @property
93
119
  def vs_labels(self):
94
120
  return self.vs.labels
95
-
121
+
96
122
  @property
97
123
  def VSSampleCount(self):
98
124
  return self.vs.sample_count
99
-
125
+
100
126
  @property
101
127
  def vs_sample_count(self):
102
128
  return self.vs.sample_count
103
-
129
+
104
130
  @property
105
131
  def FeatureCount(self):
106
132
  return self.feature_count
133
+
107
134
  @property
108
135
  def ClassCount(self):
109
136
  return self.class_count
137
+
110
138
  @property
111
139
  def ClassCount(self):
112
140
  return self.class_count
113
-
141
+
114
142
  @property
115
143
  def SampleCount(self):
116
144
  return self.sample_count
@@ -118,21 +146,8 @@ class LegacyDataSet(object):
118
146
  @property
119
147
  def Samples(self):
120
148
  return self.samples
121
-
149
+
122
150
  @property
123
151
  def Labels(self):
124
152
  return self.labels
125
- # --------------------------------------------------------------------------------------
126
- def split(self, p_nValidationSamplesPC=0.10):
127
- oTS_Samples, oVS_Samples, oTS_Labels, oVS_Labels = train_test_split(self.samples, self.labels ,
128
- test_size=p_nValidationSamplesPC,
129
- random_state=2021)
130
-
131
- # (Re)creating the subsets of the dataset after the splits have been created
132
- self.ts = SampleSet(self, oTS_Samples, oTS_Labels)
133
- self.vs = SampleSet(self, oVS_Samples, oVS_Labels)
134
-
135
- print("%d samples in the Training Set" % self.ts.sample_count)
136
- print("%d samples in the Validation Set" % self.vs.sample_count)
137
- # --------------------------------------------------------------------------------------
138
153
  # =========================================================================================================================
@@ -0,0 +1,263 @@
1
+ # ......................................................................................
2
+ # MIT License
3
+
4
+ # Copyright (c) 2019-2025 Pantelis I. Kaplanoglou
5
+
6
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ # of this software and associated documentation files (the "Software"), to deal
8
+ # in the Software without restriction, including without limitation the rights
9
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ # copies of the Software, and to permit persons to whom the Software is
11
+ # furnished to do so, subject to the following conditions:
12
+
13
+ # The above copyright notice and this permission notice shall be included in all
14
+ # copies or substantial portions of the Software.
15
+
16
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
+ # SOFTWARE.
23
+
24
+ # ......................................................................................
25
+ import os
26
+ import numpy as np
27
+ import pandas as pd
28
+ from abc import ABC, abstractmethod
29
+ from .sample_set_simple import SampleSet
30
+ from .sample_set_kind import SampleSetKind
31
+ from .sample_preprocessor import SamplePreprocessor, VoidPreprocessor
32
+ from .errors import *
33
+ from radnn import FileStore
34
+ from radnn import mlsys
35
+ from .constants import DataPreprocessingKind
36
+ from sklearn.model_selection import train_test_split
37
+ from sklearn.preprocessing import MinMaxScaler, StandardScaler
38
+
39
+ # ======================================================================================================================
40
+ class DataSetCallbacks(object):
41
+ # --------------------------------------------------------------------------------------------------------------------
42
+ def __init__(self, download_method=None, seed_method=None):
43
+ self._lazy_download = download_method
44
+ self._random_seed = seed_method
45
+ # --------------------------------------------------------------------------------------------------------------------
46
+ def lazy_download(self, fs):
47
+ self._lazy_download(fs)
48
+ # --------------------------------------------------------------------------------------------------------------------
49
+ def random_seed(self, seed: int):
50
+ self._random_seed(seed)
51
+ # --------------------------------------------------------------------------------------------------------------------
52
+ # ======================================================================================================================s
53
+
54
+
55
+
56
+
57
+
58
+ # ======================================================================================================================
59
+ class DataSetBase(ABC):
60
+ # --------------------------------------------------------------------------------------------------------------------
61
+ # Constructor
62
+ def __init__(self, name: str, variant: str | None = None, file_store=None, random_seed: int | None = None,
63
+ callbacks: DataSetCallbacks | None = None):
64
+ # ..................// Instance Fields \\.........................
65
+ self.name = name
66
+ self.variant = variant
67
+ self.fs = None
68
+ self._determine_local_filestore(file_store)
69
+ assert self.fs is not None, ERR_DS_MUST_PROVIDE_LOCAL_FILESTORE
70
+ self.random_seed = random_seed
71
+
72
+ self.all_ids = None
73
+ self.all_samples = None
74
+ self.all_labels = None
75
+
76
+ self.feature_count = None
77
+ self.class_count = None
78
+ self.sample_count = None
79
+
80
+ self.callbacks: DataSetCallbacks = callbacks
81
+
82
+ self.hprm: dict | None = None
83
+ self.ts: SampleSet | None = None
84
+ self.vs: SampleSet | None = None
85
+ self.us: SampleSet | None = None
86
+ self.preprocessor: SamplePreprocessor = VoidPreprocessor(self)
87
+
88
+ self.is_split = False
89
+ # ................................................................
90
+
91
+ # --------------------------------------------------------------------------------------
92
+ def split(self, validation_samples_pc=0.10,
93
+ preprocessing: DataPreprocessingKind | None = DataPreprocessingKind.STANDARDIZE,
94
+ random_seed: int=2021):
95
+
96
+ nTS_Samples, nVS_Samples, nTS_Labels, nVS_Labels = train_test_split(self.all_samples, self.all_labels,
97
+ test_size=validation_samples_pc,
98
+ random_state=random_seed)
99
+ if preprocessing == DataPreprocessingKind.MIN_MAX_NORMALIZE:
100
+ self.preprocessor = MinMaxScaler().fit(nTS_Samples)
101
+ elif preprocessing == DataPreprocessingKind.STANDARDIZE:
102
+ self.preprocessor = StandardScaler().fit(nTS_Samples)
103
+ else:
104
+ self.preprocessor = None
105
+
106
+ if self.preprocessor is not None:
107
+ nTS_Samples = self.preprocessor.transform(nTS_Samples)
108
+ nVS_Samples = self.preprocessor.transform(nVS_Samples)
109
+
110
+ # (Re)creating the subsets of the dataset after the splits have been created
111
+ self.ts = SampleSet(self, nTS_Samples, nTS_Labels, kind=SampleSetKind.TRAINING_SET)
112
+ if preprocessing == DataPreprocessingKind.STANDARDIZE:
113
+ self.ts.mean = self.preprocessor.mean_
114
+ self.ts.std = self.preprocessor.scale_
115
+
116
+ self.vs = SampleSet(self, nVS_Samples, nVS_Labels, kind=SampleSetKind.VALIDATION_SET)
117
+
118
+ self.is_split = True
119
+ return self
120
+ # --------------------------------------------------------------------------------------------------------------------
121
+ @property
122
+ def dataset_code(self):
123
+ sUniqueName = f"{self.name.upper()}"
124
+ if self.variant is not None:
125
+ sUniqueName += f"_{self.variant.upper()}"
126
+ return sUniqueName
127
+ # --------------------------------------------------------------------------------------------------------------------
128
+ def _determine_local_filestore(self, file_store):
129
+ if (file_store is not None):
130
+ if isinstance(file_store, FileStore):
131
+ self.fs = file_store
132
+ elif isinstance(file_store, str):
133
+ if not os.path.exists(file_store):
134
+ raise Exception(ERR_DS_FOLDER_NOT_FOUND % file_store)
135
+ self.fs = FileStore(file_store)
136
+ else:
137
+ assert mlsys.filesys is not None, ERR_MLSYS_FILESYS_NOT_INITIALIZED
138
+
139
+ self.fs: FileStore = mlsys.filesys.datasets.subfs(self.dataset_code)
140
+ # --------------------------------------------------------------------------------------------------------------------
141
+ @property
142
+ def filesystem_folder(self):
143
+ return self.fs.absolute_path
144
+ # --------------------------------------------------------------------------------------------------------------------
145
+ def read_hyperparams(self):
146
+ pass # Optionally override
147
+ # --------------------------------------------------------------------------------------------------------------------
148
+ @abstractmethod
149
+ def load_data(self):
150
+ pass # Must implement
151
+ # --------------------------------------------------------------------------------------------------------------------
152
+ def load_cache(self, is_vector_samples=True, is_last_axis_features=True):
153
+ nSuffix = ""
154
+ if is_vector_samples:
155
+ nSuffix = "-vec"
156
+ elif not is_last_axis_features:
157
+ nSuffix = "-torch"
158
+
159
+ nTSSamples = self.fs.obj.load(f"{self.dataset_code}-TS-Samples{nSuffix}.pkl")
160
+ nVSSamples = self.fs.obj.load(f"{self.dataset_code}-VS-Samples{nSuffix}.pkl")
161
+
162
+ nTSLabels = self.fs.obj.load(f"{self.dataset_code}-TS-Labels{nSuffix}.pkl")
163
+ nVSLabels = self.fs.obj.load(f"{self.dataset_code}-VS-Labels{nSuffix}.pkl")
164
+
165
+ return nTSSamples, nVSSamples, nTSLabels, nVSLabels
166
+ # --------------------------------------------------------------------------------------------------------------------
167
+ def save_cache(self, ts_samples, vs_samples, ts_labels, vs_labels, is_vector_samples=True, is_last_axis_features=True):
168
+ nSuffix = ""
169
+ if is_vector_samples:
170
+ nSuffix = "-vec"
171
+ elif not is_last_axis_features:
172
+ nSuffix = "-torch"
173
+ self.fs.obj.save(ts_samples, f"{self.dataset_code}-TS-Samples{nSuffix}.pkl")
174
+ self.fs.obj.save(vs_samples, f"{self.dataset_code}-VS-Samples{nSuffix}.pkl")
175
+
176
+ self.fs.obj.save(ts_labels, f"{self.dataset_code}-TS-Labels{nSuffix}.pkl")
177
+ self.fs.obj.save(vs_labels, f"{self.dataset_code}-VS-Labels{nSuffix}.pkl")
178
+ # --------------------------------------------------------------------------------------------------------------------
179
+ def prepare(self, hyperparams: dict | None = None):
180
+ self.hprm = hyperparams
181
+
182
+ # VIRTUAL CALL: Reads the hyperparameters into instance variables
183
+ if self.hprm is not None:
184
+ self.read_hyperparams()
185
+
186
+ if (self.callbacks is not None):
187
+ assert self.callbacks._lazy_download is not None, ERR_DS_NO_RANDOM_SEED_INITIALIZER_CALLBACK
188
+ if self.callbacks._lazy_download is not None:
189
+ self.callbacks.lazy_download(self.fs)
190
+
191
+ if (self.random_seed is not None):
192
+ bIsInitRandomSeed = False
193
+ if self.callbacks is not None:
194
+ if self.callbacks._random_seed is not None:
195
+ self.callbacks.random_seed(self.random_seed)
196
+ bIsInitRandomSeed = True
197
+ if not bIsInitRandomSeed:
198
+ mlsys.random_seed_all(self.random_seed)
199
+
200
+ self.ts = None
201
+ self.vs = None
202
+ self.us = None
203
+
204
+ # VIRTUAL CALL: Imports the dataset from the source local/remote filestore to the local cache.
205
+ self.load_data()
206
+
207
+ if self.is_split:
208
+ assert self.ts is not None, ERR_DS_SUBSET_MUST_HAVE_TS
209
+ assert self.ts.kind == SampleSetKind.TRAINING_SET, ERR_DS_SUBSET_INVALID_SETUP
210
+ if self.vs is not None:
211
+ assert self.vs.kind == SampleSetKind.VALIDATION_SET, ERR_DS_SUBSET_INVALID_SETUP
212
+
213
+ if self.us is not None:
214
+ assert self.us.kind == SampleSetKind.UNKNOWN_TEST_SET, ERR_DS_SUBSET_INVALID_SETUP
215
+
216
+ return self
217
+ # --------------------------------------------------------------------------------------------------------------------
218
+ def assign(self, data, label_columns: range):
219
+ self.all_samples, self.all_labels, self.all_ids = None, None, None
220
+ if isinstance(data, tuple):
221
+ self.all_samples, self.all_labels = data
222
+ elif isinstance(data, np.ndarray):
223
+ self.all_samples = data
224
+ elif isinstance(data, dict):
225
+ if ("samples" in dict):
226
+ self.all_samples = data["samples"]
227
+ if ("labels" in dict):
228
+ self.all_labels = data["labels"]
229
+ if ("ids" in dict):
230
+ self.all_ids = data["ids"]
231
+ elif isinstance(data, pd.DataFrame):
232
+ if isinstance(data.columns, pd.Index):
233
+ nData = data.iloc[1:].to_numpy()
234
+ else:
235
+ nData = data.to_numpy()
236
+
237
+ if label_columns is None:
238
+ self.all_samples = nData
239
+ else:
240
+ if label_columns.start >= 0:
241
+ if label_columns.stop is None:
242
+ self.all_labels = nData[:, label_columns.start]
243
+ self.all_samples = nData[:, label_columns.start + 1:]
244
+ else:
245
+ self.all_labels = nData[:, label_columns.start:label_columns.stop + 1]
246
+ self.all_samples = nData[:, label_columns.stop + 1:]
247
+ else:
248
+ self.all_samples = nData[:, :label_columns.start]
249
+ self.all_labels = nData[:, label_columns.start:]
250
+
251
+ if self.all_ids is None:
252
+ self.all_ids = range(len(self.all_samples)) + 1
253
+
254
+ return self
255
+ # --------------------------------------------------------------------------------------------------------------------
256
+ def print_info(self):
257
+ print(f"Dataset [{self.dataset_code}]")
258
+ self.ts.print_info()
259
+ if self.vs is not None:
260
+ self.vs.print_info()
261
+ if self.us is not None:
262
+ self.us.print_info()
263
+ # --------------------------------------------------------------------------------------------------------------------
@@ -25,7 +25,7 @@
25
25
 
26
26
 
27
27
  import numpy as np
28
- from sklearn.model_selection import train_test_split # import a standalone procedure function from the pacakge
28
+ from sklearn.model_selection import train_test_split # import a standalone procedure toyfunction from the pacakge
29
29
 
30
30
 
31
31
  # =========================================================================================================================
@@ -22,11 +22,16 @@
22
22
  # SOFTWARE.
23
23
 
24
24
  # ......................................................................................
25
+
26
+ ERR_MLSYS_FILESYS_NOT_INITIALIZED = "The filesystem for the Machine Learning host system has not been initialized."
27
+
28
+
25
29
  ERR_NO_CALLBACKS = "You should assign callbacks to the dataset perform proper random seed initialization for your framework."
26
- ERR_NO_RANDOM_SEED_INITIALIZER_CALLBACK = "Callback method for random seed initialization has not been defined."
30
+ ERR_DS_NO_RANDOM_SEED_INITIALIZER_CALLBACK = "Callback method for random seed initialization has not been defined."
31
+ ERR_DS_CALLBACKS_NO_LAZY_DOWNLOADER = "Callback method for downloading the dataset has not been defined."
27
32
 
28
- ERR_SUBSET_MUST_HAVE_TS = "A dataset must have at least a training subset."
29
- ERR_SUBSET_INVALID_SETUP = "Invalid sample subset setup. Please use one of the valid kinds: 'training/train/ts', 'validation/val/vs', 'testing/test/us'."
30
- ERR_SUBSET_MUST_HAVE_SAMPLES = "The subset has no samples, check the implementation of your dataset class."
31
- ERR_DATASET_FOLDER_NOT_FOUND = "The dataset was not found under the folder %s"
32
- ERR_DATASET_MUST_PROVIDE_LOCAL_FILESTORE = "You must provide a local filestore/path for the dataset"
33
+ ERR_DS_SUBSET_MUST_HAVE_TS = "A dataset must have at least a training subset."
34
+ ERR_DS_SUBSET_INVALID_SETUP = "Invalid sample subset setup. Please use one of the valid kinds: 'training/train/ts', 'validation/val/vs', 'testing/test/us'."
35
+ ERR_DS_SUBSET_MUST_HAVE_SAMPLES = "The subset has no samples, check the implementation of your dataset class."
36
+ ERR_DS_FOLDER_NOT_FOUND = "The dataset was not found under the folder %s"
37
+ ERR_DS_MUST_PROVIDE_LOCAL_FILESTORE = "You must provide a local filestore/path for the dataset"
@@ -47,6 +47,9 @@ class SamplePreprocessor(ABC):
47
47
  pass
48
48
  # --------------------------------------------------------------------------------------------------------------------
49
49
 
50
+
51
+
52
+
50
53
  # ======================================================================================================================
51
54
  class VoidPreprocessor(SamplePreprocessor):
52
55
  # --------------------------------------------------------------------------------------------------------------------