sdg-core-lib 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sdg_core_lib/NumericDataset.py +150 -0
- sdg_core_lib/__init__.py +0 -0
- sdg_core_lib/browser.py +73 -0
- sdg_core_lib/data_generator/__init__.py +0 -0
- sdg_core_lib/data_generator/model_factory.py +72 -0
- sdg_core_lib/data_generator/models/ModelInfo.py +42 -0
- sdg_core_lib/data_generator/models/TrainingInfo.py +40 -0
- sdg_core_lib/data_generator/models/UnspecializedModel.py +106 -0
- sdg_core_lib/data_generator/models/__init__.py +0 -0
- sdg_core_lib/data_generator/models/keras/KerasBaseVAE.py +172 -0
- sdg_core_lib/data_generator/models/keras/VAE.py +61 -0
- sdg_core_lib/data_generator/models/keras/__init__.py +0 -0
- sdg_core_lib/data_generator/models/keras/implementation/TabularVAE.py +96 -0
- sdg_core_lib/data_generator/models/keras/implementation/TimeSeriesVAE.py +156 -0
- sdg_core_lib/data_generator/models/keras/implementation/__init__.py +0 -0
- sdg_core_lib/evaluate/Metrics.py +48 -0
- sdg_core_lib/evaluate/TabularComparison.py +276 -0
- sdg_core_lib/evaluate/__init__.py +0 -0
- sdg_core_lib/job.py +56 -0
- sdg_core_lib/post_process/FunctionApplier.py +14 -0
- sdg_core_lib/post_process/__init__.py +0 -0
- sdg_core_lib/post_process/function_factory.py +41 -0
- sdg_core_lib/post_process/functions/FunctionInfo.py +25 -0
- sdg_core_lib/post_process/functions/FunctionResult.py +15 -0
- sdg_core_lib/post_process/functions/Parameter.py +33 -0
- sdg_core_lib/post_process/functions/UnspecializedFunction.py +42 -0
- sdg_core_lib/post_process/functions/__init__.py +0 -0
- sdg_core_lib/post_process/functions/distribution_evaluator/__init__.py +0 -0
- sdg_core_lib/post_process/functions/distribution_evaluator/implementation/NormalTester.py +65 -0
- sdg_core_lib/post_process/functions/distribution_evaluator/implementation/__init__.py +0 -0
- sdg_core_lib/post_process/functions/filter/IntervalThreshold.py +32 -0
- sdg_core_lib/post_process/functions/filter/MonoThreshold.py +28 -0
- sdg_core_lib/post_process/functions/filter/__init__.py +0 -0
- sdg_core_lib/post_process/functions/filter/implementation/InnerThreshold.py +43 -0
- sdg_core_lib/post_process/functions/filter/implementation/LowerThreshold.py +32 -0
- sdg_core_lib/post_process/functions/filter/implementation/OuterThreshold.py +42 -0
- sdg_core_lib/post_process/functions/filter/implementation/UpperThreshold.py +32 -0
- sdg_core_lib/post_process/functions/filter/implementation/__init__.py +0 -0
- sdg_core_lib/preprocess/__init__.py +0 -0
- sdg_core_lib/preprocess/scale.py +51 -0
- sdg_core_lib/test/__init__.py +0 -0
- sdg_core_lib/test/data_generator/__init__.py +0 -0
- sdg_core_lib/test/data_generator/models/__init__.py +0 -0
- sdg_core_lib/test/data_generator/models/keras/__init__.py +0 -0
- sdg_core_lib/test/data_generator/models/keras/implementation/__init__.py +0 -0
- sdg_core_lib/test/data_generator/models/keras/implementation/test_TabularVAE.py +120 -0
- sdg_core_lib/test/data_generator/models/keras/implementation/test_TimeSeriesVAE.py +110 -0
- sdg_core_lib/test/data_generator/models/keras/test_KerasBaseVAE.py +74 -0
- sdg_core_lib/test/data_generator/models/test_ModelInfo.py +27 -0
- sdg_core_lib/test/data_generator/models/test_TrainingInfo.py +30 -0
- sdg_core_lib/test/data_generator/models/test_UnspecializedModel.py +32 -0
- sdg_core_lib/test/data_generator/test_model_factory.py +52 -0
- sdg_core_lib/test/evaluate/__init__.py +0 -0
- sdg_core_lib/test/evaluate/test_Metrics.py +62 -0
- sdg_core_lib/test/evaluate/test_TabularComparisonEvaluator.py +75 -0
- sdg_core_lib/test/infer_test.json +168 -0
- sdg_core_lib/test/infer_test_nodata.json +77 -0
- sdg_core_lib/test/infer_test_nodata_wrong.json +11 -0
- sdg_core_lib/test/post_process/__init__.py +0 -0
- sdg_core_lib/test/post_process/functions/__init__.py +0 -0
- sdg_core_lib/test/post_process/functions/distribution_evaluator/__init__.py +0 -0
- sdg_core_lib/test/post_process/functions/distribution_evaluator/implementation/__init__.py +0 -0
- sdg_core_lib/test/post_process/functions/distribution_evaluator/implementation/test_NormalTester.py +55 -0
- sdg_core_lib/test/post_process/functions/filters/__init__.py +0 -0
- sdg_core_lib/test/post_process/functions/filters/implementation/__init__.py +0 -0
- sdg_core_lib/test/post_process/functions/filters/implementation/test_InnerThreshold.py +30 -0
- sdg_core_lib/test/pre_process/__init__.py +0 -0
- sdg_core_lib/test/pre_process/test_scaling.py +55 -0
- sdg_core_lib/test/test_browser.py +11 -0
- sdg_core_lib/test/test_dataset.py +149 -0
- sdg_core_lib/test/test_job.py +128 -0
- sdg_core_lib/test/train_test.json +166 -0
- sdg_core_lib/test/train_test_2.json +9 -0
- sdg_core_lib-0.1.0.dist-info/METADATA +9 -0
- sdg_core_lib-0.1.0.dist-info/RECORD +77 -0
- sdg_core_lib-0.1.0.dist-info/WHEEL +4 -0
- sdg_core_lib-0.1.0.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
{
|
|
2
|
+
"dataset": [
|
|
3
|
+
{
|
|
4
|
+
"column_data": [
|
|
5
|
+
13.71,
|
|
6
|
+
13.4,
|
|
7
|
+
13.27,
|
|
8
|
+
13.17,
|
|
9
|
+
14.13
|
|
10
|
+
],
|
|
11
|
+
"column_name": "alcohol",
|
|
12
|
+
"column_type": "continuous",
|
|
13
|
+
"column_datatype": "float64"
|
|
14
|
+
},
|
|
15
|
+
{
|
|
16
|
+
"column_data": [
|
|
17
|
+
5.65,
|
|
18
|
+
3.91,
|
|
19
|
+
4.28,
|
|
20
|
+
2.59,
|
|
21
|
+
4.1
|
|
22
|
+
],
|
|
23
|
+
"column_name": "malic_acid",
|
|
24
|
+
"column_type": "continuous",
|
|
25
|
+
"column_datatype": "float64"
|
|
26
|
+
},
|
|
27
|
+
{
|
|
28
|
+
"column_data": [
|
|
29
|
+
2.45,
|
|
30
|
+
2.48,
|
|
31
|
+
2.26,
|
|
32
|
+
2.37,
|
|
33
|
+
2.74
|
|
34
|
+
],
|
|
35
|
+
"column_name": "ash",
|
|
36
|
+
"column_type": "continuous",
|
|
37
|
+
"column_datatype": "float64"
|
|
38
|
+
},
|
|
39
|
+
{
|
|
40
|
+
"column_data": [
|
|
41
|
+
20.5,
|
|
42
|
+
23.0,
|
|
43
|
+
20.0,
|
|
44
|
+
20.0,
|
|
45
|
+
24.5
|
|
46
|
+
],
|
|
47
|
+
"column_name": "acl",
|
|
48
|
+
"column_type": "continuous",
|
|
49
|
+
"column_datatype": "float64"
|
|
50
|
+
},
|
|
51
|
+
{
|
|
52
|
+
"column_data": [
|
|
53
|
+
95,
|
|
54
|
+
102,
|
|
55
|
+
120,
|
|
56
|
+
120,
|
|
57
|
+
96
|
|
58
|
+
],
|
|
59
|
+
"column_name": "Mmg",
|
|
60
|
+
"column_type": "continuous",
|
|
61
|
+
"column_datatype": "int64"
|
|
62
|
+
},
|
|
63
|
+
{
|
|
64
|
+
"column_data": [
|
|
65
|
+
1.68,
|
|
66
|
+
1.8,
|
|
67
|
+
1.59,
|
|
68
|
+
1.65,
|
|
69
|
+
2.05
|
|
70
|
+
],
|
|
71
|
+
"column_name": "phenols",
|
|
72
|
+
"column_type": "continuous",
|
|
73
|
+
"column_datatype": "float64"
|
|
74
|
+
},
|
|
75
|
+
{
|
|
76
|
+
"column_data": [
|
|
77
|
+
0.61,
|
|
78
|
+
0.75,
|
|
79
|
+
0.69,
|
|
80
|
+
0.68,
|
|
81
|
+
0.76
|
|
82
|
+
],
|
|
83
|
+
"column_name": "flavanoids",
|
|
84
|
+
"column_type": "continuous",
|
|
85
|
+
"column_datatype": "float64"
|
|
86
|
+
},
|
|
87
|
+
{
|
|
88
|
+
"column_data": [
|
|
89
|
+
0.52,
|
|
90
|
+
0.43,
|
|
91
|
+
0.43,
|
|
92
|
+
0.53,
|
|
93
|
+
0.56
|
|
94
|
+
],
|
|
95
|
+
"column_name": "nonflavanoid_phenols",
|
|
96
|
+
"column_type": "continuous",
|
|
97
|
+
"column_datatype": "float64"
|
|
98
|
+
},
|
|
99
|
+
{
|
|
100
|
+
"column_data": [
|
|
101
|
+
1.06,
|
|
102
|
+
1.41,
|
|
103
|
+
1.35,
|
|
104
|
+
1.46,
|
|
105
|
+
1.35
|
|
106
|
+
],
|
|
107
|
+
"column_name": "proanth",
|
|
108
|
+
"column_type": "continuous",
|
|
109
|
+
"column_datatype": "float64"
|
|
110
|
+
},
|
|
111
|
+
{
|
|
112
|
+
"column_data": [
|
|
113
|
+
7.7,
|
|
114
|
+
7.3,
|
|
115
|
+
10.2,
|
|
116
|
+
9.3,
|
|
117
|
+
9.2
|
|
118
|
+
],
|
|
119
|
+
"column_name": "color_int",
|
|
120
|
+
"column_type": "continuous",
|
|
121
|
+
"column_datatype": "float64"
|
|
122
|
+
},
|
|
123
|
+
{
|
|
124
|
+
"column_data": [
|
|
125
|
+
0.64,
|
|
126
|
+
0.7,
|
|
127
|
+
0.59,
|
|
128
|
+
0.6,
|
|
129
|
+
0.61
|
|
130
|
+
],
|
|
131
|
+
"column_name": "hue",
|
|
132
|
+
"column_type": "continuous",
|
|
133
|
+
"column_datatype": "float64"
|
|
134
|
+
},
|
|
135
|
+
{
|
|
136
|
+
"column_data": [
|
|
137
|
+
3.92,
|
|
138
|
+
3.4,
|
|
139
|
+
3.17,
|
|
140
|
+
3.45,
|
|
141
|
+
2.93
|
|
142
|
+
],
|
|
143
|
+
"column_name": "od",
|
|
144
|
+
"column_type": "continuous",
|
|
145
|
+
"column_datatype": "float64"
|
|
146
|
+
},
|
|
147
|
+
{
|
|
148
|
+
"column_data": [
|
|
149
|
+
740,
|
|
150
|
+
750,
|
|
151
|
+
835,
|
|
152
|
+
840,
|
|
153
|
+
560
|
|
154
|
+
],
|
|
155
|
+
"column_name": "prolin",
|
|
156
|
+
"column_type": "continuous",
|
|
157
|
+
"column_datatype": "int64"
|
|
158
|
+
}
|
|
159
|
+
],
|
|
160
|
+
"functions_id" : [],
|
|
161
|
+
"model": {
|
|
162
|
+
"algorithm_name": "sdg_core_lib.data_generator.models.keras.implementation.TabularVAE.TabularVAE",
|
|
163
|
+
"model_name": "Test-T_VAE",
|
|
164
|
+
"image": "C:\\Users\\giumartine\\PycharmProjects\\SyntheticGenerator\\synthetic-data-generator\\src\\generator\\server\\saved_models\\trained_models\\Test-T_VAETabularVAE20250319.150458",
|
|
165
|
+
"input_shape": "(13,)"
|
|
166
|
+
},
|
|
167
|
+
"n_rows": 1000
|
|
168
|
+
}
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
{
|
|
2
|
+
"behavious_id" : [],
|
|
3
|
+
"model": {
|
|
4
|
+
"algorithm_name": "sdg_core_lib.data_generator.models.keras.implementation.TabularVAE.TabularVAE",
|
|
5
|
+
"model_name": "Test-T_VAE",
|
|
6
|
+
"image": "./sdg_core_lib/test/outputs/",
|
|
7
|
+
"input_shape": "(13,)",
|
|
8
|
+
"training_data_info": [
|
|
9
|
+
{
|
|
10
|
+
"column_name": "alcohol",
|
|
11
|
+
"column_type": "continuous",
|
|
12
|
+
"column_datatype": "float64"
|
|
13
|
+
},
|
|
14
|
+
{
|
|
15
|
+
"column_name": "malic_acid",
|
|
16
|
+
"column_type": "continuous",
|
|
17
|
+
"column_datatype": "float64"
|
|
18
|
+
},
|
|
19
|
+
{
|
|
20
|
+
"column_name": "ash",
|
|
21
|
+
"column_type": "continuous",
|
|
22
|
+
"column_datatype": "float64"
|
|
23
|
+
},
|
|
24
|
+
{
|
|
25
|
+
"column_name": "acl",
|
|
26
|
+
"column_type": "continuous",
|
|
27
|
+
"column_datatype": "float64"
|
|
28
|
+
},
|
|
29
|
+
{
|
|
30
|
+
"column_name": "Mmg",
|
|
31
|
+
"column_type": "continuous",
|
|
32
|
+
"column_datatype": "float64"
|
|
33
|
+
},
|
|
34
|
+
{
|
|
35
|
+
"column_name": "phenols",
|
|
36
|
+
"column_type": "continuous",
|
|
37
|
+
"column_datatype": "float64"
|
|
38
|
+
},
|
|
39
|
+
{
|
|
40
|
+
"column_name": "flavanoids",
|
|
41
|
+
"column_type": "continuous",
|
|
42
|
+
"column_datatype": "float64"
|
|
43
|
+
},
|
|
44
|
+
{
|
|
45
|
+
"column_name": "nonflavanoid_phenols",
|
|
46
|
+
"column_type": "continuous",
|
|
47
|
+
"column_datatype": "float64"
|
|
48
|
+
},
|
|
49
|
+
{
|
|
50
|
+
"column_name": "proanth",
|
|
51
|
+
"column_type": "continuous",
|
|
52
|
+
"column_datatype": "float64"
|
|
53
|
+
},
|
|
54
|
+
{
|
|
55
|
+
"column_name": "color_int",
|
|
56
|
+
"column_type": "continuous",
|
|
57
|
+
"column_datatype": "float64"
|
|
58
|
+
},
|
|
59
|
+
{
|
|
60
|
+
"column_name": "hue",
|
|
61
|
+
"column_type": "continuous",
|
|
62
|
+
"column_datatype": "float64"
|
|
63
|
+
},
|
|
64
|
+
{
|
|
65
|
+
"column_name": "od",
|
|
66
|
+
"column_type": "continuous",
|
|
67
|
+
"column_datatype": "float64"
|
|
68
|
+
},
|
|
69
|
+
{
|
|
70
|
+
"column_name": "prolin",
|
|
71
|
+
"column_type": "continuous",
|
|
72
|
+
"column_datatype": "float64"
|
|
73
|
+
}
|
|
74
|
+
]
|
|
75
|
+
},
|
|
76
|
+
"n_rows": 1000
|
|
77
|
+
}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
{
|
|
2
|
+
"behavious_id" : [],
|
|
3
|
+
"model": {
|
|
4
|
+
"algorithm_name": "sdg_core_lib.data_generator.models.keras.implementation.TabularVAE.TabularVAE",
|
|
5
|
+
"model_name": "Test-T_VAE",
|
|
6
|
+
"image": "./sdg_core_lib/test/outputs/",
|
|
7
|
+
"input_shape": "(13,)",
|
|
8
|
+
"training_data_info": []
|
|
9
|
+
},
|
|
10
|
+
"n_rows": 1000
|
|
11
|
+
}
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
sdg_core_lib/test/post_process/functions/distribution_evaluator/implementation/test_NormalTester.py
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
import numpy as np
|
|
3
|
+
|
|
4
|
+
from sdg_core_lib.post_process.functions.distribution_evaluator.implementation.NormalTester import (
|
|
5
|
+
NormalTester,
|
|
6
|
+
)
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@pytest.fixture
|
|
10
|
+
def correct_instance():
|
|
11
|
+
params = [
|
|
12
|
+
{"name": "mean", "value": "0.0", "parameter_type": "float"},
|
|
13
|
+
{"name": "standard_deviation", "value": "1.0", "parameter_type": "float"},
|
|
14
|
+
]
|
|
15
|
+
return NormalTester(parameters=params)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def test_check_parameters(correct_instance):
|
|
19
|
+
param_mapping = {param.name: param for param in correct_instance.parameters}
|
|
20
|
+
param_names = param_mapping.keys()
|
|
21
|
+
assert param_mapping["mean"].value == 0.0
|
|
22
|
+
assert isinstance(param_mapping["mean"].value, float)
|
|
23
|
+
assert param_mapping["standard_deviation"].value == 1.0
|
|
24
|
+
assert isinstance(param_mapping["standard_deviation"].value, float)
|
|
25
|
+
assert "mean" in param_names
|
|
26
|
+
assert "standard_deviation" in param_names
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def test_compute(correct_instance):
|
|
30
|
+
data = np.random.normal(correct_instance.mean, correct_instance.std, 10000)
|
|
31
|
+
compute_data, indexes = correct_instance._compute(data)
|
|
32
|
+
assert data.shape == (10000,)
|
|
33
|
+
assert indexes.shape == (10000,)
|
|
34
|
+
assert np.all(compute_data == data)
|
|
35
|
+
assert np.all(indexes == np.array(range(len(data))))
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def test_evaluate(correct_instance):
|
|
39
|
+
correct_data = np.random.normal(correct_instance.mean, correct_instance.std, 10000)
|
|
40
|
+
assert correct_instance._evaluate(correct_data)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def test_evaluate_wrong(correct_instance):
|
|
44
|
+
wrong_data = np.random.normal(5, 1, 10000)
|
|
45
|
+
wrong_data_2 = np.random.normal(0, 10, 10000)
|
|
46
|
+
assert not correct_instance._evaluate(wrong_data)
|
|
47
|
+
assert not correct_instance._evaluate(wrong_data_2)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def test_get_result(correct_instance):
|
|
51
|
+
data_correct = np.random.normal(correct_instance.mean, correct_instance.std, 10000)
|
|
52
|
+
results = correct_instance.get_results(data_correct)
|
|
53
|
+
assert results["results"].shape == (10000,)
|
|
54
|
+
assert results["indexes"].shape == (10000,)
|
|
55
|
+
assert results["evaluation_results"]
|
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
|
|
3
|
+
from sdg_core_lib.post_process.functions.filter.implementation.InnerThreshold import (
|
|
4
|
+
InnerThreshold,
|
|
5
|
+
)
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@pytest.fixture
|
|
9
|
+
def correct_instance():
|
|
10
|
+
params = [
|
|
11
|
+
{"name": "upper_bound", "value": "50.0", "parameter_type": "float"},
|
|
12
|
+
{"name": "lower_bound", "value": "10.0", "parameter_type": "float"},
|
|
13
|
+
{"name": "upper_strict", "value": "True", "parameter_type": "bool"},
|
|
14
|
+
{"name": "lower_strict", "value": "False", "parameter_type": "bool"},
|
|
15
|
+
]
|
|
16
|
+
return InnerThreshold(parameters=params)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def test_check_parameters(correct_instance):
|
|
20
|
+
param_mapping = {param.name: param for param in correct_instance.parameters}
|
|
21
|
+
param_names = param_mapping.keys()
|
|
22
|
+
assert param_mapping["upper_bound"].value > param_mapping["lower_bound"].value
|
|
23
|
+
assert isinstance(param_mapping["upper_bound"].value, float)
|
|
24
|
+
assert isinstance(param_mapping["lower_bound"].value, float)
|
|
25
|
+
assert isinstance(param_mapping["upper_strict"].value, bool)
|
|
26
|
+
assert isinstance(param_mapping["lower_strict"].value, bool)
|
|
27
|
+
assert "upper_bound" in param_names
|
|
28
|
+
assert "lower_bound" in param_names
|
|
29
|
+
assert "upper_strict" in param_names
|
|
30
|
+
assert "lower_strict" in param_names
|
|
File without changes
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
import numpy as np
|
|
3
|
+
|
|
4
|
+
from sdg_core_lib.preprocess.scale import (
|
|
5
|
+
standardize_simple_tabular_time_series,
|
|
6
|
+
standardize_simple_tabular_input,
|
|
7
|
+
)
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@pytest.fixture()
|
|
11
|
+
def correct_tabular_input():
|
|
12
|
+
return np.linspace(-10, 10, 100).reshape(10, 10)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@pytest.fixture()
|
|
16
|
+
def correct_time_series_input():
|
|
17
|
+
return np.linspace(-10, 10, 1000).reshape(10, 2, 50)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def test_correct_tabular_scaling(correct_tabular_input):
|
|
21
|
+
scaler, standardized_train_data, standardized_test_data = (
|
|
22
|
+
standardize_simple_tabular_input(
|
|
23
|
+
train_data=correct_tabular_input, test_data=correct_tabular_input
|
|
24
|
+
)
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
assert type(standardized_train_data) is np.ndarray
|
|
28
|
+
assert standardized_train_data.shape == correct_tabular_input.shape
|
|
29
|
+
assert standardized_test_data.shape == correct_tabular_input.shape
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def test_incorrect_tabular_scaling(correct_time_series_input):
|
|
33
|
+
with pytest.raises(ValueError) as exception_info:
|
|
34
|
+
_, _, _ = standardize_simple_tabular_input(train_data=correct_time_series_input)
|
|
35
|
+
assert exception_info.type is ValueError
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def test_correct_time_series_scaling(correct_time_series_input):
|
|
39
|
+
scaler, standardized_train_data, standardized_test_data = (
|
|
40
|
+
standardize_simple_tabular_time_series(
|
|
41
|
+
train_data=correct_time_series_input, test_data=correct_time_series_input
|
|
42
|
+
)
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
assert type(standardized_train_data) is np.ndarray
|
|
46
|
+
assert standardized_train_data.shape == correct_time_series_input.shape
|
|
47
|
+
assert standardized_test_data.shape == correct_time_series_input.shape
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def test_incorrect_time_series_scaling(correct_tabular_input):
|
|
51
|
+
with pytest.raises(ValueError) as exception_info:
|
|
52
|
+
_, _, _ = standardize_simple_tabular_time_series(
|
|
53
|
+
train_data=correct_tabular_input
|
|
54
|
+
)
|
|
55
|
+
assert exception_info.type is ValueError
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
from sdg_core_lib.browser import browse_algorithms, browse_functions
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def test_browse_algorithms():
|
|
5
|
+
for desc in browse_algorithms():
|
|
6
|
+
assert desc is not None
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def test_browse_functions():
|
|
10
|
+
for desc in browse_functions():
|
|
11
|
+
assert desc is not None
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
import pytest
|
|
3
|
+
import numpy as np
|
|
4
|
+
|
|
5
|
+
from sdg_core_lib.NumericDataset import NumericDataset
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@pytest.fixture
|
|
9
|
+
def correct_dataset():
|
|
10
|
+
return [
|
|
11
|
+
{
|
|
12
|
+
"column_name": "A",
|
|
13
|
+
"column_type": "continuous",
|
|
14
|
+
"column_datatype": "float64",
|
|
15
|
+
"column_data": [1.0, 2.0, 3.0, 4.0, 5.0],
|
|
16
|
+
},
|
|
17
|
+
{
|
|
18
|
+
"column_name": "B",
|
|
19
|
+
"column_type": "categorical",
|
|
20
|
+
"column_datatype": "object",
|
|
21
|
+
"column_data": ["a", "b", "c", "d", "e"],
|
|
22
|
+
},
|
|
23
|
+
{
|
|
24
|
+
"column_name": "C",
|
|
25
|
+
"column_type": "continuous",
|
|
26
|
+
"column_datatype": "int64",
|
|
27
|
+
"column_data": [1, 2, 3, 4, 5],
|
|
28
|
+
},
|
|
29
|
+
{
|
|
30
|
+
"column_name": "D",
|
|
31
|
+
"column_type": "test",
|
|
32
|
+
"column_datatype": "int64",
|
|
33
|
+
"column_data": [1, 2, 3, 4, 5],
|
|
34
|
+
},
|
|
35
|
+
]
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@pytest.fixture
|
|
39
|
+
def complex_dataset():
|
|
40
|
+
return [
|
|
41
|
+
{
|
|
42
|
+
"column_name": "A",
|
|
43
|
+
"column_type": "time_series",
|
|
44
|
+
"column_datatype": "float64",
|
|
45
|
+
"column_data": [[1.0, 2.0, 3.0, 4.0, 5.0], [1.0, 2.0, 3.0, 4.0, 5.0]],
|
|
46
|
+
}
|
|
47
|
+
]
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
@pytest.fixture
|
|
51
|
+
def error_dataset():
|
|
52
|
+
return [
|
|
53
|
+
{
|
|
54
|
+
"column_name": "A",
|
|
55
|
+
"column_type": "categorical",
|
|
56
|
+
"column_datatype": "string",
|
|
57
|
+
"column_data": ["a", "b", "c", "d", "e"],
|
|
58
|
+
}
|
|
59
|
+
]
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
@pytest.fixture
|
|
63
|
+
def empty_dataset():
|
|
64
|
+
return []
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def test_initialization(correct_dataset):
|
|
68
|
+
dataset = NumericDataset(correct_dataset)
|
|
69
|
+
assert len(dataset.columns) == 4
|
|
70
|
+
assert len(dataset.categorical_columns) == 1
|
|
71
|
+
assert len(dataset.continuous_columns) == 2
|
|
72
|
+
assert len(dataset.unrecognized_columns) == 1
|
|
73
|
+
assert dataset.get_numpy_data(dataset.dataframe).shape == (5, 4)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def test_dataset_complexity(complex_dataset):
|
|
77
|
+
dataset = NumericDataset(complex_dataset)
|
|
78
|
+
print(np.array(dataset.dataframe.to_numpy().tolist()).shape)
|
|
79
|
+
assert len(dataset.columns) == 1
|
|
80
|
+
assert len(dataset.categorical_columns) == 0
|
|
81
|
+
assert len(dataset.continuous_columns) == 0
|
|
82
|
+
assert len(dataset.unrecognized_columns) == 1
|
|
83
|
+
assert dataset.get_numpy_data(dataset.dataframe).shape == (2, 1, 5)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def test_error_initialization(error_dataset):
|
|
87
|
+
with pytest.raises(TypeError) as exception_info:
|
|
88
|
+
_ = NumericDataset(error_dataset)
|
|
89
|
+
assert exception_info.type is TypeError
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def test_parse_tabular_data_json(correct_dataset):
|
|
93
|
+
dataset = NumericDataset(correct_dataset)
|
|
94
|
+
print(dataset.dataframe["A"].dtype)
|
|
95
|
+
list_dict = dataset.parse_tabular_data_json()
|
|
96
|
+
assert len(list_dict) == len(dataset.columns)
|
|
97
|
+
assert list_dict[0]["column_name"] == "A"
|
|
98
|
+
assert list_dict[0]["column_type"] == "continuous"
|
|
99
|
+
assert list_dict[0]["column_datatype"] == "float64"
|
|
100
|
+
assert list_dict[0]["column_data"] == [1, 2, 3, 4, 5]
|
|
101
|
+
assert list_dict[1]["column_name"] == "B"
|
|
102
|
+
assert list_dict[1]["column_type"] == "categorical"
|
|
103
|
+
assert list_dict[1]["column_datatype"] == "object"
|
|
104
|
+
assert list_dict[1]["column_data"] == ["a", "b", "c", "d", "e"]
|
|
105
|
+
assert list_dict[2]["column_name"] == "C"
|
|
106
|
+
assert list_dict[2]["column_type"] == "continuous"
|
|
107
|
+
assert list_dict[2]["column_datatype"] == "int64"
|
|
108
|
+
assert list_dict[2]["column_data"] == [1, 2, 3, 4, 5]
|
|
109
|
+
assert list_dict[3]["column_name"] == "D"
|
|
110
|
+
assert list_dict[3]["column_type"] == "none"
|
|
111
|
+
assert list_dict[3]["column_datatype"] == "int64"
|
|
112
|
+
assert list_dict[3]["column_data"] == [1, 2, 3, 4, 5]
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def test_parse_data_to_registry(correct_dataset):
|
|
116
|
+
dataset = NumericDataset(correct_dataset)
|
|
117
|
+
feature_list = dataset.parse_data_to_registry()
|
|
118
|
+
assert len(feature_list) == len(dataset.columns)
|
|
119
|
+
assert feature_list[0]["feature_name"] == "A"
|
|
120
|
+
assert feature_list[0]["feature_position"] == 0
|
|
121
|
+
assert feature_list[0]["is_categorical"] is False
|
|
122
|
+
assert feature_list[0]["type"] == "float64"
|
|
123
|
+
assert feature_list[1]["feature_name"] == "B"
|
|
124
|
+
assert feature_list[1]["feature_position"] == 1
|
|
125
|
+
assert feature_list[1]["is_categorical"] is True
|
|
126
|
+
assert feature_list[1]["type"] == "object"
|
|
127
|
+
assert feature_list[2]["feature_name"] == "C"
|
|
128
|
+
assert feature_list[2]["feature_position"] == 2
|
|
129
|
+
assert feature_list[2]["is_categorical"] is False
|
|
130
|
+
assert feature_list[2]["type"] == "int64"
|
|
131
|
+
assert feature_list[3]["feature_name"] == "D"
|
|
132
|
+
assert feature_list[3]["feature_position"] == 3
|
|
133
|
+
assert feature_list[3]["is_categorical"] is False
|
|
134
|
+
assert feature_list[3]["type"] == "int64"
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def test_get_data(correct_dataset):
|
|
138
|
+
dataset = NumericDataset(correct_dataset)
|
|
139
|
+
dataframe, columns, continuous_columns, categorical_columns = dataset.get_data()
|
|
140
|
+
assert type(dataframe) is pd.DataFrame
|
|
141
|
+
assert columns == ["A", "B", "C", "D"]
|
|
142
|
+
assert continuous_columns == ["A", "C"]
|
|
143
|
+
assert categorical_columns == ["B"]
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def test_empty_dataset(empty_dataset):
|
|
147
|
+
with pytest.raises(ValueError) as exception_info:
|
|
148
|
+
_ = NumericDataset(empty_dataset)
|
|
149
|
+
assert exception_info.type is ValueError
|