sdg-core-lib 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. sdg_core_lib/NumericDataset.py +150 -0
  2. sdg_core_lib/__init__.py +0 -0
  3. sdg_core_lib/browser.py +73 -0
  4. sdg_core_lib/data_generator/__init__.py +0 -0
  5. sdg_core_lib/data_generator/model_factory.py +72 -0
  6. sdg_core_lib/data_generator/models/ModelInfo.py +42 -0
  7. sdg_core_lib/data_generator/models/TrainingInfo.py +40 -0
  8. sdg_core_lib/data_generator/models/UnspecializedModel.py +106 -0
  9. sdg_core_lib/data_generator/models/__init__.py +0 -0
  10. sdg_core_lib/data_generator/models/keras/KerasBaseVAE.py +172 -0
  11. sdg_core_lib/data_generator/models/keras/VAE.py +61 -0
  12. sdg_core_lib/data_generator/models/keras/__init__.py +0 -0
  13. sdg_core_lib/data_generator/models/keras/implementation/TabularVAE.py +96 -0
  14. sdg_core_lib/data_generator/models/keras/implementation/TimeSeriesVAE.py +156 -0
  15. sdg_core_lib/data_generator/models/keras/implementation/__init__.py +0 -0
  16. sdg_core_lib/evaluate/Metrics.py +48 -0
  17. sdg_core_lib/evaluate/TabularComparison.py +276 -0
  18. sdg_core_lib/evaluate/__init__.py +0 -0
  19. sdg_core_lib/job.py +56 -0
  20. sdg_core_lib/post_process/FunctionApplier.py +14 -0
  21. sdg_core_lib/post_process/__init__.py +0 -0
  22. sdg_core_lib/post_process/function_factory.py +41 -0
  23. sdg_core_lib/post_process/functions/FunctionInfo.py +25 -0
  24. sdg_core_lib/post_process/functions/FunctionResult.py +15 -0
  25. sdg_core_lib/post_process/functions/Parameter.py +33 -0
  26. sdg_core_lib/post_process/functions/UnspecializedFunction.py +42 -0
  27. sdg_core_lib/post_process/functions/__init__.py +0 -0
  28. sdg_core_lib/post_process/functions/distribution_evaluator/__init__.py +0 -0
  29. sdg_core_lib/post_process/functions/distribution_evaluator/implementation/NormalTester.py +65 -0
  30. sdg_core_lib/post_process/functions/distribution_evaluator/implementation/__init__.py +0 -0
  31. sdg_core_lib/post_process/functions/filter/IntervalThreshold.py +32 -0
  32. sdg_core_lib/post_process/functions/filter/MonoThreshold.py +28 -0
  33. sdg_core_lib/post_process/functions/filter/__init__.py +0 -0
  34. sdg_core_lib/post_process/functions/filter/implementation/InnerThreshold.py +43 -0
  35. sdg_core_lib/post_process/functions/filter/implementation/LowerThreshold.py +32 -0
  36. sdg_core_lib/post_process/functions/filter/implementation/OuterThreshold.py +42 -0
  37. sdg_core_lib/post_process/functions/filter/implementation/UpperThreshold.py +32 -0
  38. sdg_core_lib/post_process/functions/filter/implementation/__init__.py +0 -0
  39. sdg_core_lib/preprocess/__init__.py +0 -0
  40. sdg_core_lib/preprocess/scale.py +51 -0
  41. sdg_core_lib/test/__init__.py +0 -0
  42. sdg_core_lib/test/data_generator/__init__.py +0 -0
  43. sdg_core_lib/test/data_generator/models/__init__.py +0 -0
  44. sdg_core_lib/test/data_generator/models/keras/__init__.py +0 -0
  45. sdg_core_lib/test/data_generator/models/keras/implementation/__init__.py +0 -0
  46. sdg_core_lib/test/data_generator/models/keras/implementation/test_TabularVAE.py +120 -0
  47. sdg_core_lib/test/data_generator/models/keras/implementation/test_TimeSeriesVAE.py +110 -0
  48. sdg_core_lib/test/data_generator/models/keras/test_KerasBaseVAE.py +74 -0
  49. sdg_core_lib/test/data_generator/models/test_ModelInfo.py +27 -0
  50. sdg_core_lib/test/data_generator/models/test_TrainingInfo.py +30 -0
  51. sdg_core_lib/test/data_generator/models/test_UnspecializedModel.py +32 -0
  52. sdg_core_lib/test/data_generator/test_model_factory.py +52 -0
  53. sdg_core_lib/test/evaluate/__init__.py +0 -0
  54. sdg_core_lib/test/evaluate/test_Metrics.py +62 -0
  55. sdg_core_lib/test/evaluate/test_TabularComparisonEvaluator.py +75 -0
  56. sdg_core_lib/test/infer_test.json +168 -0
  57. sdg_core_lib/test/infer_test_nodata.json +77 -0
  58. sdg_core_lib/test/infer_test_nodata_wrong.json +11 -0
  59. sdg_core_lib/test/post_process/__init__.py +0 -0
  60. sdg_core_lib/test/post_process/functions/__init__.py +0 -0
  61. sdg_core_lib/test/post_process/functions/distribution_evaluator/__init__.py +0 -0
  62. sdg_core_lib/test/post_process/functions/distribution_evaluator/implementation/__init__.py +0 -0
  63. sdg_core_lib/test/post_process/functions/distribution_evaluator/implementation/test_NormalTester.py +55 -0
  64. sdg_core_lib/test/post_process/functions/filters/__init__.py +0 -0
  65. sdg_core_lib/test/post_process/functions/filters/implementation/__init__.py +0 -0
  66. sdg_core_lib/test/post_process/functions/filters/implementation/test_InnerThreshold.py +30 -0
  67. sdg_core_lib/test/pre_process/__init__.py +0 -0
  68. sdg_core_lib/test/pre_process/test_scaling.py +55 -0
  69. sdg_core_lib/test/test_browser.py +11 -0
  70. sdg_core_lib/test/test_dataset.py +149 -0
  71. sdg_core_lib/test/test_job.py +128 -0
  72. sdg_core_lib/test/train_test.json +166 -0
  73. sdg_core_lib/test/train_test_2.json +9 -0
  74. sdg_core_lib-0.1.0.dist-info/METADATA +9 -0
  75. sdg_core_lib-0.1.0.dist-info/RECORD +77 -0
  76. sdg_core_lib-0.1.0.dist-info/WHEEL +4 -0
  77. sdg_core_lib-0.1.0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,168 @@
1
+ {
2
+ "dataset": [
3
+ {
4
+ "column_data": [
5
+ 13.71,
6
+ 13.4,
7
+ 13.27,
8
+ 13.17,
9
+ 14.13
10
+ ],
11
+ "column_name": "alcohol",
12
+ "column_type": "continuous",
13
+ "column_datatype": "float64"
14
+ },
15
+ {
16
+ "column_data": [
17
+ 5.65,
18
+ 3.91,
19
+ 4.28,
20
+ 2.59,
21
+ 4.1
22
+ ],
23
+ "column_name": "malic_acid",
24
+ "column_type": "continuous",
25
+ "column_datatype": "float64"
26
+ },
27
+ {
28
+ "column_data": [
29
+ 2.45,
30
+ 2.48,
31
+ 2.26,
32
+ 2.37,
33
+ 2.74
34
+ ],
35
+ "column_name": "ash",
36
+ "column_type": "continuous",
37
+ "column_datatype": "float64"
38
+ },
39
+ {
40
+ "column_data": [
41
+ 20.5,
42
+ 23.0,
43
+ 20.0,
44
+ 20.0,
45
+ 24.5
46
+ ],
47
+ "column_name": "acl",
48
+ "column_type": "continuous",
49
+ "column_datatype": "float64"
50
+ },
51
+ {
52
+ "column_data": [
53
+ 95,
54
+ 102,
55
+ 120,
56
+ 120,
57
+ 96
58
+ ],
59
+ "column_name": "Mmg",
60
+ "column_type": "continuous",
61
+ "column_datatype": "int64"
62
+ },
63
+ {
64
+ "column_data": [
65
+ 1.68,
66
+ 1.8,
67
+ 1.59,
68
+ 1.65,
69
+ 2.05
70
+ ],
71
+ "column_name": "phenols",
72
+ "column_type": "continuous",
73
+ "column_datatype": "float64"
74
+ },
75
+ {
76
+ "column_data": [
77
+ 0.61,
78
+ 0.75,
79
+ 0.69,
80
+ 0.68,
81
+ 0.76
82
+ ],
83
+ "column_name": "flavanoids",
84
+ "column_type": "continuous",
85
+ "column_datatype": "float64"
86
+ },
87
+ {
88
+ "column_data": [
89
+ 0.52,
90
+ 0.43,
91
+ 0.43,
92
+ 0.53,
93
+ 0.56
94
+ ],
95
+ "column_name": "nonflavanoid_phenols",
96
+ "column_type": "continuous",
97
+ "column_datatype": "float64"
98
+ },
99
+ {
100
+ "column_data": [
101
+ 1.06,
102
+ 1.41,
103
+ 1.35,
104
+ 1.46,
105
+ 1.35
106
+ ],
107
+ "column_name": "proanth",
108
+ "column_type": "continuous",
109
+ "column_datatype": "float64"
110
+ },
111
+ {
112
+ "column_data": [
113
+ 7.7,
114
+ 7.3,
115
+ 10.2,
116
+ 9.3,
117
+ 9.2
118
+ ],
119
+ "column_name": "color_int",
120
+ "column_type": "continuous",
121
+ "column_datatype": "float64"
122
+ },
123
+ {
124
+ "column_data": [
125
+ 0.64,
126
+ 0.7,
127
+ 0.59,
128
+ 0.6,
129
+ 0.61
130
+ ],
131
+ "column_name": "hue",
132
+ "column_type": "continuous",
133
+ "column_datatype": "float64"
134
+ },
135
+ {
136
+ "column_data": [
137
+ 3.92,
138
+ 3.4,
139
+ 3.17,
140
+ 3.45,
141
+ 2.93
142
+ ],
143
+ "column_name": "od",
144
+ "column_type": "continuous",
145
+ "column_datatype": "float64"
146
+ },
147
+ {
148
+ "column_data": [
149
+ 740,
150
+ 750,
151
+ 835,
152
+ 840,
153
+ 560
154
+ ],
155
+ "column_name": "prolin",
156
+ "column_type": "continuous",
157
+ "column_datatype": "int64"
158
+ }
159
+ ],
160
+ "functions_id" : [],
161
+ "model": {
162
+ "algorithm_name": "sdg_core_lib.data_generator.models.keras.implementation.TabularVAE.TabularVAE",
163
+ "model_name": "Test-T_VAE",
164
+ "image": "C:\\Users\\giumartine\\PycharmProjects\\SyntheticGenerator\\synthetic-data-generator\\src\\generator\\server\\saved_models\\trained_models\\Test-T_VAETabularVAE20250319.150458",
165
+ "input_shape": "(13,)"
166
+ },
167
+ "n_rows": 1000
168
+ }
@@ -0,0 +1,77 @@
1
+ {
2
+ "behavious_id" : [],
3
+ "model": {
4
+ "algorithm_name": "sdg_core_lib.data_generator.models.keras.implementation.TabularVAE.TabularVAE",
5
+ "model_name": "Test-T_VAE",
6
+ "image": "./sdg_core_lib/test/outputs/",
7
+ "input_shape": "(13,)",
8
+ "training_data_info": [
9
+ {
10
+ "column_name": "alcohol",
11
+ "column_type": "continuous",
12
+ "column_datatype": "float64"
13
+ },
14
+ {
15
+ "column_name": "malic_acid",
16
+ "column_type": "continuous",
17
+ "column_datatype": "float64"
18
+ },
19
+ {
20
+ "column_name": "ash",
21
+ "column_type": "continuous",
22
+ "column_datatype": "float64"
23
+ },
24
+ {
25
+ "column_name": "acl",
26
+ "column_type": "continuous",
27
+ "column_datatype": "float64"
28
+ },
29
+ {
30
+ "column_name": "Mmg",
31
+ "column_type": "continuous",
32
+ "column_datatype": "float64"
33
+ },
34
+ {
35
+ "column_name": "phenols",
36
+ "column_type": "continuous",
37
+ "column_datatype": "float64"
38
+ },
39
+ {
40
+ "column_name": "flavanoids",
41
+ "column_type": "continuous",
42
+ "column_datatype": "float64"
43
+ },
44
+ {
45
+ "column_name": "nonflavanoid_phenols",
46
+ "column_type": "continuous",
47
+ "column_datatype": "float64"
48
+ },
49
+ {
50
+ "column_name": "proanth",
51
+ "column_type": "continuous",
52
+ "column_datatype": "float64"
53
+ },
54
+ {
55
+ "column_name": "color_int",
56
+ "column_type": "continuous",
57
+ "column_datatype": "float64"
58
+ },
59
+ {
60
+ "column_name": "hue",
61
+ "column_type": "continuous",
62
+ "column_datatype": "float64"
63
+ },
64
+ {
65
+ "column_name": "od",
66
+ "column_type": "continuous",
67
+ "column_datatype": "float64"
68
+ },
69
+ {
70
+ "column_name": "prolin",
71
+ "column_type": "continuous",
72
+ "column_datatype": "float64"
73
+ }
74
+ ]
75
+ },
76
+ "n_rows": 1000
77
+ }
@@ -0,0 +1,11 @@
1
+ {
2
+ "behavious_id" : [],
3
+ "model": {
4
+ "algorithm_name": "sdg_core_lib.data_generator.models.keras.implementation.TabularVAE.TabularVAE",
5
+ "model_name": "Test-T_VAE",
6
+ "image": "./sdg_core_lib/test/outputs/",
7
+ "input_shape": "(13,)",
8
+ "training_data_info": []
9
+ },
10
+ "n_rows": 1000
11
+ }
File without changes
File without changes
@@ -0,0 +1,55 @@
1
+ import pytest
2
+ import numpy as np
3
+
4
+ from sdg_core_lib.post_process.functions.distribution_evaluator.implementation.NormalTester import (
5
+ NormalTester,
6
+ )
7
+
8
+
9
+ @pytest.fixture
10
+ def correct_instance():
11
+ params = [
12
+ {"name": "mean", "value": "0.0", "parameter_type": "float"},
13
+ {"name": "standard_deviation", "value": "1.0", "parameter_type": "float"},
14
+ ]
15
+ return NormalTester(parameters=params)
16
+
17
+
18
+ def test_check_parameters(correct_instance):
19
+ param_mapping = {param.name: param for param in correct_instance.parameters}
20
+ param_names = param_mapping.keys()
21
+ assert param_mapping["mean"].value == 0.0
22
+ assert isinstance(param_mapping["mean"].value, float)
23
+ assert param_mapping["standard_deviation"].value == 1.0
24
+ assert isinstance(param_mapping["standard_deviation"].value, float)
25
+ assert "mean" in param_names
26
+ assert "standard_deviation" in param_names
27
+
28
+
29
+ def test_compute(correct_instance):
30
+ data = np.random.normal(correct_instance.mean, correct_instance.std, 10000)
31
+ compute_data, indexes = correct_instance._compute(data)
32
+ assert data.shape == (10000,)
33
+ assert indexes.shape == (10000,)
34
+ assert np.all(compute_data == data)
35
+ assert np.all(indexes == np.array(range(len(data))))
36
+
37
+
38
+ def test_evaluate(correct_instance):
39
+ correct_data = np.random.normal(correct_instance.mean, correct_instance.std, 10000)
40
+ assert correct_instance._evaluate(correct_data)
41
+
42
+
43
+ def test_evaluate_wrong(correct_instance):
44
+ wrong_data = np.random.normal(5, 1, 10000)
45
+ wrong_data_2 = np.random.normal(0, 10, 10000)
46
+ assert not correct_instance._evaluate(wrong_data)
47
+ assert not correct_instance._evaluate(wrong_data_2)
48
+
49
+
50
+ def test_get_result(correct_instance):
51
+ data_correct = np.random.normal(correct_instance.mean, correct_instance.std, 10000)
52
+ results = correct_instance.get_results(data_correct)
53
+ assert results["results"].shape == (10000,)
54
+ assert results["indexes"].shape == (10000,)
55
+ assert results["evaluation_results"]
@@ -0,0 +1,30 @@
1
+ import pytest
2
+
3
+ from sdg_core_lib.post_process.functions.filter.implementation.InnerThreshold import (
4
+ InnerThreshold,
5
+ )
6
+
7
+
8
+ @pytest.fixture
9
+ def correct_instance():
10
+ params = [
11
+ {"name": "upper_bound", "value": "50.0", "parameter_type": "float"},
12
+ {"name": "lower_bound", "value": "10.0", "parameter_type": "float"},
13
+ {"name": "upper_strict", "value": "True", "parameter_type": "bool"},
14
+ {"name": "lower_strict", "value": "False", "parameter_type": "bool"},
15
+ ]
16
+ return InnerThreshold(parameters=params)
17
+
18
+
19
+ def test_check_parameters(correct_instance):
20
+ param_mapping = {param.name: param for param in correct_instance.parameters}
21
+ param_names = param_mapping.keys()
22
+ assert param_mapping["upper_bound"].value > param_mapping["lower_bound"].value
23
+ assert isinstance(param_mapping["upper_bound"].value, float)
24
+ assert isinstance(param_mapping["lower_bound"].value, float)
25
+ assert isinstance(param_mapping["upper_strict"].value, bool)
26
+ assert isinstance(param_mapping["lower_strict"].value, bool)
27
+ assert "upper_bound" in param_names
28
+ assert "lower_bound" in param_names
29
+ assert "upper_strict" in param_names
30
+ assert "lower_strict" in param_names
File without changes
@@ -0,0 +1,55 @@
1
+ import pytest
2
+ import numpy as np
3
+
4
+ from sdg_core_lib.preprocess.scale import (
5
+ standardize_simple_tabular_time_series,
6
+ standardize_simple_tabular_input,
7
+ )
8
+
9
+
10
+ @pytest.fixture()
11
+ def correct_tabular_input():
12
+ return np.linspace(-10, 10, 100).reshape(10, 10)
13
+
14
+
15
+ @pytest.fixture()
16
+ def correct_time_series_input():
17
+ return np.linspace(-10, 10, 1000).reshape(10, 2, 50)
18
+
19
+
20
+ def test_correct_tabular_scaling(correct_tabular_input):
21
+ scaler, standardized_train_data, standardized_test_data = (
22
+ standardize_simple_tabular_input(
23
+ train_data=correct_tabular_input, test_data=correct_tabular_input
24
+ )
25
+ )
26
+
27
+ assert type(standardized_train_data) is np.ndarray
28
+ assert standardized_train_data.shape == correct_tabular_input.shape
29
+ assert standardized_test_data.shape == correct_tabular_input.shape
30
+
31
+
32
+ def test_incorrect_tabular_scaling(correct_time_series_input):
33
+ with pytest.raises(ValueError) as exception_info:
34
+ _, _, _ = standardize_simple_tabular_input(train_data=correct_time_series_input)
35
+ assert exception_info.type is ValueError
36
+
37
+
38
+ def test_correct_time_series_scaling(correct_time_series_input):
39
+ scaler, standardized_train_data, standardized_test_data = (
40
+ standardize_simple_tabular_time_series(
41
+ train_data=correct_time_series_input, test_data=correct_time_series_input
42
+ )
43
+ )
44
+
45
+ assert type(standardized_train_data) is np.ndarray
46
+ assert standardized_train_data.shape == correct_time_series_input.shape
47
+ assert standardized_test_data.shape == correct_time_series_input.shape
48
+
49
+
50
+ def test_incorrect_time_series_scaling(correct_tabular_input):
51
+ with pytest.raises(ValueError) as exception_info:
52
+ _, _, _ = standardize_simple_tabular_time_series(
53
+ train_data=correct_tabular_input
54
+ )
55
+ assert exception_info.type is ValueError
@@ -0,0 +1,11 @@
1
+ from sdg_core_lib.browser import browse_algorithms, browse_functions
2
+
3
+
4
+ def test_browse_algorithms():
5
+ for desc in browse_algorithms():
6
+ assert desc is not None
7
+
8
+
9
+ def test_browse_functions():
10
+ for desc in browse_functions():
11
+ assert desc is not None
@@ -0,0 +1,149 @@
1
+ import pandas as pd
2
+ import pytest
3
+ import numpy as np
4
+
5
+ from sdg_core_lib.NumericDataset import NumericDataset
6
+
7
+
8
+ @pytest.fixture
9
+ def correct_dataset():
10
+ return [
11
+ {
12
+ "column_name": "A",
13
+ "column_type": "continuous",
14
+ "column_datatype": "float64",
15
+ "column_data": [1.0, 2.0, 3.0, 4.0, 5.0],
16
+ },
17
+ {
18
+ "column_name": "B",
19
+ "column_type": "categorical",
20
+ "column_datatype": "object",
21
+ "column_data": ["a", "b", "c", "d", "e"],
22
+ },
23
+ {
24
+ "column_name": "C",
25
+ "column_type": "continuous",
26
+ "column_datatype": "int64",
27
+ "column_data": [1, 2, 3, 4, 5],
28
+ },
29
+ {
30
+ "column_name": "D",
31
+ "column_type": "test",
32
+ "column_datatype": "int64",
33
+ "column_data": [1, 2, 3, 4, 5],
34
+ },
35
+ ]
36
+
37
+
38
+ @pytest.fixture
39
+ def complex_dataset():
40
+ return [
41
+ {
42
+ "column_name": "A",
43
+ "column_type": "time_series",
44
+ "column_datatype": "float64",
45
+ "column_data": [[1.0, 2.0, 3.0, 4.0, 5.0], [1.0, 2.0, 3.0, 4.0, 5.0]],
46
+ }
47
+ ]
48
+
49
+
50
+ @pytest.fixture
51
+ def error_dataset():
52
+ return [
53
+ {
54
+ "column_name": "A",
55
+ "column_type": "categorical",
56
+ "column_datatype": "string",
57
+ "column_data": ["a", "b", "c", "d", "e"],
58
+ }
59
+ ]
60
+
61
+
62
+ @pytest.fixture
63
+ def empty_dataset():
64
+ return []
65
+
66
+
67
+ def test_initialization(correct_dataset):
68
+ dataset = NumericDataset(correct_dataset)
69
+ assert len(dataset.columns) == 4
70
+ assert len(dataset.categorical_columns) == 1
71
+ assert len(dataset.continuous_columns) == 2
72
+ assert len(dataset.unrecognized_columns) == 1
73
+ assert dataset.get_numpy_data(dataset.dataframe).shape == (5, 4)
74
+
75
+
76
+ def test_dataset_complexity(complex_dataset):
77
+ dataset = NumericDataset(complex_dataset)
78
+ print(np.array(dataset.dataframe.to_numpy().tolist()).shape)
79
+ assert len(dataset.columns) == 1
80
+ assert len(dataset.categorical_columns) == 0
81
+ assert len(dataset.continuous_columns) == 0
82
+ assert len(dataset.unrecognized_columns) == 1
83
+ assert dataset.get_numpy_data(dataset.dataframe).shape == (2, 1, 5)
84
+
85
+
86
+ def test_error_initialization(error_dataset):
87
+ with pytest.raises(TypeError) as exception_info:
88
+ _ = NumericDataset(error_dataset)
89
+ assert exception_info.type is TypeError
90
+
91
+
92
+ def test_parse_tabular_data_json(correct_dataset):
93
+ dataset = NumericDataset(correct_dataset)
94
+ print(dataset.dataframe["A"].dtype)
95
+ list_dict = dataset.parse_tabular_data_json()
96
+ assert len(list_dict) == len(dataset.columns)
97
+ assert list_dict[0]["column_name"] == "A"
98
+ assert list_dict[0]["column_type"] == "continuous"
99
+ assert list_dict[0]["column_datatype"] == "float64"
100
+ assert list_dict[0]["column_data"] == [1, 2, 3, 4, 5]
101
+ assert list_dict[1]["column_name"] == "B"
102
+ assert list_dict[1]["column_type"] == "categorical"
103
+ assert list_dict[1]["column_datatype"] == "object"
104
+ assert list_dict[1]["column_data"] == ["a", "b", "c", "d", "e"]
105
+ assert list_dict[2]["column_name"] == "C"
106
+ assert list_dict[2]["column_type"] == "continuous"
107
+ assert list_dict[2]["column_datatype"] == "int64"
108
+ assert list_dict[2]["column_data"] == [1, 2, 3, 4, 5]
109
+ assert list_dict[3]["column_name"] == "D"
110
+ assert list_dict[3]["column_type"] == "none"
111
+ assert list_dict[3]["column_datatype"] == "int64"
112
+ assert list_dict[3]["column_data"] == [1, 2, 3, 4, 5]
113
+
114
+
115
+ def test_parse_data_to_registry(correct_dataset):
116
+ dataset = NumericDataset(correct_dataset)
117
+ feature_list = dataset.parse_data_to_registry()
118
+ assert len(feature_list) == len(dataset.columns)
119
+ assert feature_list[0]["feature_name"] == "A"
120
+ assert feature_list[0]["feature_position"] == 0
121
+ assert feature_list[0]["is_categorical"] is False
122
+ assert feature_list[0]["type"] == "float64"
123
+ assert feature_list[1]["feature_name"] == "B"
124
+ assert feature_list[1]["feature_position"] == 1
125
+ assert feature_list[1]["is_categorical"] is True
126
+ assert feature_list[1]["type"] == "object"
127
+ assert feature_list[2]["feature_name"] == "C"
128
+ assert feature_list[2]["feature_position"] == 2
129
+ assert feature_list[2]["is_categorical"] is False
130
+ assert feature_list[2]["type"] == "int64"
131
+ assert feature_list[3]["feature_name"] == "D"
132
+ assert feature_list[3]["feature_position"] == 3
133
+ assert feature_list[3]["is_categorical"] is False
134
+ assert feature_list[3]["type"] == "int64"
135
+
136
+
137
+ def test_get_data(correct_dataset):
138
+ dataset = NumericDataset(correct_dataset)
139
+ dataframe, columns, continuous_columns, categorical_columns = dataset.get_data()
140
+ assert type(dataframe) is pd.DataFrame
141
+ assert columns == ["A", "B", "C", "D"]
142
+ assert continuous_columns == ["A", "C"]
143
+ assert categorical_columns == ["B"]
144
+
145
+
146
+ def test_empty_dataset(empty_dataset):
147
+ with pytest.raises(ValueError) as exception_info:
148
+ _ = NumericDataset(empty_dataset)
149
+ assert exception_info.type is ValueError