sdg-core-lib 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. sdg_core_lib/NumericDataset.py +150 -0
  2. sdg_core_lib/__init__.py +0 -0
  3. sdg_core_lib/browser.py +73 -0
  4. sdg_core_lib/data_generator/__init__.py +0 -0
  5. sdg_core_lib/data_generator/model_factory.py +72 -0
  6. sdg_core_lib/data_generator/models/ModelInfo.py +42 -0
  7. sdg_core_lib/data_generator/models/TrainingInfo.py +40 -0
  8. sdg_core_lib/data_generator/models/UnspecializedModel.py +106 -0
  9. sdg_core_lib/data_generator/models/__init__.py +0 -0
  10. sdg_core_lib/data_generator/models/keras/KerasBaseVAE.py +172 -0
  11. sdg_core_lib/data_generator/models/keras/VAE.py +61 -0
  12. sdg_core_lib/data_generator/models/keras/__init__.py +0 -0
  13. sdg_core_lib/data_generator/models/keras/implementation/TabularVAE.py +96 -0
  14. sdg_core_lib/data_generator/models/keras/implementation/TimeSeriesVAE.py +156 -0
  15. sdg_core_lib/data_generator/models/keras/implementation/__init__.py +0 -0
  16. sdg_core_lib/evaluate/Metrics.py +48 -0
  17. sdg_core_lib/evaluate/TabularComparison.py +276 -0
  18. sdg_core_lib/evaluate/__init__.py +0 -0
  19. sdg_core_lib/job.py +56 -0
  20. sdg_core_lib/post_process/FunctionApplier.py +14 -0
  21. sdg_core_lib/post_process/__init__.py +0 -0
  22. sdg_core_lib/post_process/function_factory.py +41 -0
  23. sdg_core_lib/post_process/functions/FunctionInfo.py +25 -0
  24. sdg_core_lib/post_process/functions/FunctionResult.py +15 -0
  25. sdg_core_lib/post_process/functions/Parameter.py +33 -0
  26. sdg_core_lib/post_process/functions/UnspecializedFunction.py +42 -0
  27. sdg_core_lib/post_process/functions/__init__.py +0 -0
  28. sdg_core_lib/post_process/functions/distribution_evaluator/__init__.py +0 -0
  29. sdg_core_lib/post_process/functions/distribution_evaluator/implementation/NormalTester.py +65 -0
  30. sdg_core_lib/post_process/functions/distribution_evaluator/implementation/__init__.py +0 -0
  31. sdg_core_lib/post_process/functions/filter/IntervalThreshold.py +32 -0
  32. sdg_core_lib/post_process/functions/filter/MonoThreshold.py +28 -0
  33. sdg_core_lib/post_process/functions/filter/__init__.py +0 -0
  34. sdg_core_lib/post_process/functions/filter/implementation/InnerThreshold.py +43 -0
  35. sdg_core_lib/post_process/functions/filter/implementation/LowerThreshold.py +32 -0
  36. sdg_core_lib/post_process/functions/filter/implementation/OuterThreshold.py +42 -0
  37. sdg_core_lib/post_process/functions/filter/implementation/UpperThreshold.py +32 -0
  38. sdg_core_lib/post_process/functions/filter/implementation/__init__.py +0 -0
  39. sdg_core_lib/preprocess/__init__.py +0 -0
  40. sdg_core_lib/preprocess/scale.py +51 -0
  41. sdg_core_lib/test/__init__.py +0 -0
  42. sdg_core_lib/test/data_generator/__init__.py +0 -0
  43. sdg_core_lib/test/data_generator/models/__init__.py +0 -0
  44. sdg_core_lib/test/data_generator/models/keras/__init__.py +0 -0
  45. sdg_core_lib/test/data_generator/models/keras/implementation/__init__.py +0 -0
  46. sdg_core_lib/test/data_generator/models/keras/implementation/test_TabularVAE.py +120 -0
  47. sdg_core_lib/test/data_generator/models/keras/implementation/test_TimeSeriesVAE.py +110 -0
  48. sdg_core_lib/test/data_generator/models/keras/test_KerasBaseVAE.py +74 -0
  49. sdg_core_lib/test/data_generator/models/test_ModelInfo.py +27 -0
  50. sdg_core_lib/test/data_generator/models/test_TrainingInfo.py +30 -0
  51. sdg_core_lib/test/data_generator/models/test_UnspecializedModel.py +32 -0
  52. sdg_core_lib/test/data_generator/test_model_factory.py +52 -0
  53. sdg_core_lib/test/evaluate/__init__.py +0 -0
  54. sdg_core_lib/test/evaluate/test_Metrics.py +62 -0
  55. sdg_core_lib/test/evaluate/test_TabularComparisonEvaluator.py +75 -0
  56. sdg_core_lib/test/infer_test.json +168 -0
  57. sdg_core_lib/test/infer_test_nodata.json +77 -0
  58. sdg_core_lib/test/infer_test_nodata_wrong.json +11 -0
  59. sdg_core_lib/test/post_process/__init__.py +0 -0
  60. sdg_core_lib/test/post_process/functions/__init__.py +0 -0
  61. sdg_core_lib/test/post_process/functions/distribution_evaluator/__init__.py +0 -0
  62. sdg_core_lib/test/post_process/functions/distribution_evaluator/implementation/__init__.py +0 -0
  63. sdg_core_lib/test/post_process/functions/distribution_evaluator/implementation/test_NormalTester.py +55 -0
  64. sdg_core_lib/test/post_process/functions/filters/__init__.py +0 -0
  65. sdg_core_lib/test/post_process/functions/filters/implementation/__init__.py +0 -0
  66. sdg_core_lib/test/post_process/functions/filters/implementation/test_InnerThreshold.py +30 -0
  67. sdg_core_lib/test/pre_process/__init__.py +0 -0
  68. sdg_core_lib/test/pre_process/test_scaling.py +55 -0
  69. sdg_core_lib/test/test_browser.py +11 -0
  70. sdg_core_lib/test/test_dataset.py +149 -0
  71. sdg_core_lib/test/test_job.py +128 -0
  72. sdg_core_lib/test/train_test.json +166 -0
  73. sdg_core_lib/test/train_test_2.json +9 -0
  74. sdg_core_lib-0.1.0.dist-info/METADATA +9 -0
  75. sdg_core_lib-0.1.0.dist-info/RECORD +77 -0
  76. sdg_core_lib-0.1.0.dist-info/WHEEL +4 -0
  77. sdg_core_lib-0.1.0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,120 @@
1
+ import numpy as np
2
+ import pytest
3
+ import os
4
+ import shutil
5
+ from sklearn.preprocessing import StandardScaler
6
+
7
+ from sdg_core_lib.NumericDataset import NumericDataset
8
+ from sdg_core_lib.data_generator.models.TrainingInfo import TrainingInfo
9
+ from sdg_core_lib.data_generator.models.keras.VAE import VAE
10
+ from sdg_core_lib.data_generator.models.keras.implementation.TabularVAE import (
11
+ TabularVAE,
12
+ )
13
+
14
+
15
+ @pytest.fixture()
16
+ def data():
17
+ return NumericDataset(
18
+ [
19
+ {
20
+ "column_name": "A",
21
+ "column_type": "continuous",
22
+ "column_datatype": "float64",
23
+ "column_data": [1.0, 2.0, 3.0, 4.0, 5.0],
24
+ }
25
+ ]
26
+ )
27
+
28
+
29
+ @pytest.fixture()
30
+ def model_data_no_load():
31
+ return {
32
+ "metadata": {"example_key": "example_value"},
33
+ "model_name": "example_model",
34
+ "input_shape": "(13,)",
35
+ "load_path": None,
36
+ "epochs": 1,
37
+ }
38
+
39
+
40
+ @pytest.fixture()
41
+ def model_data_correct_train():
42
+ return {
43
+ "metadata": {"example_key": "example_value"},
44
+ "model_name": "example_model",
45
+ "input_shape": "(1,)",
46
+ "load_path": None,
47
+ "epochs": 1,
48
+ }
49
+
50
+
51
+ def test_instantiate(model_data_no_load):
52
+ model = TabularVAE(**model_data_no_load)
53
+ assert model.model_name == model_data_no_load["model_name"]
54
+ assert model._load_path is None
55
+ assert model.input_shape == (13,)
56
+ assert model._epochs == 1
57
+ assert type(model._model) is VAE
58
+ assert model._scaler is None
59
+
60
+
61
+ def test_preprocess(model_data_no_load, data):
62
+ model = TabularVAE(**model_data_no_load)
63
+ assert model._scaler is None
64
+ scaled_data = model._pre_process(data)
65
+ assert model._scaler is not None and type(model._scaler) is StandardScaler
66
+ assert type(scaled_data) is np.ndarray
67
+
68
+
69
+ def test_self_description(model_data_no_load):
70
+ model = TabularVAE(**model_data_no_load)
71
+ self_description = model.self_describe()
72
+ assert self_description is not None
73
+ assert (
74
+ self_description["algorithm"]["name"]
75
+ == "sdg_core_lib.data_generator.models.keras.implementation.TabularVAE.TabularVAE"
76
+ )
77
+ assert self_description["algorithm"]["default_loss_function"] == "ELBO LOSS"
78
+ assert (
79
+ self_description["algorithm"]["description"]
80
+ == "A Variational Autoencoder for data generation"
81
+ )
82
+ assert self_description["datatypes"] == [
83
+ {"type": "float32", "is_categorical": False},
84
+ {"type": "int32", "is_categorical": False},
85
+ {"type": "int64", "is_categorical": False},
86
+ ]
87
+
88
+
89
+ def test_save(model_data_no_load):
90
+ model = TabularVAE(**model_data_no_load)
91
+ model_path = "./test_model"
92
+ os.mkdir(model_path)
93
+ model.save(model_path)
94
+ assert os.path.isfile(os.path.join(model_path, "encoder.keras"))
95
+ assert os.path.isfile(os.path.join(model_path, "decoder.keras"))
96
+ assert os.path.isfile(os.path.join(model_path, "scaler.skops"))
97
+ shutil.rmtree(model_path)
98
+
99
+
100
+ def test_train_wrong(model_data_no_load, data):
101
+ model = TabularVAE(**model_data_no_load)
102
+ with pytest.raises(ValueError) as exception_info:
103
+ model.train(data)
104
+ assert exception_info.type is ValueError
105
+
106
+
107
+ def test_train_correct(model_data_correct_train, data):
108
+ model = TabularVAE(**model_data_correct_train)
109
+ assert model.training_info is None
110
+ assert model._scaler is None
111
+ model.train(data)
112
+ assert type(model._scaler) is StandardScaler
113
+ assert type(model.training_info) is TrainingInfo
114
+
115
+
116
+ def test_infer(model_data_correct_train, data):
117
+ n_rows = 2
118
+ model = TabularVAE(**model_data_correct_train)
119
+ results = model.infer(n_rows)
120
+ assert results.shape == (n_rows, *model.input_shape)
@@ -0,0 +1,110 @@
1
+ import numpy as np
2
+ import pytest
3
+ import os
4
+ import shutil
5
+ from sklearn.preprocessing import MinMaxScaler
6
+
7
+ from sdg_core_lib.NumericDataset import NumericDataset
8
+ from sdg_core_lib.data_generator.models.TrainingInfo import TrainingInfo
9
+ from sdg_core_lib.data_generator.models.keras.VAE import VAE
10
+ from sdg_core_lib.data_generator.models.keras.implementation.TimeSeriesVAE import (
11
+ TimeSeriesVAE,
12
+ )
13
+
14
+
15
+ @pytest.fixture()
16
+ def model_data_correct_train():
17
+ return {
18
+ "metadata": {"example_key": "example_value"},
19
+ "model_name": "example_model",
20
+ "input_shape": "(2, 51)",
21
+ "load_path": None,
22
+ "epochs": 1,
23
+ }
24
+
25
+
26
+ @pytest.fixture()
27
+ def data():
28
+ return NumericDataset(
29
+ [
30
+ {
31
+ "column_name": "A",
32
+ "column_type": "time_series",
33
+ "column_datatype": "float64",
34
+ "column_data": np.linspace(-10, 10, 1020).reshape(-1, 51).tolist(),
35
+ },
36
+ {
37
+ "column_name": "B",
38
+ "column_type": "time_series",
39
+ "column_datatype": "float64",
40
+ "column_data": np.linspace(-10, 10, 1020).reshape(-1, 51).tolist(),
41
+ },
42
+ ]
43
+ )
44
+
45
+
46
+ def test_instantiate(model_data_correct_train):
47
+ model = TimeSeriesVAE(**model_data_correct_train)
48
+ assert model.model_name == model_data_correct_train["model_name"]
49
+ assert model._load_path is None
50
+ assert model.input_shape == (2, 51)
51
+ assert model._epochs == 1
52
+ assert type(model._model) is VAE
53
+ assert model._scaler is None
54
+
55
+
56
+ def test_preprocess(model_data_correct_train, data):
57
+ model = TimeSeriesVAE(**model_data_correct_train)
58
+ assert model._scaler is None
59
+ scaled_data = model._pre_process(data)
60
+ assert model._scaler is not None and type(model._scaler) is MinMaxScaler
61
+ assert type(scaled_data) is np.ndarray
62
+ assert scaled_data.shape == data.get_numpy_data(data.dataframe).shape
63
+ assert scaled_data.shape[1:] == model.input_shape
64
+
65
+
66
+ def test_train_correct(model_data_correct_train, data):
67
+ model = TimeSeriesVAE(**model_data_correct_train)
68
+ assert model.training_info is None
69
+ assert model._scaler is None
70
+ model.train(data)
71
+ assert type(model._scaler) is MinMaxScaler
72
+ assert type(model.training_info) is TrainingInfo
73
+
74
+
75
+ def test_save(model_data_correct_train):
76
+ model = TimeSeriesVAE(**model_data_correct_train)
77
+ model_path = "./test_model"
78
+ os.mkdir(model_path)
79
+ model.save(model_path)
80
+ assert os.path.isfile(os.path.join(model_path, "encoder.keras"))
81
+ assert os.path.isfile(os.path.join(model_path, "decoder.keras"))
82
+ assert os.path.isfile(os.path.join(model_path, "scaler.skops"))
83
+ shutil.rmtree(model_path)
84
+
85
+
86
+ def test_self_description(model_data_correct_train):
87
+ model = TimeSeriesVAE(**model_data_correct_train)
88
+ self_description = model.self_describe()
89
+ assert self_description is not None
90
+ assert (
91
+ self_description["algorithm"]["name"]
92
+ == "sdg_core_lib.data_generator.models.keras.implementation.TimeSeriesVAE.TimeSeriesVAE"
93
+ )
94
+ assert self_description["algorithm"]["default_loss_function"] == "ELBO LOSS"
95
+ assert (
96
+ self_description["algorithm"]["description"]
97
+ == "A Beta-Variational Autoencoder for time series generation"
98
+ )
99
+ assert self_description["datatypes"] == [
100
+ {"type": "float32", "is_categorical": False},
101
+ {"type": "int32", "is_categorical": False},
102
+ {"type": "int64", "is_categorical": False},
103
+ ]
104
+
105
+
106
+ def test_infer(model_data_correct_train, data):
107
+ n_rows = 2
108
+ model = TimeSeriesVAE(**model_data_correct_train)
109
+ results = model.infer(n_rows)
110
+ assert results.shape == (n_rows, *model.input_shape)
@@ -0,0 +1,74 @@
1
+ import pytest
2
+
3
+ from sdg_core_lib.NumericDataset import NumericDataset
4
+ from sdg_core_lib.data_generator.models.keras.KerasBaseVAE import KerasBaseVAE
5
+
6
+
7
+ @pytest.fixture()
8
+ def model():
9
+ return KerasBaseVAE(
10
+ metadata={},
11
+ model_name="Test-T_VAE",
12
+ input_shape="(13,)",
13
+ load_path=None,
14
+ latent_dim=2,
15
+ )
16
+
17
+
18
+ @pytest.fixture()
19
+ def correct_dataset():
20
+ data = [
21
+ {
22
+ "column_name": "A",
23
+ "column_type": "continuous",
24
+ "column_datatype": "float64",
25
+ "column_data": [1.0, 2.0, 3.0, 4.0, 5.0],
26
+ }
27
+ ]
28
+ return NumericDataset(data)
29
+
30
+
31
+ def test_instantiate(model):
32
+ assert model._model is None
33
+ with pytest.raises(NotImplementedError) as exception_info:
34
+ model._instantiate()
35
+ assert exception_info.type is NotImplementedError
36
+
37
+
38
+ def test_load_files(model):
39
+ wrong_filepath = ""
40
+ with pytest.raises(ValueError) as exception_info:
41
+ model._load_files(wrong_filepath)
42
+ assert exception_info.type is ValueError
43
+
44
+
45
+ def test_set_hyperparameters(model):
46
+ hyperparams_wrong = {"wrong": 0.01, "test": 32, "foobar": 10}
47
+ model.set_hyperparameters(**hyperparams_wrong)
48
+ assert model._learning_rate is None
49
+ assert model._batch_size is None
50
+ assert model._epochs is None
51
+
52
+ hyperparams = {"learning_rate": 0.01, "batch_size": 32, "epochs": 10}
53
+ model.set_hyperparameters(**hyperparams)
54
+ assert model._learning_rate == 0.01
55
+ assert model._batch_size == 32
56
+ assert model._epochs == 10
57
+
58
+
59
+ def test_train_not_initialized(model, correct_dataset):
60
+ with pytest.raises(NotImplementedError) as exception_info:
61
+ model.train(correct_dataset)
62
+ assert exception_info.type is NotImplementedError
63
+
64
+
65
+ def test_train_wrong_data(model):
66
+ with pytest.raises(NotImplementedError) as exception_info:
67
+ model.train([1, 2, 3])
68
+ assert exception_info.type is NotImplementedError
69
+
70
+
71
+ def test_infer(model):
72
+ with pytest.raises(AttributeError) as exception_info:
73
+ model.infer(2)
74
+ assert exception_info.type is AttributeError
@@ -0,0 +1,27 @@
1
+ import pytest
2
+
3
+ from sdg_core_lib.data_generator.models.ModelInfo import ModelInfo, AllowedData
4
+
5
+
6
+ @pytest.fixture()
7
+ def model_info():
8
+ return ModelInfo(
9
+ default_loss_function="Test Loss Function",
10
+ description="This is a test model",
11
+ allowed_data=[AllowedData("int64", False), AllowedData("float32", False)],
12
+ name="Test",
13
+ )
14
+
15
+
16
+ def test_get_data(model_info):
17
+ info = model_info.get_model_info()
18
+ assert info is not None
19
+ assert info["algorithm"] is not None
20
+ assert info["algorithm"]["default_loss_function"] == "Test Loss Function"
21
+ assert info["algorithm"]["description"] == "This is a test model"
22
+ assert info["algorithm"]["name"] == "Test"
23
+ assert len(info["datatypes"]) == 2
24
+ assert info["datatypes"][0]["type"] == "int64"
25
+ assert not info["datatypes"][0]["is_categorical"]
26
+ assert info["datatypes"][1]["type"] == "float32"
27
+ assert not info["datatypes"][1]["is_categorical"]
@@ -0,0 +1,30 @@
1
+ import pytest
2
+ from sdg_core_lib.data_generator.models.TrainingInfo import TrainingInfo
3
+
4
+
5
+ @pytest.fixture
6
+ def training_info():
7
+ return TrainingInfo(
8
+ loss_fn="mse",
9
+ train_samples=100,
10
+ train_loss=0.05,
11
+ validation_samples=20,
12
+ validation_loss=0.03,
13
+ )
14
+
15
+
16
+ def test_training_info_to_dict(training_info):
17
+ assert training_info.to_dict() == {
18
+ "loss_function": "mse",
19
+ "train_samples": 100,
20
+ "train_loss": 0.05,
21
+ "val_samples": 20,
22
+ "val_loss": 0.03,
23
+ }
24
+
25
+
26
+ def test_training_info_to_json(training_info):
27
+ assert (
28
+ training_info.to_json()
29
+ == '{"loss_function": "mse", "train_samples": 100, "train_loss": 0.05, "val_samples": 20, "val_loss": 0.03}'
30
+ )
@@ -0,0 +1,32 @@
1
+ import pytest
2
+
3
+ from sdg_core_lib.data_generator.models.UnspecializedModel import UnspecializedModel
4
+
5
+
6
+ @pytest.fixture(autouse=False)
7
+ def unspecialized_model():
8
+ return UnspecializedModel(
9
+ metadata={}, model_name="Test-T_VAE", input_shape="(13,)", load_path=None
10
+ )
11
+
12
+
13
+ def test_initialize():
14
+ with pytest.raises(TypeError) as exception_info:
15
+ _ = UnspecializedModel(
16
+ metadata={}, model_name="Test-T_VAE", input_shape="(13,)", load_path=None
17
+ )
18
+ assert exception_info.type is TypeError
19
+
20
+
21
+ def test_parse_stringed_input_shape():
22
+ stringed_shape_1 = "(10,)"
23
+ stringed_shape_2 = "(10)"
24
+ stringed_shape_3 = "[10,]"
25
+ stringed_shape_4 = "{10,}"
26
+ stringed_shape_5 = "(13,10,)"
27
+
28
+ assert UnspecializedModel._parse_stringed_input_shape(stringed_shape_1) == (10,)
29
+ assert UnspecializedModel._parse_stringed_input_shape(stringed_shape_2) == (10,)
30
+ assert UnspecializedModel._parse_stringed_input_shape(stringed_shape_3) == (10,)
31
+ assert UnspecializedModel._parse_stringed_input_shape(stringed_shape_4) == (10,)
32
+ assert UnspecializedModel._parse_stringed_input_shape(stringed_shape_5) == (13, 10)
@@ -0,0 +1,52 @@
1
+ import pytest
2
+
3
+ from sdg_core_lib.data_generator.model_factory import dynamic_import, model_factory
4
+ from sdg_core_lib.data_generator.models.keras.implementation.TabularVAE import (
5
+ TabularVAE,
6
+ )
7
+
8
+
9
+ @pytest.fixture()
10
+ def class_name():
11
+ return (
12
+ "sdg_core_lib.data_generator.models.keras.implementation.TabularVAE.TabularVAE"
13
+ )
14
+
15
+
16
+ @pytest.fixture()
17
+ def shapeless_model():
18
+ return {
19
+ "algorithm_name": "sdg_core_lib.data_generator.models.keras.implementation.TabularVAE.TabularVAE",
20
+ "model_name": "Test-T_VAE",
21
+ }
22
+
23
+
24
+ @pytest.fixture()
25
+ def shape_model():
26
+ return {
27
+ "algorithm_name": "sdg_core_lib.data_generator.models.keras.implementation.TabularVAE.TabularVAE",
28
+ "model_name": "Test-T_VAE",
29
+ "input_shape": "(13,)",
30
+ }
31
+
32
+
33
+ def test_dynamic_import(class_name):
34
+ model_class = dynamic_import(class_name)
35
+ assert model_class is not None
36
+ assert model_class is TabularVAE
37
+
38
+
39
+ def test_model_factory_empty(shapeless_model):
40
+ model = model_factory(shapeless_model, input_shape="(13,)")
41
+ assert type(model) is TabularVAE
42
+ assert model.input_shape == (13,)
43
+ assert model._model is not None
44
+ assert model.model_name is shapeless_model["model_name"]
45
+
46
+
47
+ def test_model_factory_full(shape_model):
48
+ model = model_factory(shape_model)
49
+ assert type(model) is TabularVAE
50
+ assert model.input_shape == (13,)
51
+ assert model._model is not None
52
+ assert model.model_name is shape_model["model_name"]
File without changes
@@ -0,0 +1,62 @@
1
+ from sdg_core_lib.evaluate.Metrics import (
2
+ Metric,
3
+ StatisticalMetric,
4
+ AdherenceMetric,
5
+ NoveltyMetric,
6
+ MetricReport,
7
+ )
8
+
9
+
10
+ def test_metric_init():
11
+ metric = Metric("title", "unit measure", 1.0)
12
+ assert metric.title == "title"
13
+ assert metric.unit_measure == "unit measure"
14
+ assert metric.value == 1.0
15
+
16
+
17
+ def test_statistical_metric_init():
18
+ metric = StatisticalMetric("title", "unit measure", 1.0)
19
+ assert metric.title == "title"
20
+ assert metric.unit_measure == "unit measure"
21
+ assert metric.value == 1.0
22
+ assert metric.type == "statistical_metrics"
23
+
24
+
25
+ def test_adherence_metric_init():
26
+ metric = AdherenceMetric("title", "unit measure", 1.0)
27
+ assert metric.title == "title"
28
+ assert metric.unit_measure == "unit measure"
29
+ assert metric.value == 1.0
30
+ assert metric.type == "adherence_metrics"
31
+
32
+
33
+ def test_novelty_metric_init():
34
+ metric = NoveltyMetric("title", "unit measure", 1.0)
35
+ assert metric.title == "title"
36
+ assert metric.unit_measure == "unit measure"
37
+ assert metric.value == 1.0
38
+ assert metric.type == "novelty_metrics"
39
+
40
+
41
+ def test_metric_report_init():
42
+ report = MetricReport()
43
+ assert report.report == {}
44
+
45
+
46
+ def test_metric_report_add_metric():
47
+ report = MetricReport()
48
+ metric = StatisticalMetric("title", "unit measure", 1.0)
49
+ report.add_metric(metric)
50
+ assert len(report.report["statistical_metrics"]) == 1
51
+
52
+
53
+ def test_metric_report_to_json():
54
+ report = MetricReport()
55
+ metric = StatisticalMetric("title", "unit measure", 1.0)
56
+ report.add_metric(metric)
57
+ json_report = report.to_json()
58
+ assert json_report["statistical_metrics"][0] == {
59
+ "title": "title",
60
+ "unit_measure": "unit measure",
61
+ "value": 1.0,
62
+ }
@@ -0,0 +1,75 @@
1
+ import pytest
2
+ import pandas as pd
3
+
4
+ from sdg_core_lib.evaluate.TabularComparison import TabularComparisonEvaluator
5
+
6
+
7
+ @pytest.fixture()
8
+ def real_data():
9
+ return pd.DataFrame(
10
+ {"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9], "d": [10, 11, 12]}
11
+ )
12
+
13
+
14
+ @pytest.fixture()
15
+ def synthetic_data():
16
+ return pd.DataFrame(
17
+ {"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9], "d": [10, 11, 12]}
18
+ )
19
+
20
+
21
+ @pytest.fixture()
22
+ def evaluator_correct(real_data, synthetic_data):
23
+ return TabularComparisonEvaluator(real_data, synthetic_data, ["a", "b"], ["c", "d"])
24
+
25
+
26
+ def test_init(evaluator_correct, real_data, synthetic_data):
27
+ assert evaluator_correct._numerical_columns == ["a", "b"]
28
+ assert evaluator_correct._categorical_columns == ["c", "d"]
29
+ assert evaluator_correct._real_data.equals(real_data)
30
+ assert evaluator_correct._synthetic_data.equals(synthetic_data)
31
+
32
+
33
+ def test_evaluate(evaluator_correct):
34
+ report = evaluator_correct.compute()
35
+ print(report)
36
+ assert "statistical_metrics" in report
37
+ assert "adherence_metrics" in report
38
+ assert "novelty_metrics" in report
39
+ statistical_metrics = report["statistical_metrics"]
40
+ adherence_metrics = report["adherence_metrics"]
41
+ novelty_metrics = report["novelty_metrics"]
42
+ statistical_metrics_titles = [metric["title"] for metric in statistical_metrics]
43
+ assert "Total Statistical Compliance" in statistical_metrics_titles
44
+ assert "Categorical Features Cramer's V" in statistical_metrics_titles
45
+ assert "Numerical Features Wasserstein Distance" in statistical_metrics_titles
46
+ assert (
47
+ len(adherence_metrics[0]["value"])
48
+ == len(evaluator_correct._categorical_columns)
49
+ and adherence_metrics[0]["title"]
50
+ == "Synthetic Categories Adherence to Real Categories"
51
+ )
52
+ assert (
53
+ len(adherence_metrics[1]["value"]) == len(evaluator_correct._numerical_columns)
54
+ and adherence_metrics[1]["title"]
55
+ == "Synthetic Numerical Min-Max Boundaries Adherence"
56
+ )
57
+ assert (
58
+ 0 <= novelty_metrics[0]["value"] <= 100
59
+ and novelty_metrics[0]["title"] == "Unique Synthetic Data"
60
+ )
61
+ assert (
62
+ 0 <= novelty_metrics[1]["value"] <= 100
63
+ and novelty_metrics[1]["title"] == "New Synthetic Data"
64
+ )
65
+
66
+
67
+ def test_evaluate_cramer_v_distance(evaluator_correct):
68
+ cramer_v = evaluator_correct._evaluate_cramer_v_distance()
69
+ print(cramer_v)
70
+ assert 0 <= cramer_v <= 1
71
+
72
+
73
+ def test_evaluate_wasserstein_distance(evaluator_correct):
74
+ wass_distance = evaluator_correct._evaluate_wasserstein_distance()
75
+ assert 0 <= wass_distance <= 1