sdg-core-lib 0.1.8.dev10__tar.gz → 0.1.9.dev0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/PKG-INFO +1 -1
- {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/pyproject.toml +1 -1
- {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/evaluate/base_evaluator.py +1 -1
- {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/evaluate/tables.py +1 -1
- {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/evaluate/time_series.py +1 -1
- {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/job.py +39 -27
- sdg_core_lib-0.1.8.dev10/src/sdg_core_lib/post_process/FunctionApplier.py → sdg_core_lib-0.1.9.dev0/src/sdg_core_lib/post_process/TabularFunctionApplier.py +9 -2
- {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/README.md +0 -0
- {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/__init__.py +0 -0
- {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/browser.py +0 -0
- {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/commons.py +0 -0
- {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/data_generator/__init__.py +0 -0
- {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/data_generator/models/GANs/CTGANComponents.py +0 -0
- {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/data_generator/models/GANs/__init__.py +0 -0
- {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/data_generator/models/GANs/implementation/CTGAN.py +0 -0
- {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/data_generator/models/GANs/implementation/__init__.py +0 -0
- {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/data_generator/models/ModelInfo.py +0 -0
- {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/data_generator/models/TrainingInfo.py +0 -0
- {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/data_generator/models/UnspecializedModel.py +0 -0
- {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/data_generator/models/VAEs/KerasBaseVAE.py +0 -0
- {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/data_generator/models/VAEs/VAE.py +0 -0
- {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/data_generator/models/VAEs/__init__.py +0 -0
- {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/data_generator/models/VAEs/implementation/AutoTabularVAE.py +0 -0
- {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/data_generator/models/VAEs/implementation/TabularVAE.py +0 -0
- {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/data_generator/models/VAEs/implementation/TimeSeriesVAE.py +0 -0
- {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/data_generator/models/VAEs/implementation/__init__.py +0 -0
- {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/data_generator/models/__init__.py +0 -0
- {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/dataset/__init__.py +0 -0
- {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/dataset/columns.py +0 -0
- {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/dataset/datasets.py +0 -0
- {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/dataset/validation_schema.py +0 -0
- {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/evaluate/__init__.py +0 -0
- {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/evaluate/metrics.py +0 -0
- {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/mappings.py +0 -0
- {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/post_process/__init__.py +0 -0
- {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/post_process/function_factory.py +0 -0
- {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/post_process/function_utils.py +0 -0
- {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/post_process/functions/Parameter.py +0 -0
- {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/post_process/functions/UnspecializedFunction.py +0 -0
- {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/post_process/functions/__init__.py +0 -0
- {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/post_process/functions/distribution_evaluator/__init__.py +0 -0
- {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/post_process/functions/distribution_evaluator/implementation/NormalTester.py +0 -0
- {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/post_process/functions/distribution_evaluator/implementation/__init__.py +0 -0
- {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/post_process/functions/filter/IntervalThreshold.py +0 -0
- {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/post_process/functions/filter/MonoThreshold.py +0 -0
- {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/post_process/functions/filter/__init__.py +0 -0
- {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/post_process/functions/filter/implementation/InnerThreshold.py +0 -0
- {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/post_process/functions/filter/implementation/LowerThreshold.py +0 -0
- {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/post_process/functions/filter/implementation/OuterThreshold.py +0 -0
- {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/post_process/functions/filter/implementation/UpperThreshold.py +0 -0
- {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/post_process/functions/filter/implementation/__init__.py +0 -0
- {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/post_process/functions/generation/__init__.py +0 -0
- {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/post_process/functions/generation/implementation/LinearFunction.py +0 -0
- {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/post_process/functions/generation/implementation/NormalDistributionSample.py +0 -0
- {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/post_process/functions/generation/implementation/QuadraticFunction.py +0 -0
- {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/post_process/functions/generation/implementation/SinusoidalFunction.py +0 -0
- {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/post_process/functions/generation/implementation/__init__.py +0 -0
- {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/post_process/functions/modification/__init__.py +0 -0
- {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/post_process/functions/modification/implementation/BurstNoiseAdder.py +0 -0
- {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/post_process/functions/modification/implementation/WhiteNoiseAdder.py +0 -0
- {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/post_process/functions/modification/implementation/__init__.py +0 -0
- {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/preprocess/__init__.py +0 -0
- {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/preprocess/base_processor.py +0 -0
- {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/preprocess/strategies/__init__.py +0 -0
- {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/preprocess/strategies/base_strategy.py +0 -0
- {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/preprocess/strategies/ctgan_strategy.py +0 -0
- {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/preprocess/strategies/steps.py +0 -0
- {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/preprocess/strategies/vae_strategy.py +0 -0
- {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/preprocess/table_processor.py +0 -0
|
@@ -34,7 +34,7 @@ class TabularComparisonEvaluator(BaseEvaluator):
|
|
|
34
34
|
raise TypeError("synthetic_data must be a Table")
|
|
35
35
|
super().__init__(real_data, synthetic_data)
|
|
36
36
|
|
|
37
|
-
def compute(self):
|
|
37
|
+
def compute(self) -> dict:
|
|
38
38
|
numerical_columns = self._real_data.get_numeric_columns()
|
|
39
39
|
categorical_columns = self._real_data.get_categorical_columns()
|
|
40
40
|
if len(numerical_columns) < 1 and len(categorical_columns) < 1:
|
{sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/evaluate/time_series.py
RENAMED
|
@@ -29,7 +29,7 @@ class TimeSeriesComparisonEvaluator(TabularComparisonEvaluator):
|
|
|
29
29
|
raise ValueError("synthetic_data must be a TimeSeries")
|
|
30
30
|
super().__init__(real_data, synthetic_data)
|
|
31
31
|
|
|
32
|
-
def compute(self):
|
|
32
|
+
def compute(self) -> dict:
|
|
33
33
|
numerical_columns = self._real_data.get_numeric_columns()
|
|
34
34
|
categorical_columns = self._real_data.get_categorical_columns()
|
|
35
35
|
if len(numerical_columns) < 1 and len(categorical_columns) < 1:
|
|
@@ -2,7 +2,7 @@ from typing import Optional, Type
|
|
|
2
2
|
|
|
3
3
|
from sdg_core_lib.data_generator.models.UnspecializedModel import UnspecializedModel
|
|
4
4
|
from sdg_core_lib.dataset.datasets import Dataset
|
|
5
|
-
from sdg_core_lib.post_process.
|
|
5
|
+
from sdg_core_lib.post_process.TabularFunctionApplier import TabularFunctionApplier
|
|
6
6
|
from sdg_core_lib.preprocess.base_processor import Processor
|
|
7
7
|
from sdg_core_lib.mappings import (
|
|
8
8
|
DatasetMapping,
|
|
@@ -36,7 +36,7 @@ class Job:
|
|
|
36
36
|
self.__dataset = dataset if dataset is not None else {}
|
|
37
37
|
self.__n_rows = n_rows
|
|
38
38
|
self.__save_filepath = save_filepath
|
|
39
|
-
self.__functions = functions
|
|
39
|
+
self.__functions = functions if functions else []
|
|
40
40
|
dataset_type = self.__dataset.get("dataset_type", "")
|
|
41
41
|
self.__dataset_mapping = self._get_dataset_mapping(dataset_type)
|
|
42
42
|
self.__dataset_class = self.__dataset_mapping.get_dataset_class()
|
|
@@ -92,6 +92,36 @@ class Job:
|
|
|
92
92
|
|
|
93
93
|
return processor
|
|
94
94
|
|
|
95
|
+
def _infer_and_evaluate(
|
|
96
|
+
self,
|
|
97
|
+
data: Dataset,
|
|
98
|
+
preprocessed_data: Dataset,
|
|
99
|
+
processor: Processor,
|
|
100
|
+
model: UnspecializedModel,
|
|
101
|
+
) -> tuple[dict, list[dict]]:
|
|
102
|
+
|
|
103
|
+
predicted_data = model.infer(self.__n_rows)
|
|
104
|
+
synthetic_data = preprocessed_data.clone(predicted_data)
|
|
105
|
+
synthetic_data = synthetic_data.postprocess(processor)
|
|
106
|
+
function_generator = TabularFunctionApplier(
|
|
107
|
+
self.__functions, self.__n_rows, from_scratch=False
|
|
108
|
+
)
|
|
109
|
+
try:
|
|
110
|
+
filtered_synthetic_data = function_generator.apply_all(synthetic_data)
|
|
111
|
+
except TypeError:
|
|
112
|
+
filtered_synthetic_data = synthetic_data
|
|
113
|
+
|
|
114
|
+
report = {"available": "false"}
|
|
115
|
+
if data is not None:
|
|
116
|
+
evaluator = self.__evaluator_class(
|
|
117
|
+
real_data=data,
|
|
118
|
+
synthetic_data=synthetic_data,
|
|
119
|
+
)
|
|
120
|
+
report = evaluator.compute()
|
|
121
|
+
|
|
122
|
+
results = filtered_synthetic_data.to_json()
|
|
123
|
+
return report, results
|
|
124
|
+
|
|
95
125
|
def train(self) -> tuple[list[dict], dict, UnspecializedModel, list[dict]]:
|
|
96
126
|
"""
|
|
97
127
|
Runs a pre-defined training job.
|
|
@@ -117,16 +147,9 @@ class Job:
|
|
|
117
147
|
model.train(data=preprocessed_data.get_computing_data())
|
|
118
148
|
model.save(self.__save_filepath)
|
|
119
149
|
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
synthetic_data = synthetic_data.postprocess(processor)
|
|
123
|
-
|
|
124
|
-
evaluator = self.__evaluator_class(
|
|
125
|
-
real_data=data,
|
|
126
|
-
synthetic_data=synthetic_data,
|
|
150
|
+
report, results = self._infer_and_evaluate(
|
|
151
|
+
data, preprocessed_data, processor, model
|
|
127
152
|
)
|
|
128
|
-
report = evaluator.compute()
|
|
129
|
-
results = synthetic_data.to_json()
|
|
130
153
|
|
|
131
154
|
return results, report, model, preprocess_schema
|
|
132
155
|
|
|
@@ -143,33 +166,22 @@ class Job:
|
|
|
143
166
|
preprocessed_data = data.preprocess(processor)
|
|
144
167
|
|
|
145
168
|
model = self._model_factory(preprocessed_data, is_new_model=False)
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
report = {"available": "false"}
|
|
151
|
-
if data is not None:
|
|
152
|
-
evaluator = self.__evaluator_class(
|
|
153
|
-
real_data=data,
|
|
154
|
-
synthetic_data=synthetic_data,
|
|
155
|
-
)
|
|
156
|
-
report = evaluator.compute()
|
|
157
|
-
|
|
158
|
-
results = synthetic_data.to_json()
|
|
169
|
+
report, results = self._infer_and_evaluate(
|
|
170
|
+
data, preprocessed_data, processor, model
|
|
171
|
+
)
|
|
159
172
|
|
|
160
173
|
return results, report
|
|
161
174
|
|
|
162
175
|
def generate_from_functions(self, dataset: Optional[Dataset] = None):
|
|
163
176
|
"""
|
|
164
177
|
Generate a dataset from a list of functions.
|
|
165
|
-
:param n_rows: number of rows to generate
|
|
166
178
|
:param dataset: a Dataset object
|
|
167
|
-
:return: a dataset in
|
|
179
|
+
:return: a dataset in JSON format
|
|
168
180
|
"""
|
|
169
181
|
from_scratch = False
|
|
170
182
|
if dataset is None:
|
|
171
183
|
from_scratch = True
|
|
172
|
-
function_generator =
|
|
184
|
+
function_generator = TabularFunctionApplier(
|
|
173
185
|
self.__functions, self.__n_rows, from_scratch=from_scratch
|
|
174
186
|
)
|
|
175
187
|
dataset = function_generator.apply_all(dataset)
|
|
@@ -5,7 +5,12 @@ from loguru import logger
|
|
|
5
5
|
from typing import Optional
|
|
6
6
|
|
|
7
7
|
|
|
8
|
-
class
|
|
8
|
+
class TabularFunctionApplier:
|
|
9
|
+
"""
|
|
10
|
+
A class to apply functions to datasets.
|
|
11
|
+
WARNING: this class is under construction. New dataset types will be supported in the future.
|
|
12
|
+
"""
|
|
13
|
+
|
|
9
14
|
def __init__(
|
|
10
15
|
self, function_feature_dict: list[dict], n_rows: int, from_scratch: bool = False
|
|
11
16
|
):
|
|
@@ -126,7 +131,9 @@ class FunctionApplier:
|
|
|
126
131
|
Raises:
|
|
127
132
|
ValueError: If data compatibility issues arise
|
|
128
133
|
"""
|
|
129
|
-
if not
|
|
134
|
+
if not type(dataset) is Table:
|
|
135
|
+
# TODO: support other dataset types
|
|
136
|
+
logger.error("Only Table datasets are currently supported")
|
|
130
137
|
raise TypeError("Only Table datasets are currently supported")
|
|
131
138
|
|
|
132
139
|
json_structure = dataset.to_json()
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/data_generator/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/dataset/validation_schema.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/post_process/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/post_process/function_utils.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/preprocess/__init__.py
RENAMED
|
File without changes
|
{sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/preprocess/base_processor.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/preprocess/strategies/steps.py
RENAMED
|
File without changes
|
|
File without changes
|
{sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/preprocess/table_processor.py
RENAMED
|
File without changes
|