sdg-core-lib 0.1.8.dev10__tar.gz → 0.1.9.dev0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/PKG-INFO +1 -1
  2. {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/pyproject.toml +1 -1
  3. {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/evaluate/base_evaluator.py +1 -1
  4. {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/evaluate/tables.py +1 -1
  5. {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/evaluate/time_series.py +1 -1
  6. {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/job.py +39 -27
  7. sdg_core_lib-0.1.8.dev10/src/sdg_core_lib/post_process/FunctionApplier.py → sdg_core_lib-0.1.9.dev0/src/sdg_core_lib/post_process/TabularFunctionApplier.py +9 -2
  8. {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/README.md +0 -0
  9. {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/__init__.py +0 -0
  10. {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/browser.py +0 -0
  11. {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/commons.py +0 -0
  12. {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/data_generator/__init__.py +0 -0
  13. {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/data_generator/models/GANs/CTGANComponents.py +0 -0
  14. {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/data_generator/models/GANs/__init__.py +0 -0
  15. {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/data_generator/models/GANs/implementation/CTGAN.py +0 -0
  16. {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/data_generator/models/GANs/implementation/__init__.py +0 -0
  17. {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/data_generator/models/ModelInfo.py +0 -0
  18. {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/data_generator/models/TrainingInfo.py +0 -0
  19. {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/data_generator/models/UnspecializedModel.py +0 -0
  20. {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/data_generator/models/VAEs/KerasBaseVAE.py +0 -0
  21. {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/data_generator/models/VAEs/VAE.py +0 -0
  22. {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/data_generator/models/VAEs/__init__.py +0 -0
  23. {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/data_generator/models/VAEs/implementation/AutoTabularVAE.py +0 -0
  24. {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/data_generator/models/VAEs/implementation/TabularVAE.py +0 -0
  25. {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/data_generator/models/VAEs/implementation/TimeSeriesVAE.py +0 -0
  26. {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/data_generator/models/VAEs/implementation/__init__.py +0 -0
  27. {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/data_generator/models/__init__.py +0 -0
  28. {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/dataset/__init__.py +0 -0
  29. {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/dataset/columns.py +0 -0
  30. {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/dataset/datasets.py +0 -0
  31. {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/dataset/validation_schema.py +0 -0
  32. {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/evaluate/__init__.py +0 -0
  33. {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/evaluate/metrics.py +0 -0
  34. {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/mappings.py +0 -0
  35. {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/post_process/__init__.py +0 -0
  36. {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/post_process/function_factory.py +0 -0
  37. {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/post_process/function_utils.py +0 -0
  38. {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/post_process/functions/Parameter.py +0 -0
  39. {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/post_process/functions/UnspecializedFunction.py +0 -0
  40. {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/post_process/functions/__init__.py +0 -0
  41. {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/post_process/functions/distribution_evaluator/__init__.py +0 -0
  42. {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/post_process/functions/distribution_evaluator/implementation/NormalTester.py +0 -0
  43. {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/post_process/functions/distribution_evaluator/implementation/__init__.py +0 -0
  44. {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/post_process/functions/filter/IntervalThreshold.py +0 -0
  45. {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/post_process/functions/filter/MonoThreshold.py +0 -0
  46. {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/post_process/functions/filter/__init__.py +0 -0
  47. {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/post_process/functions/filter/implementation/InnerThreshold.py +0 -0
  48. {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/post_process/functions/filter/implementation/LowerThreshold.py +0 -0
  49. {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/post_process/functions/filter/implementation/OuterThreshold.py +0 -0
  50. {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/post_process/functions/filter/implementation/UpperThreshold.py +0 -0
  51. {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/post_process/functions/filter/implementation/__init__.py +0 -0
  52. {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/post_process/functions/generation/__init__.py +0 -0
  53. {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/post_process/functions/generation/implementation/LinearFunction.py +0 -0
  54. {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/post_process/functions/generation/implementation/NormalDistributionSample.py +0 -0
  55. {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/post_process/functions/generation/implementation/QuadraticFunction.py +0 -0
  56. {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/post_process/functions/generation/implementation/SinusoidalFunction.py +0 -0
  57. {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/post_process/functions/generation/implementation/__init__.py +0 -0
  58. {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/post_process/functions/modification/__init__.py +0 -0
  59. {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/post_process/functions/modification/implementation/BurstNoiseAdder.py +0 -0
  60. {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/post_process/functions/modification/implementation/WhiteNoiseAdder.py +0 -0
  61. {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/post_process/functions/modification/implementation/__init__.py +0 -0
  62. {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/preprocess/__init__.py +0 -0
  63. {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/preprocess/base_processor.py +0 -0
  64. {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/preprocess/strategies/__init__.py +0 -0
  65. {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/preprocess/strategies/base_strategy.py +0 -0
  66. {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/preprocess/strategies/ctgan_strategy.py +0 -0
  67. {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/preprocess/strategies/steps.py +0 -0
  68. {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/preprocess/strategies/vae_strategy.py +0 -0
  69. {sdg_core_lib-0.1.8.dev10 → sdg_core_lib-0.1.9.dev0}/src/sdg_core_lib/preprocess/table_processor.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sdg-core-lib
3
- Version: 0.1.8.dev10
3
+ Version: 0.1.9.dev0
4
4
  Summary: Add your description here
5
5
  Author: emiliocimino
6
6
  Author-email: emiliocimino <emilio.cimino@outlook.it>
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "sdg-core-lib"
3
- version = "0.1.8.dev10"
3
+ version = "0.1.9.dev0"
4
4
  description = "Add your description here"
5
5
  license = "AGPL-3.0"
6
6
  readme = "README.md"
@@ -10,5 +10,5 @@ class BaseEvaluator(ABC):
10
10
  self.report = MetricReport()
11
11
 
12
12
  @abstractmethod
13
- def compute(self):
13
+ def compute(self) -> dict:
14
14
  raise NotImplementedError
@@ -34,7 +34,7 @@ class TabularComparisonEvaluator(BaseEvaluator):
34
34
  raise TypeError("synthetic_data must be a Table")
35
35
  super().__init__(real_data, synthetic_data)
36
36
 
37
- def compute(self):
37
+ def compute(self) -> dict:
38
38
  numerical_columns = self._real_data.get_numeric_columns()
39
39
  categorical_columns = self._real_data.get_categorical_columns()
40
40
  if len(numerical_columns) < 1 and len(categorical_columns) < 1:
@@ -29,7 +29,7 @@ class TimeSeriesComparisonEvaluator(TabularComparisonEvaluator):
29
29
  raise ValueError("synthetic_data must be a TimeSeries")
30
30
  super().__init__(real_data, synthetic_data)
31
31
 
32
- def compute(self):
32
+ def compute(self) -> dict:
33
33
  numerical_columns = self._real_data.get_numeric_columns()
34
34
  categorical_columns = self._real_data.get_categorical_columns()
35
35
  if len(numerical_columns) < 1 and len(categorical_columns) < 1:
@@ -2,7 +2,7 @@ from typing import Optional, Type
2
2
 
3
3
  from sdg_core_lib.data_generator.models.UnspecializedModel import UnspecializedModel
4
4
  from sdg_core_lib.dataset.datasets import Dataset
5
- from sdg_core_lib.post_process.FunctionApplier import FunctionApplier
5
+ from sdg_core_lib.post_process.TabularFunctionApplier import TabularFunctionApplier
6
6
  from sdg_core_lib.preprocess.base_processor import Processor
7
7
  from sdg_core_lib.mappings import (
8
8
  DatasetMapping,
@@ -36,7 +36,7 @@ class Job:
36
36
  self.__dataset = dataset if dataset is not None else {}
37
37
  self.__n_rows = n_rows
38
38
  self.__save_filepath = save_filepath
39
- self.__functions = functions
39
+ self.__functions = functions if functions else []
40
40
  dataset_type = self.__dataset.get("dataset_type", "")
41
41
  self.__dataset_mapping = self._get_dataset_mapping(dataset_type)
42
42
  self.__dataset_class = self.__dataset_mapping.get_dataset_class()
@@ -92,6 +92,36 @@ class Job:
92
92
 
93
93
  return processor
94
94
 
95
+ def _infer_and_evaluate(
96
+ self,
97
+ data: Dataset,
98
+ preprocessed_data: Dataset,
99
+ processor: Processor,
100
+ model: UnspecializedModel,
101
+ ) -> tuple[dict, list[dict]]:
102
+
103
+ predicted_data = model.infer(self.__n_rows)
104
+ synthetic_data = preprocessed_data.clone(predicted_data)
105
+ synthetic_data = synthetic_data.postprocess(processor)
106
+ function_generator = TabularFunctionApplier(
107
+ self.__functions, self.__n_rows, from_scratch=False
108
+ )
109
+ try:
110
+ filtered_synthetic_data = function_generator.apply_all(synthetic_data)
111
+ except TypeError:
112
+ filtered_synthetic_data = synthetic_data
113
+
114
+ report = {"available": "false"}
115
+ if data is not None:
116
+ evaluator = self.__evaluator_class(
117
+ real_data=data,
118
+ synthetic_data=synthetic_data,
119
+ )
120
+ report = evaluator.compute()
121
+
122
+ results = filtered_synthetic_data.to_json()
123
+ return report, results
124
+
95
125
  def train(self) -> tuple[list[dict], dict, UnspecializedModel, list[dict]]:
96
126
  """
97
127
  Runs a pre-defined training job.
@@ -117,16 +147,9 @@ class Job:
117
147
  model.train(data=preprocessed_data.get_computing_data())
118
148
  model.save(self.__save_filepath)
119
149
 
120
- predicted_data = model.infer(self.__n_rows)
121
- synthetic_data = preprocessed_data.clone(predicted_data)
122
- synthetic_data = synthetic_data.postprocess(processor)
123
-
124
- evaluator = self.__evaluator_class(
125
- real_data=data,
126
- synthetic_data=synthetic_data,
150
+ report, results = self._infer_and_evaluate(
151
+ data, preprocessed_data, processor, model
127
152
  )
128
- report = evaluator.compute()
129
- results = synthetic_data.to_json()
130
153
 
131
154
  return results, report, model, preprocess_schema
132
155
 
@@ -143,33 +166,22 @@ class Job:
143
166
  preprocessed_data = data.preprocess(processor)
144
167
 
145
168
  model = self._model_factory(preprocessed_data, is_new_model=False)
146
- predicted_data = model.infer(self.__n_rows)
147
- synthetic_data = preprocessed_data.clone(predicted_data)
148
- synthetic_data = synthetic_data.postprocess(processor)
149
-
150
- report = {"available": "false"}
151
- if data is not None:
152
- evaluator = self.__evaluator_class(
153
- real_data=data,
154
- synthetic_data=synthetic_data,
155
- )
156
- report = evaluator.compute()
157
-
158
- results = synthetic_data.to_json()
169
+ report, results = self._infer_and_evaluate(
170
+ data, preprocessed_data, processor, model
171
+ )
159
172
 
160
173
  return results, report
161
174
 
162
175
  def generate_from_functions(self, dataset: Optional[Dataset] = None):
163
176
  """
164
177
  Generate a dataset from a list of functions.
165
- :param n_rows: number of rows to generate
166
178
  :param dataset: a Dataset object
167
- :return: a dataset in json format
179
+ :return: a dataset in JSON format
168
180
  """
169
181
  from_scratch = False
170
182
  if dataset is None:
171
183
  from_scratch = True
172
- function_generator = FunctionApplier(
184
+ function_generator = TabularFunctionApplier(
173
185
  self.__functions, self.__n_rows, from_scratch=from_scratch
174
186
  )
175
187
  dataset = function_generator.apply_all(dataset)
@@ -5,7 +5,12 @@ from loguru import logger
5
5
  from typing import Optional
6
6
 
7
7
 
8
- class FunctionApplier:
8
+ class TabularFunctionApplier:
9
+ """
10
+ A class to apply functions to datasets.
11
+ WARNING: this class is under construction. New dataset types will be supported in the future.
12
+ """
13
+
9
14
  def __init__(
10
15
  self, function_feature_dict: list[dict], n_rows: int, from_scratch: bool = False
11
16
  ):
@@ -126,7 +131,9 @@ class FunctionApplier:
126
131
  Raises:
127
132
  ValueError: If data compatibility issues arise
128
133
  """
129
- if not isinstance(dataset, Table):
134
+ if not type(dataset) is Table:
135
+ # TODO: support other dataset types
136
+ logger.error("Only Table datasets are currently supported")
130
137
  raise TypeError("Only Table datasets are currently supported")
131
138
 
132
139
  json_structure = dataset.to_json()