sdg-core-lib 0.1.9.dev2__tar.gz → 0.1.9.dev4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/PKG-INFO +1 -1
  2. {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/pyproject.toml +1 -1
  3. {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/job.py +3 -1
  4. {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/post_process/TabularFunctionApplier.py +46 -2
  5. {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/post_process/functions/filter/implementation/InnerThreshold.py +4 -3
  6. {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/post_process/functions/filter/implementation/LowerThreshold.py +3 -2
  7. {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/post_process/functions/filter/implementation/OuterThreshold.py +4 -4
  8. {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/post_process/functions/filter/implementation/UpperThreshold.py +3 -2
  9. {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/README.md +0 -0
  10. {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/__init__.py +0 -0
  11. {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/browser.py +0 -0
  12. {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/commons.py +0 -0
  13. {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/data_generator/__init__.py +0 -0
  14. {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/data_generator/models/GANs/CTGANComponents.py +0 -0
  15. {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/data_generator/models/GANs/__init__.py +0 -0
  16. {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/data_generator/models/GANs/implementation/CTGAN.py +0 -0
  17. {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/data_generator/models/GANs/implementation/__init__.py +0 -0
  18. {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/data_generator/models/ModelInfo.py +0 -0
  19. {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/data_generator/models/TrainingInfo.py +0 -0
  20. {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/data_generator/models/UnspecializedModel.py +0 -0
  21. {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/data_generator/models/VAEs/KerasBaseVAE.py +0 -0
  22. {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/data_generator/models/VAEs/VAE.py +0 -0
  23. {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/data_generator/models/VAEs/__init__.py +0 -0
  24. {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/data_generator/models/VAEs/implementation/AutoTabularVAE.py +0 -0
  25. {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/data_generator/models/VAEs/implementation/TabularVAE.py +0 -0
  26. {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/data_generator/models/VAEs/implementation/TimeSeriesVAE.py +0 -0
  27. {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/data_generator/models/VAEs/implementation/__init__.py +0 -0
  28. {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/data_generator/models/__init__.py +0 -0
  29. {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/dataset/__init__.py +0 -0
  30. {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/dataset/columns.py +0 -0
  31. {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/dataset/datasets.py +0 -0
  32. {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/dataset/validation_schema.py +0 -0
  33. {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/evaluate/__init__.py +0 -0
  34. {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/evaluate/base_evaluator.py +0 -0
  35. {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/evaluate/metrics.py +0 -0
  36. {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/evaluate/tables.py +0 -0
  37. {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/evaluate/time_series.py +0 -0
  38. {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/mappings.py +0 -0
  39. {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/post_process/__init__.py +0 -0
  40. {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/post_process/function_factory.py +0 -0
  41. {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/post_process/function_utils.py +0 -0
  42. {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/post_process/functions/Parameter.py +0 -0
  43. {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/post_process/functions/UnspecializedFunction.py +0 -0
  44. {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/post_process/functions/__init__.py +0 -0
  45. {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/post_process/functions/distribution_evaluator/__init__.py +0 -0
  46. {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/post_process/functions/distribution_evaluator/implementation/NormalTester.py +0 -0
  47. {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/post_process/functions/distribution_evaluator/implementation/__init__.py +0 -0
  48. {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/post_process/functions/filter/IntervalThreshold.py +0 -0
  49. {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/post_process/functions/filter/MonoThreshold.py +0 -0
  50. {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/post_process/functions/filter/__init__.py +0 -0
  51. {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/post_process/functions/filter/implementation/__init__.py +0 -0
  52. {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/post_process/functions/generation/__init__.py +0 -0
  53. {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/post_process/functions/generation/implementation/LinearFunction.py +0 -0
  54. {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/post_process/functions/generation/implementation/NormalDistributionSample.py +0 -0
  55. {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/post_process/functions/generation/implementation/QuadraticFunction.py +0 -0
  56. {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/post_process/functions/generation/implementation/SinusoidalFunction.py +0 -0
  57. {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/post_process/functions/generation/implementation/__init__.py +0 -0
  58. {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/post_process/functions/modification/__init__.py +0 -0
  59. {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/post_process/functions/modification/implementation/BurstNoiseAdder.py +0 -0
  60. {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/post_process/functions/modification/implementation/WhiteNoiseAdder.py +0 -0
  61. {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/post_process/functions/modification/implementation/__init__.py +0 -0
  62. {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/preprocess/__init__.py +0 -0
  63. {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/preprocess/base_processor.py +0 -0
  64. {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/preprocess/strategies/__init__.py +0 -0
  65. {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/preprocess/strategies/base_strategy.py +0 -0
  66. {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/preprocess/strategies/ctgan_strategy.py +0 -0
  67. {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/preprocess/strategies/steps.py +0 -0
  68. {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/preprocess/strategies/vae_strategy.py +0 -0
  69. {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/preprocess/table_processor.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sdg-core-lib
3
- Version: 0.1.9.dev2
3
+ Version: 0.1.9.dev4
4
4
  Summary: Add your description here
5
5
  Author: emiliocimino
6
6
  Author-email: emiliocimino <emilio.cimino@outlook.it>
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "sdg-core-lib"
3
- version = "0.1.9.dev2"
3
+ version = "0.1.9.dev4"
4
4
  description = "Add your description here"
5
5
  license = "AGPL-3.0"
6
6
  readme = "README.md"
@@ -1,4 +1,5 @@
1
1
  from typing import Optional, Type
2
+ from loguru import logger
2
3
 
3
4
  from sdg_core_lib.data_generator.models.UnspecializedModel import UnspecializedModel
4
5
  from sdg_core_lib.dataset.datasets import Dataset
@@ -108,7 +109,8 @@ class Job:
108
109
  )
109
110
  try:
110
111
  filtered_synthetic_data = function_generator.apply_all(synthetic_data)
111
- except TypeError:
112
+ except (TypeError, ValueError) as e:
113
+ logger.error(f"Unable to apply functions to data: {e}")
112
114
  filtered_synthetic_data = synthetic_data
113
115
 
114
116
  report = {"available": "false"}
@@ -21,17 +21,36 @@ class TabularFunctionApplier:
21
21
 
22
22
  def _initialize(self):
23
23
  feature_function_mapping = {}
24
+ logger.info(
25
+ f"Initializing with function_feature_dict: {self.function_feature_dict}"
26
+ )
27
+
24
28
  for item in self.function_feature_dict:
25
29
  feature = item["feature"]
30
+ logger.debug(f"Processing function for feature: {feature}")
31
+
26
32
  if feature not in feature_function_mapping.keys():
27
33
  feature_function_mapping[feature] = []
28
34
 
29
- feature_function_mapping[feature].append(function_factory(item))
35
+ try:
36
+ function_instance = function_factory(item)
37
+ logger.debug(
38
+ f"Successfully created function: {function_instance.__class__.__name__}"
39
+ )
40
+ feature_function_mapping[feature].append(function_instance)
41
+ except Exception as e:
42
+ logger.error(f"Failed to create function for feature {feature}: {e}")
43
+ logger.error(f"Function item: {item}")
44
+ raise
30
45
 
31
46
  for feature, functions in feature_function_mapping.items():
32
47
  functions.sort(key=lambda x: x.priority.value, reverse=True)
48
+ logger.debug(f"Sorted {len(functions)} functions for feature {feature}")
33
49
 
34
50
  self.function_feature_mapping = feature_function_mapping
51
+ logger.info(
52
+ f"Final function mapping: {list(self.function_feature_mapping.keys())}"
53
+ )
35
54
 
36
55
  def apply_all(self, dataset: Optional[Dataset] = None) -> Dataset:
37
56
  """
@@ -141,14 +160,32 @@ class TabularFunctionApplier:
141
160
  data_array = []
142
161
  unmapped_features = []
143
162
 
163
+ logger.info(
164
+ f"Available features in dataset: {[f['column_name'] for f in json_structure]}"
165
+ )
166
+ logger.info(f"Function mapping: {list(self.function_feature_mapping.keys())}")
167
+
144
168
  for feature in json_structure:
145
169
  feature_name = feature["column_name"]
170
+ logger.debug(f"Processing feature: {feature_name}")
146
171
 
147
172
  if feature_name in self.function_feature_mapping:
173
+ logger.info(f"Found functions for feature: {feature_name}")
148
174
  functions = self.function_feature_mapping[feature_name]
149
- self._validate_function_sequence(functions, from_scratch=False)
175
+ logger.debug(
176
+ f"Functions to apply: {[f.__class__.__name__ for f in functions]}"
177
+ )
178
+
179
+ try:
180
+ self._validate_function_sequence(functions, from_scratch=False)
181
+ except Exception as e:
182
+ logger.error(f"Validation failed for {feature_name}: {e}")
183
+ raise
150
184
 
151
185
  feature_data = np.array(feature["column_data"])
186
+ logger.debug(
187
+ f"Original data shape: {feature_data.shape}, dtype: {feature_data.dtype}"
188
+ )
152
189
  original_shape = feature_data.shape
153
190
 
154
191
  for function in functions:
@@ -159,9 +196,15 @@ class TabularFunctionApplier:
159
196
  continue
160
197
 
161
198
  try:
199
+ logger.debug(
200
+ f"Applying {function.__class__.__name__} to {feature_name}"
201
+ )
162
202
  feature_data, indexes, success = function.apply(
163
203
  n_rows=self.n_rows, data=feature_data
164
204
  )
205
+ logger.debug(
206
+ f"Function result: success={success}, indexes_sum={indexes.sum()}"
207
+ )
165
208
  if not success:
166
209
  logger.warning(
167
210
  f"Function {function.__class__.__name__} failed to apply successfully"
@@ -182,6 +225,7 @@ class TabularFunctionApplier:
182
225
  modified_features.add(feature_name)
183
226
  else:
184
227
  # Preserve unmapped features
228
+ logger.debug(f"No function mapping for feature: {feature_name}")
185
229
  data_array.append(np.array(feature["column_data"]))
186
230
  unmapped_features.append(feature_name)
187
231
 
@@ -7,7 +7,7 @@ from sdg_core_lib.post_process.functions.filter.IntervalThreshold import (
7
7
 
8
8
 
9
9
  class InnerThreshold(IntervalThreshold):
10
- description = "Filters data between a given interval"
10
+ description = "Keeps data in a given interval"
11
11
 
12
12
  def __init__(self, parameters: list[Parameter]):
13
13
  super().__init__(parameters)
@@ -26,5 +26,6 @@ class InnerThreshold(IntervalThreshold):
26
26
  lower_indexes = np.less(data, self.upper_bound)
27
27
 
28
28
  final_indexes = lower_indexes & upper_indexes
29
- data[final_indexes] = np.nan
30
- return data, final_indexes, True
29
+ removed_indexes = ~final_indexes
30
+ data[removed_indexes] = np.nan
31
+ return data, removed_indexes, True
@@ -18,5 +18,6 @@ class LowerThreshold(MonoThreshold):
18
18
  else:
19
19
  indexes = np.greater(data, self.value)
20
20
 
21
- data[indexes] = np.nan
22
- return data, indexes, True
21
+ removed_indexes = indexes
22
+ data[removed_indexes] = np.nan
23
+ return data, removed_indexes, True
@@ -7,7 +7,7 @@ from sdg_core_lib.post_process.functions.filter.IntervalThreshold import (
7
7
 
8
8
 
9
9
  class OuterThreshold(IntervalThreshold):
10
- description = "Filters data outside a given interval"
10
+ description = "Keeps data outside a given interval"
11
11
 
12
12
  def __init__(self, parameters: list[Parameter]):
13
13
  super().__init__(parameters)
@@ -25,6 +25,6 @@ class OuterThreshold(IntervalThreshold):
25
25
  else:
26
26
  lower_indexes = np.less(data, self.lower_bound)
27
27
  final_indexes = lower_indexes | upper_indexes
28
-
29
- data[final_indexes] = np.nan
30
- return data, final_indexes, True
28
+ removed_indexes = ~final_indexes
29
+ data[removed_indexes] = np.nan
30
+ return data, removed_indexes, True
@@ -18,5 +18,6 @@ class UpperThreshold(MonoThreshold):
18
18
  else:
19
19
  indexes = np.less(data, self.value)
20
20
 
21
- data[indexes] = np.nan
22
- return data, indexes, True
21
+ removed_indexes = indexes
22
+ data[removed_indexes] = np.nan
23
+ return data, removed_indexes, True