sdg-core-lib 0.1.9.dev2__tar.gz → 0.1.9.dev4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/PKG-INFO +1 -1
- {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/pyproject.toml +1 -1
- {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/job.py +3 -1
- {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/post_process/TabularFunctionApplier.py +46 -2
- {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/post_process/functions/filter/implementation/InnerThreshold.py +4 -3
- {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/post_process/functions/filter/implementation/LowerThreshold.py +3 -2
- {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/post_process/functions/filter/implementation/OuterThreshold.py +4 -4
- {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/post_process/functions/filter/implementation/UpperThreshold.py +3 -2
- {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/README.md +0 -0
- {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/__init__.py +0 -0
- {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/browser.py +0 -0
- {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/commons.py +0 -0
- {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/data_generator/__init__.py +0 -0
- {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/data_generator/models/GANs/CTGANComponents.py +0 -0
- {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/data_generator/models/GANs/__init__.py +0 -0
- {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/data_generator/models/GANs/implementation/CTGAN.py +0 -0
- {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/data_generator/models/GANs/implementation/__init__.py +0 -0
- {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/data_generator/models/ModelInfo.py +0 -0
- {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/data_generator/models/TrainingInfo.py +0 -0
- {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/data_generator/models/UnspecializedModel.py +0 -0
- {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/data_generator/models/VAEs/KerasBaseVAE.py +0 -0
- {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/data_generator/models/VAEs/VAE.py +0 -0
- {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/data_generator/models/VAEs/__init__.py +0 -0
- {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/data_generator/models/VAEs/implementation/AutoTabularVAE.py +0 -0
- {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/data_generator/models/VAEs/implementation/TabularVAE.py +0 -0
- {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/data_generator/models/VAEs/implementation/TimeSeriesVAE.py +0 -0
- {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/data_generator/models/VAEs/implementation/__init__.py +0 -0
- {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/data_generator/models/__init__.py +0 -0
- {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/dataset/__init__.py +0 -0
- {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/dataset/columns.py +0 -0
- {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/dataset/datasets.py +0 -0
- {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/dataset/validation_schema.py +0 -0
- {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/evaluate/__init__.py +0 -0
- {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/evaluate/base_evaluator.py +0 -0
- {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/evaluate/metrics.py +0 -0
- {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/evaluate/tables.py +0 -0
- {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/evaluate/time_series.py +0 -0
- {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/mappings.py +0 -0
- {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/post_process/__init__.py +0 -0
- {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/post_process/function_factory.py +0 -0
- {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/post_process/function_utils.py +0 -0
- {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/post_process/functions/Parameter.py +0 -0
- {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/post_process/functions/UnspecializedFunction.py +0 -0
- {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/post_process/functions/__init__.py +0 -0
- {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/post_process/functions/distribution_evaluator/__init__.py +0 -0
- {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/post_process/functions/distribution_evaluator/implementation/NormalTester.py +0 -0
- {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/post_process/functions/distribution_evaluator/implementation/__init__.py +0 -0
- {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/post_process/functions/filter/IntervalThreshold.py +0 -0
- {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/post_process/functions/filter/MonoThreshold.py +0 -0
- {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/post_process/functions/filter/__init__.py +0 -0
- {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/post_process/functions/filter/implementation/__init__.py +0 -0
- {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/post_process/functions/generation/__init__.py +0 -0
- {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/post_process/functions/generation/implementation/LinearFunction.py +0 -0
- {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/post_process/functions/generation/implementation/NormalDistributionSample.py +0 -0
- {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/post_process/functions/generation/implementation/QuadraticFunction.py +0 -0
- {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/post_process/functions/generation/implementation/SinusoidalFunction.py +0 -0
- {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/post_process/functions/generation/implementation/__init__.py +0 -0
- {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/post_process/functions/modification/__init__.py +0 -0
- {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/post_process/functions/modification/implementation/BurstNoiseAdder.py +0 -0
- {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/post_process/functions/modification/implementation/WhiteNoiseAdder.py +0 -0
- {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/post_process/functions/modification/implementation/__init__.py +0 -0
- {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/preprocess/__init__.py +0 -0
- {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/preprocess/base_processor.py +0 -0
- {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/preprocess/strategies/__init__.py +0 -0
- {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/preprocess/strategies/base_strategy.py +0 -0
- {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/preprocess/strategies/ctgan_strategy.py +0 -0
- {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/preprocess/strategies/steps.py +0 -0
- {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/preprocess/strategies/vae_strategy.py +0 -0
- {sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/preprocess/table_processor.py +0 -0
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
from typing import Optional, Type
|
|
2
|
+
from loguru import logger
|
|
2
3
|
|
|
3
4
|
from sdg_core_lib.data_generator.models.UnspecializedModel import UnspecializedModel
|
|
4
5
|
from sdg_core_lib.dataset.datasets import Dataset
|
|
@@ -108,7 +109,8 @@ class Job:
|
|
|
108
109
|
)
|
|
109
110
|
try:
|
|
110
111
|
filtered_synthetic_data = function_generator.apply_all(synthetic_data)
|
|
111
|
-
except TypeError:
|
|
112
|
+
except (TypeError, ValueError) as e:
|
|
113
|
+
logger.error(f"Unable to apply functions to data: {e}")
|
|
112
114
|
filtered_synthetic_data = synthetic_data
|
|
113
115
|
|
|
114
116
|
report = {"available": "false"}
|
|
@@ -21,17 +21,36 @@ class TabularFunctionApplier:
|
|
|
21
21
|
|
|
22
22
|
def _initialize(self):
|
|
23
23
|
feature_function_mapping = {}
|
|
24
|
+
logger.info(
|
|
25
|
+
f"Initializing with function_feature_dict: {self.function_feature_dict}"
|
|
26
|
+
)
|
|
27
|
+
|
|
24
28
|
for item in self.function_feature_dict:
|
|
25
29
|
feature = item["feature"]
|
|
30
|
+
logger.debug(f"Processing function for feature: {feature}")
|
|
31
|
+
|
|
26
32
|
if feature not in feature_function_mapping.keys():
|
|
27
33
|
feature_function_mapping[feature] = []
|
|
28
34
|
|
|
29
|
-
|
|
35
|
+
try:
|
|
36
|
+
function_instance = function_factory(item)
|
|
37
|
+
logger.debug(
|
|
38
|
+
f"Successfully created function: {function_instance.__class__.__name__}"
|
|
39
|
+
)
|
|
40
|
+
feature_function_mapping[feature].append(function_instance)
|
|
41
|
+
except Exception as e:
|
|
42
|
+
logger.error(f"Failed to create function for feature {feature}: {e}")
|
|
43
|
+
logger.error(f"Function item: {item}")
|
|
44
|
+
raise
|
|
30
45
|
|
|
31
46
|
for feature, functions in feature_function_mapping.items():
|
|
32
47
|
functions.sort(key=lambda x: x.priority.value, reverse=True)
|
|
48
|
+
logger.debug(f"Sorted {len(functions)} functions for feature {feature}")
|
|
33
49
|
|
|
34
50
|
self.function_feature_mapping = feature_function_mapping
|
|
51
|
+
logger.info(
|
|
52
|
+
f"Final function mapping: {list(self.function_feature_mapping.keys())}"
|
|
53
|
+
)
|
|
35
54
|
|
|
36
55
|
def apply_all(self, dataset: Optional[Dataset] = None) -> Dataset:
|
|
37
56
|
"""
|
|
@@ -141,14 +160,32 @@ class TabularFunctionApplier:
|
|
|
141
160
|
data_array = []
|
|
142
161
|
unmapped_features = []
|
|
143
162
|
|
|
163
|
+
logger.info(
|
|
164
|
+
f"Available features in dataset: {[f['column_name'] for f in json_structure]}"
|
|
165
|
+
)
|
|
166
|
+
logger.info(f"Function mapping: {list(self.function_feature_mapping.keys())}")
|
|
167
|
+
|
|
144
168
|
for feature in json_structure:
|
|
145
169
|
feature_name = feature["column_name"]
|
|
170
|
+
logger.debug(f"Processing feature: {feature_name}")
|
|
146
171
|
|
|
147
172
|
if feature_name in self.function_feature_mapping:
|
|
173
|
+
logger.info(f"Found functions for feature: {feature_name}")
|
|
148
174
|
functions = self.function_feature_mapping[feature_name]
|
|
149
|
-
|
|
175
|
+
logger.debug(
|
|
176
|
+
f"Functions to apply: {[f.__class__.__name__ for f in functions]}"
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
try:
|
|
180
|
+
self._validate_function_sequence(functions, from_scratch=False)
|
|
181
|
+
except Exception as e:
|
|
182
|
+
logger.error(f"Validation failed for {feature_name}: {e}")
|
|
183
|
+
raise
|
|
150
184
|
|
|
151
185
|
feature_data = np.array(feature["column_data"])
|
|
186
|
+
logger.debug(
|
|
187
|
+
f"Original data shape: {feature_data.shape}, dtype: {feature_data.dtype}"
|
|
188
|
+
)
|
|
152
189
|
original_shape = feature_data.shape
|
|
153
190
|
|
|
154
191
|
for function in functions:
|
|
@@ -159,9 +196,15 @@ class TabularFunctionApplier:
|
|
|
159
196
|
continue
|
|
160
197
|
|
|
161
198
|
try:
|
|
199
|
+
logger.debug(
|
|
200
|
+
f"Applying {function.__class__.__name__} to {feature_name}"
|
|
201
|
+
)
|
|
162
202
|
feature_data, indexes, success = function.apply(
|
|
163
203
|
n_rows=self.n_rows, data=feature_data
|
|
164
204
|
)
|
|
205
|
+
logger.debug(
|
|
206
|
+
f"Function result: success={success}, indexes_sum={indexes.sum()}"
|
|
207
|
+
)
|
|
165
208
|
if not success:
|
|
166
209
|
logger.warning(
|
|
167
210
|
f"Function {function.__class__.__name__} failed to apply successfully"
|
|
@@ -182,6 +225,7 @@ class TabularFunctionApplier:
|
|
|
182
225
|
modified_features.add(feature_name)
|
|
183
226
|
else:
|
|
184
227
|
# Preserve unmapped features
|
|
228
|
+
logger.debug(f"No function mapping for feature: {feature_name}")
|
|
185
229
|
data_array.append(np.array(feature["column_data"]))
|
|
186
230
|
unmapped_features.append(feature_name)
|
|
187
231
|
|
|
@@ -7,7 +7,7 @@ from sdg_core_lib.post_process.functions.filter.IntervalThreshold import (
|
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
class InnerThreshold(IntervalThreshold):
|
|
10
|
-
description = "
|
|
10
|
+
description = "Keeps data in a given interval"
|
|
11
11
|
|
|
12
12
|
def __init__(self, parameters: list[Parameter]):
|
|
13
13
|
super().__init__(parameters)
|
|
@@ -26,5 +26,6 @@ class InnerThreshold(IntervalThreshold):
|
|
|
26
26
|
lower_indexes = np.less(data, self.upper_bound)
|
|
27
27
|
|
|
28
28
|
final_indexes = lower_indexes & upper_indexes
|
|
29
|
-
|
|
30
|
-
|
|
29
|
+
removed_indexes = ~final_indexes
|
|
30
|
+
data[removed_indexes] = np.nan
|
|
31
|
+
return data, removed_indexes, True
|
|
@@ -7,7 +7,7 @@ from sdg_core_lib.post_process.functions.filter.IntervalThreshold import (
|
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
class OuterThreshold(IntervalThreshold):
|
|
10
|
-
description = "
|
|
10
|
+
description = "Keeps data outside a given interval"
|
|
11
11
|
|
|
12
12
|
def __init__(self, parameters: list[Parameter]):
|
|
13
13
|
super().__init__(parameters)
|
|
@@ -25,6 +25,6 @@ class OuterThreshold(IntervalThreshold):
|
|
|
25
25
|
else:
|
|
26
26
|
lower_indexes = np.less(data, self.lower_bound)
|
|
27
27
|
final_indexes = lower_indexes | upper_indexes
|
|
28
|
-
|
|
29
|
-
data[
|
|
30
|
-
return data,
|
|
28
|
+
removed_indexes = ~final_indexes
|
|
29
|
+
data[removed_indexes] = np.nan
|
|
30
|
+
return data, removed_indexes, True
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/data_generator/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/dataset/validation_schema.py
RENAMED
|
File without changes
|
|
File without changes
|
{sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/evaluate/base_evaluator.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/evaluate/time_series.py
RENAMED
|
File without changes
|
|
File without changes
|
{sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/post_process/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/post_process/function_utils.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/preprocess/base_processor.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/preprocess/strategies/steps.py
RENAMED
|
File without changes
|
|
File without changes
|
{sdg_core_lib-0.1.9.dev2 → sdg_core_lib-0.1.9.dev4}/src/sdg_core_lib/preprocess/table_processor.py
RENAMED
|
File without changes
|