sdg-core-lib 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. sdg_core_lib/NumericDataset.py +150 -0
  2. sdg_core_lib/__init__.py +0 -0
  3. sdg_core_lib/browser.py +73 -0
  4. sdg_core_lib/data_generator/__init__.py +0 -0
  5. sdg_core_lib/data_generator/model_factory.py +72 -0
  6. sdg_core_lib/data_generator/models/ModelInfo.py +42 -0
  7. sdg_core_lib/data_generator/models/TrainingInfo.py +40 -0
  8. sdg_core_lib/data_generator/models/UnspecializedModel.py +106 -0
  9. sdg_core_lib/data_generator/models/__init__.py +0 -0
  10. sdg_core_lib/data_generator/models/keras/KerasBaseVAE.py +172 -0
  11. sdg_core_lib/data_generator/models/keras/VAE.py +61 -0
  12. sdg_core_lib/data_generator/models/keras/__init__.py +0 -0
  13. sdg_core_lib/data_generator/models/keras/implementation/TabularVAE.py +96 -0
  14. sdg_core_lib/data_generator/models/keras/implementation/TimeSeriesVAE.py +156 -0
  15. sdg_core_lib/data_generator/models/keras/implementation/__init__.py +0 -0
  16. sdg_core_lib/evaluate/Metrics.py +48 -0
  17. sdg_core_lib/evaluate/TabularComparison.py +276 -0
  18. sdg_core_lib/evaluate/__init__.py +0 -0
  19. sdg_core_lib/job.py +56 -0
  20. sdg_core_lib/post_process/FunctionApplier.py +14 -0
  21. sdg_core_lib/post_process/__init__.py +0 -0
  22. sdg_core_lib/post_process/function_factory.py +41 -0
  23. sdg_core_lib/post_process/functions/FunctionInfo.py +25 -0
  24. sdg_core_lib/post_process/functions/FunctionResult.py +15 -0
  25. sdg_core_lib/post_process/functions/Parameter.py +33 -0
  26. sdg_core_lib/post_process/functions/UnspecializedFunction.py +42 -0
  27. sdg_core_lib/post_process/functions/__init__.py +0 -0
  28. sdg_core_lib/post_process/functions/distribution_evaluator/__init__.py +0 -0
  29. sdg_core_lib/post_process/functions/distribution_evaluator/implementation/NormalTester.py +65 -0
  30. sdg_core_lib/post_process/functions/distribution_evaluator/implementation/__init__.py +0 -0
  31. sdg_core_lib/post_process/functions/filter/IntervalThreshold.py +32 -0
  32. sdg_core_lib/post_process/functions/filter/MonoThreshold.py +28 -0
  33. sdg_core_lib/post_process/functions/filter/__init__.py +0 -0
  34. sdg_core_lib/post_process/functions/filter/implementation/InnerThreshold.py +43 -0
  35. sdg_core_lib/post_process/functions/filter/implementation/LowerThreshold.py +32 -0
  36. sdg_core_lib/post_process/functions/filter/implementation/OuterThreshold.py +42 -0
  37. sdg_core_lib/post_process/functions/filter/implementation/UpperThreshold.py +32 -0
  38. sdg_core_lib/post_process/functions/filter/implementation/__init__.py +0 -0
  39. sdg_core_lib/preprocess/__init__.py +0 -0
  40. sdg_core_lib/preprocess/scale.py +51 -0
  41. sdg_core_lib/test/__init__.py +0 -0
  42. sdg_core_lib/test/data_generator/__init__.py +0 -0
  43. sdg_core_lib/test/data_generator/models/__init__.py +0 -0
  44. sdg_core_lib/test/data_generator/models/keras/__init__.py +0 -0
  45. sdg_core_lib/test/data_generator/models/keras/implementation/__init__.py +0 -0
  46. sdg_core_lib/test/data_generator/models/keras/implementation/test_TabularVAE.py +120 -0
  47. sdg_core_lib/test/data_generator/models/keras/implementation/test_TimeSeriesVAE.py +110 -0
  48. sdg_core_lib/test/data_generator/models/keras/test_KerasBaseVAE.py +74 -0
  49. sdg_core_lib/test/data_generator/models/test_ModelInfo.py +27 -0
  50. sdg_core_lib/test/data_generator/models/test_TrainingInfo.py +30 -0
  51. sdg_core_lib/test/data_generator/models/test_UnspecializedModel.py +32 -0
  52. sdg_core_lib/test/data_generator/test_model_factory.py +52 -0
  53. sdg_core_lib/test/evaluate/__init__.py +0 -0
  54. sdg_core_lib/test/evaluate/test_Metrics.py +62 -0
  55. sdg_core_lib/test/evaluate/test_TabularComparisonEvaluator.py +75 -0
  56. sdg_core_lib/test/infer_test.json +168 -0
  57. sdg_core_lib/test/infer_test_nodata.json +77 -0
  58. sdg_core_lib/test/infer_test_nodata_wrong.json +11 -0
  59. sdg_core_lib/test/post_process/__init__.py +0 -0
  60. sdg_core_lib/test/post_process/functions/__init__.py +0 -0
  61. sdg_core_lib/test/post_process/functions/distribution_evaluator/__init__.py +0 -0
  62. sdg_core_lib/test/post_process/functions/distribution_evaluator/implementation/__init__.py +0 -0
  63. sdg_core_lib/test/post_process/functions/distribution_evaluator/implementation/test_NormalTester.py +55 -0
  64. sdg_core_lib/test/post_process/functions/filters/__init__.py +0 -0
  65. sdg_core_lib/test/post_process/functions/filters/implementation/__init__.py +0 -0
  66. sdg_core_lib/test/post_process/functions/filters/implementation/test_InnerThreshold.py +30 -0
  67. sdg_core_lib/test/pre_process/__init__.py +0 -0
  68. sdg_core_lib/test/pre_process/test_scaling.py +55 -0
  69. sdg_core_lib/test/test_browser.py +11 -0
  70. sdg_core_lib/test/test_dataset.py +149 -0
  71. sdg_core_lib/test/test_job.py +128 -0
  72. sdg_core_lib/test/train_test.json +166 -0
  73. sdg_core_lib/test/train_test_2.json +9 -0
  74. sdg_core_lib-0.1.0.dist-info/METADATA +9 -0
  75. sdg_core_lib-0.1.0.dist-info/RECORD +77 -0
  76. sdg_core_lib-0.1.0.dist-info/WHEEL +4 -0
  77. sdg_core_lib-0.1.0.dist-info/entry_points.txt +3 -0
sdg_core_lib/job.py ADDED
@@ -0,0 +1,56 @@
1
+ import copy
2
+ import pandas as pd
3
+
4
+ from sdg_core_lib.evaluate.TabularComparison import TabularComparisonEvaluator
5
+ from sdg_core_lib.NumericDataset import NumericDataset
6
+ from sdg_core_lib.data_generator.model_factory import model_factory
7
+ from sdg_core_lib.data_generator.models.UnspecializedModel import UnspecializedModel
8
+
9
+
10
+ def job(
11
+ model_info: dict, dataset: list, n_rows: int, save_filepath: str, train: bool
12
+ ) -> tuple[list[dict], dict, UnspecializedModel, NumericDataset]:
13
+ """
14
+ Main function to run the job.
15
+
16
+ This function will run the Synthetic Data Generation job. It will create an instance of the specified model or
17
+ load the specified dataset, pre-process the data, train the model (if specified to do so), generate synthetic
18
+ data, evaluate the generated data and save the results to the specified location.
19
+
20
+ :param model_info: a dictionary containing the model's information
21
+ :param dataset: a list of dataframes
22
+ :param n_rows: the number of rows to generate
23
+ :param save_filepath: the path to save the results
24
+ :param train: a boolean indicating if the model should be trained
25
+ :return: a tuple containing a list of metrics, a dictionary with the model's info, the trained model, and the generated dataset
26
+ """
27
+
28
+ if len(dataset) == 0:
29
+ data_info = model_info.get("training_data_info", [])
30
+ data = NumericDataset(dataset=data_info)
31
+ else:
32
+ data = NumericDataset(dataset=dataset)
33
+
34
+ model = model_factory(model_info, data.input_shape)
35
+ if train:
36
+ model.train(data=data)
37
+ model.save(save_filepath)
38
+
39
+ predicted_data = model.infer(n_rows)
40
+ df_predict = pd.DataFrame(data=predicted_data.tolist(), columns=data.columns)
41
+
42
+ report = {"available": False}
43
+ if len(data.dataframe) > 0:
44
+ evaluator = TabularComparisonEvaluator(
45
+ real_data=data.dataframe,
46
+ synthetic_data=df_predict,
47
+ numerical_columns=data.continuous_columns,
48
+ categorical_columns=data.categorical_columns,
49
+ )
50
+ report = evaluator.compute()
51
+
52
+ generated = copy.deepcopy(data)
53
+ generated.dataframe = df_predict
54
+ results = generated.parse_tabular_data_json()
55
+
56
+ return results, report, model, data
@@ -0,0 +1,14 @@
1
+ import numpy as np
2
+
3
+ from sdg_core_lib.post_process.functions.UnspecializedFunction import (
4
+ UnspecializedFunction,
5
+ )
6
+
7
+
8
+ class FunctionApplier:
9
+ def __init__(self, functions: list[UnspecializedFunction], data: list[np.array]):
10
+ self.functions = functions
11
+ self.data = data
12
+
13
+ def apply(self):
14
+ pass
File without changes
@@ -0,0 +1,41 @@
1
+ import importlib
2
+
3
+ from sdg_core_lib.post_process.functions.UnspecializedFunction import (
4
+ UnspecializedFunction,
5
+ )
6
+
7
+
8
+ def dynamic_import(class_name: str):
9
+ """
10
+ Dynamically imports a class given its name.
11
+
12
+ :param class_name: a string with the full name of the class to import
13
+ :return: the class itself
14
+ """
15
+ module_name, class_name = class_name.rsplit(".", 1)
16
+ module = importlib.import_module(module_name)
17
+ return getattr(module, class_name)
18
+
19
+
20
+ def function_factory(function_dict: dict) -> UnspecializedFunction:
21
+ """
22
+ This function is a generic model factory. Takes a dictionary containing useful model information and plugs
23
+ them in the model itself.
24
+ Input shape may be passed as an argument (i.e) from the request data itself, or [alternatively] may be present in
25
+ model dictionary. If not explicitly passed, it will use the model dictionary
26
+
27
+ :return: An instance of a BaseModel class or any subclass
28
+ """
29
+ function_name, parameter_list = parse_function_info(function_dict)
30
+ function_class = dynamic_import(function_name)
31
+ function = function_class(parameters=parameter_list)
32
+ return function
33
+
34
+
35
+ def parse_function_info(function_dict: dict):
36
+ """ """
37
+
38
+ function_name = function_dict["function_reference"]
39
+ parameter_list = function_dict["parameters"]
40
+
41
+ return function_name, parameter_list
@@ -0,0 +1,25 @@
1
+ from sdg_core_lib.post_process.functions.Parameter import Parameter
2
+
3
+
4
+ class FunctionInfo:
5
+ def __init__(
6
+ self,
7
+ name: str,
8
+ description: str,
9
+ function_reference: str,
10
+ parameters: list[Parameter],
11
+ ):
12
+ self.name = name
13
+ self.description = description
14
+ self.function_reference = function_reference
15
+ self.Parameters = parameters
16
+
17
+ def get_function_info(self):
18
+ return {
19
+ "function": {
20
+ "name": self.name,
21
+ "description": self.description,
22
+ "function_reference": self.function_reference,
23
+ },
24
+ "parameters": [param.to_json() for param in self.Parameters],
25
+ }
@@ -0,0 +1,15 @@
1
+ import numpy as np
2
+
3
+
4
+ class FunctionResult:
5
+ def __init__(self, result: np.array, indexes: np.array, evaluation_result: bool):
6
+ self.indexes = indexes
7
+ self.result = result
8
+ self.evaluation_result = evaluation_result
9
+
10
+ def to_dict(self):
11
+ return {
12
+ "indexes": self.indexes,
13
+ "results": self.result,
14
+ "evaluation_results": self.evaluation_result,
15
+ }
@@ -0,0 +1,33 @@
1
+ import ast
2
+ import builtins
3
+
4
+
5
+ class Parameter:
6
+ def __init__(self, name: str, value: str, parameter_type: str):
7
+ self.name = name
8
+ self.value = value
9
+ self.parameter_type = parameter_type
10
+
11
+ def to_json(self):
12
+ return {
13
+ "name": self.name,
14
+ "value": str(self.value),
15
+ "parameter_type": self.parameter_type,
16
+ }
17
+
18
+ @staticmethod
19
+ def _convert_type(stringed_value: str, parameter_type: str):
20
+ converted_value = ast.literal_eval(stringed_value)
21
+ target_type = getattr(builtins, parameter_type)
22
+ if not isinstance(converted_value, target_type):
23
+ raise ValueError(
24
+ f"Type inference went wrong: expected type {target_type} but got {type(converted_value)}"
25
+ )
26
+ return converted_value
27
+
28
+ @classmethod
29
+ def from_json(cls, json_data):
30
+ converted_value = cls._convert_type(
31
+ json_data["value"], json_data["parameter_type"]
32
+ )
33
+ return cls(json_data["name"], converted_value, json_data["parameter_type"])
@@ -0,0 +1,42 @@
1
+ import numpy as np
2
+ from abc import ABC, abstractmethod
3
+
4
+ from sdg_core_lib.post_process.functions.FunctionResult import FunctionResult
5
+ from sdg_core_lib.post_process.functions.Parameter import Parameter
6
+
7
+
8
+ class UnspecializedFunction(ABC):
9
+ def __init__(self, parameters: list[dict]):
10
+ self.parameters = [Parameter.from_json(param) for param in parameters]
11
+
12
+ @abstractmethod
13
+ def _check_parameters(self):
14
+ raise NotImplementedError
15
+
16
+ @abstractmethod
17
+ def _compute(self, data: np.array) -> tuple[np.array, np.array]:
18
+ """
19
+ Applies a data transformation function on a given set of generated data
20
+ :param data: a numpy array of data from a single feature
21
+ :return: transformed data and affected indexes
22
+ """
23
+ raise NotImplementedError
24
+
25
+ @abstractmethod
26
+ def _evaluate(self, data: np.array) -> bool:
27
+ """
28
+ Applies an evaluation function on a given set of generated data
29
+ :param data: a numpy array of data from a single feature
30
+ :return: a single boolean value evaluating id data meets evaluation criteria
31
+ """
32
+ raise NotImplementedError
33
+
34
+ @classmethod
35
+ def self_describe(cls):
36
+ raise NotImplementedError
37
+
38
+ def get_results(self, data: np.array) -> dict:
39
+ results, indexes = self._compute(data)
40
+ evaluation_results = self._evaluate(data)
41
+ report = FunctionResult(results, indexes, evaluation_results)
42
+ return report.to_dict()
File without changes
@@ -0,0 +1,65 @@
1
+ import numpy as np
2
+ from scipy.stats import normaltest, ttest_1samp, kstest, norm
3
+
4
+ from sdg_core_lib.post_process.functions.FunctionInfo import FunctionInfo
5
+ from sdg_core_lib.post_process.functions.UnspecializedFunction import (
6
+ UnspecializedFunction,
7
+ )
8
+ from sdg_core_lib.post_process.functions.Parameter import Parameter
9
+
10
+
11
+ class NormalTester(UnspecializedFunction):
12
+ def __init__(self, parameters: list[dict]):
13
+ super().__init__(parameters)
14
+ self.mean = None
15
+ self.std = None
16
+ self._check_parameters()
17
+
18
+ def _check_parameters(self):
19
+ param_mapping = {param.name: param for param in self.parameters}
20
+ self.mean = param_mapping["mean"].value
21
+ self.std = param_mapping["standard_deviation"].value
22
+
23
+ def _compute(self, data: np.array) -> tuple[np.array, np.array]:
24
+ """
25
+ Currently returns same data
26
+ :param data:
27
+ :return:
28
+ """
29
+ return data, np.array(range(len(data)))
30
+
31
+ def _evaluate(self, data: np.array) -> bool:
32
+ """
33
+ Checks if data is normally distributed.
34
+ Consider the null hypotesis that data is normally distributed.
35
+ If null hypotesis is rejected (p < 0.05), it means that data is not normally distributed
36
+ Evaluation is based on 3 tests:
37
+ 1. D’Agostino and Pearson’s test
38
+ 2. Student's t-test
39
+ 3. Kolmogorov-Smirnov
40
+
41
+ :param data:
42
+ :return: False if null hypotesis is rejected (p < 0.05), True if it is failed to reject (p > 0.05)
43
+ """
44
+
45
+ def cdf_function(x):
46
+ return norm.cdf(x, loc=self.mean, scale=self.std)
47
+
48
+ _, p_normal = normaltest(data)
49
+ _, p_t = ttest_1samp(data, self.mean)
50
+ _, p_k = kstest(data, cdf_function)
51
+ p = min(p_normal, p_t, p_k)
52
+
53
+ return p > 0.05
54
+
55
+ @classmethod
56
+ def self_describe(cls):
57
+ return FunctionInfo(
58
+ name=f"{cls.__qualname__}",
59
+ function_reference=f"{cls.__module__}.{cls.__qualname__}",
60
+ parameters=[
61
+ Parameter("mean", 0.0, "float"),
62
+ Parameter("standard_deviation", 1.0, "float"),
63
+ ],
64
+ description="Checks if data is normally distributed given a desired mean and standard deviation",
65
+ ).get_function_info()
@@ -0,0 +1,32 @@
1
+ from sdg_core_lib.post_process.functions.UnspecializedFunction import (
2
+ UnspecializedFunction,
3
+ )
4
+
5
+ import numpy as np
6
+
7
+
8
+ class IntervalThreshold(UnspecializedFunction):
9
+ def __init__(self, parameters: list[dict]):
10
+ super().__init__(parameters)
11
+ self.upper_bound = None
12
+ self.lower_bound = None
13
+ self.upper_strict = None
14
+ self.lower_strict = None
15
+ self._check_parameters()
16
+
17
+ def _check_parameters(self):
18
+ param_mapping = {param.name: param for param in self.parameters}
19
+ self.upper_bound = param_mapping["upper_bound"].value
20
+ self.lower_bound = param_mapping["lower_bound"].value
21
+ self.upper_strict = param_mapping["upper_strict"].value
22
+ self.lower_strict = param_mapping["lower_strict"].value
23
+
24
+ def _compute(self, data: np.array):
25
+ pass
26
+
27
+ def _evaluate(self, data: np.array):
28
+ pass
29
+
30
+ @classmethod
31
+ def self_describe(cls):
32
+ raise NotImplementedError
@@ -0,0 +1,28 @@
1
+ from sdg_core_lib.post_process.functions.UnspecializedFunction import (
2
+ UnspecializedFunction,
3
+ )
4
+
5
+ import numpy as np
6
+
7
+
8
+ class MonoThreshold(UnspecializedFunction):
9
+ def __init__(self, parameters: list[dict]):
10
+ super().__init__(parameters)
11
+ self.value = None
12
+ self.strict = None
13
+ self._check_parameters()
14
+
15
+ def _check_parameters(self):
16
+ param_mapping = {param.name: param for param in self.parameters}
17
+ self.value = param_mapping["value"].value
18
+ self.strict = param_mapping["strict"].value
19
+
20
+ def _compute(self, data: np.array):
21
+ pass
22
+
23
+ def _evaluate(self, data: np.array):
24
+ pass
25
+
26
+ @classmethod
27
+ def self_describe(cls):
28
+ raise NotImplementedError
File without changes
@@ -0,0 +1,43 @@
1
+ import numpy as np
2
+
3
+ from sdg_core_lib.post_process.functions.FunctionInfo import FunctionInfo
4
+ from sdg_core_lib.post_process.functions.Parameter import Parameter
5
+ from sdg_core_lib.post_process.functions.filter.IntervalThreshold import (
6
+ IntervalThreshold,
7
+ )
8
+
9
+
10
+ class InnerThreshold(IntervalThreshold):
11
+ def __init__(self, parameters: list[dict]):
12
+ super().__init__(parameters)
13
+
14
+ def _compute(self, data: np.array):
15
+ if self.lower_strict:
16
+ upper_indexes = np.greater_equal(data, self.lower_bound)
17
+ else:
18
+ upper_indexes = np.greater(data, self.lower_bound)
19
+
20
+ if self.upper_strict:
21
+ lower_indexes = np.less_equal(data, self.upper_bound)
22
+ else:
23
+ lower_indexes = np.less(self.upper_bound)
24
+
25
+ final_indexes = lower_indexes & upper_indexes
26
+ return data[final_indexes], final_indexes
27
+
28
+ def _evaluate(self, data: np.array):
29
+ return True
30
+
31
+ @classmethod
32
+ def self_describe(cls):
33
+ return FunctionInfo(
34
+ name=f"{cls.__qualname__}",
35
+ function_reference=f"{cls.__module__}.{cls.__qualname__}",
36
+ parameters=[
37
+ Parameter("lower_bound", 0.0, "float"),
38
+ Parameter("upper_bound", 1.0, "float"),
39
+ Parameter("lower_strict", True, "bool"),
40
+ Parameter("upper_strict", True, "bool"),
41
+ ],
42
+ description="Filters data between a given interval",
43
+ ).get_function_info()
@@ -0,0 +1,32 @@
1
+ import numpy as np
2
+
3
+ from sdg_core_lib.post_process.functions.FunctionInfo import FunctionInfo
4
+ from sdg_core_lib.post_process.functions.Parameter import Parameter
5
+ from sdg_core_lib.post_process.functions.filter.MonoThreshold import MonoThreshold
6
+
7
+
8
+ class LowerThreshold(MonoThreshold):
9
+ def __init__(self, parameters: list[dict]):
10
+ super().__init__(parameters)
11
+
12
+ def _compute(self, data: np.array):
13
+ if self.strict:
14
+ indexes = np.greater_equal(data, self.value)
15
+ else:
16
+ indexes = np.greater(data, self.value)
17
+ return data[indexes], indexes
18
+
19
+ def _evaluate(self, data: np.array):
20
+ return True
21
+
22
+ @classmethod
23
+ def self_describe(cls):
24
+ return FunctionInfo(
25
+ name=f"{cls.__qualname__}",
26
+ function_reference=f"{cls.__module__}.{cls.__qualname__}",
27
+ parameters=[
28
+ Parameter("value", 0.0, "float"),
29
+ Parameter("strict", True, "bool"),
30
+ ],
31
+ description="Mono-threshold function: pick values greater than a lower threshold",
32
+ ).get_function_info()
@@ -0,0 +1,42 @@
1
+ import numpy as np
2
+
3
+ from sdg_core_lib.post_process.functions.FunctionInfo import FunctionInfo
4
+ from sdg_core_lib.post_process.functions.Parameter import Parameter
5
+ from sdg_core_lib.post_process.functions.filter.IntervalThreshold import (
6
+ IntervalThreshold,
7
+ )
8
+
9
+
10
+ class OuterThreshold(IntervalThreshold):
11
+ def __init__(self, parameters: list[dict]):
12
+ super().__init__(parameters)
13
+
14
+ def _compute(self, data: np.array):
15
+ if self.lower_strict:
16
+ upper_indexes = np.greater_equal(data, self.upper_bound)
17
+ else:
18
+ upper_indexes = np.greater(data, self.upper_bound)
19
+
20
+ if self.upper_strict:
21
+ lower_indexes = np.less_equal(data, self.lower_bound)
22
+ else:
23
+ lower_indexes = np.less(data, self.lower_bound)
24
+ final_indexes = lower_indexes | upper_indexes
25
+ return data[final_indexes], final_indexes
26
+
27
+ def _evaluate(self, data: np.array):
28
+ return True
29
+
30
+ @classmethod
31
+ def self_describe(cls):
32
+ return FunctionInfo(
33
+ name=f"{cls.__qualname__}",
34
+ function_reference=f"{cls.__module__}.{cls.__qualname__}",
35
+ parameters=[
36
+ Parameter("lower_bound", 0.0, "float"),
37
+ Parameter("upper_bound", 1.0, "float"),
38
+ Parameter("lower_strict", True, "bool"),
39
+ Parameter("upper_strict", True, "bool"),
40
+ ],
41
+ description="Filters data outside a given interval",
42
+ ).get_function_info()
@@ -0,0 +1,32 @@
1
+ import numpy as np
2
+
3
+ from sdg_core_lib.post_process.functions.FunctionInfo import FunctionInfo
4
+ from sdg_core_lib.post_process.functions.Parameter import Parameter
5
+ from sdg_core_lib.post_process.functions.filter.MonoThreshold import MonoThreshold
6
+
7
+
8
+ class UpperThreshold(MonoThreshold):
9
+ def __init__(self, parameters: list[dict]):
10
+ super().__init__(parameters)
11
+
12
+ def _compute(self, data: np.array):
13
+ if self.strict:
14
+ indexes = np.less_equal(data, self.value)
15
+ else:
16
+ indexes = np.less(data, self.value)
17
+ return data[indexes], indexes
18
+
19
+ def _evaluate(self, data: np.array):
20
+ return True
21
+
22
+ @classmethod
23
+ def self_describe(cls):
24
+ return FunctionInfo(
25
+ name=f"{cls.__qualname__}",
26
+ function_reference=f"{cls.__module__}.{cls.__qualname__}",
27
+ parameters=[
28
+ Parameter("value", 0.0, "float"),
29
+ Parameter("strict", True, "bool"),
30
+ ],
31
+ description="Mono-threshold function: picks value less than an upper threshold",
32
+ ).get_function_info()
File without changes
@@ -0,0 +1,51 @@
1
+ from sklearn.preprocessing import StandardScaler, MinMaxScaler
2
+ import numpy as np
3
+
4
+
5
+ def standardize_simple_tabular_input(
6
+ train_data: np.array, test_data: np.array = None
7
+ ) -> tuple[StandardScaler, np.array, np.array]:
8
+ """
9
+ Standardizes the tabular input data by scaling features to have zero mean and unit variance.
10
+
11
+ :param train_data: A numpy array of shape (batch, features) representing the training data.
12
+ :param test_data: An optional numpy array of shape (batch, features) representing the test data.
13
+ :return: A tuple containing the fitted StandardScaler, the standardized training data, and the standardized test data
14
+ if provided.
15
+ :raises DataException: If the input data does not have the expected shape.
16
+ """
17
+
18
+ scaler = StandardScaler()
19
+ train_data = scaler.fit_transform(train_data)
20
+ if test_data is not None:
21
+ test_data = scaler.transform(test_data)
22
+
23
+ return scaler, train_data, test_data
24
+
25
+
26
+ def standardize_simple_tabular_time_series(
27
+ train_data: np.array, test_data: np.array = None
28
+ ) -> tuple[MinMaxScaler, np.array, np.array]:
29
+ """
30
+ Standardizes the time series data by scaling features to have zero mean and unit variance.
31
+
32
+ :param train_data: A numpy array of shape (batch, features, steps) representing the training data.
33
+ :param test_data: An optional numpy array of shape (batch, features, steps) representing the test data.
34
+ :return: A tuple containing the fitted StandardScaler, the standardized training data, and the standardized test data
35
+ if provided.
36
+ :raises DataException: If the input data does not have the expected shape.
37
+ """
38
+ scaler = MinMaxScaler()
39
+
40
+ batch, features, steps = train_data.shape
41
+
42
+ x_reshaped = train_data.transpose(0, 2, 1).reshape(-1, features)
43
+ x_scaled = scaler.fit_transform(x_reshaped)
44
+ train_data = x_scaled.reshape(batch, steps, features).transpose(0, 2, 1)
45
+
46
+ if test_data is not None:
47
+ t_reshaped = test_data.transpose(0, 2, 1).reshape(-1, features)
48
+ t_scaled = scaler.transform(t_reshaped)
49
+ test_data = t_scaled.reshape(batch, steps, features).transpose(0, 2, 1)
50
+
51
+ return scaler, train_data, test_data
File without changes
File without changes
File without changes