sdg-core-lib 0.1.6.dev4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. sdg_core_lib-0.1.6.dev4/PKG-INFO +20 -0
  2. sdg_core_lib-0.1.6.dev4/README.md +0 -0
  3. sdg_core_lib-0.1.6.dev4/pyproject.toml +51 -0
  4. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/__init__.py +0 -0
  5. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/browser.py +73 -0
  6. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/config.py +9 -0
  7. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/data_generator/__init__.py +0 -0
  8. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/data_generator/model_factory.py +72 -0
  9. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/data_generator/models/ModelInfo.py +42 -0
  10. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/data_generator/models/TrainingInfo.py +40 -0
  11. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/data_generator/models/UnspecializedModel.py +88 -0
  12. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/data_generator/models/__init__.py +0 -0
  13. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/data_generator/models/keras/KerasBaseVAE.py +154 -0
  14. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/data_generator/models/keras/VAE.py +78 -0
  15. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/data_generator/models/keras/__init__.py +0 -0
  16. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/data_generator/models/keras/implementation/AutoTabularVAE.py +165 -0
  17. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/data_generator/models/keras/implementation/TabularVAE.py +84 -0
  18. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/data_generator/models/keras/implementation/TimeSeriesVAE.py +127 -0
  19. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/data_generator/models/keras/implementation/__init__.py +0 -0
  20. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/dataset/__init__.py +0 -0
  21. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/dataset/columns.py +108 -0
  22. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/dataset/datasets.py +328 -0
  23. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/dataset/processor.py +116 -0
  24. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/dataset/steps.py +137 -0
  25. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/evaluate/__init__.py +0 -0
  26. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/evaluate/metrics.py +54 -0
  27. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/evaluate/tables.py +334 -0
  28. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/evaluate/time_series.py +83 -0
  29. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/job.py +94 -0
  30. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/post_process/FunctionApplier.py +6 -0
  31. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/post_process/__init__.py +0 -0
  32. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/post_process/function_factory.py +41 -0
  33. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/post_process/function_utils.py +9 -0
  34. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/post_process/functions/Parameter.py +36 -0
  35. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/post_process/functions/UnspecializedFunction.py +52 -0
  36. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/post_process/functions/__init__.py +0 -0
  37. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/post_process/functions/distribution_evaluator/__init__.py +0 -0
  38. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/post_process/functions/distribution_evaluator/implementation/NormalTester.py +60 -0
  39. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/post_process/functions/distribution_evaluator/implementation/__init__.py +0 -0
  40. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/post_process/functions/filter/IntervalThreshold.py +37 -0
  41. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/post_process/functions/filter/MonoThreshold.py +31 -0
  42. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/post_process/functions/filter/__init__.py +0 -0
  43. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/post_process/functions/filter/implementation/InnerThreshold.py +27 -0
  44. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/post_process/functions/filter/implementation/LowerThreshold.py +18 -0
  45. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/post_process/functions/filter/implementation/OuterThreshold.py +28 -0
  46. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/post_process/functions/filter/implementation/UpperThreshold.py +20 -0
  47. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/post_process/functions/filter/implementation/__init__.py +0 -0
  48. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/post_process/functions/generation/__init__.py +0 -0
  49. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/post_process/functions/generation/implementation/LinearFunction.py +50 -0
  50. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/post_process/functions/generation/implementation/NormalDistributionSample.py +46 -0
  51. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/post_process/functions/generation/implementation/QuadraticFunction.py +52 -0
  52. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/post_process/functions/generation/implementation/SinusoidalFunction.py +56 -0
  53. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/post_process/functions/generation/implementation/__init__.py +0 -0
  54. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/post_process/functions/modification/__init__.py +0 -0
  55. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/post_process/functions/modification/implementation/BurstNoiseAdder.py +75 -0
  56. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/post_process/functions/modification/implementation/WhiteNoiseAdder.py +37 -0
  57. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/post_process/functions/modification/implementation/__init__.py +0 -0
  58. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/test/__init__.py +0 -0
  59. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/test/data_generator/__init__.py +0 -0
  60. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/test/data_generator/models/__init__.py +0 -0
  61. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/test/data_generator/models/keras/__init__.py +0 -0
  62. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/test/data_generator/models/keras/implementation/__init__.py +0 -0
  63. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/test/data_generator/models/keras/implementation/test_TabularVAE.py +109 -0
  64. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/test/data_generator/models/keras/implementation/test_TimeSeriesVAE.py +106 -0
  65. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/test/data_generator/models/keras/test_KerasBaseVAE.py +74 -0
  66. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/test/data_generator/models/test_ModelInfo.py +27 -0
  67. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/test/data_generator/models/test_TrainingInfo.py +30 -0
  68. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/test/data_generator/models/test_UnspecializedModel.py +32 -0
  69. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/test/data_generator/test_model_factory.py +52 -0
  70. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/test/dataset/__init__.py +0 -0
  71. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/test/dataset/correct_dataset.json +26 -0
  72. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/test/dataset/correct_skeleton.json +30 -0
  73. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/test/dataset/industrial_dataset.json +1 -0
  74. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/test/dataset/test_processor.py +156 -0
  75. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/test/dataset/test_steps.py +166 -0
  76. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/test/dataset/test_table.py +321 -0
  77. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/test/dataset/test_timeseries.py +176 -0
  78. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/test/evaluate/__init__.py +0 -0
  79. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/test/evaluate/test_Metrics.py +62 -0
  80. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/test/evaluate/test_evaluate_table.py +214 -0
  81. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/test/post_process/__init__.py +0 -0
  82. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/test/post_process/functions/__init__.py +0 -0
  83. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/test/post_process/functions/distribution_evaluator/__init__.py +0 -0
  84. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/test/post_process/functions/distribution_evaluator/implementation/__init__.py +0 -0
  85. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/test/post_process/functions/distribution_evaluator/implementation/test_NormalTester.py +38 -0
  86. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/test/post_process/functions/filter/__init__.py +0 -0
  87. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/test/post_process/functions/filter/implementation/__init__.py +0 -0
  88. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/test/post_process/functions/filter/implementation/test_InnerThreshold.py +30 -0
  89. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/test/post_process/functions/filter/implementation/test_LowerThreshold.py +81 -0
  90. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/test/post_process/functions/filter/implementation/test_OuterThreshold.py +103 -0
  91. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/test/post_process/functions/filter/implementation/test_UpperThreshold.py +81 -0
  92. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/test/post_process/functions/generator_functions/__init__.py +0 -0
  93. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/test/post_process/functions/generator_functions/implementation/__init__.py +0 -0
  94. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/test/post_process/functions/generator_functions/implementation/test_LinearFunction.py +68 -0
  95. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/test/post_process/functions/generator_functions/implementation/test_NormalDistributionSample.py +67 -0
  96. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/test/post_process/functions/generator_functions/implementation/test_QuadraticFunction.py +75 -0
  97. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/test/post_process/functions/generator_functions/implementation/test_SinusoidalFunction.py +84 -0
  98. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/test/post_process/functions/modification/__init__.py +0 -0
  99. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/test/post_process/functions/modification/implementation/__init__.py +0 -0
  100. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/test/post_process/functions/modification/implementation/test_BurstNoiseAdder.py +83 -0
  101. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/test/post_process/functions/modification/implementation/test_WhiteNoiseAdder.py +95 -0
  102. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/test/post_process/train_test_automodel.json +168 -0
  103. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/test/test_browser.py +12 -0
  104. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/test/test_files/infer_test.json +170 -0
  105. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/test/test_files/infer_test_nodata.json +103 -0
  106. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/test/test_files/infer_test_nodata_wrong.json +11 -0
  107. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/test/test_files/train_industry.json +9 -0
  108. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/test/test_files/train_test.json +168 -0
  109. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/test/test_files/train_test_2.json +51047 -0
  110. sdg_core_lib-0.1.6.dev4/src/sdg_core_lib/test/test_job.py +137 -0
@@ -0,0 +1,20 @@
1
+ Metadata-Version: 2.3
2
+ Name: sdg-core-lib
3
+ Version: 0.1.6.dev4
4
+ Summary: Add your description here
5
+ Author: emiliocimino
6
+ Author-email: emiliocimino <emilio.cimino@outlook.it>
7
+ Requires-Dist: numpy==2.0.2
8
+ Requires-Dist: pandas==2.2.3
9
+ Requires-Dist: seaborn==0.13.2
10
+ Requires-Dist: scikit-learn==1.5.2
11
+ Requires-Dist: keras==3.6.0
12
+ Requires-Dist: tensorflow==2.18.0
13
+ Requires-Dist: loguru==0.7.3
14
+ Requires-Dist: skops==0.13.0
15
+ Requires-Dist: statsmodels==0.14.5
16
+ Requires-Dist: tslearn==0.7.0
17
+ Requires-Dist: keras-tuner==1.4.8
18
+ Requires-Python: >=3.12
19
+ Description-Content-Type: text/markdown
20
+
File without changes
@@ -0,0 +1,51 @@
1
+ [project]
2
+ name = "sdg-core-lib"
3
+ version = "0.1.6.dev4"
4
+ description = "Add your description here"
5
+ readme = "README.md"
6
+ authors = [
7
+ { name = "emiliocimino", email = "emilio.cimino@outlook.it" }
8
+ ]
9
+
10
+ requires-python = ">=3.12"
11
+
12
+ dependencies = [
13
+ "numpy==2.0.2",
14
+ "pandas==2.2.3",
15
+ "seaborn==0.13.2",
16
+ "scikit-learn==1.5.2",
17
+ "keras==3.6.0",
18
+ "tensorflow==2.18.0",
19
+ "loguru==0.7.3",
20
+ "skops==0.13.0",
21
+ "statsmodels==0.14.5",
22
+ "tslearn==0.7.0",
23
+ "keras-tuner==1.4.8",
24
+ ]
25
+
26
+ [dependency-groups]
27
+ dev = [
28
+ "numpy==2.0.2",
29
+ "pandas==2.2.3",
30
+ "seaborn==0.13.2",
31
+ "scikit-learn==1.5.2",
32
+ "keras==3.6.0",
33
+ "tensorflow==2.18.0",
34
+ "loguru==0.7.3",
35
+ "skops==0.13.0",
36
+ "statsmodels==0.14.5",
37
+ "tslearn==0.7.0",
38
+ "keras-tuner==1.4.8"
39
+ ]
40
+
41
+ test = [
42
+ {include-group = "dev"},
43
+ "pytest"
44
+ ]
45
+
46
+ [project.scripts]
47
+ sdg-core-lib = "sdg_core_lib:main"
48
+
49
+ [build-system]
50
+ requires = ["uv_build>=0.8.22,<0.9.0"]
51
+ build-backend = "uv_build"
File without changes
@@ -0,0 +1,73 @@
1
+ import os
2
+ from pathlib import Path
3
+ import importlib
4
+ from typing import Generator
5
+
6
+
7
+ def find_implementations(
8
+ root_path: str, implementation_folder: str = "implementation"
9
+ ) -> list[str]:
10
+ """
11
+ Takes a root path and a name of a folder. Returns all modules existing in each of the so-named folders
12
+ :param implementation_folder: folder name where implemented modules exist
13
+ :param root_path: root path in which to explore
14
+ :return: list of stringed modules represented in py-like dot-notation
15
+ """
16
+
17
+ root_dir = Path(root_path).resolve() # Ensure absolute path
18
+ implementation_dirs = root_dir.rglob(
19
+ implementation_folder
20
+ ) # Find all 'implementation' folders
21
+ module_paths = []
22
+
23
+ for impl_dir in implementation_dirs:
24
+ py_files = [
25
+ file for file in impl_dir.glob("*.py") if file.name != "__init__.py"
26
+ ]
27
+
28
+ for file in py_files:
29
+ rel_path = file.relative_to(root_dir).with_suffix("") # Remove extension
30
+ module_path = ".".join(rel_path.parts) # Convert to module notation
31
+ module_paths.append(module_path)
32
+
33
+ return module_paths
34
+
35
+
36
+ def browse(path: str, package: str) -> Generator[dict | None, None, None]:
37
+ """
38
+ Generator function to iterate.
39
+ It exploits the find_implementations function to gather all module names, then extract from each module
40
+ the main class. Each main class so extracted provides a dictionary description.
41
+
42
+ :return: dictionary description of each implementation existing in sdg_core_lib
43
+ """
44
+
45
+ modules = find_implementations(path)
46
+ list_module_names = [f"{package}{module}" for module in modules]
47
+
48
+ for module_name in list_module_names:
49
+ class_name = module_name.split(".")[-1]
50
+ try:
51
+ module = importlib.import_module(module_name)
52
+ except ImportError:
53
+ yield None
54
+ continue
55
+ Class = getattr(module, class_name)
56
+
57
+ yield Class.self_describe()
58
+
59
+
60
+ def browse_functions():
61
+ base_function_package = "sdg_core_lib.post_process.functions."
62
+ base_function_path = os.path.join(
63
+ os.path.dirname(os.path.abspath(__file__)), "post_process/functions/"
64
+ )
65
+ return browse(base_function_path, base_function_package)
66
+
67
+
68
+ def browse_algorithms():
69
+ base_model_package = "sdg_core_lib.data_generator.models."
70
+ base_model_path = os.path.join(
71
+ os.path.dirname(os.path.abspath(__file__)), "data_generator/models/"
72
+ )
73
+ return browse(base_model_path, base_model_package)
@@ -0,0 +1,9 @@
1
+ import os
2
+
3
+
4
+ def get_hyperparameters() -> dict:
5
+ return {
6
+ "epochs": os.environ.get("EPOCHS"),
7
+ "learning_rate": os.environ.get("LEARNING_RATE"),
8
+ "batch_size": os.environ.get("BATCH_SIZE"),
9
+ }
@@ -0,0 +1,72 @@
1
+ import importlib
2
+
3
+ from sdg_core_lib.data_generator.models.UnspecializedModel import UnspecializedModel
4
+
5
+
6
+ def dynamic_import(class_name: str):
7
+ """
8
+ Dynamically imports a class given its name.
9
+
10
+ :param class_name: a string with the full name of the class to import
11
+ :return: the class itself
12
+ """
13
+ module_name, class_name = class_name.rsplit(".", 1)
14
+ module = importlib.import_module(module_name)
15
+ return getattr(module, class_name)
16
+
17
+
18
+ def model_factory(model_dict: dict, input_shape: str = None) -> UnspecializedModel:
19
+ """
20
+ This function is a generic model factory. Takes a dictionary containing useful model information and plugs
21
+ them in the model itself.
22
+ Input shape may be passed as an argument (i.e) from the request data itself, or [alternatively] may be present in
23
+ model dictionary. If not explicitly passed, it will use the model dictionary
24
+
25
+ :param model_dict: A dictionary containing model information, structured as follows:
26
+ {
27
+ "image" -> contains the possible path where to find the model image. If not none, model will be loaded from there
28
+ "metadata" -> a dictionary itself, containing miscellaneous information
29
+ "algorithm_name" -> includes the model class module to _load
30
+ "model_name" -> the model name, used to identify the model itself
31
+ "input_shape" [optional] -> contains a stringed tuple that identifies the input layer shape
32
+ }
33
+ :param input_shape:
34
+ :return: An instance of a BaseModel class or any subclass
35
+ """
36
+ model_file, metadata, model_type, model_name, input_shape_model = parse_model_info(
37
+ model_dict
38
+ )
39
+ if input_shape is None:
40
+ input_shape = input_shape_model
41
+
42
+ ModelClass = dynamic_import(model_type)
43
+ model = ModelClass(
44
+ metadata=metadata,
45
+ model_name=model_name,
46
+ input_shape=input_shape,
47
+ load_path=model_file,
48
+ )
49
+ return model
50
+
51
+
52
+ def parse_model_info(model_dict: dict):
53
+ """
54
+ Extracts the necessary information from the model dictionary and returns them as separate arguments.
55
+
56
+ :param model_dict: A dictionary containing model information, structured as follows:
57
+ {
58
+ "image" -> contains the possible path where to find the model image. If not none, model will be loaded from there
59
+ "metadata" -> a dictionary itself, containing miscellaneous information
60
+ "algorithm_name" -> includes the model class module to _load
61
+ "model_name" -> the model name, used to identify the model itself
62
+ "input_shape" [optional] -> contains a stringed tuple that identifies the input layer shape
63
+ }
64
+ :return: model_file, metadata, model_type, model_name, input_shape
65
+ """
66
+ model_file = model_dict.get("image", None)
67
+ metadata = model_dict.get("metadata", {})
68
+ model_type = model_dict.get("algorithm_name")
69
+ model_name = model_dict.get("model_name")
70
+ input_shape = model_dict.get("input_shape", "")
71
+
72
+ return model_file, metadata, model_type, model_name, input_shape
@@ -0,0 +1,42 @@
1
+ class AllowedData:
2
+ def __init__(self, dtype: str, is_categorical: bool):
3
+ self.dtype = dtype
4
+ self.is_categorical = is_categorical
5
+
6
+ def to_json(self):
7
+ return {"type": self.dtype, "is_categorical": self.is_categorical}
8
+
9
+
10
+ class ModelInfo:
11
+ def __init__(
12
+ self,
13
+ name: str,
14
+ default_loss_function: str,
15
+ description: str,
16
+ allowed_data: list[AllowedData],
17
+ ):
18
+ self.name = name
19
+ self.default_loss_function = default_loss_function
20
+ self.description = description
21
+ self.allowed_data = allowed_data
22
+
23
+ def get_model_info(self):
24
+ """
25
+ Returns a dictionary containing the model information.
26
+
27
+ The dictionary includes the model's name, default loss function, description,
28
+ and a list of allowed data types with their categorical status.
29
+
30
+ :return: dict containing the model's information
31
+ """
32
+ allowed_data = [ad.to_json() for ad in self.allowed_data]
33
+ system_model_info = {
34
+ "algorithm": {
35
+ "name": self.name,
36
+ "default_loss_function": self.default_loss_function,
37
+ "description": self.description,
38
+ },
39
+ "datatypes": allowed_data,
40
+ }
41
+
42
+ return system_model_info
@@ -0,0 +1,40 @@
1
+ import json
2
+
3
+
4
+ class TrainingInfo:
5
+ def __init__(
6
+ self,
7
+ loss_fn: str,
8
+ train_samples: int,
9
+ train_loss: float,
10
+ validation_samples: int = None,
11
+ validation_loss: float = None,
12
+ ):
13
+ self._loss_fn = loss_fn
14
+ self._train_samples = train_samples
15
+ self._train_loss = train_loss
16
+ self._validation_samples = validation_samples
17
+ self._validation_loss = validation_loss
18
+
19
+ def to_dict(self) -> dict:
20
+ """
21
+ Convert the TrainingInfo to a dictionary
22
+
23
+ :return: dict: A dictionary with the training info
24
+ """
25
+ return {
26
+ "loss_function": self._loss_fn,
27
+ "train_samples": self._train_samples,
28
+ "train_loss": self._train_loss,
29
+ "val_samples": self._validation_samples,
30
+ "val_loss": self._validation_loss,
31
+ }
32
+
33
+ def to_json(self) -> str:
34
+ """
35
+ Convert the TrainingInfo to a JSON string
36
+
37
+ :return: str: A JSON string with the training info
38
+ """
39
+
40
+ return json.dumps(self.to_dict())
@@ -0,0 +1,88 @@
1
+ import numpy as np
2
+ from abc import ABC, abstractmethod
3
+
4
+
5
+ class UnspecializedModel(ABC):
6
+ """
7
+ Abstract class for all models. Implements common functionalities and defines abstract methods that must be implemented
8
+ by all subclasses.
9
+
10
+ Attributes:
11
+ _metadata (dict): A dictionary containing miscellaneous information about the model.
12
+ model_name (str): The model name, used to identify the model itself.
13
+ input_shape (tuple): A tuple containing the input shape of the model.
14
+ _load_path (str): A string containing the path where to load the model from.
15
+ _model (keras.Model): The model instance.
16
+ _scaler (Scaler): The scaler instance.
17
+ training_info (TrainingInfo): The training info instance.
18
+ """
19
+
20
+ def __init__(
21
+ self,
22
+ metadata: dict,
23
+ model_name: str,
24
+ input_shape: str = None,
25
+ load_path: str = None,
26
+ ):
27
+ self._metadata = metadata
28
+ self.model_name = model_name
29
+ self.input_shape = self._parse_stringed_input_shape(input_shape)
30
+ self._load_path = load_path
31
+ self._model = None # Placeholder for the model instance
32
+ self.training_info = None # Placeholder for training info
33
+ self._model_misc = None # Placeholder for model miscellaneous info
34
+
35
+ @abstractmethod
36
+ def _build(self, input_shape: str):
37
+ raise NotImplementedError
38
+
39
+ @abstractmethod
40
+ def _load(self, model_filepath: str):
41
+ """Load trained_models weights."""
42
+ raise NotImplementedError
43
+
44
+ @abstractmethod
45
+ def _instantiate(self):
46
+ raise NotImplementedError
47
+
48
+ @abstractmethod
49
+ def train(self, data: np.ndarray):
50
+ """Train the model."""
51
+ raise NotImplementedError
52
+
53
+ @abstractmethod
54
+ def fine_tune(self, data: np.ndarray, **kwargs):
55
+ """Fine-tune the model."""
56
+ raise NotImplementedError
57
+
58
+ @abstractmethod
59
+ def infer(self, n_rows: int, **kwargs):
60
+ """Run inference."""
61
+ raise NotImplementedError
62
+
63
+ @abstractmethod
64
+ def save(self, folder_path):
65
+ """Save Model."""
66
+ raise NotImplementedError
67
+
68
+ @abstractmethod
69
+ def set_hyperparameters(self, **kwargs):
70
+ """Set Hyperparameters"""
71
+ raise NotImplementedError
72
+
73
+ @classmethod
74
+ def self_describe(cls):
75
+ raise NotImplementedError
76
+
77
+ @staticmethod
78
+ def _parse_stringed_input_shape(stringed_shape: str) -> tuple[int, ...]:
79
+ """
80
+ Parses a stringed list of numbers into a tuple
81
+
82
+ :param stringed_shape: a stringed list of number in format "[x,y,z]"
83
+ :return: a tuple of numbers, in format (x, y, z)
84
+ """
85
+ brackets = ["(", ")", "[", "]", "{", "}"]
86
+ for b in brackets:
87
+ stringed_shape = stringed_shape.replace(b, "")
88
+ return tuple([int(n) for n in stringed_shape.split(",") if n != ""])
@@ -0,0 +1,154 @@
1
+ from abc import ABC
2
+
3
+ import numpy as np
4
+ import os
5
+ import keras
6
+ from keras import saving
7
+
8
+ from sdg_core_lib.data_generator.models.UnspecializedModel import UnspecializedModel
9
+ from sdg_core_lib.data_generator.models.TrainingInfo import TrainingInfo
10
+
11
+ os.environ["KERAS_BACKEND"] = "tensorflow"
12
+
13
+
14
+ class KerasBaseVAE(UnspecializedModel, ABC):
15
+ """
16
+ Abstract base class for keras VAE models.
17
+
18
+ This class provides a common interface for keras VAE models and handles the saving and loading of models.
19
+ """
20
+
21
+ def __init__(
22
+ self,
23
+ metadata: dict,
24
+ model_name: str,
25
+ input_shape: str,
26
+ load_path: str | None,
27
+ latent_dim: int,
28
+ ):
29
+ super().__init__(metadata, model_name, input_shape, load_path)
30
+ self._latent_dim = latent_dim
31
+ self._beta = None
32
+ self._learning_rate = None
33
+ self._batch_size = None
34
+ self._epochs = None
35
+
36
+ @staticmethod
37
+ def _load_files(folder_path: str):
38
+ """
39
+ Loads the saved VAE model files from the given folder path.
40
+
41
+ :param folder_path: path to the folder containing the saved model files
42
+ :return: encoder and decoder models
43
+ :raises FileNotFoundError: if the model files are not found in the given folder
44
+ """
45
+ encoder_filename = os.path.join(folder_path, "encoder.keras")
46
+ decoder_filename = os.path.join(folder_path, "decoder.keras")
47
+ encoder = saving.load_model(encoder_filename)
48
+ decoder = saving.load_model(decoder_filename)
49
+ return encoder, decoder
50
+
51
+ def _load_model(self, encoder, decoder):
52
+ raise NotImplementedError
53
+
54
+ def _load(self, folder_path: str):
55
+ encoder, decoder = self._load_files(folder_path)
56
+ self._load_model(encoder, decoder)
57
+
58
+ def _instantiate(self):
59
+ """
60
+ Instantiates the model and loads the saved model if the load_path is given.
61
+
62
+ :return: None
63
+ """
64
+ if self._load_path is not None:
65
+ self._load(self._load_path)
66
+ return
67
+ if not self._model and self.input_shape:
68
+ self._model = self._build(self.input_shape)
69
+
70
+ def save(self, folder_path: str):
71
+ """
72
+ Saves the model and scaler to the given folder path.
73
+
74
+ :param folder_path: path to the folder to save the model
75
+ :raises FileNotFoundError: if the folder does not exist
76
+ :raises AttributeError: if the model does not exist
77
+ :return: None
78
+ """
79
+ encoder_filename = os.path.join(folder_path, "encoder.keras")
80
+ decoder_filename = os.path.join(folder_path, "decoder.keras")
81
+ saving.save_model(self._model.encoder, encoder_filename)
82
+ saving.save_model(self._model.decoder, decoder_filename)
83
+
84
+ def fine_tune(self, data: np.ndarray, **kwargs):
85
+ raise NotImplementedError
86
+
87
+ def _build(self, input_shape: str):
88
+ raise NotImplementedError
89
+
90
+ def _set_hyperparams(self, learning_rate, batch_size, epochs):
91
+ if learning_rate is not None:
92
+ self._learning_rate = float(learning_rate)
93
+ if batch_size is not None:
94
+ self._batch_size = int(batch_size)
95
+ if epochs is not None:
96
+ self._epochs = int(epochs)
97
+
98
+ def set_hyperparameters(self, **kwargs):
99
+ learning_rate = kwargs.get("learning_rate", self._learning_rate)
100
+ batch_size = kwargs.get("batch_size", self._batch_size)
101
+ epochs = kwargs.get("epochs", self._epochs)
102
+ self._set_hyperparams(learning_rate, batch_size, epochs)
103
+
104
+ def train(
105
+ self,
106
+ data: np.ndarray,
107
+ learning_rate: float = None,
108
+ batch_size: int = None,
109
+ epochs: int = None,
110
+ ):
111
+ """
112
+ Trains the VAE model on the provided data.
113
+
114
+ :param data: the dataset to train the model on, must be a NumericDataset
115
+ :param learning_rate: optional, the learning rate to use for training
116
+ :param batch_size: optional, the batch size to use for training
117
+ :param epochs: optional, the number of epochs to train the model for
118
+ :raises TypeError: if the data is not of type NumericDataset
119
+ :raises ValueError: if the model shape does not match data shape
120
+ :return: None
121
+ """
122
+ learning_rate = (
123
+ learning_rate if learning_rate is not None else self._learning_rate
124
+ )
125
+ batch_size = batch_size if batch_size is not None else self._batch_size
126
+ epochs = epochs if epochs is not None else self._epochs
127
+ self._model.compile(
128
+ optimizer=keras.optimizers.Adam(learning_rate=learning_rate)
129
+ )
130
+ history = self._model.fit(data, epochs=epochs, batch_size=batch_size, verbose=1)
131
+ self.training_info = TrainingInfo(
132
+ loss_fn="ELBO",
133
+ train_loss=history.history["loss"][-1].numpy().item(),
134
+ train_samples=data.shape[0],
135
+ validation_loss=-1,
136
+ validation_samples=0,
137
+ )
138
+
139
+ def infer(self, n_rows: int, **kwargs):
140
+ """
141
+ Performs inference using the trained VAE model.
142
+
143
+ :param n_rows: Number of rows to generate.
144
+ :param kwargs: Additional arguments for inference, not currently used.
145
+ :raises AttributeError: If the model is not instantiated.
146
+ :return: A numpy array containing the generated data after decoding and inverse scaling.
147
+ """
148
+ z_random = np.random.normal(size=(n_rows, self._latent_dim))
149
+ results = self._model.decoder.predict(z_random)
150
+ return results
151
+
152
+ @classmethod
153
+ def self_describe(cls):
154
+ raise NotImplementedError
@@ -0,0 +1,78 @@
1
+ import keras
2
+ from keras.api import layers, ops
3
+ import tensorflow as tf
4
+
5
+
6
+ class Sampling(layers.Layer):
7
+ """Uses (z_mean, z_log_var) to sample z, the vector encoding a digit."""
8
+
9
+ def __init__(self, seed: int = 42, **kwargs):
10
+ super().__init__(**kwargs)
11
+ self.seed_generator = keras.random.SeedGenerator(seed)
12
+
13
+ def call(self, inputs):
14
+ z_mean, z_log_var = inputs
15
+ batch = ops.shape(z_mean)[0]
16
+ dim = ops.shape(z_mean)[1]
17
+ epsilon = keras.random.normal(shape=(batch, dim), seed=self.seed_generator)
18
+ return z_mean + ops.exp(0.5 * z_log_var) * epsilon
19
+
20
+
21
+ class VAE(keras.Model):
22
+ def __init__(self, encoder, decoder, beta=1, **kwargs):
23
+ super().__init__(**kwargs)
24
+ self.encoder = encoder
25
+ self.decoder = decoder
26
+ self._beta = beta
27
+ self.total_loss_tracker = keras.metrics.Mean(name="total_loss")
28
+ self.reconstruction_loss_tracker = keras.metrics.Mean(
29
+ name="reconstruction_loss"
30
+ )
31
+ self.kl_loss_tracker = keras.metrics.Mean(name="kl_loss")
32
+
33
+ @property
34
+ def metrics(self):
35
+ return [
36
+ self.total_loss_tracker,
37
+ self.reconstruction_loss_tracker,
38
+ self.kl_loss_tracker,
39
+ ]
40
+
41
+ def train_step(self, data):
42
+ with tf.GradientTape() as tape:
43
+ z_mean, z_log_var, z = self.encoder(data)
44
+ reconstruction = self.decoder(z)
45
+ reconstruction_loss = ops.mean(
46
+ ops.sum(ops.abs(data - reconstruction), axis=-1)
47
+ )
48
+ kl_loss = -0.5 * (1 + z_log_var - ops.square(z_mean) - ops.exp(z_log_var))
49
+ kl_loss = ops.mean(ops.sum(kl_loss, axis=1))
50
+ total_loss = reconstruction_loss + self._beta * kl_loss
51
+ grads = tape.gradient(total_loss, self.trainable_weights)
52
+ self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
53
+ self.total_loss_tracker.update_state(total_loss)
54
+ self.reconstruction_loss_tracker.update_state(reconstruction_loss)
55
+ self.kl_loss_tracker.update_state(kl_loss)
56
+
57
+ return {
58
+ "loss": self.total_loss_tracker.result(),
59
+ "reconstruction_loss": self.reconstruction_loss_tracker.result(),
60
+ "kl_loss": self.kl_loss_tracker.result(),
61
+ }
62
+
63
+ def test_step(self, data):
64
+ if isinstance(data, tuple):
65
+ data = data[0]
66
+
67
+ z_mean, z_log_var, z = self.encoder(data)
68
+ reconstruction = self.decoder(z)
69
+ ops.mean(ops.sum(ops.abs(data - reconstruction), axis=-1))
70
+ reconstruction_loss = ops.mean(ops.sum(ops.abs(data - reconstruction), axis=-1))
71
+ kl_loss = -0.5 * (1 + z_log_var - ops.square(z_mean) - ops.exp(z_log_var))
72
+ kl_loss = ops.mean(ops.sum(kl_loss, axis=1))
73
+ total_loss = reconstruction_loss + self._beta * kl_loss
74
+ return {
75
+ "loss": total_loss,
76
+ "reconstruction_loss": reconstruction_loss,
77
+ "kl_loss": kl_loss,
78
+ }