sdg-core-lib 0.1.8.dev2__tar.gz → 0.1.8.dev4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. {sdg_core_lib-0.1.8.dev2 → sdg_core_lib-0.1.8.dev4}/PKG-INFO +2 -3
  2. {sdg_core_lib-0.1.8.dev2 → sdg_core_lib-0.1.8.dev4}/pyproject.toml +4 -4
  3. {sdg_core_lib-0.1.8.dev2 → sdg_core_lib-0.1.8.dev4}/src/sdg_core_lib/data_generator/models/GANs/CTGANComponents.py +0 -1
  4. {sdg_core_lib-0.1.8.dev2 → sdg_core_lib-0.1.8.dev4}/src/sdg_core_lib/data_generator/models/GANs/implementation/CTGAN.py +1 -1
  5. {sdg_core_lib-0.1.8.dev2 → sdg_core_lib-0.1.8.dev4}/src/sdg_core_lib/dataset/datasets.py +21 -14
  6. sdg_core_lib-0.1.8.dev4/src/sdg_core_lib/dataset/validation_schema.py +37 -0
  7. {sdg_core_lib-0.1.8.dev2 → sdg_core_lib-0.1.8.dev4}/README.md +0 -0
  8. {sdg_core_lib-0.1.8.dev2 → sdg_core_lib-0.1.8.dev4}/src/sdg_core_lib/__init__.py +0 -0
  9. {sdg_core_lib-0.1.8.dev2 → sdg_core_lib-0.1.8.dev4}/src/sdg_core_lib/browser.py +0 -0
  10. {sdg_core_lib-0.1.8.dev2 → sdg_core_lib-0.1.8.dev4}/src/sdg_core_lib/commons.py +0 -0
  11. {sdg_core_lib-0.1.8.dev2 → sdg_core_lib-0.1.8.dev4}/src/sdg_core_lib/data_generator/__init__.py +0 -0
  12. {sdg_core_lib-0.1.8.dev2 → sdg_core_lib-0.1.8.dev4}/src/sdg_core_lib/data_generator/models/GANs/__init__.py +0 -0
  13. {sdg_core_lib-0.1.8.dev2 → sdg_core_lib-0.1.8.dev4}/src/sdg_core_lib/data_generator/models/GANs/implementation/__init__.py +0 -0
  14. {sdg_core_lib-0.1.8.dev2 → sdg_core_lib-0.1.8.dev4}/src/sdg_core_lib/data_generator/models/ModelInfo.py +0 -0
  15. {sdg_core_lib-0.1.8.dev2 → sdg_core_lib-0.1.8.dev4}/src/sdg_core_lib/data_generator/models/TrainingInfo.py +0 -0
  16. {sdg_core_lib-0.1.8.dev2 → sdg_core_lib-0.1.8.dev4}/src/sdg_core_lib/data_generator/models/UnspecializedModel.py +0 -0
  17. {sdg_core_lib-0.1.8.dev2 → sdg_core_lib-0.1.8.dev4}/src/sdg_core_lib/data_generator/models/VAEs/KerasBaseVAE.py +0 -0
  18. {sdg_core_lib-0.1.8.dev2 → sdg_core_lib-0.1.8.dev4}/src/sdg_core_lib/data_generator/models/VAEs/VAE.py +0 -0
  19. {sdg_core_lib-0.1.8.dev2 → sdg_core_lib-0.1.8.dev4}/src/sdg_core_lib/data_generator/models/VAEs/__init__.py +0 -0
  20. {sdg_core_lib-0.1.8.dev2 → sdg_core_lib-0.1.8.dev4}/src/sdg_core_lib/data_generator/models/VAEs/implementation/AutoTabularVAE.py +0 -0
  21. {sdg_core_lib-0.1.8.dev2 → sdg_core_lib-0.1.8.dev4}/src/sdg_core_lib/data_generator/models/VAEs/implementation/TabularVAE.py +0 -0
  22. {sdg_core_lib-0.1.8.dev2 → sdg_core_lib-0.1.8.dev4}/src/sdg_core_lib/data_generator/models/VAEs/implementation/TimeSeriesVAE.py +0 -0
  23. {sdg_core_lib-0.1.8.dev2 → sdg_core_lib-0.1.8.dev4}/src/sdg_core_lib/data_generator/models/VAEs/implementation/__init__.py +0 -0
  24. {sdg_core_lib-0.1.8.dev2 → sdg_core_lib-0.1.8.dev4}/src/sdg_core_lib/data_generator/models/__init__.py +0 -0
  25. {sdg_core_lib-0.1.8.dev2 → sdg_core_lib-0.1.8.dev4}/src/sdg_core_lib/dataset/__init__.py +0 -0
  26. {sdg_core_lib-0.1.8.dev2 → sdg_core_lib-0.1.8.dev4}/src/sdg_core_lib/dataset/columns.py +0 -0
  27. {sdg_core_lib-0.1.8.dev2 → sdg_core_lib-0.1.8.dev4}/src/sdg_core_lib/evaluate/__init__.py +0 -0
  28. {sdg_core_lib-0.1.8.dev2 → sdg_core_lib-0.1.8.dev4}/src/sdg_core_lib/evaluate/base_evaluator.py +0 -0
  29. {sdg_core_lib-0.1.8.dev2 → sdg_core_lib-0.1.8.dev4}/src/sdg_core_lib/evaluate/metrics.py +0 -0
  30. {sdg_core_lib-0.1.8.dev2 → sdg_core_lib-0.1.8.dev4}/src/sdg_core_lib/evaluate/tables.py +0 -0
  31. {sdg_core_lib-0.1.8.dev2 → sdg_core_lib-0.1.8.dev4}/src/sdg_core_lib/evaluate/time_series.py +0 -0
  32. {sdg_core_lib-0.1.8.dev2 → sdg_core_lib-0.1.8.dev4}/src/sdg_core_lib/job.py +0 -0
  33. {sdg_core_lib-0.1.8.dev2 → sdg_core_lib-0.1.8.dev4}/src/sdg_core_lib/mappings.py +0 -0
  34. {sdg_core_lib-0.1.8.dev2 → sdg_core_lib-0.1.8.dev4}/src/sdg_core_lib/post_process/FunctionApplier.py +0 -0
  35. {sdg_core_lib-0.1.8.dev2 → sdg_core_lib-0.1.8.dev4}/src/sdg_core_lib/post_process/__init__.py +0 -0
  36. {sdg_core_lib-0.1.8.dev2 → sdg_core_lib-0.1.8.dev4}/src/sdg_core_lib/post_process/function_factory.py +0 -0
  37. {sdg_core_lib-0.1.8.dev2 → sdg_core_lib-0.1.8.dev4}/src/sdg_core_lib/post_process/function_utils.py +0 -0
  38. {sdg_core_lib-0.1.8.dev2 → sdg_core_lib-0.1.8.dev4}/src/sdg_core_lib/post_process/functions/Parameter.py +0 -0
  39. {sdg_core_lib-0.1.8.dev2 → sdg_core_lib-0.1.8.dev4}/src/sdg_core_lib/post_process/functions/UnspecializedFunction.py +0 -0
  40. {sdg_core_lib-0.1.8.dev2 → sdg_core_lib-0.1.8.dev4}/src/sdg_core_lib/post_process/functions/__init__.py +0 -0
  41. {sdg_core_lib-0.1.8.dev2 → sdg_core_lib-0.1.8.dev4}/src/sdg_core_lib/post_process/functions/distribution_evaluator/__init__.py +0 -0
  42. {sdg_core_lib-0.1.8.dev2 → sdg_core_lib-0.1.8.dev4}/src/sdg_core_lib/post_process/functions/distribution_evaluator/implementation/NormalTester.py +0 -0
  43. {sdg_core_lib-0.1.8.dev2 → sdg_core_lib-0.1.8.dev4}/src/sdg_core_lib/post_process/functions/distribution_evaluator/implementation/__init__.py +0 -0
  44. {sdg_core_lib-0.1.8.dev2 → sdg_core_lib-0.1.8.dev4}/src/sdg_core_lib/post_process/functions/filter/IntervalThreshold.py +0 -0
  45. {sdg_core_lib-0.1.8.dev2 → sdg_core_lib-0.1.8.dev4}/src/sdg_core_lib/post_process/functions/filter/MonoThreshold.py +0 -0
  46. {sdg_core_lib-0.1.8.dev2 → sdg_core_lib-0.1.8.dev4}/src/sdg_core_lib/post_process/functions/filter/__init__.py +0 -0
  47. {sdg_core_lib-0.1.8.dev2 → sdg_core_lib-0.1.8.dev4}/src/sdg_core_lib/post_process/functions/filter/implementation/InnerThreshold.py +0 -0
  48. {sdg_core_lib-0.1.8.dev2 → sdg_core_lib-0.1.8.dev4}/src/sdg_core_lib/post_process/functions/filter/implementation/LowerThreshold.py +0 -0
  49. {sdg_core_lib-0.1.8.dev2 → sdg_core_lib-0.1.8.dev4}/src/sdg_core_lib/post_process/functions/filter/implementation/OuterThreshold.py +0 -0
  50. {sdg_core_lib-0.1.8.dev2 → sdg_core_lib-0.1.8.dev4}/src/sdg_core_lib/post_process/functions/filter/implementation/UpperThreshold.py +0 -0
  51. {sdg_core_lib-0.1.8.dev2 → sdg_core_lib-0.1.8.dev4}/src/sdg_core_lib/post_process/functions/filter/implementation/__init__.py +0 -0
  52. {sdg_core_lib-0.1.8.dev2 → sdg_core_lib-0.1.8.dev4}/src/sdg_core_lib/post_process/functions/generation/__init__.py +0 -0
  53. {sdg_core_lib-0.1.8.dev2 → sdg_core_lib-0.1.8.dev4}/src/sdg_core_lib/post_process/functions/generation/implementation/LinearFunction.py +0 -0
  54. {sdg_core_lib-0.1.8.dev2 → sdg_core_lib-0.1.8.dev4}/src/sdg_core_lib/post_process/functions/generation/implementation/NormalDistributionSample.py +0 -0
  55. {sdg_core_lib-0.1.8.dev2 → sdg_core_lib-0.1.8.dev4}/src/sdg_core_lib/post_process/functions/generation/implementation/QuadraticFunction.py +0 -0
  56. {sdg_core_lib-0.1.8.dev2 → sdg_core_lib-0.1.8.dev4}/src/sdg_core_lib/post_process/functions/generation/implementation/SinusoidalFunction.py +0 -0
  57. {sdg_core_lib-0.1.8.dev2 → sdg_core_lib-0.1.8.dev4}/src/sdg_core_lib/post_process/functions/generation/implementation/__init__.py +0 -0
  58. {sdg_core_lib-0.1.8.dev2 → sdg_core_lib-0.1.8.dev4}/src/sdg_core_lib/post_process/functions/modification/__init__.py +0 -0
  59. {sdg_core_lib-0.1.8.dev2 → sdg_core_lib-0.1.8.dev4}/src/sdg_core_lib/post_process/functions/modification/implementation/BurstNoiseAdder.py +0 -0
  60. {sdg_core_lib-0.1.8.dev2 → sdg_core_lib-0.1.8.dev4}/src/sdg_core_lib/post_process/functions/modification/implementation/WhiteNoiseAdder.py +0 -0
  61. {sdg_core_lib-0.1.8.dev2 → sdg_core_lib-0.1.8.dev4}/src/sdg_core_lib/post_process/functions/modification/implementation/__init__.py +0 -0
  62. {sdg_core_lib-0.1.8.dev2 → sdg_core_lib-0.1.8.dev4}/src/sdg_core_lib/preprocess/__init__.py +0 -0
  63. {sdg_core_lib-0.1.8.dev2 → sdg_core_lib-0.1.8.dev4}/src/sdg_core_lib/preprocess/base_processor.py +0 -0
  64. {sdg_core_lib-0.1.8.dev2 → sdg_core_lib-0.1.8.dev4}/src/sdg_core_lib/preprocess/strategies/__init__.py +0 -0
  65. {sdg_core_lib-0.1.8.dev2 → sdg_core_lib-0.1.8.dev4}/src/sdg_core_lib/preprocess/strategies/base_strategy.py +0 -0
  66. {sdg_core_lib-0.1.8.dev2 → sdg_core_lib-0.1.8.dev4}/src/sdg_core_lib/preprocess/strategies/ctgan_strategy.py +0 -0
  67. {sdg_core_lib-0.1.8.dev2 → sdg_core_lib-0.1.8.dev4}/src/sdg_core_lib/preprocess/strategies/steps.py +0 -0
  68. {sdg_core_lib-0.1.8.dev2 → sdg_core_lib-0.1.8.dev4}/src/sdg_core_lib/preprocess/strategies/vae_strategy.py +0 -0
  69. {sdg_core_lib-0.1.8.dev2 → sdg_core_lib-0.1.8.dev4}/src/sdg_core_lib/preprocess/table_processor.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sdg-core-lib
3
- Version: 0.1.8.dev2
3
+ Version: 0.1.8.dev4
4
4
  Summary: Add your description here
5
5
  Author: emiliocimino
6
6
  Author-email: emiliocimino <emilio.cimino@outlook.it>
@@ -16,8 +16,7 @@ Requires-Dist: skops==0.13.0
16
16
  Requires-Dist: statsmodels==0.14.5
17
17
  Requires-Dist: tslearn==0.7.0
18
18
  Requires-Dist: keras-tuner==1.4.8
19
- Requires-Dist: scipy-stubs~=1.16.2
20
- Requires-Dist: pandas-stubs~=2.2.3
19
+ Requires-Dist: pydantic
21
20
  Requires-Python: >=3.12
22
21
  Description-Content-Type: text/markdown
23
22
 
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "sdg-core-lib"
3
- version = "0.1.8.dev2"
3
+ version = "0.1.8.dev4"
4
4
  description = "Add your description here"
5
5
  license = "AGPL-3.0"
6
6
  readme = "README.md"
@@ -22,8 +22,7 @@ dependencies = [
22
22
  "statsmodels==0.14.5",
23
23
  "tslearn==0.7.0",
24
24
  "keras-tuner==1.4.8",
25
- "scipy-stubs~=1.16.2",
26
- "pandas-stubs~=2.2.3",
25
+ "pydantic"
27
26
  ]
28
27
 
29
28
  [dependency-groups]
@@ -38,7 +37,8 @@ dev = [
38
37
  "skops==0.13.0",
39
38
  "statsmodels==0.14.5",
40
39
  "tslearn==0.7.0",
41
- "keras-tuner==1.4.8"
40
+ "keras-tuner==1.4.8",
41
+ "pydantic"
42
42
  ]
43
43
 
44
44
  test = [
@@ -189,7 +189,6 @@ class CTGANModel(keras.Model):
189
189
  batch_size = int(batch_size) # Convert symbolic tensor to int
190
190
  num_cats = len(self.generator.cats_disc)
191
191
  total_cond_dim = sum(self.generator.cats_disc)
192
- cats_disc = tf.convert_to_tensor(self.generator.cats_disc, dtype=tf.int32)
193
192
 
194
193
  col_indices = tf.random.uniform(
195
194
  shape=[batch_size], minval=0, maxval=num_cats, dtype=tf.int32
@@ -28,7 +28,7 @@ class CTGAN(UnspecializedModel):
28
28
  pac_size=10,
29
29
  learning_rate=1e-3,
30
30
  batch_size=100,
31
- epochs=10,
31
+ epochs=50,
32
32
  gen_steps=4,
33
33
  critic_dropout=0.2,
34
34
  ):
@@ -2,6 +2,11 @@ from abc import ABC, abstractmethod
2
2
  import numpy as np
3
3
 
4
4
  from sdg_core_lib.dataset.columns import Numeric, Categorical, Column
5
+ from sdg_core_lib.dataset.validation_schema import (
6
+ FeatureData,
7
+ DataSkeleton,
8
+ SkeletonOut,
9
+ )
5
10
  from sdg_core_lib.preprocess.base_processor import Processor
6
11
  from sdg_core_lib.preprocess.table_processor import TableProcessor
7
12
 
@@ -75,6 +80,7 @@ class Table(Dataset):
75
80
  raise ValueError("Empty dataset")
76
81
 
77
82
  for idx, col_data in enumerate(json_data):
83
+ FeatureData.model_validate(col_data)
78
84
  col_type = col_data.get("column_type", "")
79
85
  col_name = col_data.get("column_name", "")
80
86
  col_values = np.array(
@@ -110,6 +116,7 @@ class Table(Dataset):
110
116
  data_map = []
111
117
 
112
118
  for col_data in sorted(skeleton, key=lambda x: int(x["column_position"])):
119
+ DataSkeleton.model_validate(col_data)
113
120
  col_type = col_data.get("column_type", "")
114
121
  col_name = col_data.get("column_name", "")
115
122
  col_value_type = col_data.get("column_datatype", "")
@@ -157,27 +164,27 @@ class Table(Dataset):
157
164
 
158
165
  def to_json(self) -> list[dict]:
159
166
  return [
160
- {
161
- "column_data": col.values.reshape(
167
+ FeatureData(
168
+ column_data=col.values.reshape(
162
169
  -1,
163
170
  ).tolist(),
164
- "column_name": col.name,
165
- "column_type": col.column_type,
166
- "column_datatype": col.value_type,
167
- }
171
+ column_name=col.name,
172
+ column_type=col.column_type,
173
+ column_datatype=col.value_type,
174
+ ).model_dump()
168
175
  for col in self.columns
169
176
  ]
170
177
 
171
178
  def to_skeleton(self) -> list[dict]:
172
179
  return [
173
- {
174
- "feature_name": col.name,
175
- "feature_position": col.position,
176
- "is_categorical": True if isinstance(col, Categorical) else False,
177
- "type": col.value_type,
178
- "feature_type": col.column_type,
179
- "feature_size": str(col.get_internal_shape()[1]),
180
- }
180
+ SkeletonOut(
181
+ feature_name=col.name,
182
+ feature_position=col.position,
183
+ feature_type=col.column_type,
184
+ type=col.value_type,
185
+ is_categorical=True if isinstance(col, Categorical) else False,
186
+ feature_size=str(col.get_internal_shape()[1]),
187
+ ).model_dump()
181
188
  for col in self.columns
182
189
  ]
183
190
 
@@ -0,0 +1,37 @@
1
+ from pydantic import BaseModel, PositiveInt, ConfigDict
2
+ from sdg_core_lib.commons import DataType
3
+ from enum import Enum
4
+ from typing import List
5
+
6
+
7
+ class SupportedFeatureTypes(str, Enum):
8
+ continuous = "continuous"
9
+ categorical = "categorical"
10
+ primary_key = "primary_key"
11
+ group_index = "group_index"
12
+
13
+
14
+ class BaseFeature(BaseModel):
15
+ model_config = ConfigDict(use_enum_values=True)
16
+ column_name: str
17
+ column_datatype: DataType
18
+ column_type: SupportedFeatureTypes
19
+
20
+
21
+ class FeatureData(BaseFeature):
22
+ column_data: List[float | int | str] | List
23
+
24
+
25
+ class DataSkeleton(BaseFeature):
26
+ column_position: int
27
+ column_size: PositiveInt
28
+
29
+
30
+ class SkeletonOut(BaseModel):
31
+ model_config = ConfigDict(use_enum_values=True)
32
+ feature_name: str
33
+ feature_position: int
34
+ is_categorical: bool
35
+ type: DataType
36
+ feature_type: SupportedFeatureTypes
37
+ feature_size: str