mloda 0.3.3__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (201) hide show
  1. mloda/__init__.py +17 -0
  2. {mloda_core → mloda/core}/abstract_plugins/components/base_artifact.py +2 -2
  3. {mloda_core → mloda/core}/abstract_plugins/components/base_validator.py +13 -0
  4. {mloda_core → mloda/core}/abstract_plugins/components/data_access_collection.py +1 -1
  5. {mloda_core → mloda/core}/abstract_plugins/components/data_types.py +39 -0
  6. {mloda_core → mloda/core}/abstract_plugins/components/feature.py +39 -33
  7. {mloda_core → mloda/core}/abstract_plugins/components/feature_chainer/feature_chain_parser.py +19 -19
  8. mloda/core/abstract_plugins/components/feature_chainer/feature_chain_parser_mixin.py +197 -0
  9. {mloda_core → mloda/core}/abstract_plugins/components/feature_collection.py +6 -6
  10. {mloda_core → mloda/core}/abstract_plugins/components/feature_group_version.py +8 -8
  11. {mloda_core → mloda/core}/abstract_plugins/components/feature_set.py +18 -24
  12. {mloda_core → mloda/core}/abstract_plugins/components/framework_transformer/cfw_transformer.py +2 -2
  13. {mloda_core → mloda/core}/abstract_plugins/components/index/add_index_feature.py +4 -4
  14. {mloda_core → mloda/core}/abstract_plugins/components/input_data/api/api_input_data.py +3 -3
  15. {mloda_core → mloda/core}/abstract_plugins/components/input_data/api/api_input_data_collection.py +2 -2
  16. {mloda_core → mloda/core}/abstract_plugins/components/input_data/api/base_api_data.py +1 -1
  17. {mloda_core → mloda/core}/abstract_plugins/components/input_data/base_input_data.py +6 -6
  18. {mloda_core → mloda/core}/abstract_plugins/components/input_data/creator/data_creator.py +3 -3
  19. mloda/core/abstract_plugins/components/link.py +437 -0
  20. {mloda_core → mloda/core}/abstract_plugins/components/match_data/match_data.py +3 -3
  21. {mloda_core → mloda/core}/abstract_plugins/components/merge/base_merge_engine.py +2 -2
  22. {mloda_core → mloda/core}/abstract_plugins/components/options.py +12 -36
  23. {mloda_core → mloda/core}/abstract_plugins/components/parallelization_modes.py +1 -1
  24. {mloda_core → mloda/core}/abstract_plugins/components/plugin_option/plugin_collector.py +14 -14
  25. mloda/core/abstract_plugins/components/validators/datatype_validator.py +96 -0
  26. mloda/core/abstract_plugins/components/validators/feature_set_validator.py +38 -0
  27. mloda/core/abstract_plugins/components/validators/feature_validator.py +23 -0
  28. mloda/core/abstract_plugins/components/validators/link_validator.py +79 -0
  29. mloda/core/abstract_plugins/components/validators/options_validator.py +57 -0
  30. mloda_core/abstract_plugins/compute_frame_work.py → mloda/core/abstract_plugins/compute_framework.py +46 -37
  31. mloda_core/abstract_plugins/abstract_feature_group.py → mloda/core/abstract_plugins/feature_group.py +56 -33
  32. mloda/core/abstract_plugins/function_extender.py +78 -0
  33. mloda/core/api/plugin_docs.py +220 -0
  34. mloda/core/api/plugin_info.py +32 -0
  35. {mloda_core → mloda/core}/api/prepare/setup_compute_framework.py +11 -11
  36. {mloda_core → mloda/core}/api/request.py +42 -33
  37. {mloda_core → mloda/core}/core/cfw_manager.py +8 -8
  38. {mloda_core → mloda/core}/core/engine.py +47 -46
  39. {mloda_core → mloda/core}/core/step/abstract_step.py +7 -7
  40. {mloda_core → mloda/core}/core/step/feature_group_step.py +12 -12
  41. {mloda_core → mloda/core}/core/step/join_step.py +14 -14
  42. {mloda_core → mloda/core}/core/step/transform_frame_work_step.py +16 -16
  43. {mloda_core → mloda/core}/filter/filter_engine.py +1 -1
  44. {mloda_core → mloda/core}/filter/filter_type_enum.py +1 -1
  45. {mloda_core → mloda/core}/filter/global_filter.py +23 -23
  46. {mloda_core → mloda/core}/filter/single_filter.py +6 -6
  47. {mloda_core → mloda/core}/prepare/accessible_plugins.py +16 -18
  48. {mloda_core → mloda/core}/prepare/execution_plan.py +65 -39
  49. {mloda_core → mloda/core}/prepare/graph/build_graph.py +6 -6
  50. {mloda_core → mloda/core}/prepare/graph/graph.py +1 -1
  51. {mloda_core → mloda/core}/prepare/graph/properties.py +5 -5
  52. {mloda_core → mloda/core}/prepare/identify_feature_group.py +12 -14
  53. {mloda_core → mloda/core}/prepare/joinstep_collection.py +3 -3
  54. {mloda_core → mloda/core}/prepare/resolve_compute_frameworks.py +6 -6
  55. {mloda_core → mloda/core}/prepare/resolve_graph.py +11 -11
  56. {mloda_core → mloda/core}/prepare/resolve_links.py +11 -31
  57. mloda/core/prepare/validators/resolve_link_validator.py +32 -0
  58. mloda/core/runtime/compute_framework_executor.py +271 -0
  59. mloda/core/runtime/data_lifecycle_manager.py +160 -0
  60. mloda/core/runtime/flight/__init__.py +0 -0
  61. {mloda_core → mloda/core}/runtime/flight/runner_flight_server.py +1 -1
  62. mloda/core/runtime/run.py +317 -0
  63. mloda/core/runtime/worker/__init__.py +0 -0
  64. {mloda_core → mloda/core}/runtime/worker/multiprocessing_worker.py +15 -10
  65. {mloda_core → mloda/core}/runtime/worker/thread_worker.py +2 -2
  66. mloda/core/runtime/worker_manager.py +96 -0
  67. mloda/provider/__init__.py +101 -0
  68. mloda/steward/__init__.py +25 -0
  69. mloda/user/__init__.py +57 -0
  70. {mloda-0.3.3.dist-info → mloda-0.4.0.dist-info}/METADATA +18 -22
  71. mloda-0.4.0.dist-info/RECORD +248 -0
  72. {mloda-0.3.3.dist-info → mloda-0.4.0.dist-info}/top_level.txt +1 -1
  73. mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_filter_engine.py +2 -2
  74. mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_framework.py +15 -13
  75. mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_merge_engine.py +3 -3
  76. mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_pyarrow_transformer.py +1 -1
  77. mloda_plugins/compute_framework/base_implementations/iceberg/iceberg_filter_engine.py +2 -2
  78. mloda_plugins/compute_framework/base_implementations/iceberg/iceberg_framework.py +12 -10
  79. mloda_plugins/compute_framework/base_implementations/iceberg/iceberg_pyarrow_transformer.py +1 -1
  80. mloda_plugins/compute_framework/base_implementations/pandas/dataframe.py +18 -16
  81. mloda_plugins/compute_framework/base_implementations/pandas/pandas_filter_engine.py +36 -13
  82. mloda_plugins/compute_framework/base_implementations/pandas/pandas_merge_engine.py +7 -7
  83. mloda_plugins/compute_framework/base_implementations/pandas/pandaspyarrowtransformer.py +1 -1
  84. mloda_plugins/compute_framework/base_implementations/polars/dataframe.py +16 -14
  85. mloda_plugins/compute_framework/base_implementations/polars/lazy_dataframe.py +13 -12
  86. mloda_plugins/compute_framework/base_implementations/polars/polars_filter_engine.py +2 -2
  87. mloda_plugins/compute_framework/base_implementations/polars/polars_lazy_pyarrow_transformer.py +1 -1
  88. mloda_plugins/compute_framework/base_implementations/polars/polars_merge_engine.py +3 -3
  89. mloda_plugins/compute_framework/base_implementations/polars/polars_pyarrow_transformer.py +1 -1
  90. mloda_plugins/compute_framework/base_implementations/pyarrow/pyarrow_filter_engine.py +2 -2
  91. mloda_plugins/compute_framework/base_implementations/pyarrow/pyarrow_merge_engine.py +3 -3
  92. mloda_plugins/compute_framework/base_implementations/pyarrow/table.py +12 -10
  93. mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_filter_engine.py +2 -2
  94. mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_framework.py +11 -9
  95. mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_merge_engine.py +3 -3
  96. mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_pyarrow_transformer.py +1 -1
  97. mloda_plugins/compute_framework/base_implementations/spark/spark_filter_engine.py +2 -2
  98. mloda_plugins/compute_framework/base_implementations/spark/spark_framework.py +17 -15
  99. mloda_plugins/compute_framework/base_implementations/spark/spark_merge_engine.py +3 -3
  100. mloda_plugins/compute_framework/base_implementations/spark/spark_pyarrow_transformer.py +1 -1
  101. mloda_plugins/config/feature/loader.py +2 -2
  102. mloda_plugins/feature_group/experimental/aggregated_feature_group/base.py +45 -62
  103. mloda_plugins/feature_group/experimental/aggregated_feature_group/pandas.py +2 -2
  104. mloda_plugins/feature_group/experimental/aggregated_feature_group/polars_lazy.py +2 -2
  105. mloda_plugins/feature_group/experimental/aggregated_feature_group/pyarrow.py +2 -2
  106. mloda_plugins/feature_group/experimental/clustering/base.py +69 -97
  107. mloda_plugins/feature_group/experimental/clustering/pandas.py +2 -2
  108. mloda_plugins/feature_group/experimental/data_quality/missing_value/base.py +58 -79
  109. mloda_plugins/feature_group/experimental/data_quality/missing_value/pandas.py +2 -2
  110. mloda_plugins/feature_group/experimental/data_quality/missing_value/pyarrow.py +2 -2
  111. mloda_plugins/feature_group/experimental/data_quality/missing_value/python_dict.py +2 -2
  112. mloda_plugins/feature_group/experimental/default_options_key.py +16 -19
  113. mloda_plugins/feature_group/experimental/dimensionality_reduction/base.py +80 -94
  114. mloda_plugins/feature_group/experimental/dimensionality_reduction/pandas.py +2 -2
  115. mloda_plugins/feature_group/experimental/dynamic_feature_group_factory/dynamic_feature_group_factory.py +24 -24
  116. mloda_plugins/feature_group/experimental/forecasting/base.py +106 -104
  117. mloda_plugins/feature_group/experimental/forecasting/forecasting_artifact.py +2 -2
  118. mloda_plugins/feature_group/experimental/forecasting/pandas.py +15 -15
  119. mloda_plugins/feature_group/experimental/geo_distance/base.py +50 -42
  120. mloda_plugins/feature_group/experimental/geo_distance/pandas.py +2 -2
  121. mloda_plugins/feature_group/experimental/llm/cli.py +4 -4
  122. mloda_plugins/feature_group/experimental/llm/cli_features/refactor_git_cached.py +19 -19
  123. mloda_plugins/feature_group/experimental/llm/installed_packages_feature_group.py +8 -8
  124. mloda_plugins/feature_group/experimental/llm/list_directory_feature_group.py +5 -5
  125. mloda_plugins/feature_group/experimental/llm/llm_api/claude.py +3 -3
  126. mloda_plugins/feature_group/experimental/llm/llm_api/gemini.py +3 -3
  127. mloda_plugins/feature_group/experimental/llm/llm_api/llm_base_request.py +5 -5
  128. mloda_plugins/feature_group/experimental/llm/llm_api/openai.py +3 -3
  129. mloda_plugins/feature_group/experimental/llm/llm_api/request_loop.py +6 -6
  130. mloda_plugins/feature_group/experimental/llm/llm_file_selector.py +10 -10
  131. mloda_plugins/feature_group/experimental/llm/tools/tool_collection.py +1 -1
  132. mloda_plugins/feature_group/experimental/node_centrality/base.py +46 -72
  133. mloda_plugins/feature_group/experimental/node_centrality/pandas.py +2 -2
  134. mloda_plugins/feature_group/experimental/sklearn/encoding/base.py +51 -51
  135. mloda_plugins/feature_group/experimental/sklearn/encoding/pandas.py +2 -2
  136. mloda_plugins/feature_group/experimental/sklearn/pipeline/base.py +52 -39
  137. mloda_plugins/feature_group/experimental/sklearn/pipeline/pandas.py +2 -2
  138. mloda_plugins/feature_group/experimental/sklearn/scaling/base.py +44 -58
  139. mloda_plugins/feature_group/experimental/sklearn/scaling/pandas.py +2 -2
  140. mloda_plugins/feature_group/experimental/sklearn/sklearn_artifact.py +2 -2
  141. mloda_plugins/feature_group/experimental/source_input_feature.py +15 -15
  142. mloda_plugins/feature_group/experimental/text_cleaning/base.py +38 -61
  143. mloda_plugins/feature_group/experimental/text_cleaning/pandas.py +2 -2
  144. mloda_plugins/feature_group/experimental/text_cleaning/python_dict.py +2 -2
  145. mloda_plugins/feature_group/experimental/time_window/base.py +106 -93
  146. mloda_plugins/feature_group/experimental/time_window/pandas.py +13 -13
  147. mloda_plugins/feature_group/experimental/time_window/pyarrow.py +12 -12
  148. mloda_plugins/feature_group/input_data/api_data/api_data.py +9 -11
  149. mloda_plugins/feature_group/input_data/read_context_files.py +7 -7
  150. mloda_plugins/feature_group/input_data/read_db.py +7 -9
  151. mloda_plugins/feature_group/input_data/read_db_feature.py +4 -4
  152. mloda_plugins/feature_group/input_data/read_dbs/sqlite.py +23 -13
  153. mloda_plugins/feature_group/input_data/read_file.py +8 -8
  154. mloda_plugins/feature_group/input_data/read_file_feature.py +4 -4
  155. mloda_plugins/feature_group/input_data/read_files/csv.py +6 -6
  156. mloda_plugins/feature_group/input_data/read_files/feather.py +5 -5
  157. mloda_plugins/feature_group/input_data/read_files/json.py +5 -5
  158. mloda_plugins/feature_group/input_data/read_files/orc.py +5 -5
  159. mloda_plugins/feature_group/input_data/read_files/parquet.py +5 -5
  160. mloda_plugins/feature_group/input_data/read_files/text_file_reader.py +5 -5
  161. mloda_plugins/function_extender/base_implementations/otel/otel_extender.py +4 -4
  162. mloda-0.3.3.dist-info/RECORD +0 -230
  163. mloda_core/abstract_plugins/components/link.py +0 -286
  164. mloda_core/abstract_plugins/function_extender.py +0 -34
  165. mloda_core/runtime/run.py +0 -617
  166. {mloda_core → mloda/core}/__init__.py +0 -0
  167. {mloda_core → mloda/core}/abstract_plugins/__init__.py +0 -0
  168. {mloda_core → mloda/core}/abstract_plugins/components/__init__.py +0 -0
  169. {mloda_core → mloda/core}/abstract_plugins/components/domain.py +0 -0
  170. {mloda_core → mloda/core}/abstract_plugins/components/feature_chainer/__init__.py +0 -0
  171. {mloda_core → mloda/core}/abstract_plugins/components/feature_name.py +0 -0
  172. {mloda_core → mloda/core}/abstract_plugins/components/framework_transformer/__init__.py +0 -0
  173. {mloda_core → mloda/core}/abstract_plugins/components/framework_transformer/base_transformer.py +0 -0
  174. {mloda_core → mloda/core}/abstract_plugins/components/hashable_dict.py +0 -0
  175. {mloda_core → mloda/core}/abstract_plugins/components/index/__init__.py +0 -0
  176. {mloda_core → mloda/core}/abstract_plugins/components/index/index.py +0 -0
  177. {mloda_core → mloda/core}/abstract_plugins/components/input_data/__init__.py +0 -0
  178. {mloda_core → mloda/core}/abstract_plugins/components/input_data/api/__init__.py +0 -0
  179. {mloda_core → mloda/core}/abstract_plugins/components/input_data/creator/__init__.py +0 -0
  180. {mloda_core → mloda/core}/abstract_plugins/components/match_data/__init__.py +0 -0
  181. {mloda_core → mloda/core}/abstract_plugins/components/merge/__init__.py +0 -0
  182. {mloda_core → mloda/core}/abstract_plugins/components/plugin_option/__init__.py +0 -0
  183. {mloda_core → mloda/core}/abstract_plugins/components/utils.py +0 -0
  184. {mloda_core/abstract_plugins/plugin_loader → mloda/core/abstract_plugins/components/validators}/__init__.py +0 -0
  185. {mloda_core/api → mloda/core/abstract_plugins/plugin_loader}/__init__.py +0 -0
  186. {mloda_core → mloda/core}/abstract_plugins/plugin_loader/plugin_loader.py +0 -0
  187. {mloda_core/api/prepare → mloda/core/api}/__init__.py +0 -0
  188. {mloda_core/core → mloda/core/api/prepare}/__init__.py +0 -0
  189. {mloda_core/core/step → mloda/core/core}/__init__.py +0 -0
  190. {mloda_core/filter → mloda/core/core/step}/__init__.py +0 -0
  191. {mloda_core/prepare → mloda/core/filter}/__init__.py +0 -0
  192. {mloda_core → mloda/core}/filter/filter_parameter.py +0 -0
  193. {mloda_core/prepare/graph → mloda/core/prepare}/__init__.py +0 -0
  194. {mloda_core/runtime → mloda/core/prepare/graph}/__init__.py +0 -0
  195. {mloda_core/runtime/flight → mloda/core/prepare/validators}/__init__.py +0 -0
  196. {mloda_core/runtime/worker → mloda/core/runtime}/__init__.py +0 -0
  197. {mloda_core → mloda/core}/runtime/flight/flight_server.py +0 -0
  198. {mloda-0.3.3.dist-info → mloda-0.4.0.dist-info}/WHEEL +0 -0
  199. {mloda-0.3.3.dist-info → mloda-0.4.0.dist-info}/entry_points.txt +0 -0
  200. {mloda-0.3.3.dist-info → mloda-0.4.0.dist-info}/licenses/LICENSE.TXT +0 -0
  201. {mloda-0.3.3.dist-info → mloda-0.4.0.dist-info}/licenses/NOTICE.md +0 -0
@@ -1,7 +1,7 @@
1
1
  from enum import Enum
2
2
 
3
3
 
4
- class ParallelizationModes(Enum):
4
+ class ParallelizationMode(Enum):
5
5
  SYNC = "sync"
6
6
  THREADING = "threading"
7
7
  MULTIPROCESSING = "multiprocessing"
@@ -1,11 +1,11 @@
1
1
  from typing import Set, Type
2
2
 
3
- from mloda_core.abstract_plugins.abstract_feature_group import AbstractFeatureGroup
3
+ from mloda.core.abstract_plugins.feature_group import FeatureGroup
4
4
 
5
5
 
6
- class PlugInCollector:
6
+ class PluginCollector:
7
7
  """
8
- The PlugInCollector class is a helper class with the purpose to disable or enable feature groups.
8
+ The PluginCollector class is a helper class with the purpose to disable or enable feature groups.
9
9
 
10
10
  This class is useful for rapid prototype development, where you want to disable or enable feature groups,
11
11
  when the other, competing feature groups are found.
@@ -15,16 +15,16 @@ class PlugInCollector:
15
15
  """
16
16
 
17
17
  def __init__(self) -> None:
18
- self.disabled_feature_group_classes: Set[Type[AbstractFeatureGroup]] = set()
19
- self.enabled_feature_group_classes: Set[Type[AbstractFeatureGroup]] = set()
18
+ self.disabled_feature_group_classes: Set[Type[FeatureGroup]] = set()
19
+ self.enabled_feature_group_classes: Set[Type[FeatureGroup]] = set()
20
20
 
21
- def add_disabled_feature_group_classes(self, feature_group_cls: Set[Type[AbstractFeatureGroup]]) -> None:
21
+ def add_disabled_feature_group_classes(self, feature_group_cls: Set[Type[FeatureGroup]]) -> None:
22
22
  self.disabled_feature_group_classes.update(feature_group_cls)
23
23
 
24
- def add_enabled_feature_group_classes(self, feature_group_cls: Set[Type[AbstractFeatureGroup]]) -> None:
24
+ def add_enabled_feature_group_classes(self, feature_group_cls: Set[Type[FeatureGroup]]) -> None:
25
25
  self.enabled_feature_group_classes.update(feature_group_cls)
26
26
 
27
- def applicable_feature_group_class(self, feature_group_cls: Type[AbstractFeatureGroup]) -> bool:
27
+ def applicable_feature_group_class(self, feature_group_cls: Type[FeatureGroup]) -> bool:
28
28
  if feature_group_cls in self.disabled_feature_group_classes:
29
29
  return False
30
30
 
@@ -38,22 +38,22 @@ class PlugInCollector:
38
38
 
39
39
  @staticmethod
40
40
  def disabled_feature_groups(
41
- feature_group_cls: Set[Type[AbstractFeatureGroup]] | Type[AbstractFeatureGroup],
42
- ) -> "PlugInCollector":
41
+ feature_group_cls: Set[Type[FeatureGroup]] | Type[FeatureGroup],
42
+ ) -> "PluginCollector":
43
43
  if not isinstance(feature_group_cls, Set):
44
44
  feature_group_cls = {feature_group_cls}
45
45
 
46
- plugin_collector = PlugInCollector()
46
+ plugin_collector = PluginCollector()
47
47
  plugin_collector.add_disabled_feature_group_classes(feature_group_cls)
48
48
  return plugin_collector
49
49
 
50
50
  @staticmethod
51
51
  def enabled_feature_groups(
52
- feature_group_cls: Set[Type[AbstractFeatureGroup]] | Type[AbstractFeatureGroup],
53
- ) -> "PlugInCollector":
52
+ feature_group_cls: Set[Type[FeatureGroup]] | Type[FeatureGroup],
53
+ ) -> "PluginCollector":
54
54
  if not isinstance(feature_group_cls, Set):
55
55
  feature_group_cls = {feature_group_cls}
56
56
 
57
- plugin_collector = PlugInCollector()
57
+ plugin_collector = PluginCollector()
58
58
  plugin_collector.add_enabled_feature_group_classes(feature_group_cls)
59
59
  return plugin_collector
@@ -0,0 +1,96 @@
1
+ from typing import Any
2
+
3
+
4
+ from mloda.core.abstract_plugins.components.data_types import DataType
5
+
6
+
7
+ class DataTypeMismatchError(ValueError):
8
+ """Raised when feature data type doesn't match declared type."""
9
+
10
+ def __init__(self, feature_name: str, declared: DataType, actual: DataType) -> None:
11
+ self.feature_name = feature_name
12
+ self.declared = declared
13
+ self.actual = actual
14
+ super().__init__(
15
+ f"Feature '{feature_name}': declared {declared.name}, got {actual.name}, coercion not supported"
16
+ )
17
+
18
+
19
+ class DataTypeValidator:
20
+ """Validates feature data matches declared DataType."""
21
+
22
+ _COMPATIBLE_TYPES = {
23
+ DataType.INT64: {DataType.INT32, DataType.INT64},
24
+ DataType.DOUBLE: {DataType.FLOAT, DataType.DOUBLE, DataType.INT32, DataType.INT64},
25
+ DataType.TIMESTAMP_MICROS: {DataType.TIMESTAMP_MILLIS, DataType.TIMESTAMP_MICROS},
26
+ }
27
+
28
+ @classmethod
29
+ def _types_compatible(cls, declared: DataType, actual: DataType) -> bool:
30
+ """Check if actual type is compatible with declared (allows widening)."""
31
+ if declared == actual:
32
+ return True
33
+ return actual in cls._COMPATIBLE_TYPES.get(declared, set())
34
+
35
+ @classmethod
36
+ def _types_loosely_compatible(cls, declared: DataType, actual: DataType) -> bool:
37
+ """Check if types are loosely compatible (allows any numeric/timestamp pairing).
38
+
39
+ Lenient mode allows data type mismatches within the same category:
40
+ - All numeric types (INT32, INT64, FLOAT, DOUBLE) are interchangeable
41
+ - All timestamp types are interchangeable
42
+ - Other types must match exactly
43
+
44
+ This fixes legacy FeatureGroups that declare INT32 but return DOUBLE.
45
+ """
46
+ if declared == actual:
47
+ return True
48
+
49
+ numeric_types = {DataType.INT32, DataType.INT64, DataType.FLOAT, DataType.DOUBLE}
50
+ if declared in numeric_types and actual in numeric_types:
51
+ return True
52
+
53
+ timestamp_types = {DataType.TIMESTAMP_MILLIS, DataType.TIMESTAMP_MICROS}
54
+ if declared in timestamp_types and actual in timestamp_types:
55
+ return True
56
+
57
+ return False
58
+
59
+ @classmethod
60
+ def validate(cls, data: Any, features: Any, strict_only: bool = False) -> None:
61
+ """Validate that data columns match declared feature types.
62
+
63
+ Args:
64
+ data: PyArrow table or similar with column data
65
+ features: FeatureSet containing features to validate
66
+ strict_only: If True, only validate when strict_type_enforcement is enabled.
67
+ This maintains backward compatibility with existing code.
68
+ """
69
+ from mloda_plugins.feature_group.experimental.default_options_key import DefaultOptionKeys
70
+
71
+ for feature in features.features:
72
+ if feature.data_type is None:
73
+ continue
74
+
75
+ col_name = feature.get_name()
76
+ if col_name not in data.column_names:
77
+ continue
78
+
79
+ arrow_type = data.schema.field(col_name).type
80
+
81
+ try:
82
+ actual_type = DataType.from_arrow_type(arrow_type)
83
+ except ValueError:
84
+ continue
85
+
86
+ strict_mode = False
87
+ if feature.options:
88
+ strict_value = feature.options.get(DefaultOptionKeys.strict_type_enforcement)
89
+ strict_mode = strict_value if strict_value is not None else False
90
+
91
+ if strict_mode:
92
+ if not cls._types_compatible(feature.data_type, actual_type):
93
+ raise DataTypeMismatchError(col_name, feature.data_type, actual_type)
94
+ else:
95
+ if not cls._types_loosely_compatible(feature.data_type, actual_type):
96
+ raise DataTypeMismatchError(col_name, feature.data_type, actual_type)
@@ -0,0 +1,38 @@
1
+ from typing import Any, Optional, Set, TYPE_CHECKING
2
+
3
+ if TYPE_CHECKING:
4
+ from mloda.core.abstract_plugins.components.feature import Feature
5
+
6
+
7
+ class FeatureSetValidator:
8
+ @staticmethod
9
+ def validate_options_initialized(options: Any, context: str = "FeatureSet") -> None:
10
+ if options is None:
11
+ raise ValueError(f"Options not initialized in {context}")
12
+
13
+ @staticmethod
14
+ def validate_equal_options(features: Set["Feature"]) -> None:
15
+ if len(features) <= 1:
16
+ return
17
+
18
+ options_list = [feature.options for feature in features]
19
+ first_options = options_list[0]
20
+
21
+ for options in options_list[1:]:
22
+ if options != first_options:
23
+ raise ValueError("Features have different options")
24
+
25
+ @staticmethod
26
+ def validate_feature_added(feature_name: Optional[str], context: str = "feature") -> None:
27
+ if feature_name is None:
28
+ raise ValueError(f"Feature name is None in {context}")
29
+
30
+ @staticmethod
31
+ def validate_filters_not_set(filters: Any) -> None:
32
+ if filters is not None:
33
+ raise ValueError("Filters already set")
34
+
35
+ @staticmethod
36
+ def validate_filters_is_set_type(filters: Any) -> None:
37
+ if not isinstance(filters, set):
38
+ raise ValueError("Filters must be a Set type")
@@ -0,0 +1,23 @@
1
+ from typing import Optional, Set, Type
2
+ from mloda.core.abstract_plugins.compute_framework import ComputeFramework
3
+
4
+
5
+ class FeatureValidator:
6
+ @staticmethod
7
+ def validate_and_resolve_compute_framework(
8
+ framework_name: str, available_frameworks: Set[Type[ComputeFramework]], source: str = "parameter"
9
+ ) -> Type[ComputeFramework]:
10
+ for subclass in available_frameworks:
11
+ if framework_name == subclass.get_class_name():
12
+ return subclass
13
+ raise ValueError(f"Compute framework via {source} {framework_name} not found.")
14
+
15
+ @staticmethod
16
+ def validate_compute_frameworks_resolved(
17
+ compute_frameworks: Optional[Set[Type[ComputeFramework]]], feature_name: str
18
+ ) -> None:
19
+ if compute_frameworks is None:
20
+ raise ValueError(
21
+ f"Feature {feature_name} does not have any compute framework. "
22
+ "This function can only be called when the frameworks were resolved."
23
+ )
@@ -0,0 +1,79 @@
1
+ from typing import TYPE_CHECKING, Any, Optional, Set, Tuple, Union
2
+
3
+ if TYPE_CHECKING:
4
+ from mloda.core.abstract_plugins.components.link import Link
5
+
6
+
7
+ class LinkValidator:
8
+ @staticmethod
9
+ def validate_index_not_empty(index: Union[str, Tuple[str, ...]], context: str = "index") -> None:
10
+ if not index:
11
+ raise ValueError(f"{context} cannot be empty")
12
+
13
+ @staticmethod
14
+ def validate_join_type(jointype: Any) -> None:
15
+ from mloda.core.abstract_plugins.components.link import JoinType
16
+
17
+ if not isinstance(jointype, JoinType):
18
+ raise ValueError(f"Join type {jointype} is not supported")
19
+
20
+ @staticmethod
21
+ def validate_no_double_joins(links: Set["Link"]) -> None:
22
+ from mloda.core.abstract_plugins.components.link import JoinType
23
+
24
+ for i_link in links:
25
+ for j_link in links:
26
+ if i_link == j_link:
27
+ continue
28
+ if (
29
+ i_link.left_feature_group == j_link.right_feature_group
30
+ and i_link.right_feature_group == j_link.left_feature_group
31
+ and i_link.jointype not in [JoinType.APPEND, JoinType.UNION]
32
+ ):
33
+ raise ValueError(
34
+ f"Link {i_link} and {j_link} have at least two different defined joins. Please remove one."
35
+ )
36
+
37
+ @staticmethod
38
+ def validate_no_conflicting_join_types(links: Set["Link"]) -> None:
39
+ for i_link in links:
40
+ for j_link in links:
41
+ if i_link == j_link:
42
+ continue
43
+ if (
44
+ i_link.left_feature_group == j_link.left_feature_group
45
+ and i_link.right_feature_group == j_link.right_feature_group
46
+ and i_link.jointype != j_link.jointype
47
+ ):
48
+ raise ValueError(
49
+ f"Link {i_link} and {j_link} have different join types for the same feature groups. Please remove one."
50
+ )
51
+
52
+ @staticmethod
53
+ def validate_right_join_constraints(links: Set["Link"]) -> None:
54
+ from mloda.core.abstract_plugins.components.link import JoinType
55
+
56
+ for i_link in links:
57
+ if i_link.jointype == JoinType.RIGHT:
58
+ for j_link in links:
59
+ if i_link == j_link:
60
+ continue
61
+ if (
62
+ i_link.left_feature_group == j_link.left_feature_group
63
+ or i_link.left_feature_group == j_link.right_feature_group
64
+ ):
65
+ raise ValueError(
66
+ f"Link {i_link} and {j_link} have multiple right joins for the same feature group on the left side or switching from left to right side although using right join. Please reconsider your joinlogic and if possible, use left joins instead of rightjoins. This will currently break the planner or during execution."
67
+ )
68
+
69
+ @classmethod
70
+ def validate_links(cls, links: Optional[Set["Link"]]) -> None:
71
+ if links is None:
72
+ return
73
+
74
+ for link in links:
75
+ cls.validate_join_type(link.jointype)
76
+
77
+ cls.validate_no_double_joins(links)
78
+ cls.validate_no_conflicting_join_types(links)
79
+ cls.validate_right_join_constraints(links)
@@ -0,0 +1,57 @@
1
+ from typing import Any, Dict, Set
2
+
3
+
4
+ class OptionsValidator:
5
+ """Validates Options configuration consistency."""
6
+
7
+ @staticmethod
8
+ def validate_no_duplicate_keys(group: Dict[str, Any], context: Dict[str, Any]) -> None:
9
+ """
10
+ Ensure no key exists in both group and context.
11
+
12
+ Raises ValueError if any key exists in both, with duplicate keys in message.
13
+ """
14
+ duplicate_keys = set(group.keys()) & set(context.keys())
15
+ if duplicate_keys:
16
+ raise ValueError(f"Keys cannot exist in both group and context: {duplicate_keys}")
17
+
18
+ @staticmethod
19
+ def validate_can_add_to_group(key: str, value: Any, group: Dict[str, Any], context: Dict[str, Any]) -> None:
20
+ """
21
+ Validate that a key can be added to group.
22
+
23
+ Checks:
24
+ 1. If key exists in group with different value -> ValueError (include key in message)
25
+ 2. If key exists in context -> ValueError (include key in message)
26
+ """
27
+ if key in group:
28
+ if value != group[key]:
29
+ raise ValueError(f"Key {key} already exists in group options with a different value: {group[key]}")
30
+ if key in context:
31
+ raise ValueError(f"Key {key} already exists in context options. Cannot add to group.")
32
+
33
+ @staticmethod
34
+ def validate_can_add_to_context(key: str, value: Any, group: Dict[str, Any], context: Dict[str, Any]) -> None:
35
+ """
36
+ Validate that a key can be added to context.
37
+
38
+ Checks:
39
+ 1. If key exists in context with different value -> ValueError (include key in message)
40
+ 2. If key exists in group -> ValueError (include key in message)
41
+ """
42
+ if key in context:
43
+ if value != context[key]:
44
+ raise ValueError(f"Key {key} already exists in context options with a different value: {context[key]}")
45
+ if key in group:
46
+ raise ValueError(f"Key {key} already exists in group options. Cannot add to context.")
47
+
48
+ @staticmethod
49
+ def validate_no_group_context_conflicts(other_group_keys: Set[str], self_context_keys: Set[str]) -> None:
50
+ """
51
+ Validate no conflicts between other's group keys and self's context keys.
52
+
53
+ Raises ValueError if any key exists in both, with conflicting keys in message.
54
+ """
55
+ conflicting_keys = other_group_keys & self_context_keys
56
+ if conflicting_keys:
57
+ raise ValueError(f"Cannot update group: keys already exist in context: {conflicting_keys}")
@@ -1,23 +1,27 @@
1
1
  from abc import ABC
2
2
  from typing import Any, List, Optional, Set, Type, Union, final
3
3
  from uuid import UUID, uuid4
4
- from mloda_core.abstract_plugins.components.data_access_collection import DataAccessCollection
5
- from mloda_core.abstract_plugins.components.framework_transformer.cfw_transformer import (
4
+ from mloda.core.abstract_plugins.components.data_access_collection import DataAccessCollection
5
+ from mloda.core.abstract_plugins.components.framework_transformer.cfw_transformer import (
6
6
  ComputeFrameworkTransformer,
7
7
  )
8
- from mloda_core.abstract_plugins.components.merge.base_merge_engine import BaseMergeEngine
8
+ from mloda.core.abstract_plugins.components.merge.base_merge_engine import BaseMergeEngine
9
9
  import pyarrow as pa
10
10
 
11
- from mloda_core.abstract_plugins.function_extender import WrapperFunctionExtender, WrapperFunctionEnum
12
- from mloda_core.abstract_plugins.components.feature_name import FeatureName
13
- from mloda_core.abstract_plugins.components.parallelization_modes import ParallelizationModes
14
- from mloda_core.filter.filter_engine import BaseFilterEngine
15
- from mloda_core.runtime.flight.flight_server import FlightServer
11
+ from mloda.core.abstract_plugins.function_extender import (
12
+ Extender,
13
+ ExtenderHook,
14
+ _CompositeExtender,
15
+ )
16
+ from mloda.core.abstract_plugins.components.feature_name import FeatureName
17
+ from mloda.core.abstract_plugins.components.parallelization_modes import ParallelizationMode
18
+ from mloda.core.filter.filter_engine import BaseFilterEngine
19
+ from mloda.core.runtime.flight.flight_server import FlightServer
16
20
 
17
21
 
18
- class ComputeFrameWork(ABC):
22
+ class ComputeFramework(ABC):
19
23
  """
20
- Documentation ComputeFrameWork:
24
+ Documentation ComputeFramework:
21
25
 
22
26
  This class is used to define the compute framework.
23
27
 
@@ -41,10 +45,10 @@ class ComputeFrameWork(ABC):
41
45
 
42
46
  def __init__(
43
47
  self,
44
- mode: ParallelizationModes,
48
+ mode: ParallelizationMode,
45
49
  children_if_root: frozenset[UUID],
46
50
  uuid: UUID = uuid4(),
47
- function_extender: Optional[Set[WrapperFunctionExtender]] = None,
51
+ function_extender: Optional[Set[Extender]] = None,
48
52
  ) -> None:
49
53
  """This class is initialized step execution."""
50
54
  self.mode = mode
@@ -64,18 +68,19 @@ class ComputeFrameWork(ABC):
64
68
  # connection object for frameworks that need persistent connections (e.g., DuckDB, Spark)
65
69
  self.framework_connection_object: Optional[Any] = None
66
70
 
67
- @staticmethod
68
- def expected_data_framework() -> Any:
71
+ @classmethod
72
+ def expected_data_framework(cls) -> Any:
69
73
  """
70
74
  This function should return the expected data framework for the compute framework.
71
75
  However, we only need to set it if we really want to be sure that the datatype is correct.
72
76
  """
73
77
  return None
74
78
 
75
- def filter_engine(self) -> Type[BaseFilterEngine]:
79
+ @classmethod
80
+ def filter_engine(cls) -> Type[BaseFilterEngine]:
76
81
  """
77
82
  This function should return the filtered data.
78
- The BaseFilterEngine should be overwritten by the appropriate ComputeFrameWork if needed
83
+ The BaseFilterEngine should be overwritten by the appropriate ComputeFramework if needed
79
84
  """
80
85
  raise NotImplementedError
81
86
 
@@ -122,16 +127,15 @@ class ComputeFrameWork(ABC):
122
127
  """
123
128
  return data
124
129
 
125
- def merge_engine(self) -> Type[BaseMergeEngine]:
130
+ @classmethod
131
+ def merge_engine(cls) -> Type[BaseMergeEngine]:
126
132
  """
127
133
  This function should return a subclass of the BaseMergeEngine.
128
134
  With this, we can merge data from the same compute framework.
129
135
 
130
136
  This implementation is optional.
131
137
  """
132
- raise NotImplementedError(
133
- f"Merge functionality is for this compute framework not implemented {self.__class__.__name__}."
134
- )
138
+ raise NotImplementedError(f"Merge functionality is for this compute framework not implemented {cls.__name__}.")
135
139
 
136
140
  def set_framework_connection_object(self, framework_connection_object: Optional[Any] = None) -> None:
137
141
  """
@@ -241,7 +245,7 @@ class ComputeFrameWork(ABC):
241
245
  if self.data is None:
242
246
  return
243
247
 
244
- extender = self.get_function_extender(WrapperFunctionEnum.VALIDATE_INPUT_FEATURE)
248
+ extender = self.get_function_extender(ExtenderHook.VALIDATE_INPUT_FEATURE)
245
249
  if extender is None:
246
250
  result = feature_group.validate_input_features(self.data, features)
247
251
  else:
@@ -255,7 +259,12 @@ class ComputeFrameWork(ABC):
255
259
  def run_validate_output_features(self, feature_group: Any, features: Any) -> Any:
256
260
  if self.data is None:
257
261
  return
258
- extender = self.get_function_extender(WrapperFunctionEnum.VALIDATE_OUTPUT_FEATURE)
262
+
263
+ from mloda.core.abstract_plugins.components.validators.datatype_validator import DataTypeValidator
264
+
265
+ DataTypeValidator.validate(self.data, features, strict_only=True)
266
+
267
+ extender = self.get_function_extender(ExtenderHook.VALIDATE_OUTPUT_FEATURE)
259
268
  if extender is None:
260
269
  result = feature_group.validate_output_features(self.data, features)
261
270
  else:
@@ -276,7 +285,7 @@ class ComputeFrameWork(ABC):
276
285
 
277
286
  @final
278
287
  def __eq__(self, other: object) -> bool:
279
- if not isinstance(other, ComputeFrameWork):
288
+ if not isinstance(other, ComputeFramework):
280
289
  return False
281
290
  return self.get_class_name() == other.get_class_name() and self.children_if_root == other.children_if_root
282
291
 
@@ -323,23 +332,23 @@ class ComputeFrameWork(ABC):
323
332
  return False
324
333
 
325
334
  @final
326
- def get_function_extender(self, wrapper_function_enum: WrapperFunctionEnum) -> Optional[WrapperFunctionExtender]:
327
- found_extender = None
335
+ def get_function_extender(self, wrapper_function_enum: ExtenderHook) -> Optional[Extender]:
336
+ matching_extenders = []
328
337
  for extender in self.function_extender:
329
338
  if wrapper_function_enum in extender.wraps():
330
- if found_extender is not None:
331
- raise ValueError(
332
- f"Multiple function_extender found for {wrapper_function_enum}, {found_extender.__class__.__name__}, {extender.__class__.__name__}"
333
- )
334
- found_extender = extender
335
-
336
- if found_extender is not None:
337
- return found_extender
338
- return None
339
+ matching_extenders.append(extender)
340
+
341
+ if len(matching_extenders) == 0:
342
+ return None
343
+ if len(matching_extenders) == 1:
344
+ return matching_extenders[0]
345
+
346
+ sorted_extenders = sorted(matching_extenders, key=lambda e: e.priority)
347
+ return _CompositeExtender(sorted_extenders, wrapper_function_enum)
339
348
 
340
349
  @final
341
350
  def run_calculate_feature(self, feature_group: Any, features: Any) -> Any:
342
- extender = self.get_function_extender(WrapperFunctionEnum.FEATURE_GROUP_CALCULATE_FEATURE)
351
+ extender = self.get_function_extender(ExtenderHook.FEATURE_GROUP_CALCULATE_FEATURE)
343
352
 
344
353
  try:
345
354
  if extender is None:
@@ -365,8 +374,8 @@ When a feature depends on multiple input features, you must provide explicit Lin
365
374
  how to merge them. Without Links, the framework cannot determine how to combine the data.
366
375
 
367
376
  Example:
368
- from mloda_core.abstract_plugins.components.link import Link
369
- from mloda_core.abstract_plugins.components.index.index import Index
377
+ from mloda.core.abstract_plugins.components.link import Link
378
+ from mloda.core.abstract_plugins.components.index.index import Index
370
379
 
371
380
  links = {{
372
381
  Link.inner(