mloda 0.3.3__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (201) hide show
  1. mloda/__init__.py +17 -0
  2. {mloda_core → mloda/core}/abstract_plugins/components/base_artifact.py +2 -2
  3. {mloda_core → mloda/core}/abstract_plugins/components/base_validator.py +13 -0
  4. {mloda_core → mloda/core}/abstract_plugins/components/data_access_collection.py +1 -1
  5. {mloda_core → mloda/core}/abstract_plugins/components/data_types.py +39 -0
  6. {mloda_core → mloda/core}/abstract_plugins/components/feature.py +39 -33
  7. {mloda_core → mloda/core}/abstract_plugins/components/feature_chainer/feature_chain_parser.py +19 -19
  8. mloda/core/abstract_plugins/components/feature_chainer/feature_chain_parser_mixin.py +197 -0
  9. {mloda_core → mloda/core}/abstract_plugins/components/feature_collection.py +6 -6
  10. {mloda_core → mloda/core}/abstract_plugins/components/feature_group_version.py +8 -8
  11. {mloda_core → mloda/core}/abstract_plugins/components/feature_set.py +18 -24
  12. {mloda_core → mloda/core}/abstract_plugins/components/framework_transformer/cfw_transformer.py +2 -2
  13. {mloda_core → mloda/core}/abstract_plugins/components/index/add_index_feature.py +4 -4
  14. {mloda_core → mloda/core}/abstract_plugins/components/input_data/api/api_input_data.py +3 -3
  15. {mloda_core → mloda/core}/abstract_plugins/components/input_data/api/api_input_data_collection.py +2 -2
  16. {mloda_core → mloda/core}/abstract_plugins/components/input_data/api/base_api_data.py +1 -1
  17. {mloda_core → mloda/core}/abstract_plugins/components/input_data/base_input_data.py +6 -6
  18. {mloda_core → mloda/core}/abstract_plugins/components/input_data/creator/data_creator.py +3 -3
  19. mloda/core/abstract_plugins/components/link.py +437 -0
  20. {mloda_core → mloda/core}/abstract_plugins/components/match_data/match_data.py +3 -3
  21. {mloda_core → mloda/core}/abstract_plugins/components/merge/base_merge_engine.py +2 -2
  22. {mloda_core → mloda/core}/abstract_plugins/components/options.py +12 -36
  23. {mloda_core → mloda/core}/abstract_plugins/components/parallelization_modes.py +1 -1
  24. {mloda_core → mloda/core}/abstract_plugins/components/plugin_option/plugin_collector.py +14 -14
  25. mloda/core/abstract_plugins/components/validators/datatype_validator.py +96 -0
  26. mloda/core/abstract_plugins/components/validators/feature_set_validator.py +38 -0
  27. mloda/core/abstract_plugins/components/validators/feature_validator.py +23 -0
  28. mloda/core/abstract_plugins/components/validators/link_validator.py +79 -0
  29. mloda/core/abstract_plugins/components/validators/options_validator.py +57 -0
  30. mloda_core/abstract_plugins/compute_frame_work.py → mloda/core/abstract_plugins/compute_framework.py +46 -37
  31. mloda_core/abstract_plugins/abstract_feature_group.py → mloda/core/abstract_plugins/feature_group.py +56 -33
  32. mloda/core/abstract_plugins/function_extender.py +78 -0
  33. mloda/core/api/plugin_docs.py +220 -0
  34. mloda/core/api/plugin_info.py +32 -0
  35. {mloda_core → mloda/core}/api/prepare/setup_compute_framework.py +11 -11
  36. {mloda_core → mloda/core}/api/request.py +42 -33
  37. {mloda_core → mloda/core}/core/cfw_manager.py +8 -8
  38. {mloda_core → mloda/core}/core/engine.py +47 -46
  39. {mloda_core → mloda/core}/core/step/abstract_step.py +7 -7
  40. {mloda_core → mloda/core}/core/step/feature_group_step.py +12 -12
  41. {mloda_core → mloda/core}/core/step/join_step.py +14 -14
  42. {mloda_core → mloda/core}/core/step/transform_frame_work_step.py +16 -16
  43. {mloda_core → mloda/core}/filter/filter_engine.py +1 -1
  44. {mloda_core → mloda/core}/filter/filter_type_enum.py +1 -1
  45. {mloda_core → mloda/core}/filter/global_filter.py +23 -23
  46. {mloda_core → mloda/core}/filter/single_filter.py +6 -6
  47. {mloda_core → mloda/core}/prepare/accessible_plugins.py +16 -18
  48. {mloda_core → mloda/core}/prepare/execution_plan.py +65 -39
  49. {mloda_core → mloda/core}/prepare/graph/build_graph.py +6 -6
  50. {mloda_core → mloda/core}/prepare/graph/graph.py +1 -1
  51. {mloda_core → mloda/core}/prepare/graph/properties.py +5 -5
  52. {mloda_core → mloda/core}/prepare/identify_feature_group.py +12 -14
  53. {mloda_core → mloda/core}/prepare/joinstep_collection.py +3 -3
  54. {mloda_core → mloda/core}/prepare/resolve_compute_frameworks.py +6 -6
  55. {mloda_core → mloda/core}/prepare/resolve_graph.py +11 -11
  56. {mloda_core → mloda/core}/prepare/resolve_links.py +11 -31
  57. mloda/core/prepare/validators/resolve_link_validator.py +32 -0
  58. mloda/core/runtime/compute_framework_executor.py +271 -0
  59. mloda/core/runtime/data_lifecycle_manager.py +160 -0
  60. mloda/core/runtime/flight/__init__.py +0 -0
  61. {mloda_core → mloda/core}/runtime/flight/runner_flight_server.py +1 -1
  62. mloda/core/runtime/run.py +317 -0
  63. mloda/core/runtime/worker/__init__.py +0 -0
  64. {mloda_core → mloda/core}/runtime/worker/multiprocessing_worker.py +15 -10
  65. {mloda_core → mloda/core}/runtime/worker/thread_worker.py +2 -2
  66. mloda/core/runtime/worker_manager.py +96 -0
  67. mloda/provider/__init__.py +101 -0
  68. mloda/steward/__init__.py +25 -0
  69. mloda/user/__init__.py +57 -0
  70. {mloda-0.3.3.dist-info → mloda-0.4.0.dist-info}/METADATA +18 -22
  71. mloda-0.4.0.dist-info/RECORD +248 -0
  72. {mloda-0.3.3.dist-info → mloda-0.4.0.dist-info}/top_level.txt +1 -1
  73. mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_filter_engine.py +2 -2
  74. mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_framework.py +15 -13
  75. mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_merge_engine.py +3 -3
  76. mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_pyarrow_transformer.py +1 -1
  77. mloda_plugins/compute_framework/base_implementations/iceberg/iceberg_filter_engine.py +2 -2
  78. mloda_plugins/compute_framework/base_implementations/iceberg/iceberg_framework.py +12 -10
  79. mloda_plugins/compute_framework/base_implementations/iceberg/iceberg_pyarrow_transformer.py +1 -1
  80. mloda_plugins/compute_framework/base_implementations/pandas/dataframe.py +18 -16
  81. mloda_plugins/compute_framework/base_implementations/pandas/pandas_filter_engine.py +36 -13
  82. mloda_plugins/compute_framework/base_implementations/pandas/pandas_merge_engine.py +7 -7
  83. mloda_plugins/compute_framework/base_implementations/pandas/pandaspyarrowtransformer.py +1 -1
  84. mloda_plugins/compute_framework/base_implementations/polars/dataframe.py +16 -14
  85. mloda_plugins/compute_framework/base_implementations/polars/lazy_dataframe.py +13 -12
  86. mloda_plugins/compute_framework/base_implementations/polars/polars_filter_engine.py +2 -2
  87. mloda_plugins/compute_framework/base_implementations/polars/polars_lazy_pyarrow_transformer.py +1 -1
  88. mloda_plugins/compute_framework/base_implementations/polars/polars_merge_engine.py +3 -3
  89. mloda_plugins/compute_framework/base_implementations/polars/polars_pyarrow_transformer.py +1 -1
  90. mloda_plugins/compute_framework/base_implementations/pyarrow/pyarrow_filter_engine.py +2 -2
  91. mloda_plugins/compute_framework/base_implementations/pyarrow/pyarrow_merge_engine.py +3 -3
  92. mloda_plugins/compute_framework/base_implementations/pyarrow/table.py +12 -10
  93. mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_filter_engine.py +2 -2
  94. mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_framework.py +11 -9
  95. mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_merge_engine.py +3 -3
  96. mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_pyarrow_transformer.py +1 -1
  97. mloda_plugins/compute_framework/base_implementations/spark/spark_filter_engine.py +2 -2
  98. mloda_plugins/compute_framework/base_implementations/spark/spark_framework.py +17 -15
  99. mloda_plugins/compute_framework/base_implementations/spark/spark_merge_engine.py +3 -3
  100. mloda_plugins/compute_framework/base_implementations/spark/spark_pyarrow_transformer.py +1 -1
  101. mloda_plugins/config/feature/loader.py +2 -2
  102. mloda_plugins/feature_group/experimental/aggregated_feature_group/base.py +45 -62
  103. mloda_plugins/feature_group/experimental/aggregated_feature_group/pandas.py +2 -2
  104. mloda_plugins/feature_group/experimental/aggregated_feature_group/polars_lazy.py +2 -2
  105. mloda_plugins/feature_group/experimental/aggregated_feature_group/pyarrow.py +2 -2
  106. mloda_plugins/feature_group/experimental/clustering/base.py +69 -97
  107. mloda_plugins/feature_group/experimental/clustering/pandas.py +2 -2
  108. mloda_plugins/feature_group/experimental/data_quality/missing_value/base.py +58 -79
  109. mloda_plugins/feature_group/experimental/data_quality/missing_value/pandas.py +2 -2
  110. mloda_plugins/feature_group/experimental/data_quality/missing_value/pyarrow.py +2 -2
  111. mloda_plugins/feature_group/experimental/data_quality/missing_value/python_dict.py +2 -2
  112. mloda_plugins/feature_group/experimental/default_options_key.py +16 -19
  113. mloda_plugins/feature_group/experimental/dimensionality_reduction/base.py +80 -94
  114. mloda_plugins/feature_group/experimental/dimensionality_reduction/pandas.py +2 -2
  115. mloda_plugins/feature_group/experimental/dynamic_feature_group_factory/dynamic_feature_group_factory.py +24 -24
  116. mloda_plugins/feature_group/experimental/forecasting/base.py +106 -104
  117. mloda_plugins/feature_group/experimental/forecasting/forecasting_artifact.py +2 -2
  118. mloda_plugins/feature_group/experimental/forecasting/pandas.py +15 -15
  119. mloda_plugins/feature_group/experimental/geo_distance/base.py +50 -42
  120. mloda_plugins/feature_group/experimental/geo_distance/pandas.py +2 -2
  121. mloda_plugins/feature_group/experimental/llm/cli.py +4 -4
  122. mloda_plugins/feature_group/experimental/llm/cli_features/refactor_git_cached.py +19 -19
  123. mloda_plugins/feature_group/experimental/llm/installed_packages_feature_group.py +8 -8
  124. mloda_plugins/feature_group/experimental/llm/list_directory_feature_group.py +5 -5
  125. mloda_plugins/feature_group/experimental/llm/llm_api/claude.py +3 -3
  126. mloda_plugins/feature_group/experimental/llm/llm_api/gemini.py +3 -3
  127. mloda_plugins/feature_group/experimental/llm/llm_api/llm_base_request.py +5 -5
  128. mloda_plugins/feature_group/experimental/llm/llm_api/openai.py +3 -3
  129. mloda_plugins/feature_group/experimental/llm/llm_api/request_loop.py +6 -6
  130. mloda_plugins/feature_group/experimental/llm/llm_file_selector.py +10 -10
  131. mloda_plugins/feature_group/experimental/llm/tools/tool_collection.py +1 -1
  132. mloda_plugins/feature_group/experimental/node_centrality/base.py +46 -72
  133. mloda_plugins/feature_group/experimental/node_centrality/pandas.py +2 -2
  134. mloda_plugins/feature_group/experimental/sklearn/encoding/base.py +51 -51
  135. mloda_plugins/feature_group/experimental/sklearn/encoding/pandas.py +2 -2
  136. mloda_plugins/feature_group/experimental/sklearn/pipeline/base.py +52 -39
  137. mloda_plugins/feature_group/experimental/sklearn/pipeline/pandas.py +2 -2
  138. mloda_plugins/feature_group/experimental/sklearn/scaling/base.py +44 -58
  139. mloda_plugins/feature_group/experimental/sklearn/scaling/pandas.py +2 -2
  140. mloda_plugins/feature_group/experimental/sklearn/sklearn_artifact.py +2 -2
  141. mloda_plugins/feature_group/experimental/source_input_feature.py +15 -15
  142. mloda_plugins/feature_group/experimental/text_cleaning/base.py +38 -61
  143. mloda_plugins/feature_group/experimental/text_cleaning/pandas.py +2 -2
  144. mloda_plugins/feature_group/experimental/text_cleaning/python_dict.py +2 -2
  145. mloda_plugins/feature_group/experimental/time_window/base.py +106 -93
  146. mloda_plugins/feature_group/experimental/time_window/pandas.py +13 -13
  147. mloda_plugins/feature_group/experimental/time_window/pyarrow.py +12 -12
  148. mloda_plugins/feature_group/input_data/api_data/api_data.py +9 -11
  149. mloda_plugins/feature_group/input_data/read_context_files.py +7 -7
  150. mloda_plugins/feature_group/input_data/read_db.py +7 -9
  151. mloda_plugins/feature_group/input_data/read_db_feature.py +4 -4
  152. mloda_plugins/feature_group/input_data/read_dbs/sqlite.py +23 -13
  153. mloda_plugins/feature_group/input_data/read_file.py +8 -8
  154. mloda_plugins/feature_group/input_data/read_file_feature.py +4 -4
  155. mloda_plugins/feature_group/input_data/read_files/csv.py +6 -6
  156. mloda_plugins/feature_group/input_data/read_files/feather.py +5 -5
  157. mloda_plugins/feature_group/input_data/read_files/json.py +5 -5
  158. mloda_plugins/feature_group/input_data/read_files/orc.py +5 -5
  159. mloda_plugins/feature_group/input_data/read_files/parquet.py +5 -5
  160. mloda_plugins/feature_group/input_data/read_files/text_file_reader.py +5 -5
  161. mloda_plugins/function_extender/base_implementations/otel/otel_extender.py +4 -4
  162. mloda-0.3.3.dist-info/RECORD +0 -230
  163. mloda_core/abstract_plugins/components/link.py +0 -286
  164. mloda_core/abstract_plugins/function_extender.py +0 -34
  165. mloda_core/runtime/run.py +0 -617
  166. {mloda_core → mloda/core}/__init__.py +0 -0
  167. {mloda_core → mloda/core}/abstract_plugins/__init__.py +0 -0
  168. {mloda_core → mloda/core}/abstract_plugins/components/__init__.py +0 -0
  169. {mloda_core → mloda/core}/abstract_plugins/components/domain.py +0 -0
  170. {mloda_core → mloda/core}/abstract_plugins/components/feature_chainer/__init__.py +0 -0
  171. {mloda_core → mloda/core}/abstract_plugins/components/feature_name.py +0 -0
  172. {mloda_core → mloda/core}/abstract_plugins/components/framework_transformer/__init__.py +0 -0
  173. {mloda_core → mloda/core}/abstract_plugins/components/framework_transformer/base_transformer.py +0 -0
  174. {mloda_core → mloda/core}/abstract_plugins/components/hashable_dict.py +0 -0
  175. {mloda_core → mloda/core}/abstract_plugins/components/index/__init__.py +0 -0
  176. {mloda_core → mloda/core}/abstract_plugins/components/index/index.py +0 -0
  177. {mloda_core → mloda/core}/abstract_plugins/components/input_data/__init__.py +0 -0
  178. {mloda_core → mloda/core}/abstract_plugins/components/input_data/api/__init__.py +0 -0
  179. {mloda_core → mloda/core}/abstract_plugins/components/input_data/creator/__init__.py +0 -0
  180. {mloda_core → mloda/core}/abstract_plugins/components/match_data/__init__.py +0 -0
  181. {mloda_core → mloda/core}/abstract_plugins/components/merge/__init__.py +0 -0
  182. {mloda_core → mloda/core}/abstract_plugins/components/plugin_option/__init__.py +0 -0
  183. {mloda_core → mloda/core}/abstract_plugins/components/utils.py +0 -0
  184. {mloda_core/abstract_plugins/plugin_loader → mloda/core/abstract_plugins/components/validators}/__init__.py +0 -0
  185. {mloda_core/api → mloda/core/abstract_plugins/plugin_loader}/__init__.py +0 -0
  186. {mloda_core → mloda/core}/abstract_plugins/plugin_loader/plugin_loader.py +0 -0
  187. {mloda_core/api/prepare → mloda/core/api}/__init__.py +0 -0
  188. {mloda_core/core → mloda/core/api/prepare}/__init__.py +0 -0
  189. {mloda_core/core/step → mloda/core/core}/__init__.py +0 -0
  190. {mloda_core/filter → mloda/core/core/step}/__init__.py +0 -0
  191. {mloda_core/prepare → mloda/core/filter}/__init__.py +0 -0
  192. {mloda_core → mloda/core}/filter/filter_parameter.py +0 -0
  193. {mloda_core/prepare/graph → mloda/core/prepare}/__init__.py +0 -0
  194. {mloda_core/runtime → mloda/core/prepare/graph}/__init__.py +0 -0
  195. {mloda_core/runtime/flight → mloda/core/prepare/validators}/__init__.py +0 -0
  196. {mloda_core/runtime/worker → mloda/core/runtime}/__init__.py +0 -0
  197. {mloda_core → mloda/core}/runtime/flight/flight_server.py +0 -0
  198. {mloda-0.3.3.dist-info → mloda-0.4.0.dist-info}/WHEEL +0 -0
  199. {mloda-0.3.3.dist-info → mloda-0.4.0.dist-info}/entry_points.txt +0 -0
  200. {mloda-0.3.3.dist-info → mloda-0.4.0.dist-info}/licenses/LICENSE.TXT +0 -0
  201. {mloda-0.3.3.dist-info → mloda-0.4.0.dist-info}/licenses/NOTICE.md +0 -0
@@ -1,7 +1,7 @@
1
1
  from typing import Any, Set, Tuple
2
- from mloda_core.abstract_plugins.components.merge.base_merge_engine import BaseMergeEngine
3
- from mloda_core.abstract_plugins.components.index.index import Index
4
- from mloda_core.abstract_plugins.components.link import JoinType
2
+ from mloda.provider import BaseMergeEngine
3
+ from mloda.user import Index
4
+ from mloda.user import JoinType
5
5
 
6
6
 
7
7
  class PythonDictMergeEngine(BaseMergeEngine):
@@ -1,6 +1,6 @@
1
1
  from typing import Any, Optional
2
2
 
3
- from mloda_core.abstract_plugins.components.framework_transformer.base_transformer import BaseTransformer
3
+ from mloda.provider import BaseTransformer
4
4
 
5
5
  try:
6
6
  import pyarrow as pa
@@ -1,6 +1,6 @@
1
1
  from typing import Any
2
- from mloda_core.filter.filter_engine import BaseFilterEngine
3
- from mloda_core.filter.single_filter import SingleFilter
2
+ from mloda.provider import BaseFilterEngine
3
+ from mloda.user import SingleFilter
4
4
 
5
5
  try:
6
6
  from pyspark.sql import DataFrame
@@ -1,10 +1,10 @@
1
1
  import logging
2
2
  from typing import Any, Set, Type, Optional
3
- from mloda_core.abstract_plugins.components.merge.base_merge_engine import BaseMergeEngine
3
+ from mloda.provider import BaseMergeEngine
4
4
  from mloda_plugins.compute_framework.base_implementations.spark.spark_merge_engine import SparkMergeEngine
5
- from mloda_core.abstract_plugins.components.feature_name import FeatureName
6
- from mloda_core.abstract_plugins.compute_frame_work import ComputeFrameWork
7
- from mloda_core.filter.filter_engine import BaseFilterEngine
5
+ from mloda.user import FeatureName
6
+ from mloda import ComputeFramework
7
+ from mloda.provider import BaseFilterEngine
8
8
  from mloda_plugins.compute_framework.base_implementations.spark.spark_filter_engine import SparkFilterEngine
9
9
 
10
10
  try:
@@ -25,8 +25,8 @@ except ImportError:
25
25
  logger = logging.getLogger(__name__)
26
26
 
27
27
 
28
- class SparkFramework(ComputeFrameWork):
29
- """Spark framework implementation for ComputeFrameWork.
28
+ class SparkFramework(ComputeFramework):
29
+ """Spark framework implementation for ComputeFramework.
30
30
 
31
31
  This framework leverages Apache Spark for distributed data processing.
32
32
  It requires a SparkSession to be provided through the framework connection object.
@@ -62,11 +62,12 @@ class SparkFramework(ComputeFrameWork):
62
62
  except ImportError:
63
63
  return False
64
64
 
65
- @staticmethod
66
- def expected_data_framework() -> Any:
67
- return SparkFramework.spark_dataframe()
65
+ @classmethod
66
+ def expected_data_framework(cls) -> Any:
67
+ return cls.spark_dataframe()
68
68
 
69
- def merge_engine(self) -> Type[BaseMergeEngine]:
69
+ @classmethod
70
+ def merge_engine(cls) -> Type[BaseMergeEngine]:
70
71
  return SparkMergeEngine
71
72
 
72
73
  def select_data_by_column_names(self, data: Any, selected_feature_names: Set[FeatureName]) -> Any:
@@ -78,14 +79,14 @@ class SparkFramework(ComputeFrameWork):
78
79
  if self.data is not None:
79
80
  self.column_names = set(self.data.columns)
80
81
 
81
- @staticmethod
82
- def spark_dataframe() -> Any:
82
+ @classmethod
83
+ def spark_dataframe(cls) -> Any:
83
84
  if DataFrame is None:
84
85
  raise ImportError("PySpark is not installed. To be able to use this framework, please install pyspark.")
85
86
  return DataFrame
86
87
 
87
- @staticmethod
88
- def spark_session() -> Any:
88
+ @classmethod
89
+ def spark_session(cls) -> Any:
89
90
  if SparkSession is None:
90
91
  raise ImportError("PySpark is not installed. To be able to use this framework, please install pyspark.")
91
92
  return SparkSession
@@ -194,5 +195,6 @@ class SparkFramework(ComputeFrameWork):
194
195
 
195
196
  raise ValueError(f"Data {type(data)} is not supported by {self.__class__.__name__}")
196
197
 
197
- def filter_engine(self) -> Type[BaseFilterEngine]:
198
+ @classmethod
199
+ def filter_engine(cls) -> Type[BaseFilterEngine]:
198
200
  return SparkFilterEngine
@@ -1,8 +1,8 @@
1
1
  from typing import Any, Tuple
2
2
 
3
- from mloda_core.abstract_plugins.components.index.index import Index
4
- from mloda_core.abstract_plugins.components.link import JoinType
5
- from mloda_core.abstract_plugins.components.merge.base_merge_engine import BaseMergeEngine
3
+ from mloda.user import Index
4
+ from mloda.user import JoinType
5
+ from mloda.provider import BaseMergeEngine
6
6
 
7
7
  try:
8
8
  from pyspark.sql import DataFrame
@@ -1,6 +1,6 @@
1
1
  from typing import Any, Optional
2
2
 
3
- from mloda_core.abstract_plugins.components.framework_transformer.base_transformer import BaseTransformer
3
+ from mloda.provider import BaseTransformer
4
4
 
5
5
  try:
6
6
  from pyspark.sql import DataFrame, SparkSession
@@ -6,8 +6,8 @@ to mloda Feature instances.
6
6
  """
7
7
 
8
8
  from typing import List, Union, Dict, Any
9
- from mloda_core.abstract_plugins.components.feature import Feature
10
- from mloda_core.abstract_plugins.components.options import Options
9
+ from mloda import Feature
10
+ from mloda import Options
11
11
  from mloda_plugins.config.feature.parser import parse_json
12
12
  from mloda_plugins.config.feature.models import FeatureConfig
13
13
  from mloda_plugins.feature_group.experimental.default_options_key import DefaultOptionKeys
@@ -7,16 +7,19 @@ from __future__ import annotations
7
7
  from abc import abstractmethod
8
8
  from typing import Any, List, Optional, Set, Union
9
9
 
10
- from mloda_core.abstract_plugins.abstract_feature_group import AbstractFeatureGroup
11
- from mloda_core.abstract_plugins.components.feature import Feature
12
- from mloda_core.abstract_plugins.components.feature_name import FeatureName
13
- from mloda_core.abstract_plugins.components.feature_set import FeatureSet
14
- from mloda_core.abstract_plugins.components.options import Options
15
- from mloda_core.abstract_plugins.components.feature_chainer.feature_chain_parser import FeatureChainParser
10
+ from mloda import FeatureGroup
11
+ from mloda import Feature
12
+ from mloda.user import FeatureName
13
+ from mloda.provider import FeatureSet
14
+ from mloda import Options
15
+ from mloda.provider import FeatureChainParser
16
+ from mloda.provider import (
17
+ FeatureChainParserMixin,
18
+ )
16
19
  from mloda_plugins.feature_group.experimental.default_options_key import DefaultOptionKeys
17
20
 
18
21
 
19
- class AggregatedFeatureGroup(AbstractFeatureGroup):
22
+ class AggregatedFeatureGroup(FeatureChainParserMixin, FeatureGroup):
20
23
  """
21
24
  Base class for all aggregated feature groups.
22
25
 
@@ -99,38 +102,24 @@ class AggregatedFeatureGroup(AbstractFeatureGroup):
99
102
 
100
103
  PREFIX_PATTERN = r".*__([\w]+)_aggr$"
101
104
 
105
+ # In-feature configuration for FeatureChainParserMixin
106
+ MIN_IN_FEATURES = 1
107
+ MAX_IN_FEATURES = 1
108
+
102
109
  # Property mapping for configuration-based feature creation
103
110
  PROPERTY_MAPPING = {
104
111
  AGGREGATION_TYPE: {
105
112
  **AGGREGATION_TYPES, # All supported aggregation types as valid values
106
- DefaultOptionKeys.mloda_context: True, # Mark as context parameter
107
- DefaultOptionKeys.mloda_strict_validation: True, # Enable strict validation
113
+ DefaultOptionKeys.context: True, # Mark as context parameter
114
+ DefaultOptionKeys.strict_validation: True, # Enable strict validation
108
115
  },
109
116
  DefaultOptionKeys.in_features: {
110
117
  "explanation": "Source feature to aggregate",
111
- DefaultOptionKeys.mloda_context: True, # Mark as context parameter
112
- DefaultOptionKeys.mloda_strict_validation: False, # Flexible validation
118
+ DefaultOptionKeys.context: True, # Mark as context parameter
119
+ DefaultOptionKeys.strict_validation: False, # Flexible validation
113
120
  },
114
121
  }
115
122
 
116
- def input_features(self, options: Options, feature_name: FeatureName) -> Optional[Set[Feature]]:
117
- """Extract source feature from either configuration-based options or string parsing."""
118
-
119
- source_feature: str | None = None
120
-
121
- # string based
122
- _, source_feature = FeatureChainParser.parse_feature_name(feature_name, [self.PREFIX_PATTERN])
123
- if source_feature is not None:
124
- return {Feature(source_feature)}
125
-
126
- # configuration based
127
- source_features = options.get_in_features()
128
- if len(source_features) != 1:
129
- raise ValueError(
130
- f"Expected exactly one source feature, but found {len(source_features)}: {source_features}"
131
- )
132
- return set(source_features)
133
-
134
123
  @classmethod
135
124
  def get_aggregation_type(cls, feature_name: str) -> str:
136
125
  """Extract the aggregation type from the feature name."""
@@ -140,21 +129,26 @@ class AggregatedFeatureGroup(AbstractFeatureGroup):
140
129
  return prefix_part
141
130
 
142
131
  @classmethod
143
- def match_feature_group_criteria(
144
- cls,
145
- feature_name: Union[FeatureName, str],
146
- options: Options,
147
- data_access_collection: Optional[Any] = None,
148
- ) -> bool:
149
- """Check if feature name matches the expected pattern and aggregation type."""
150
-
151
- # Use the unified parser with property mapping for full configuration support
152
- return FeatureChainParser.match_configuration_feature_chain_parser(
153
- feature_name,
154
- options,
155
- property_mapping=cls.PROPERTY_MAPPING,
156
- prefix_patterns=[cls.PREFIX_PATTERN],
157
- )
132
+ def _extract_aggregation_type(cls, feature: Feature) -> Optional[str]:
133
+ """
134
+ Extract aggregation type from a feature.
135
+
136
+ Tries string-based parsing first, falls back to configuration.
137
+
138
+ Args:
139
+ feature: The feature to extract aggregation type from
140
+
141
+ Returns:
142
+ The aggregation type, or None if not found
143
+ """
144
+ # Try string-based parsing first
145
+ aggregation_type, _ = FeatureChainParser.parse_feature_name(feature.name, [cls.PREFIX_PATTERN])
146
+ if aggregation_type is not None:
147
+ return aggregation_type
148
+
149
+ # Fall back to configuration
150
+ aggregation_type = feature.options.get(cls.AGGREGATION_TYPE)
151
+ return str(aggregation_type) if aggregation_type is not None else None
158
152
 
159
153
  @classmethod
160
154
  def _extract_aggr_and_source_feature(cls, feature: Feature) -> tuple[str, str]:
@@ -172,27 +166,16 @@ class AggregatedFeatureGroup(AbstractFeatureGroup):
172
166
  Raises:
173
167
  ValueError: If parameters cannot be extracted
174
168
  """
175
- aggregation_type = None
176
- source_feature_name: str | None = None
169
+ # Use the mixin method to extract source features
170
+ source_features = cls._extract_source_features(feature)
177
171
 
178
- # string based
179
- aggregation_type, source_feature_name = FeatureChainParser.parse_feature_name(
180
- feature.name, [cls.PREFIX_PATTERN]
181
- )
182
- if aggregation_type is not None and source_feature_name is not None:
183
- return aggregation_type, source_feature_name
184
-
185
- # configuration based
186
- source_features = feature.options.get_in_features()
187
- source_feature = next(iter(source_features))
188
- source_feature_name = source_feature.get_name()
189
-
190
- aggregation_type = feature.options.get(cls.AGGREGATION_TYPE)
172
+ # Extract aggregation type
173
+ aggregation_type = cls._extract_aggregation_type(feature)
191
174
 
192
- if aggregation_type is None or source_feature_name is None:
193
- raise ValueError(f"Could not extract aggregation type and source feature from: {feature.name}")
175
+ if aggregation_type is None:
176
+ raise ValueError(f"Could not extract aggregation type from: {feature.name}")
194
177
 
195
- return aggregation_type, source_feature_name
178
+ return aggregation_type, source_features[0]
196
179
 
197
180
  @classmethod
198
181
  def _supports_aggregation_type(cls, aggregation_type: str) -> bool:
@@ -6,7 +6,7 @@ from __future__ import annotations
6
6
 
7
7
  from typing import Any, List, Set, Type, Union
8
8
 
9
- from mloda_core.abstract_plugins.compute_frame_work import ComputeFrameWork
9
+ from mloda import ComputeFramework
10
10
 
11
11
  from mloda_plugins.compute_framework.base_implementations.pandas.dataframe import PandasDataFrame
12
12
  from mloda_plugins.feature_group.experimental.aggregated_feature_group.base import AggregatedFeatureGroup
@@ -14,7 +14,7 @@ from mloda_plugins.feature_group.experimental.aggregated_feature_group.base impo
14
14
 
15
15
  class PandasAggregatedFeatureGroup(AggregatedFeatureGroup):
16
16
  @classmethod
17
- def compute_framework_rule(cls) -> Union[bool, Set[Type[ComputeFrameWork]]]:
17
+ def compute_framework_rule(cls) -> Union[bool, Set[Type[ComputeFramework]]]:
18
18
  """Specify that this feature group works with Pandas."""
19
19
  return {PandasDataFrame}
20
20
 
@@ -6,7 +6,7 @@ from __future__ import annotations
6
6
 
7
7
  from typing import Any, List, Set, Type, Union
8
8
 
9
- from mloda_core.abstract_plugins.compute_frame_work import ComputeFrameWork
9
+ from mloda import ComputeFramework
10
10
 
11
11
  from mloda_plugins.compute_framework.base_implementations.polars.lazy_dataframe import PolarsLazyDataFrame
12
12
  from mloda_plugins.feature_group.experimental.aggregated_feature_group.base import AggregatedFeatureGroup
@@ -26,7 +26,7 @@ class PolarsLazyAggregatedFeatureGroup(AggregatedFeatureGroup):
26
26
  """
27
27
 
28
28
  @classmethod
29
- def compute_framework_rule(cls) -> Union[bool, Set[Type[ComputeFrameWork]]]:
29
+ def compute_framework_rule(cls) -> Union[bool, Set[Type[ComputeFramework]]]:
30
30
  """Specify that this feature group works with Polars Lazy DataFrames."""
31
31
  return {PolarsLazyDataFrame}
32
32
 
@@ -9,7 +9,7 @@ from typing import Any, List, Set, Type, Union
9
9
  import pyarrow as pa
10
10
  import pyarrow.compute as pc
11
11
 
12
- from mloda_core.abstract_plugins.compute_frame_work import ComputeFrameWork
12
+ from mloda import ComputeFramework
13
13
 
14
14
  from mloda_plugins.compute_framework.base_implementations.pyarrow.table import PyArrowTable
15
15
  from mloda_plugins.feature_group.experimental.aggregated_feature_group.base import AggregatedFeatureGroup
@@ -23,7 +23,7 @@ class PyArrowAggregatedFeatureGroup(AggregatedFeatureGroup):
23
23
  """
24
24
 
25
25
  @classmethod
26
- def compute_framework_rule(cls) -> Union[bool, Set[Type[ComputeFrameWork]]]:
26
+ def compute_framework_rule(cls) -> Union[bool, Set[Type[ComputeFramework]]]:
27
27
  """Specify that this feature group works with PyArrow."""
28
28
  return {PyArrowTable}
29
29
 
@@ -7,16 +7,19 @@ from __future__ import annotations
7
7
  from abc import abstractmethod
8
8
  from typing import Any, List, Optional, Set, Union
9
9
 
10
- from mloda_core.abstract_plugins.abstract_feature_group import AbstractFeatureGroup
11
- from mloda_core.abstract_plugins.components.feature import Feature
12
- from mloda_core.abstract_plugins.components.feature_chainer.feature_chain_parser import FeatureChainParser
13
- from mloda_core.abstract_plugins.components.feature_name import FeatureName
14
- from mloda_core.abstract_plugins.components.feature_set import FeatureSet
15
- from mloda_core.abstract_plugins.components.options import Options
10
+ from mloda import FeatureGroup
11
+ from mloda import Feature
12
+ from mloda.provider import FeatureChainParser
13
+ from mloda.provider import (
14
+ FeatureChainParserMixin,
15
+ )
16
+ from mloda.user import FeatureName
17
+ from mloda.provider import FeatureSet
18
+ from mloda import Options
16
19
  from mloda_plugins.feature_group.experimental.default_options_key import DefaultOptionKeys
17
20
 
18
21
 
19
- class ClusteringFeatureGroup(AbstractFeatureGroup):
22
+ class ClusteringFeatureGroup(FeatureChainParserMixin, FeatureGroup):
20
23
  # Option keys for clustering configuration
21
24
  """
22
25
  Base class for all clustering feature groups.
@@ -105,53 +108,49 @@ class ClusteringFeatureGroup(AbstractFeatureGroup):
105
108
  # Define the prefix pattern for this feature group
106
109
  PREFIX_PATTERN = r".*__cluster_([\w]+)_([\w]+)$"
107
110
 
111
+ # In-feature configuration for FeatureChainParserMixin
112
+ MIN_IN_FEATURES = 1
113
+ MAX_IN_FEATURES = None # Unlimited in_features allowed
114
+
108
115
  # Property mapping for configuration-based feature creation
109
116
  PROPERTY_MAPPING = {
110
117
  ALGORITHM: {
111
118
  **CLUSTERING_ALGORITHMS, # All supported algorithms as valid values
112
- DefaultOptionKeys.mloda_context: True, # Mark as context parameter
113
- DefaultOptionKeys.mloda_strict_validation: True, # Enable strict validation
119
+ DefaultOptionKeys.context: True, # Mark as context parameter
120
+ DefaultOptionKeys.strict_validation: True, # Enable strict validation
114
121
  },
115
122
  K_VALUE: {
116
123
  "explanation": "Number of clusters or 'auto' for automatic determination",
117
- DefaultOptionKeys.mloda_context: True, # Mark as context parameter
118
- DefaultOptionKeys.mloda_strict_validation: True, # Enable strict validation
119
- DefaultOptionKeys.mloda_validation_function: lambda value: value == "auto"
124
+ DefaultOptionKeys.context: True, # Mark as context parameter
125
+ DefaultOptionKeys.strict_validation: True, # Enable strict validation
126
+ DefaultOptionKeys.validation_function: lambda value: value == "auto"
120
127
  or (isinstance(value, (int, str)) and str(value).isdigit() and int(value) > 0),
121
128
  },
122
129
  DefaultOptionKeys.in_features: {
123
130
  "explanation": "Source features to use for clustering",
124
- DefaultOptionKeys.mloda_context: True, # Mark as context parameter
125
- DefaultOptionKeys.mloda_strict_validation: False, # Flexible validation
131
+ DefaultOptionKeys.context: True, # Mark as context parameter
132
+ DefaultOptionKeys.strict_validation: False, # Flexible validation
126
133
  },
127
134
  OUTPUT_PROBABILITIES: {
128
135
  "explanation": "Whether to output cluster probabilities/distances as separate columns using ~N suffix pattern",
129
- DefaultOptionKeys.mloda_context: True, # Mark as context parameter
130
- DefaultOptionKeys.mloda_strict_validation: False, # Flexible validation
131
- DefaultOptionKeys.mloda_default: False, # Default is False (don't output probabilities)
132
- DefaultOptionKeys.mloda_validation_function: lambda value: isinstance(value, bool),
136
+ DefaultOptionKeys.context: True, # Mark as context parameter
137
+ DefaultOptionKeys.strict_validation: False, # Flexible validation
138
+ DefaultOptionKeys.default: False, # Default is False (don't output probabilities)
139
+ DefaultOptionKeys.validation_function: lambda value: isinstance(value, bool),
133
140
  },
134
141
  }
135
142
 
136
- def input_features(self, options: Options, feature_name: FeatureName) -> Optional[Set[Feature]]:
137
- """Extract source features from either string parsing or configuration-based options."""
138
-
139
- # string based
140
- source_features_str: str | None = None
141
- _, source_features_str = FeatureChainParser.parse_feature_name(feature_name, [self.PREFIX_PATTERN])
142
-
143
- if source_features_str is not None:
144
- # Handle multiple source features (ampersand-separated)
145
- source_features = set()
146
- for feature in source_features_str.split("&"):
147
- source_features.add(Feature(feature.strip()))
148
- return source_features
149
-
150
- # configuration based
151
- source_features_frozen = options.get_in_features()
152
- if len(source_features_frozen) < 1:
153
- raise ValueError(f"Feature '{feature_name}' requires at least one source feature, but none were provided.")
154
- return set(source_features_frozen)
143
+ @classmethod
144
+ def _validate_string_match(cls, feature_name: str, operation_config: str, source_feature: str) -> bool:
145
+ """Validate clustering-specific string patterns using parse_clustering_prefix()."""
146
+ if FeatureChainParser.is_chained_feature(feature_name):
147
+ try:
148
+ # Use existing validation logic that validates algorithm and k_value
149
+ cls.parse_clustering_prefix(feature_name)
150
+ except ValueError:
151
+ # If validation fails, this feature doesn't match
152
+ return False
153
+ return True
155
154
 
156
155
  @classmethod
157
156
  def parse_clustering_prefix(cls, feature_name: str) -> tuple[str, str]:
@@ -213,41 +212,12 @@ class ClusteringFeatureGroup(AbstractFeatureGroup):
213
212
  k_value = cls.parse_clustering_prefix(feature_name)[1]
214
213
  return k_value if k_value == "auto" else int(k_value)
215
214
 
216
- @classmethod
217
- def match_feature_group_criteria(
218
- cls,
219
- feature_name: Union[FeatureName, str],
220
- options: Options,
221
- data_access_collection: Optional[Any] = None,
222
- ) -> bool:
223
- """Check if feature name matches the expected pattern for clustering features."""
224
-
225
- # Use the unified parser with property mapping for full configuration support
226
- result = FeatureChainParser.match_configuration_feature_chain_parser(
227
- feature_name,
228
- options,
229
- property_mapping=cls.PROPERTY_MAPPING,
230
- prefix_patterns=[cls.PREFIX_PATTERN],
231
- )
232
-
233
- # If it matches and it's a string-based feature, validate with our custom logic
234
- if result:
235
- feature_name_str = feature_name.name if isinstance(feature_name, FeatureName) else feature_name
236
-
237
- # Check if this is a string-based feature (contains the pattern)
238
- if FeatureChainParser.is_chained_feature(feature_name_str):
239
- try:
240
- # Use existing validation logic that validates algorithm and k_value
241
- cls.parse_clustering_prefix(feature_name_str)
242
- except ValueError:
243
- # If validation fails, this feature doesn't match
244
- return False
245
- return result
215
+ # Custom validation done via _validate_string_match() hook
246
216
 
247
217
  @classmethod
248
- def _extract_algorithm_k_value_and_source_features(cls, feature: Feature) -> tuple[str, Union[int, str], list[str]]:
218
+ def _extract_clustering_params(cls, feature: Feature) -> tuple[Optional[str], Optional[Union[int, str]]]:
249
219
  """
250
- Extract algorithm, k_value, and source features from a feature.
220
+ Extract algorithm and k_value from a feature.
251
221
 
252
222
  Tries string-based approach first, falls back to configuration-based.
253
223
 
@@ -255,47 +225,49 @@ class ClusteringFeatureGroup(AbstractFeatureGroup):
255
225
  feature: The feature to extract parameters from
256
226
 
257
227
  Returns:
258
- Tuple of (algorithm, k_value, source_features_list)
228
+ Tuple of (algorithm, k_value) or (None, None) if extraction fails
259
229
 
260
230
  Raises:
261
- ValueError: If parameters cannot be extracted
231
+ ValueError: If string-based parsing fails due to invalid format
262
232
  """
263
- algorithm = None
264
- k_value: str | int | None = None
265
- source_features = None
266
-
267
- # string based
233
+ # Try string-based parsing first
268
234
  algorithm_str, source_features_str = FeatureChainParser.parse_feature_name(feature.name, [cls.PREFIX_PATTERN])
269
235
  if algorithm_str is not None and source_features_str is not None:
270
- # Parse the algorithm and k_value from the prefix
271
236
  algorithm, k_value_str = cls.parse_clustering_prefix(feature.get_name())
237
+ k_value: Union[int, str] = "auto" if k_value_str == "auto" else int(k_value_str)
238
+ return algorithm, k_value
272
239
 
273
- # Convert k_value to appropriate type
274
- if k_value_str == "auto":
275
- k_value = "auto"
276
- else:
277
- k_value = int(k_value_str)
240
+ # Fall back to configuration-based
241
+ algorithm = feature.options.get(cls.ALGORITHM)
242
+ k_value_raw = feature.options.get(cls.K_VALUE)
278
243
 
279
- # Parse source features (ampersand-separated)
280
- source_features = [feature.strip() for feature in source_features_str.split("&")]
244
+ if k_value_raw is None:
245
+ return algorithm, None
281
246
 
282
- return algorithm, k_value, source_features
247
+ k_value = "auto" if k_value_raw == "auto" else int(k_value_raw)
248
+ return algorithm, k_value
283
249
 
284
- # configuration based
285
- source_features_frozen = feature.options.get_in_features()
286
- source_features = [source_feature.get_name() for source_feature in source_features_frozen]
250
+ @classmethod
251
+ def _extract_algorithm_k_value_and_source_features(cls, feature: Feature) -> tuple[str, Union[int, str], list[str]]:
252
+ """
253
+ Extract algorithm, k_value, and source features from a feature.
287
254
 
288
- algorithm = feature.options.get(cls.ALGORITHM)
289
- k_value_raw = feature.options.get(cls.K_VALUE)
255
+ Tries string-based approach first, falls back to configuration-based.
290
256
 
291
- # Convert k_value to appropriate type
292
- if k_value_raw == "auto":
293
- k_value = "auto"
294
- else:
295
- k_value = int(k_value_raw)
257
+ Args:
258
+ feature: The feature to extract parameters from
259
+
260
+ Returns:
261
+ Tuple of (algorithm, k_value, source_features_list)
262
+
263
+ Raises:
264
+ ValueError: If parameters cannot be extracted
265
+ """
266
+ source_features = cls._extract_source_features(feature)
267
+ algorithm, k_value = cls._extract_clustering_params(feature)
296
268
 
297
- if algorithm is None or k_value is None or not source_features:
298
- raise ValueError(f"Could not extract algorithm, k_value, and source features from: {feature.name}")
269
+ if algorithm is None or k_value is None:
270
+ raise ValueError(f"Could not extract algorithm and k_value from: {feature.name}")
299
271
 
300
272
  return algorithm, k_value, source_features
301
273
 
@@ -27,14 +27,14 @@ except ImportError:
27
27
  np = None # type: ignore[assignment]
28
28
 
29
29
 
30
- from mloda_core.abstract_plugins.compute_frame_work import ComputeFrameWork
30
+ from mloda import ComputeFramework
31
31
  from mloda_plugins.compute_framework.base_implementations.pandas.dataframe import PandasDataFrame
32
32
  from mloda_plugins.feature_group.experimental.clustering.base import ClusteringFeatureGroup
33
33
 
34
34
 
35
35
  class PandasClusteringFeatureGroup(ClusteringFeatureGroup):
36
36
  @classmethod
37
- def compute_framework_rule(cls) -> set[type[ComputeFrameWork]]:
37
+ def compute_framework_rule(cls) -> set[type[ComputeFramework]]:
38
38
  """Define the compute framework for this feature group."""
39
39
  return {PandasDataFrame}
40
40