mloda 0.3.3__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (201) hide show
  1. mloda/__init__.py +17 -0
  2. {mloda_core → mloda/core}/abstract_plugins/components/base_artifact.py +2 -2
  3. {mloda_core → mloda/core}/abstract_plugins/components/base_validator.py +13 -0
  4. {mloda_core → mloda/core}/abstract_plugins/components/data_access_collection.py +1 -1
  5. {mloda_core → mloda/core}/abstract_plugins/components/data_types.py +39 -0
  6. {mloda_core → mloda/core}/abstract_plugins/components/feature.py +39 -33
  7. {mloda_core → mloda/core}/abstract_plugins/components/feature_chainer/feature_chain_parser.py +19 -19
  8. mloda/core/abstract_plugins/components/feature_chainer/feature_chain_parser_mixin.py +197 -0
  9. {mloda_core → mloda/core}/abstract_plugins/components/feature_collection.py +6 -6
  10. {mloda_core → mloda/core}/abstract_plugins/components/feature_group_version.py +8 -8
  11. {mloda_core → mloda/core}/abstract_plugins/components/feature_set.py +18 -24
  12. {mloda_core → mloda/core}/abstract_plugins/components/framework_transformer/cfw_transformer.py +2 -2
  13. {mloda_core → mloda/core}/abstract_plugins/components/index/add_index_feature.py +4 -4
  14. {mloda_core → mloda/core}/abstract_plugins/components/input_data/api/api_input_data.py +3 -3
  15. {mloda_core → mloda/core}/abstract_plugins/components/input_data/api/api_input_data_collection.py +2 -2
  16. {mloda_core → mloda/core}/abstract_plugins/components/input_data/api/base_api_data.py +1 -1
  17. {mloda_core → mloda/core}/abstract_plugins/components/input_data/base_input_data.py +6 -6
  18. {mloda_core → mloda/core}/abstract_plugins/components/input_data/creator/data_creator.py +3 -3
  19. mloda/core/abstract_plugins/components/link.py +437 -0
  20. {mloda_core → mloda/core}/abstract_plugins/components/match_data/match_data.py +3 -3
  21. {mloda_core → mloda/core}/abstract_plugins/components/merge/base_merge_engine.py +2 -2
  22. {mloda_core → mloda/core}/abstract_plugins/components/options.py +12 -36
  23. {mloda_core → mloda/core}/abstract_plugins/components/parallelization_modes.py +1 -1
  24. {mloda_core → mloda/core}/abstract_plugins/components/plugin_option/plugin_collector.py +14 -14
  25. mloda/core/abstract_plugins/components/validators/datatype_validator.py +96 -0
  26. mloda/core/abstract_plugins/components/validators/feature_set_validator.py +38 -0
  27. mloda/core/abstract_plugins/components/validators/feature_validator.py +23 -0
  28. mloda/core/abstract_plugins/components/validators/link_validator.py +79 -0
  29. mloda/core/abstract_plugins/components/validators/options_validator.py +57 -0
  30. mloda_core/abstract_plugins/compute_frame_work.py → mloda/core/abstract_plugins/compute_framework.py +45 -37
  31. mloda_core/abstract_plugins/abstract_feature_group.py → mloda/core/abstract_plugins/feature_group.py +56 -33
  32. mloda/core/abstract_plugins/function_extender.py +78 -0
  33. mloda/core/api/plugin_docs.py +220 -0
  34. mloda/core/api/plugin_info.py +32 -0
  35. {mloda_core → mloda/core}/api/prepare/setup_compute_framework.py +11 -11
  36. {mloda_core → mloda/core}/api/request.py +42 -33
  37. {mloda_core → mloda/core}/core/cfw_manager.py +8 -8
  38. {mloda_core → mloda/core}/core/engine.py +47 -47
  39. {mloda_core → mloda/core}/core/step/abstract_step.py +7 -7
  40. {mloda_core → mloda/core}/core/step/feature_group_step.py +12 -12
  41. {mloda_core → mloda/core}/core/step/join_step.py +14 -14
  42. {mloda_core → mloda/core}/core/step/transform_frame_work_step.py +16 -16
  43. {mloda_core → mloda/core}/filter/filter_engine.py +1 -1
  44. {mloda_core → mloda/core}/filter/filter_type_enum.py +1 -1
  45. {mloda_core → mloda/core}/filter/global_filter.py +23 -23
  46. {mloda_core → mloda/core}/filter/single_filter.py +6 -6
  47. {mloda_core → mloda/core}/prepare/accessible_plugins.py +15 -18
  48. {mloda_core → mloda/core}/prepare/execution_plan.py +65 -39
  49. {mloda_core → mloda/core}/prepare/graph/build_graph.py +6 -6
  50. {mloda_core → mloda/core}/prepare/graph/graph.py +1 -1
  51. {mloda_core → mloda/core}/prepare/graph/properties.py +5 -5
  52. {mloda_core → mloda/core}/prepare/identify_feature_group.py +12 -14
  53. {mloda_core → mloda/core}/prepare/joinstep_collection.py +3 -3
  54. {mloda_core → mloda/core}/prepare/resolve_compute_frameworks.py +6 -6
  55. {mloda_core → mloda/core}/prepare/resolve_graph.py +11 -11
  56. {mloda_core → mloda/core}/prepare/resolve_links.py +11 -31
  57. mloda/core/prepare/validators/resolve_link_validator.py +32 -0
  58. mloda/core/runtime/compute_framework_executor.py +271 -0
  59. mloda/core/runtime/data_lifecycle_manager.py +160 -0
  60. mloda/core/runtime/flight/__init__.py +0 -0
  61. {mloda_core → mloda/core}/runtime/flight/runner_flight_server.py +1 -1
  62. mloda/core/runtime/run.py +317 -0
  63. mloda/core/runtime/worker/__init__.py +0 -0
  64. {mloda_core → mloda/core}/runtime/worker/multiprocessing_worker.py +15 -10
  65. {mloda_core → mloda/core}/runtime/worker/thread_worker.py +2 -2
  66. mloda/core/runtime/worker_manager.py +96 -0
  67. mloda/provider/__init__.py +101 -0
  68. mloda/steward/__init__.py +25 -0
  69. mloda/user/__init__.py +57 -0
  70. {mloda-0.3.3.dist-info → mloda-0.4.1.dist-info}/METADATA +24 -31
  71. mloda-0.4.1.dist-info/RECORD +248 -0
  72. {mloda-0.3.3.dist-info → mloda-0.4.1.dist-info}/top_level.txt +1 -1
  73. mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_filter_engine.py +2 -2
  74. mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_framework.py +15 -13
  75. mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_merge_engine.py +3 -3
  76. mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_pyarrow_transformer.py +1 -1
  77. mloda_plugins/compute_framework/base_implementations/iceberg/iceberg_filter_engine.py +2 -2
  78. mloda_plugins/compute_framework/base_implementations/iceberg/iceberg_framework.py +12 -10
  79. mloda_plugins/compute_framework/base_implementations/iceberg/iceberg_pyarrow_transformer.py +1 -1
  80. mloda_plugins/compute_framework/base_implementations/pandas/dataframe.py +18 -16
  81. mloda_plugins/compute_framework/base_implementations/pandas/pandas_filter_engine.py +36 -13
  82. mloda_plugins/compute_framework/base_implementations/pandas/pandas_merge_engine.py +7 -7
  83. mloda_plugins/compute_framework/base_implementations/pandas/pandaspyarrowtransformer.py +1 -1
  84. mloda_plugins/compute_framework/base_implementations/polars/dataframe.py +16 -14
  85. mloda_plugins/compute_framework/base_implementations/polars/lazy_dataframe.py +13 -12
  86. mloda_plugins/compute_framework/base_implementations/polars/polars_filter_engine.py +2 -2
  87. mloda_plugins/compute_framework/base_implementations/polars/polars_lazy_pyarrow_transformer.py +1 -1
  88. mloda_plugins/compute_framework/base_implementations/polars/polars_merge_engine.py +3 -3
  89. mloda_plugins/compute_framework/base_implementations/polars/polars_pyarrow_transformer.py +1 -1
  90. mloda_plugins/compute_framework/base_implementations/pyarrow/pyarrow_filter_engine.py +2 -2
  91. mloda_plugins/compute_framework/base_implementations/pyarrow/pyarrow_merge_engine.py +3 -3
  92. mloda_plugins/compute_framework/base_implementations/pyarrow/table.py +12 -11
  93. mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_filter_engine.py +2 -2
  94. mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_framework.py +11 -9
  95. mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_merge_engine.py +3 -3
  96. mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_pyarrow_transformer.py +1 -1
  97. mloda_plugins/compute_framework/base_implementations/spark/spark_filter_engine.py +2 -2
  98. mloda_plugins/compute_framework/base_implementations/spark/spark_framework.py +17 -15
  99. mloda_plugins/compute_framework/base_implementations/spark/spark_merge_engine.py +2 -3
  100. mloda_plugins/compute_framework/base_implementations/spark/spark_pyarrow_transformer.py +1 -1
  101. mloda_plugins/config/feature/loader.py +2 -2
  102. mloda_plugins/feature_group/experimental/aggregated_feature_group/base.py +45 -64
  103. mloda_plugins/feature_group/experimental/aggregated_feature_group/pandas.py +2 -2
  104. mloda_plugins/feature_group/experimental/aggregated_feature_group/polars_lazy.py +2 -2
  105. mloda_plugins/feature_group/experimental/aggregated_feature_group/pyarrow.py +2 -2
  106. mloda_plugins/feature_group/experimental/clustering/base.py +67 -97
  107. mloda_plugins/feature_group/experimental/clustering/pandas.py +2 -2
  108. mloda_plugins/feature_group/experimental/data_quality/missing_value/base.py +58 -82
  109. mloda_plugins/feature_group/experimental/data_quality/missing_value/pandas.py +2 -2
  110. mloda_plugins/feature_group/experimental/data_quality/missing_value/pyarrow.py +2 -2
  111. mloda_plugins/feature_group/experimental/data_quality/missing_value/python_dict.py +2 -2
  112. mloda_plugins/feature_group/experimental/default_options_key.py +16 -19
  113. mloda_plugins/feature_group/experimental/dimensionality_reduction/base.py +81 -96
  114. mloda_plugins/feature_group/experimental/dimensionality_reduction/pandas.py +2 -2
  115. mloda_plugins/feature_group/experimental/dynamic_feature_group_factory/dynamic_feature_group_factory.py +24 -24
  116. mloda_plugins/feature_group/experimental/forecasting/base.py +108 -106
  117. mloda_plugins/feature_group/experimental/forecasting/forecasting_artifact.py +2 -2
  118. mloda_plugins/feature_group/experimental/forecasting/pandas.py +15 -15
  119. mloda_plugins/feature_group/experimental/geo_distance/base.py +52 -44
  120. mloda_plugins/feature_group/experimental/geo_distance/pandas.py +2 -3
  121. mloda_plugins/feature_group/experimental/llm/cli.py +4 -4
  122. mloda_plugins/feature_group/experimental/llm/cli_features/refactor_git_cached.py +19 -19
  123. mloda_plugins/feature_group/experimental/llm/installed_packages_feature_group.py +8 -8
  124. mloda_plugins/feature_group/experimental/llm/list_directory_feature_group.py +5 -5
  125. mloda_plugins/feature_group/experimental/llm/llm_api/claude.py +3 -3
  126. mloda_plugins/feature_group/experimental/llm/llm_api/gemini.py +3 -3
  127. mloda_plugins/feature_group/experimental/llm/llm_api/llm_base_request.py +5 -5
  128. mloda_plugins/feature_group/experimental/llm/llm_api/openai.py +3 -3
  129. mloda_plugins/feature_group/experimental/llm/llm_api/request_loop.py +6 -6
  130. mloda_plugins/feature_group/experimental/llm/llm_file_selector.py +10 -10
  131. mloda_plugins/feature_group/experimental/llm/tools/tool_collection.py +1 -1
  132. mloda_plugins/feature_group/experimental/node_centrality/base.py +46 -74
  133. mloda_plugins/feature_group/experimental/node_centrality/pandas.py +2 -2
  134. mloda_plugins/feature_group/experimental/sklearn/encoding/base.py +53 -53
  135. mloda_plugins/feature_group/experimental/sklearn/encoding/pandas.py +2 -2
  136. mloda_plugins/feature_group/experimental/sklearn/pipeline/base.py +52 -39
  137. mloda_plugins/feature_group/experimental/sklearn/pipeline/pandas.py +3 -4
  138. mloda_plugins/feature_group/experimental/sklearn/scaling/base.py +44 -60
  139. mloda_plugins/feature_group/experimental/sklearn/scaling/pandas.py +2 -2
  140. mloda_plugins/feature_group/experimental/sklearn/sklearn_artifact.py +2 -3
  141. mloda_plugins/feature_group/experimental/source_input_feature.py +15 -15
  142. mloda_plugins/feature_group/experimental/text_cleaning/base.py +38 -63
  143. mloda_plugins/feature_group/experimental/text_cleaning/pandas.py +2 -2
  144. mloda_plugins/feature_group/experimental/text_cleaning/python_dict.py +2 -2
  145. mloda_plugins/feature_group/experimental/time_window/base.py +108 -95
  146. mloda_plugins/feature_group/experimental/time_window/pandas.py +13 -13
  147. mloda_plugins/feature_group/experimental/time_window/pyarrow.py +12 -12
  148. mloda_plugins/feature_group/input_data/api_data/api_data.py +9 -11
  149. mloda_plugins/feature_group/input_data/read_context_files.py +7 -7
  150. mloda_plugins/feature_group/input_data/read_db.py +7 -9
  151. mloda_plugins/feature_group/input_data/read_db_feature.py +4 -4
  152. mloda_plugins/feature_group/input_data/read_dbs/sqlite.py +23 -13
  153. mloda_plugins/feature_group/input_data/read_file.py +8 -8
  154. mloda_plugins/feature_group/input_data/read_file_feature.py +4 -4
  155. mloda_plugins/feature_group/input_data/read_files/csv.py +6 -6
  156. mloda_plugins/feature_group/input_data/read_files/feather.py +5 -5
  157. mloda_plugins/feature_group/input_data/read_files/json.py +5 -5
  158. mloda_plugins/feature_group/input_data/read_files/orc.py +5 -5
  159. mloda_plugins/feature_group/input_data/read_files/parquet.py +5 -5
  160. mloda_plugins/feature_group/input_data/read_files/text_file_reader.py +5 -5
  161. mloda_plugins/function_extender/base_implementations/otel/otel_extender.py +4 -4
  162. mloda-0.3.3.dist-info/RECORD +0 -230
  163. mloda_core/abstract_plugins/components/link.py +0 -286
  164. mloda_core/abstract_plugins/function_extender.py +0 -34
  165. mloda_core/runtime/run.py +0 -617
  166. {mloda_core → mloda/core}/__init__.py +0 -0
  167. {mloda_core → mloda/core}/abstract_plugins/__init__.py +0 -0
  168. {mloda_core → mloda/core}/abstract_plugins/components/__init__.py +0 -0
  169. {mloda_core → mloda/core}/abstract_plugins/components/domain.py +0 -0
  170. {mloda_core → mloda/core}/abstract_plugins/components/feature_chainer/__init__.py +0 -0
  171. {mloda_core → mloda/core}/abstract_plugins/components/feature_name.py +0 -0
  172. {mloda_core → mloda/core}/abstract_plugins/components/framework_transformer/__init__.py +0 -0
  173. {mloda_core → mloda/core}/abstract_plugins/components/framework_transformer/base_transformer.py +0 -0
  174. {mloda_core → mloda/core}/abstract_plugins/components/hashable_dict.py +0 -0
  175. {mloda_core → mloda/core}/abstract_plugins/components/index/__init__.py +0 -0
  176. {mloda_core → mloda/core}/abstract_plugins/components/index/index.py +0 -0
  177. {mloda_core → mloda/core}/abstract_plugins/components/input_data/__init__.py +0 -0
  178. {mloda_core → mloda/core}/abstract_plugins/components/input_data/api/__init__.py +0 -0
  179. {mloda_core → mloda/core}/abstract_plugins/components/input_data/creator/__init__.py +0 -0
  180. {mloda_core → mloda/core}/abstract_plugins/components/match_data/__init__.py +0 -0
  181. {mloda_core → mloda/core}/abstract_plugins/components/merge/__init__.py +0 -0
  182. {mloda_core → mloda/core}/abstract_plugins/components/plugin_option/__init__.py +0 -0
  183. {mloda_core → mloda/core}/abstract_plugins/components/utils.py +0 -0
  184. {mloda_core/abstract_plugins/plugin_loader → mloda/core/abstract_plugins/components/validators}/__init__.py +0 -0
  185. {mloda_core/api → mloda/core/abstract_plugins/plugin_loader}/__init__.py +0 -0
  186. {mloda_core → mloda/core}/abstract_plugins/plugin_loader/plugin_loader.py +0 -0
  187. {mloda_core/api/prepare → mloda/core/api}/__init__.py +0 -0
  188. {mloda_core/core → mloda/core/api/prepare}/__init__.py +0 -0
  189. {mloda_core/core/step → mloda/core/core}/__init__.py +0 -0
  190. {mloda_core/filter → mloda/core/core/step}/__init__.py +0 -0
  191. {mloda_core/prepare → mloda/core/filter}/__init__.py +0 -0
  192. {mloda_core → mloda/core}/filter/filter_parameter.py +0 -0
  193. {mloda_core/prepare/graph → mloda/core/prepare}/__init__.py +0 -0
  194. {mloda_core/runtime → mloda/core/prepare/graph}/__init__.py +0 -0
  195. {mloda_core/runtime/flight → mloda/core/prepare/validators}/__init__.py +0 -0
  196. {mloda_core/runtime/worker → mloda/core/runtime}/__init__.py +0 -0
  197. {mloda_core → mloda/core}/runtime/flight/flight_server.py +0 -0
  198. {mloda-0.3.3.dist-info → mloda-0.4.1.dist-info}/WHEEL +0 -0
  199. {mloda-0.3.3.dist-info → mloda-0.4.1.dist-info}/entry_points.txt +0 -0
  200. {mloda-0.3.3.dist-info → mloda-0.4.1.dist-info}/licenses/LICENSE.TXT +0 -0
  201. {mloda-0.3.3.dist-info → mloda-0.4.1.dist-info}/licenses/NOTICE.md +0 -0
@@ -4,18 +4,19 @@ Base implementation for node centrality feature groups.
4
4
 
5
5
  from __future__ import annotations
6
6
 
7
- from typing import Any, Optional, Set, Type, Union
8
-
9
- from mloda_core.abstract_plugins.abstract_feature_group import AbstractFeatureGroup
10
- from mloda_core.abstract_plugins.components.feature import Feature
11
- from mloda_core.abstract_plugins.components.feature_chainer.feature_chain_parser import FeatureChainParser
12
- from mloda_core.abstract_plugins.components.feature_name import FeatureName
13
- from mloda_core.abstract_plugins.components.feature_set import FeatureSet
14
- from mloda_core.abstract_plugins.components.options import Options
7
+ from typing import Any, Optional
8
+
9
+ from mloda import FeatureGroup
10
+ from mloda import Feature
11
+ from mloda.provider import FeatureChainParser
12
+ from mloda.provider import (
13
+ FeatureChainParserMixin,
14
+ )
15
+ from mloda.provider import FeatureSet
15
16
  from mloda_plugins.feature_group.experimental.default_options_key import DefaultOptionKeys
16
17
 
17
18
 
18
- class NodeCentralityFeatureGroup(AbstractFeatureGroup):
19
+ class NodeCentralityFeatureGroup(FeatureChainParserMixin, FeatureGroup):
19
20
  # Option keys for centrality configuration
20
21
  CENTRALITY_TYPE = "centrality_type"
21
22
  GRAPH_TYPE = "graph_type"
@@ -131,49 +132,35 @@ class NodeCentralityFeatureGroup(AbstractFeatureGroup):
131
132
  # Define the suffix pattern for this feature group (L→R format: source__operation)
132
133
  PREFIX_PATTERN = r".*__([\w]+)_centrality$"
133
134
 
135
+ # In-feature configuration for FeatureChainParserMixin
136
+ MIN_IN_FEATURES = 1
137
+ MAX_IN_FEATURES = 1
138
+
134
139
  # Property mapping for configuration-based feature creation
135
140
  PROPERTY_MAPPING = {
136
141
  # Context parameters (don't affect Feature Group resolution)
137
142
  CENTRALITY_TYPE: {
138
143
  **CENTRALITY_TYPES, # All supported centrality types as valid options
139
- DefaultOptionKeys.mloda_context: True,
140
- DefaultOptionKeys.mloda_strict_validation: True,
144
+ DefaultOptionKeys.context: True,
145
+ DefaultOptionKeys.strict_validation: True,
141
146
  },
142
147
  GRAPH_TYPE: {
143
148
  **GRAPH_TYPES, # All supported graph types as valid options
144
- DefaultOptionKeys.mloda_context: True,
145
- DefaultOptionKeys.mloda_strict_validation: True,
146
- DefaultOptionKeys.mloda_default: "undirected",
149
+ DefaultOptionKeys.context: True,
150
+ DefaultOptionKeys.strict_validation: True,
151
+ DefaultOptionKeys.default: "undirected",
147
152
  },
148
153
  WEIGHT_COLUMN: {
149
154
  "explanation": "Column name for edge weights (optional)",
150
- DefaultOptionKeys.mloda_context: True,
151
- DefaultOptionKeys.mloda_default: None,
155
+ DefaultOptionKeys.context: True,
156
+ DefaultOptionKeys.default: None,
152
157
  },
153
158
  DefaultOptionKeys.in_features: {
154
159
  "explanation": "Source feature representing the nodes for centrality calculation",
155
- DefaultOptionKeys.mloda_context: True,
160
+ DefaultOptionKeys.context: True,
156
161
  },
157
162
  }
158
163
 
159
- def input_features(self, options: Options, feature_name: FeatureName) -> Optional[Set[Feature]]:
160
- """Extract source feature from either configuration-based options or string parsing."""
161
-
162
- source_feature: str | None = None
163
-
164
- # string based
165
- _, source_feature = FeatureChainParser.parse_feature_name(feature_name, [self.PREFIX_PATTERN])
166
- if source_feature is not None:
167
- return {Feature(source_feature)}
168
-
169
- # configuration based
170
- source_features = options.get_in_features()
171
- if len(source_features) != 1:
172
- raise ValueError(
173
- f"Expected exactly one source feature, but found {len(source_features)}: {source_features}"
174
- )
175
- return set(source_features)
176
-
177
164
  @classmethod
178
165
  def parse_centrality_prefix(cls, feature_name: str) -> str:
179
166
  """
@@ -221,24 +208,6 @@ class NodeCentralityFeatureGroup(AbstractFeatureGroup):
221
208
  """Extract the centrality type from the feature name."""
222
209
  return cls.parse_centrality_prefix(feature_name)
223
210
 
224
- @classmethod
225
- def match_feature_group_criteria(
226
- cls,
227
- feature_name: Union[FeatureName, str],
228
- options: Options,
229
- data_access_collection: Optional[Any] = None,
230
- ) -> bool:
231
- """Check if feature name matches the expected pattern for centrality features."""
232
- if isinstance(feature_name, FeatureName):
233
- feature_name = feature_name.name
234
-
235
- return FeatureChainParser.match_configuration_feature_chain_parser(
236
- feature_name,
237
- options,
238
- property_mapping=cls.PROPERTY_MAPPING,
239
- prefix_patterns=[cls.PREFIX_PATTERN],
240
- )
241
-
242
211
  @classmethod
243
212
  def calculate_feature(cls, data: Any, features: FeatureSet) -> Any:
244
213
  """
@@ -290,35 +259,38 @@ class NodeCentralityFeatureGroup(AbstractFeatureGroup):
290
259
  Raises:
291
260
  ValueError: If parameters cannot be extracted
292
261
  """
293
- centrality_type = None
294
- source_feature_name: str | None = None
262
+ source_features = cls._extract_source_features(feature)
263
+ centrality_type = cls._extract_centrality_type(feature)
264
+ if centrality_type is None:
265
+ raise ValueError(f"Could not extract centrality type from: {feature.name}")
266
+ return centrality_type, source_features[0]
295
267
 
268
+ @classmethod
269
+ def _extract_centrality_type(cls, feature: Feature) -> Optional[str]:
270
+ """
271
+ Extract centrality type from a feature.
272
+
273
+ Tries string-based parsing first, falls back to configuration-based approach.
274
+
275
+ Args:
276
+ feature: The feature to extract centrality type from
277
+
278
+ Returns:
279
+ The centrality type, or None if not found
280
+
281
+ Raises:
282
+ ValueError: If centrality type cannot be extracted
283
+ """
296
284
  # Try string-based parsing first
297
- # Note: parse_feature_name returns (operation_config, source_feature) for L→R format
298
- # The operation_config is already extracted by the regex group (e.g., "degree" from "degree_centrality")
299
285
  suffix_part, source_feature_name = FeatureChainParser.parse_feature_name(feature.name, [cls.PREFIX_PATTERN])
300
286
  if source_feature_name is not None and suffix_part is not None:
301
287
  # The suffix_part is already the centrality type (extracted by regex group)
302
- centrality_type = suffix_part
303
- if centrality_type in cls.CENTRALITY_TYPES:
304
- return centrality_type, source_feature_name
288
+ if suffix_part in cls.CENTRALITY_TYPES:
289
+ return suffix_part
305
290
 
306
291
  # Fall back to configuration-based approach
307
- source_features = feature.options.get_in_features()
308
- if len(source_features) != 1:
309
- raise ValueError(
310
- f"Expected exactly one source feature, but found {len(source_features)}: {source_features}"
311
- )
312
-
313
- source_feature = next(iter(source_features))
314
- source_feature_name = source_feature.get_name()
315
-
316
292
  centrality_type = feature.options.get(cls.CENTRALITY_TYPE)
317
-
318
- if centrality_type is None or source_feature_name is None:
319
- raise ValueError(f"Could not extract centrality type and source feature from: {feature.name}")
320
-
321
- return centrality_type, source_feature_name
293
+ return str(centrality_type) if centrality_type is not None else None
322
294
 
323
295
  @classmethod
324
296
  def _check_source_feature_exists(cls, data: Any, feature_name: str) -> None:
@@ -14,14 +14,14 @@ except ImportError:
14
14
  pd = None
15
15
  np = None # type: ignore
16
16
 
17
- from mloda_core.abstract_plugins.compute_frame_work import ComputeFrameWork
17
+ from mloda import ComputeFramework
18
18
  from mloda_plugins.compute_framework.base_implementations.pandas.dataframe import PandasDataFrame
19
19
  from mloda_plugins.feature_group.experimental.node_centrality.base import NodeCentralityFeatureGroup
20
20
 
21
21
 
22
22
  class PandasNodeCentralityFeatureGroup(NodeCentralityFeatureGroup):
23
23
  @classmethod
24
- def compute_framework_rule(cls) -> set[type[ComputeFrameWork]]:
24
+ def compute_framework_rule(cls) -> set[type[ComputeFramework]]:
25
25
  """Define the compute framework for this feature group."""
26
26
  return {PandasDataFrame}
27
27
 
@@ -5,20 +5,23 @@ Base implementation for scikit-learn encoding feature groups.
5
5
  from __future__ import annotations
6
6
 
7
7
  import datetime
8
- from typing import Any, Dict, Optional, Set, Type, Union
9
-
10
- from mloda_core.abstract_plugins.abstract_feature_group import AbstractFeatureGroup
11
- from mloda_core.abstract_plugins.components.feature import Feature
12
- from mloda_core.abstract_plugins.components.feature_name import FeatureName
13
- from mloda_core.abstract_plugins.components.feature_set import FeatureSet
14
- from mloda_core.abstract_plugins.components.options import Options
15
- from mloda_core.abstract_plugins.components.feature_chainer.feature_chain_parser import FeatureChainParser
16
- from mloda_core.abstract_plugins.components.base_artifact import BaseArtifact
8
+ from typing import Any, Dict, Optional, Set, Type
9
+
10
+ from mloda import FeatureGroup
11
+ from mloda import Feature
12
+ from mloda.user import FeatureName
13
+ from mloda.provider import FeatureSet
14
+ from mloda import Options
15
+ from mloda.provider import FeatureChainParser
16
+ from mloda.provider import (
17
+ FeatureChainParserMixin,
18
+ )
19
+ from mloda.provider import BaseArtifact
17
20
  from mloda_plugins.feature_group.experimental.default_options_key import DefaultOptionKeys
18
21
  from mloda_plugins.feature_group.experimental.sklearn.sklearn_artifact import SklearnArtifact
19
22
 
20
23
 
21
- class EncodingFeatureGroup(AbstractFeatureGroup):
24
+ class EncodingFeatureGroup(FeatureChainParserMixin, FeatureGroup):
22
25
  """
23
26
  Base class for scikit-learn encoding feature groups.
24
27
 
@@ -55,8 +58,8 @@ class EncodingFeatureGroup(AbstractFeatureGroup):
55
58
  Uses Options with proper group/context parameter separation:
56
59
 
57
60
  ```python
58
- from mloda_core.abstract_plugins.components.feature import Feature
59
- from mloda_core.abstract_plugins.components.options import Options
61
+ from mloda import Feature
62
+ from mloda import Options
60
63
  from mloda_plugins.feature_group.experimental.default_options_key import DefaultOptionKeys
61
64
 
62
65
  feature = Feature(
@@ -75,7 +78,7 @@ class EncodingFeatureGroup(AbstractFeatureGroup):
75
78
  ### String-Based Creation
76
79
 
77
80
  ```python
78
- from mloda_core.abstract_plugins.components.feature import Feature
81
+ from mloda import Feature
79
82
 
80
83
  # OneHot encoding - creates multiple binary columns
81
84
  feature = Feature(name="product_category__onehot_encoded")
@@ -92,8 +95,8 @@ class EncodingFeatureGroup(AbstractFeatureGroup):
92
95
  ### Configuration-Based Creation
93
96
 
94
97
  ```python
95
- from mloda_core.abstract_plugins.components.feature import Feature
96
- from mloda_core.abstract_plugins.components.options import Options
98
+ from mloda import Feature
99
+ from mloda import Options
97
100
 
98
101
  # OneHot encoding using configuration
99
102
  feature = Feature(
@@ -168,19 +171,23 @@ class EncodingFeatureGroup(AbstractFeatureGroup):
168
171
 
169
172
  # Define patterns for parsing
170
173
  PATTERN = "__"
171
- SUFFIX_PATTERN = r".*__(onehot|label|ordinal)_encoded(~\d+)?$"
174
+ PREFIX_PATTERN = r".*__(onehot|label|ordinal)_encoded(~\d+)?$"
175
+
176
+ # In-feature configuration for FeatureChainParserMixin
177
+ MIN_IN_FEATURES = 1
178
+ MAX_IN_FEATURES = 1
172
179
 
173
180
  # Property mapping for new configuration-based approach
174
181
  PROPERTY_MAPPING = {
175
182
  ENCODER_TYPE: {
176
183
  **SUPPORTED_ENCODERS, # All supported encoder types as valid options
177
- DefaultOptionKeys.mloda_context: True, # Context parameter
178
- DefaultOptionKeys.mloda_strict_validation: True, # Enable strict validation
184
+ DefaultOptionKeys.context: True, # Context parameter
185
+ DefaultOptionKeys.strict_validation: True, # Enable strict validation
179
186
  },
180
187
  DefaultOptionKeys.in_features: {
181
188
  "explanation": "Source feature to encode",
182
- DefaultOptionKeys.mloda_context: True, # Context parameter
183
- DefaultOptionKeys.mloda_strict_validation: False, # Flexible validation
189
+ DefaultOptionKeys.context: True, # Context parameter
190
+ DefaultOptionKeys.strict_validation: False, # Flexible validation
184
191
  },
185
192
  }
186
193
 
@@ -193,7 +200,7 @@ class EncodingFeatureGroup(AbstractFeatureGroup):
193
200
  """Extract source feature from either configuration-based options or string parsing."""
194
201
 
195
202
  # Try string-based parsing first
196
- _, source_feature = FeatureChainParser.parse_feature_name(feature_name, [self.SUFFIX_PATTERN])
203
+ _, source_feature = FeatureChainParser.parse_feature_name(feature_name, [self.PREFIX_PATTERN])
197
204
  if source_feature is not None:
198
205
  # Remove ~suffix if present (for OneHot column patterns like category~1)
199
206
  base_feature = self.get_column_base_feature(source_feature)
@@ -210,7 +217,7 @@ class EncodingFeatureGroup(AbstractFeatureGroup):
210
217
  @classmethod
211
218
  def get_encoder_type(cls, feature_name: str) -> str:
212
219
  """Extract the encoder type from the feature name."""
213
- encoder_type, _ = FeatureChainParser.parse_feature_name(feature_name, [cls.SUFFIX_PATTERN])
220
+ encoder_type, _ = FeatureChainParser.parse_feature_name(feature_name, [cls.PREFIX_PATTERN])
214
221
  if encoder_type is None:
215
222
  raise ValueError(f"Invalid encoding feature name format: {feature_name}")
216
223
 
@@ -223,22 +230,6 @@ class EncodingFeatureGroup(AbstractFeatureGroup):
223
230
 
224
231
  return encoder_type
225
232
 
226
- @classmethod
227
- def match_feature_group_criteria(
228
- cls,
229
- feature_name: Union[FeatureName, str],
230
- options: Options,
231
- data_access_collection: Optional[Any] = None,
232
- ) -> bool:
233
- """Check if feature name matches the expected pattern using unified parser."""
234
- # Use the unified parser with property mapping for full configuration support
235
- return FeatureChainParser.match_configuration_feature_chain_parser(
236
- feature_name,
237
- options,
238
- property_mapping=cls.PROPERTY_MAPPING,
239
- prefix_patterns=[cls.SUFFIX_PATTERN],
240
- )
241
-
242
233
  @classmethod
243
234
  def calculate_feature(cls, data: Any, features: FeatureSet) -> Any:
244
235
  """
@@ -307,33 +298,42 @@ class EncodingFeatureGroup(AbstractFeatureGroup):
307
298
  Raises:
308
299
  ValueError: If parameters cannot be extracted
309
300
  """
310
- encoder_type = None
311
- source_feature_name: str | None = None
301
+ source_features = cls._extract_source_features(feature)
302
+ encoder_type = cls._extract_encoder_type(feature)
303
+ if encoder_type is None:
304
+ raise ValueError(f"Could not extract encoder type from: {feature.name}")
305
+ return encoder_type, source_features[0]
312
306
 
313
- # Try string-based parsing first
307
+ @classmethod
308
+ def _extract_encoder_type(cls, feature: Feature) -> Optional[str]:
309
+ """
310
+ Extract encoder type from a feature.
311
+
312
+ Tries string-based parsing first, falls back to configuration-based approach.
313
+
314
+ Args:
315
+ feature: The feature to extract encoder type from
316
+
317
+ Returns:
318
+ Encoder type string, or None if not found
319
+
320
+ Raises:
321
+ ValueError: If encoder type is unsupported
322
+ """
314
323
  feature_name_str = feature.name.name if hasattr(feature.name, "name") else str(feature.name)
315
324
 
316
325
  if FeatureChainParser.is_chained_feature(feature_name_str):
317
326
  encoder_type = cls.get_encoder_type(feature_name_str)
318
- source_feature_name = FeatureChainParser.extract_source_feature(feature_name_str, cls.SUFFIX_PATTERN)
319
- return encoder_type, source_feature_name
320
-
321
- # Fall back to configuration-based approach
322
- source_features = feature.options.get_in_features()
323
- source_feature = next(iter(source_features))
324
- source_feature_name = source_feature.get_name()
327
+ return encoder_type
325
328
 
326
329
  encoder_type = feature.options.get(cls.ENCODER_TYPE)
327
330
 
328
- if encoder_type is None or source_feature_name is None:
329
- raise ValueError(f"Could not extract encoder type and source feature from: {feature.name}")
330
-
331
- if encoder_type not in cls.SUPPORTED_ENCODERS:
331
+ if encoder_type is not None and encoder_type not in cls.SUPPORTED_ENCODERS:
332
332
  raise ValueError(
333
333
  f"Unsupported encoder type: {encoder_type}. Supported types: {', '.join(cls.SUPPORTED_ENCODERS.keys())}"
334
334
  )
335
335
 
336
- return encoder_type, source_feature_name
336
+ return str(encoder_type) if encoder_type is not None else None
337
337
 
338
338
  @classmethod
339
339
  def _import_sklearn_components(cls) -> Dict[str, Any]:
@@ -6,7 +6,7 @@ from __future__ import annotations
6
6
 
7
7
  from typing import Any, Set, Type, Union
8
8
 
9
- from mloda_core.abstract_plugins.compute_frame_work import ComputeFrameWork
9
+ from mloda import ComputeFramework
10
10
 
11
11
  from mloda_plugins.compute_framework.base_implementations.pandas.dataframe import PandasDataFrame
12
12
  from mloda_plugins.feature_group.experimental.sklearn.encoding.base import EncodingFeatureGroup
@@ -21,7 +21,7 @@ class PandasEncodingFeatureGroup(EncodingFeatureGroup):
21
21
  """
22
22
 
23
23
  @classmethod
24
- def compute_framework_rule(cls) -> Union[bool, Set[Type[ComputeFrameWork]]]:
24
+ def compute_framework_rule(cls) -> Union[bool, Set[Type[ComputeFramework]]]:
25
25
  """Specify that this feature group works with Pandas."""
26
26
  return {PandasDataFrame}
27
27
 
@@ -7,18 +7,21 @@ from __future__ import annotations
7
7
  import datetime
8
8
  from typing import Any, Dict, FrozenSet, List, Optional, Set, Type, Union
9
9
 
10
- from mloda_core.abstract_plugins.abstract_feature_group import AbstractFeatureGroup
11
- from mloda_core.abstract_plugins.components.feature import Feature
12
- from mloda_core.abstract_plugins.components.feature_name import FeatureName
13
- from mloda_core.abstract_plugins.components.feature_set import FeatureSet
14
- from mloda_core.abstract_plugins.components.options import Options
15
- from mloda_core.abstract_plugins.components.feature_chainer.feature_chain_parser import FeatureChainParser
16
- from mloda_core.abstract_plugins.components.base_artifact import BaseArtifact
10
+ from mloda import FeatureGroup
11
+ from mloda import Feature
12
+ from mloda.user import FeatureName
13
+ from mloda.provider import FeatureSet
14
+ from mloda import Options
15
+ from mloda.provider import FeatureChainParser
16
+ from mloda.provider import (
17
+ FeatureChainParserMixin,
18
+ )
19
+ from mloda.provider import BaseArtifact
17
20
  from mloda_plugins.feature_group.experimental.default_options_key import DefaultOptionKeys
18
21
  from mloda_plugins.feature_group.experimental.sklearn.sklearn_artifact import SklearnArtifact
19
22
 
20
23
 
21
- class SklearnPipelineFeatureGroup(AbstractFeatureGroup):
24
+ class SklearnPipelineFeatureGroup(FeatureChainParserMixin, FeatureGroup):
22
25
  """
23
26
  Base class for scikit-learn pipeline feature groups.
24
27
 
@@ -88,22 +91,22 @@ class SklearnPipelineFeatureGroup(AbstractFeatureGroup):
88
91
  PROPERTY_MAPPING = {
89
92
  PIPELINE_NAME: {
90
93
  **PIPELINE_TYPES, # All supported pipeline types as valid options
91
- DefaultOptionKeys.mloda_context: True,
92
- DefaultOptionKeys.mloda_default: None, # Default is None as steps + params also work
94
+ DefaultOptionKeys.context: True,
95
+ DefaultOptionKeys.default: None, # Default is None as steps + params also work
93
96
  },
94
97
  PIPELINE_STEPS: {
95
98
  "explanation": "List of pipeline steps as (name, transformer) tuples",
96
- DefaultOptionKeys.mloda_context: True,
97
- DefaultOptionKeys.mloda_default: None, # Default is None as pipeline_types also work
99
+ DefaultOptionKeys.context: True,
100
+ DefaultOptionKeys.default: None, # Default is None as pipeline_types also work
98
101
  },
99
102
  PIPELINE_PARAMS: {
100
103
  "explanation": "Pipeline parameters dictionary",
101
- DefaultOptionKeys.mloda_context: True,
102
- DefaultOptionKeys.mloda_default: None, # Default is None as pipeline_types also work
104
+ DefaultOptionKeys.context: True,
105
+ DefaultOptionKeys.default: None, # Default is None as pipeline_types also work
103
106
  },
104
107
  DefaultOptionKeys.in_features: {
105
108
  "explanation": "Source features for sklearn pipeline (comma-separated)",
106
- DefaultOptionKeys.mloda_context: True,
109
+ DefaultOptionKeys.context: True,
107
110
  },
108
111
  }
109
112
 
@@ -111,6 +114,12 @@ class SklearnPipelineFeatureGroup(AbstractFeatureGroup):
111
114
  PATTERN = "__"
112
115
  PREFIX_PATTERN = r".*__sklearn_pipeline_([\w]+)$"
113
116
 
117
+ # In-feature configuration for FeatureChainParserMixin
118
+ # Pipelines support variable number of in_features
119
+ IN_FEATURE_SEPARATOR = "," # Use comma for multiple source features
120
+ MIN_IN_FEATURES = 1
121
+ MAX_IN_FEATURES: Optional[int] = None # Unlimited
122
+
114
123
  @staticmethod
115
124
  def artifact() -> Type[BaseArtifact] | None:
116
125
  """Return the artifact class for sklearn pipeline persistence."""
@@ -143,6 +152,8 @@ class SklearnPipelineFeatureGroup(AbstractFeatureGroup):
143
152
  # The regex already extracts just the pipeline name (e.g., "scaling" from "income__sklearn_pipeline_scaling")
144
153
  return prefix_part
145
154
 
155
+ # Note: Custom match_feature_group_criteria() required instead of inheriting from mixin
156
+ # because this feature group has unique pre-check logic (PIPELINE_NAME vs PIPELINE_STEPS mutual exclusivity)
146
157
  @classmethod
147
158
  def match_feature_group_criteria(
148
159
  cls,
@@ -248,43 +259,45 @@ class SklearnPipelineFeatureGroup(AbstractFeatureGroup):
248
259
  Raises:
249
260
  ValueError: If parameters cannot be extracted
250
261
  """
262
+ source_features = cls._extract_source_features(feature)
263
+ pipeline_name = cls._extract_pipeline_name(feature)
264
+ if pipeline_name is None:
265
+ raise ValueError(f"Could not extract pipeline name from: {feature.name}")
266
+ return pipeline_name, source_features
267
+
268
+ @classmethod
269
+ def _extract_pipeline_name(cls, feature: Feature) -> Optional[str]:
270
+ """
271
+ Extract pipeline name from a feature.
272
+
273
+ Tries string-based parsing first, falls back to configuration-based approach.
274
+
275
+ Args:
276
+ feature: The feature to extract pipeline name from
277
+
278
+ Returns:
279
+ Pipeline name or None if extraction fails
280
+ """
251
281
  # Try string-based parsing first
252
282
  feature_name_str = feature.name.name if hasattr(feature.name, "name") else str(feature.name)
253
283
 
254
284
  if FeatureChainParser.is_chained_feature(feature_name_str):
255
- pipeline_name = cls.get_pipeline_name(feature_name_str)
256
- _, source_features_str = FeatureChainParser.parse_feature_name(feature_name_str, [cls.PREFIX_PATTERN])
257
-
258
- if source_features_str is not None:
259
- # Handle multiple source features
260
- if "," in source_features_str:
261
- source_features = [f.strip() for f in source_features_str.split(",")]
262
- else:
263
- source_features = [source_features_str]
264
-
265
- return pipeline_name, source_features
285
+ prefix_part, _ = FeatureChainParser.parse_feature_name(feature_name_str, [cls.PREFIX_PATTERN])
286
+ if prefix_part is not None:
287
+ return prefix_part
266
288
 
267
289
  # Fall back to configuration-based approach
268
290
  pipeline_name = feature.options.get(cls.PIPELINE_NAME)
269
291
  pipeline_steps = feature.options.get(cls.PIPELINE_STEPS)
270
- source_features_set = feature.options.get_in_features()
271
- source_features = [sf.get_name() for sf in source_features_set]
272
292
 
273
293
  # Handle mutual exclusivity: either PIPELINE_NAME or PIPELINE_STEPS
274
294
  if pipeline_name is not None:
275
- # Using predefined pipeline
276
- if not source_features:
277
- raise ValueError(f"Could not extract source features from: {feature.name}")
278
- return pipeline_name, source_features
295
+ return str(pipeline_name)
279
296
  elif pipeline_steps is not None:
280
297
  # Using custom pipeline steps - use "custom" as pipeline name
281
- if not source_features:
282
- raise ValueError(f"Could not extract source features from: {feature.name}")
283
- return "custom", source_features
284
- else:
285
- raise ValueError(
286
- f"Could not extract pipeline configuration from: {feature.name}. Must provide either PIPELINE_NAME or PIPELINE_STEPS."
287
- )
298
+ return "custom"
299
+
300
+ return None
288
301
 
289
302
  @classmethod
290
303
  def _get_pipeline_config_from_feature(cls, feature: Feature, pipeline_name: str) -> Dict[str, Any]:
@@ -4,10 +4,9 @@ Pandas implementation for scikit-learn pipeline feature groups.
4
4
 
5
5
  from __future__ import annotations
6
6
 
7
- import numpy as np
8
- from typing import Any, List, Set, Type, Union
7
+ from typing import Any, Set, Type, Union
9
8
 
10
- from mloda_core.abstract_plugins.compute_frame_work import ComputeFrameWork
9
+ from mloda import ComputeFramework
11
10
 
12
11
  from mloda_plugins.compute_framework.base_implementations.pandas.dataframe import PandasDataFrame
13
12
  from mloda_plugins.feature_group.experimental.sklearn.pipeline.base import SklearnPipelineFeatureGroup
@@ -22,7 +21,7 @@ class PandasSklearnPipelineFeatureGroup(SklearnPipelineFeatureGroup):
22
21
  """
23
22
 
24
23
  @classmethod
25
- def compute_framework_rule(cls) -> Union[bool, Set[Type[ComputeFrameWork]]]:
24
+ def compute_framework_rule(cls) -> Union[bool, Set[Type[ComputeFramework]]]:
26
25
  """Specify that this feature group works with Pandas."""
27
26
  return {PandasDataFrame}
28
27