mloda 0.3.2__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (201) hide show
  1. mloda/__init__.py +17 -0
  2. {mloda_core → mloda/core}/abstract_plugins/components/base_artifact.py +2 -2
  3. {mloda_core → mloda/core}/abstract_plugins/components/base_validator.py +13 -0
  4. {mloda_core → mloda/core}/abstract_plugins/components/data_access_collection.py +1 -1
  5. {mloda_core → mloda/core}/abstract_plugins/components/data_types.py +39 -0
  6. {mloda_core → mloda/core}/abstract_plugins/components/feature.py +39 -33
  7. {mloda_core → mloda/core}/abstract_plugins/components/feature_chainer/feature_chain_parser.py +19 -19
  8. mloda/core/abstract_plugins/components/feature_chainer/feature_chain_parser_mixin.py +197 -0
  9. {mloda_core → mloda/core}/abstract_plugins/components/feature_collection.py +6 -6
  10. {mloda_core → mloda/core}/abstract_plugins/components/feature_group_version.py +8 -8
  11. {mloda_core → mloda/core}/abstract_plugins/components/feature_set.py +18 -24
  12. {mloda_core → mloda/core}/abstract_plugins/components/framework_transformer/cfw_transformer.py +2 -2
  13. {mloda_core → mloda/core}/abstract_plugins/components/index/add_index_feature.py +4 -4
  14. {mloda_core → mloda/core}/abstract_plugins/components/input_data/api/api_input_data.py +3 -3
  15. {mloda_core → mloda/core}/abstract_plugins/components/input_data/api/api_input_data_collection.py +2 -2
  16. {mloda_core → mloda/core}/abstract_plugins/components/input_data/api/base_api_data.py +1 -1
  17. {mloda_core → mloda/core}/abstract_plugins/components/input_data/base_input_data.py +6 -6
  18. {mloda_core → mloda/core}/abstract_plugins/components/input_data/creator/data_creator.py +3 -3
  19. mloda/core/abstract_plugins/components/link.py +437 -0
  20. {mloda_core → mloda/core}/abstract_plugins/components/match_data/match_data.py +3 -3
  21. {mloda_core → mloda/core}/abstract_plugins/components/merge/base_merge_engine.py +2 -2
  22. {mloda_core → mloda/core}/abstract_plugins/components/options.py +12 -36
  23. {mloda_core → mloda/core}/abstract_plugins/components/parallelization_modes.py +1 -1
  24. {mloda_core → mloda/core}/abstract_plugins/components/plugin_option/plugin_collector.py +14 -14
  25. mloda/core/abstract_plugins/components/validators/datatype_validator.py +96 -0
  26. mloda/core/abstract_plugins/components/validators/feature_set_validator.py +38 -0
  27. mloda/core/abstract_plugins/components/validators/feature_validator.py +23 -0
  28. mloda/core/abstract_plugins/components/validators/link_validator.py +79 -0
  29. mloda/core/abstract_plugins/components/validators/options_validator.py +57 -0
  30. mloda_core/abstract_plugins/compute_frame_work.py → mloda/core/abstract_plugins/compute_framework.py +46 -37
  31. mloda_core/abstract_plugins/abstract_feature_group.py → mloda/core/abstract_plugins/feature_group.py +56 -33
  32. mloda/core/abstract_plugins/function_extender.py +78 -0
  33. mloda/core/api/plugin_docs.py +220 -0
  34. mloda/core/api/plugin_info.py +32 -0
  35. {mloda_core → mloda/core}/api/prepare/setup_compute_framework.py +11 -11
  36. {mloda_core → mloda/core}/api/request.py +42 -33
  37. {mloda_core → mloda/core}/core/cfw_manager.py +8 -8
  38. {mloda_core → mloda/core}/core/engine.py +47 -46
  39. {mloda_core → mloda/core}/core/step/abstract_step.py +7 -7
  40. {mloda_core → mloda/core}/core/step/feature_group_step.py +12 -12
  41. {mloda_core → mloda/core}/core/step/join_step.py +14 -14
  42. {mloda_core → mloda/core}/core/step/transform_frame_work_step.py +16 -16
  43. {mloda_core → mloda/core}/filter/filter_engine.py +1 -1
  44. {mloda_core → mloda/core}/filter/filter_type_enum.py +1 -1
  45. {mloda_core → mloda/core}/filter/global_filter.py +23 -23
  46. {mloda_core → mloda/core}/filter/single_filter.py +6 -6
  47. {mloda_core → mloda/core}/prepare/accessible_plugins.py +16 -18
  48. {mloda_core → mloda/core}/prepare/execution_plan.py +65 -39
  49. {mloda_core → mloda/core}/prepare/graph/build_graph.py +6 -6
  50. {mloda_core → mloda/core}/prepare/graph/graph.py +1 -1
  51. {mloda_core → mloda/core}/prepare/graph/properties.py +5 -5
  52. {mloda_core → mloda/core}/prepare/identify_feature_group.py +12 -14
  53. {mloda_core → mloda/core}/prepare/joinstep_collection.py +3 -3
  54. {mloda_core → mloda/core}/prepare/resolve_compute_frameworks.py +6 -6
  55. {mloda_core → mloda/core}/prepare/resolve_graph.py +11 -11
  56. {mloda_core → mloda/core}/prepare/resolve_links.py +31 -40
  57. mloda/core/prepare/validators/resolve_link_validator.py +32 -0
  58. mloda/core/runtime/compute_framework_executor.py +271 -0
  59. mloda/core/runtime/data_lifecycle_manager.py +160 -0
  60. mloda/core/runtime/flight/__init__.py +0 -0
  61. {mloda_core → mloda/core}/runtime/flight/runner_flight_server.py +1 -1
  62. mloda/core/runtime/run.py +317 -0
  63. mloda/core/runtime/worker/__init__.py +0 -0
  64. {mloda_core → mloda/core}/runtime/worker/multiprocessing_worker.py +15 -10
  65. {mloda_core → mloda/core}/runtime/worker/thread_worker.py +2 -2
  66. mloda/core/runtime/worker_manager.py +96 -0
  67. mloda/provider/__init__.py +101 -0
  68. mloda/steward/__init__.py +25 -0
  69. mloda/user/__init__.py +57 -0
  70. {mloda-0.3.2.dist-info → mloda-0.4.0.dist-info}/METADATA +18 -22
  71. mloda-0.4.0.dist-info/RECORD +248 -0
  72. {mloda-0.3.2.dist-info → mloda-0.4.0.dist-info}/top_level.txt +1 -1
  73. mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_filter_engine.py +2 -2
  74. mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_framework.py +15 -13
  75. mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_merge_engine.py +3 -3
  76. mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_pyarrow_transformer.py +1 -1
  77. mloda_plugins/compute_framework/base_implementations/iceberg/iceberg_filter_engine.py +2 -2
  78. mloda_plugins/compute_framework/base_implementations/iceberg/iceberg_framework.py +12 -10
  79. mloda_plugins/compute_framework/base_implementations/iceberg/iceberg_pyarrow_transformer.py +1 -1
  80. mloda_plugins/compute_framework/base_implementations/pandas/dataframe.py +18 -16
  81. mloda_plugins/compute_framework/base_implementations/pandas/pandas_filter_engine.py +36 -13
  82. mloda_plugins/compute_framework/base_implementations/pandas/pandas_merge_engine.py +7 -7
  83. mloda_plugins/compute_framework/base_implementations/pandas/pandaspyarrowtransformer.py +1 -1
  84. mloda_plugins/compute_framework/base_implementations/polars/dataframe.py +16 -14
  85. mloda_plugins/compute_framework/base_implementations/polars/lazy_dataframe.py +13 -12
  86. mloda_plugins/compute_framework/base_implementations/polars/polars_filter_engine.py +2 -2
  87. mloda_plugins/compute_framework/base_implementations/polars/polars_lazy_pyarrow_transformer.py +1 -1
  88. mloda_plugins/compute_framework/base_implementations/polars/polars_merge_engine.py +3 -3
  89. mloda_plugins/compute_framework/base_implementations/polars/polars_pyarrow_transformer.py +1 -1
  90. mloda_plugins/compute_framework/base_implementations/pyarrow/pyarrow_filter_engine.py +2 -2
  91. mloda_plugins/compute_framework/base_implementations/pyarrow/pyarrow_merge_engine.py +3 -3
  92. mloda_plugins/compute_framework/base_implementations/pyarrow/table.py +12 -10
  93. mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_filter_engine.py +2 -2
  94. mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_framework.py +11 -9
  95. mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_merge_engine.py +3 -3
  96. mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_pyarrow_transformer.py +1 -1
  97. mloda_plugins/compute_framework/base_implementations/spark/spark_filter_engine.py +2 -2
  98. mloda_plugins/compute_framework/base_implementations/spark/spark_framework.py +17 -15
  99. mloda_plugins/compute_framework/base_implementations/spark/spark_merge_engine.py +3 -3
  100. mloda_plugins/compute_framework/base_implementations/spark/spark_pyarrow_transformer.py +1 -1
  101. mloda_plugins/config/feature/loader.py +2 -2
  102. mloda_plugins/feature_group/experimental/aggregated_feature_group/base.py +45 -62
  103. mloda_plugins/feature_group/experimental/aggregated_feature_group/pandas.py +2 -2
  104. mloda_plugins/feature_group/experimental/aggregated_feature_group/polars_lazy.py +2 -2
  105. mloda_plugins/feature_group/experimental/aggregated_feature_group/pyarrow.py +2 -2
  106. mloda_plugins/feature_group/experimental/clustering/base.py +69 -97
  107. mloda_plugins/feature_group/experimental/clustering/pandas.py +2 -2
  108. mloda_plugins/feature_group/experimental/data_quality/missing_value/base.py +58 -79
  109. mloda_plugins/feature_group/experimental/data_quality/missing_value/pandas.py +2 -2
  110. mloda_plugins/feature_group/experimental/data_quality/missing_value/pyarrow.py +2 -2
  111. mloda_plugins/feature_group/experimental/data_quality/missing_value/python_dict.py +2 -2
  112. mloda_plugins/feature_group/experimental/default_options_key.py +16 -19
  113. mloda_plugins/feature_group/experimental/dimensionality_reduction/base.py +80 -94
  114. mloda_plugins/feature_group/experimental/dimensionality_reduction/pandas.py +2 -2
  115. mloda_plugins/feature_group/experimental/dynamic_feature_group_factory/dynamic_feature_group_factory.py +24 -24
  116. mloda_plugins/feature_group/experimental/forecasting/base.py +106 -104
  117. mloda_plugins/feature_group/experimental/forecasting/forecasting_artifact.py +2 -2
  118. mloda_plugins/feature_group/experimental/forecasting/pandas.py +15 -15
  119. mloda_plugins/feature_group/experimental/geo_distance/base.py +50 -42
  120. mloda_plugins/feature_group/experimental/geo_distance/pandas.py +2 -2
  121. mloda_plugins/feature_group/experimental/llm/cli.py +4 -4
  122. mloda_plugins/feature_group/experimental/llm/cli_features/refactor_git_cached.py +19 -19
  123. mloda_plugins/feature_group/experimental/llm/installed_packages_feature_group.py +8 -8
  124. mloda_plugins/feature_group/experimental/llm/list_directory_feature_group.py +5 -5
  125. mloda_plugins/feature_group/experimental/llm/llm_api/claude.py +3 -3
  126. mloda_plugins/feature_group/experimental/llm/llm_api/gemini.py +3 -3
  127. mloda_plugins/feature_group/experimental/llm/llm_api/llm_base_request.py +5 -5
  128. mloda_plugins/feature_group/experimental/llm/llm_api/openai.py +3 -3
  129. mloda_plugins/feature_group/experimental/llm/llm_api/request_loop.py +6 -6
  130. mloda_plugins/feature_group/experimental/llm/llm_file_selector.py +10 -10
  131. mloda_plugins/feature_group/experimental/llm/tools/tool_collection.py +1 -1
  132. mloda_plugins/feature_group/experimental/node_centrality/base.py +46 -72
  133. mloda_plugins/feature_group/experimental/node_centrality/pandas.py +2 -2
  134. mloda_plugins/feature_group/experimental/sklearn/encoding/base.py +51 -51
  135. mloda_plugins/feature_group/experimental/sklearn/encoding/pandas.py +2 -2
  136. mloda_plugins/feature_group/experimental/sklearn/pipeline/base.py +52 -39
  137. mloda_plugins/feature_group/experimental/sklearn/pipeline/pandas.py +2 -2
  138. mloda_plugins/feature_group/experimental/sklearn/scaling/base.py +44 -58
  139. mloda_plugins/feature_group/experimental/sklearn/scaling/pandas.py +2 -2
  140. mloda_plugins/feature_group/experimental/sklearn/sklearn_artifact.py +2 -2
  141. mloda_plugins/feature_group/experimental/source_input_feature.py +15 -15
  142. mloda_plugins/feature_group/experimental/text_cleaning/base.py +38 -61
  143. mloda_plugins/feature_group/experimental/text_cleaning/pandas.py +2 -2
  144. mloda_plugins/feature_group/experimental/text_cleaning/python_dict.py +2 -2
  145. mloda_plugins/feature_group/experimental/time_window/base.py +106 -93
  146. mloda_plugins/feature_group/experimental/time_window/pandas.py +13 -13
  147. mloda_plugins/feature_group/experimental/time_window/pyarrow.py +12 -12
  148. mloda_plugins/feature_group/input_data/api_data/api_data.py +9 -11
  149. mloda_plugins/feature_group/input_data/read_context_files.py +7 -7
  150. mloda_plugins/feature_group/input_data/read_db.py +7 -9
  151. mloda_plugins/feature_group/input_data/read_db_feature.py +4 -4
  152. mloda_plugins/feature_group/input_data/read_dbs/sqlite.py +23 -13
  153. mloda_plugins/feature_group/input_data/read_file.py +8 -8
  154. mloda_plugins/feature_group/input_data/read_file_feature.py +4 -4
  155. mloda_plugins/feature_group/input_data/read_files/csv.py +6 -6
  156. mloda_plugins/feature_group/input_data/read_files/feather.py +5 -5
  157. mloda_plugins/feature_group/input_data/read_files/json.py +5 -5
  158. mloda_plugins/feature_group/input_data/read_files/orc.py +5 -5
  159. mloda_plugins/feature_group/input_data/read_files/parquet.py +5 -5
  160. mloda_plugins/feature_group/input_data/read_files/text_file_reader.py +5 -5
  161. mloda_plugins/function_extender/base_implementations/otel/otel_extender.py +4 -4
  162. mloda-0.3.2.dist-info/RECORD +0 -230
  163. mloda_core/abstract_plugins/components/link.py +0 -286
  164. mloda_core/abstract_plugins/function_extender.py +0 -34
  165. mloda_core/runtime/run.py +0 -617
  166. {mloda_core → mloda/core}/__init__.py +0 -0
  167. {mloda_core → mloda/core}/abstract_plugins/__init__.py +0 -0
  168. {mloda_core → mloda/core}/abstract_plugins/components/__init__.py +0 -0
  169. {mloda_core → mloda/core}/abstract_plugins/components/domain.py +0 -0
  170. {mloda_core → mloda/core}/abstract_plugins/components/feature_chainer/__init__.py +0 -0
  171. {mloda_core → mloda/core}/abstract_plugins/components/feature_name.py +0 -0
  172. {mloda_core → mloda/core}/abstract_plugins/components/framework_transformer/__init__.py +0 -0
  173. {mloda_core → mloda/core}/abstract_plugins/components/framework_transformer/base_transformer.py +0 -0
  174. {mloda_core → mloda/core}/abstract_plugins/components/hashable_dict.py +0 -0
  175. {mloda_core → mloda/core}/abstract_plugins/components/index/__init__.py +0 -0
  176. {mloda_core → mloda/core}/abstract_plugins/components/index/index.py +0 -0
  177. {mloda_core → mloda/core}/abstract_plugins/components/input_data/__init__.py +0 -0
  178. {mloda_core → mloda/core}/abstract_plugins/components/input_data/api/__init__.py +0 -0
  179. {mloda_core → mloda/core}/abstract_plugins/components/input_data/creator/__init__.py +0 -0
  180. {mloda_core → mloda/core}/abstract_plugins/components/match_data/__init__.py +0 -0
  181. {mloda_core → mloda/core}/abstract_plugins/components/merge/__init__.py +0 -0
  182. {mloda_core → mloda/core}/abstract_plugins/components/plugin_option/__init__.py +0 -0
  183. {mloda_core → mloda/core}/abstract_plugins/components/utils.py +0 -0
  184. {mloda_core/abstract_plugins/plugin_loader → mloda/core/abstract_plugins/components/validators}/__init__.py +0 -0
  185. {mloda_core/api → mloda/core/abstract_plugins/plugin_loader}/__init__.py +0 -0
  186. {mloda_core → mloda/core}/abstract_plugins/plugin_loader/plugin_loader.py +0 -0
  187. {mloda_core/api/prepare → mloda/core/api}/__init__.py +0 -0
  188. {mloda_core/core → mloda/core/api/prepare}/__init__.py +0 -0
  189. {mloda_core/core/step → mloda/core/core}/__init__.py +0 -0
  190. {mloda_core/filter → mloda/core/core/step}/__init__.py +0 -0
  191. {mloda_core/prepare → mloda/core/filter}/__init__.py +0 -0
  192. {mloda_core → mloda/core}/filter/filter_parameter.py +0 -0
  193. {mloda_core/prepare/graph → mloda/core/prepare}/__init__.py +0 -0
  194. {mloda_core/runtime → mloda/core/prepare/graph}/__init__.py +0 -0
  195. {mloda_core/runtime/flight → mloda/core/prepare/validators}/__init__.py +0 -0
  196. {mloda_core/runtime/worker → mloda/core/runtime}/__init__.py +0 -0
  197. {mloda_core → mloda/core}/runtime/flight/flight_server.py +0 -0
  198. {mloda-0.3.2.dist-info → mloda-0.4.0.dist-info}/WHEEL +0 -0
  199. {mloda-0.3.2.dist-info → mloda-0.4.0.dist-info}/entry_points.txt +0 -0
  200. {mloda-0.3.2.dist-info → mloda-0.4.0.dist-info}/licenses/LICENSE.TXT +0 -0
  201. {mloda-0.3.2.dist-info → mloda-0.4.0.dist-info}/licenses/NOTICE.md +0 -0
@@ -7,18 +7,21 @@ from __future__ import annotations
7
7
  import datetime
8
8
  from typing import Any, Dict, Optional, Set, Type, Union
9
9
 
10
- from mloda_core.abstract_plugins.abstract_feature_group import AbstractFeatureGroup
11
- from mloda_core.abstract_plugins.components.feature import Feature
12
- from mloda_core.abstract_plugins.components.feature_name import FeatureName
13
- from mloda_core.abstract_plugins.components.feature_set import FeatureSet
14
- from mloda_core.abstract_plugins.components.options import Options
15
- from mloda_core.abstract_plugins.components.feature_chainer.feature_chain_parser import FeatureChainParser
16
- from mloda_core.abstract_plugins.components.base_artifact import BaseArtifact
10
+ from mloda import FeatureGroup
11
+ from mloda import Feature
12
+ from mloda.user import FeatureName
13
+ from mloda.provider import FeatureSet
14
+ from mloda import Options
15
+ from mloda.provider import FeatureChainParser
16
+ from mloda.provider import (
17
+ FeatureChainParserMixin,
18
+ )
19
+ from mloda.provider import BaseArtifact
17
20
  from mloda_plugins.feature_group.experimental.default_options_key import DefaultOptionKeys
18
21
  from mloda_plugins.feature_group.experimental.sklearn.sklearn_artifact import SklearnArtifact
19
22
 
20
23
 
21
- class ScalingFeatureGroup(AbstractFeatureGroup):
24
+ class ScalingFeatureGroup(FeatureChainParserMixin, FeatureGroup):
22
25
  """
23
26
  Base class for scikit-learn scaling feature groups.
24
27
 
@@ -82,17 +85,21 @@ class ScalingFeatureGroup(AbstractFeatureGroup):
82
85
  PATTERN = "__"
83
86
  PREFIX_PATTERN = r".*__(standard|minmax|robust|normalizer)_scaled$"
84
87
 
88
+ # In-feature configuration for FeatureChainParserMixin
89
+ MIN_IN_FEATURES = 1
90
+ MAX_IN_FEATURES = 1
91
+
85
92
  # Property mapping for new configuration-based approach
86
93
  PROPERTY_MAPPING = {
87
94
  SCALER_TYPE: {
88
95
  **SUPPORTED_SCALERS, # All supported scaler types as valid options
89
- DefaultOptionKeys.mloda_context: True, # Context parameter
90
- DefaultOptionKeys.mloda_strict_validation: True, # Enable strict validation
96
+ DefaultOptionKeys.context: True, # Context parameter
97
+ DefaultOptionKeys.strict_validation: True, # Enable strict validation
91
98
  },
92
99
  DefaultOptionKeys.in_features: {
93
100
  "explanation": "Source feature to scale",
94
- DefaultOptionKeys.mloda_context: True, # Context parameter
95
- DefaultOptionKeys.mloda_strict_validation: False, # Flexible validation
101
+ DefaultOptionKeys.context: True, # Context parameter
102
+ DefaultOptionKeys.strict_validation: False, # Flexible validation
96
103
  },
97
104
  }
98
105
 
@@ -101,22 +108,6 @@ class ScalingFeatureGroup(AbstractFeatureGroup):
101
108
  """Return the artifact class for sklearn scaler persistence."""
102
109
  return SklearnArtifact
103
110
 
104
- def input_features(self, options: Options, feature_name: FeatureName) -> Optional[Set[Feature]]:
105
- """Extract source feature from either configuration-based options or string parsing."""
106
-
107
- # Try string-based parsing first
108
- _, source_feature = FeatureChainParser.parse_feature_name(feature_name, [self.PREFIX_PATTERN])
109
- if source_feature is not None:
110
- return {Feature(source_feature)}
111
-
112
- # Fall back to configuration-based approach
113
- source_features = options.get_in_features()
114
- if len(source_features) != 1:
115
- raise ValueError(
116
- f"Expected exactly one source feature, but found {len(source_features)}: {source_features}"
117
- )
118
- return set(source_features)
119
-
120
111
  @classmethod
121
112
  def get_scaler_type(cls, feature_name: str) -> str:
122
113
  """Extract the scaler type from the feature name."""
@@ -133,22 +124,6 @@ class ScalingFeatureGroup(AbstractFeatureGroup):
133
124
 
134
125
  return scaler_type
135
126
 
136
- @classmethod
137
- def match_feature_group_criteria(
138
- cls,
139
- feature_name: Union[FeatureName, str],
140
- options: Options,
141
- data_access_collection: Optional[Any] = None,
142
- ) -> bool:
143
- """Check if feature name matches the expected pattern using unified parser."""
144
- # Use the unified parser with property mapping for full configuration support
145
- return FeatureChainParser.match_configuration_feature_chain_parser(
146
- feature_name,
147
- options,
148
- property_mapping=cls.PROPERTY_MAPPING,
149
- prefix_patterns=[cls.PREFIX_PATTERN],
150
- )
151
-
152
127
  @classmethod
153
128
  def calculate_feature(cls, data: Any, features: FeatureSet) -> Any:
154
129
  """
@@ -213,33 +188,44 @@ class ScalingFeatureGroup(AbstractFeatureGroup):
213
188
  Raises:
214
189
  ValueError: If parameters cannot be extracted
215
190
  """
216
- scaler_type = None
217
- source_feature_name: str | None = None
191
+ source_features = cls._extract_source_features(feature)
192
+ scaler_type = cls._extract_scaler_type(feature)
193
+ if scaler_type is None:
194
+ raise ValueError(f"Could not extract scaler type from: {feature.name}")
195
+ return scaler_type, source_features[0]
218
196
 
219
- # Try string-based parsing first
197
+ @classmethod
198
+ def _extract_scaler_type(cls, feature: Feature) -> Optional[str]:
199
+ """
200
+ Extract scaler type from a feature.
201
+
202
+ Tries string-based parsing first, falls back to configuration-based approach.
203
+
204
+ Args:
205
+ feature: The feature to extract scaler type from
206
+
207
+ Returns:
208
+ The scaler type string
209
+
210
+ Raises:
211
+ ValueError: If scaler type is unsupported
212
+ """
220
213
  feature_name_str = feature.name.name if hasattr(feature.name, "name") else str(feature.name)
221
214
 
215
+ # Try string-based parsing first
222
216
  if FeatureChainParser.is_chained_feature(feature_name_str):
223
217
  scaler_type = cls.get_scaler_type(feature_name_str)
224
- source_feature_name = FeatureChainParser.extract_source_feature(feature_name_str, cls.PREFIX_PATTERN)
225
- return scaler_type, source_feature_name
218
+ return scaler_type
226
219
 
227
220
  # Fall back to configuration-based approach
228
- source_features = feature.options.get_in_features()
229
- source_feature = next(iter(source_features))
230
- source_feature_name = source_feature.get_name()
231
-
232
221
  scaler_type = feature.options.get(cls.SCALER_TYPE)
233
222
 
234
- if scaler_type is None or source_feature_name is None:
235
- raise ValueError(f"Could not extract scaler type and source feature from: {feature.name}")
236
-
237
- if scaler_type not in cls.SUPPORTED_SCALERS:
223
+ if scaler_type is not None and scaler_type not in cls.SUPPORTED_SCALERS:
238
224
  raise ValueError(
239
225
  f"Unsupported scaler type: {scaler_type}. Supported types: {', '.join(cls.SUPPORTED_SCALERS.keys())}"
240
226
  )
241
227
 
242
- return scaler_type, source_feature_name
228
+ return str(scaler_type) if scaler_type is not None else None
243
229
 
244
230
  @classmethod
245
231
  def _import_sklearn_components(cls) -> Dict[str, Any]:
@@ -6,7 +6,7 @@ from __future__ import annotations
6
6
 
7
7
  from typing import Any, Set, Type, Union
8
8
 
9
- from mloda_core.abstract_plugins.compute_frame_work import ComputeFrameWork
9
+ from mloda import ComputeFramework
10
10
 
11
11
  from mloda_plugins.compute_framework.base_implementations.pandas.dataframe import PandasDataFrame
12
12
  from mloda_plugins.feature_group.experimental.sklearn.scaling.base import ScalingFeatureGroup
@@ -21,7 +21,7 @@ class PandasScalingFeatureGroup(ScalingFeatureGroup):
21
21
  """
22
22
 
23
23
  @classmethod
24
- def compute_framework_rule(cls) -> Union[bool, Set[Type[ComputeFrameWork]]]:
24
+ def compute_framework_rule(cls) -> Union[bool, Set[Type[ComputeFramework]]]:
25
25
  """Specify that this feature group works with Pandas."""
26
26
  return {PandasDataFrame}
27
27
 
@@ -10,8 +10,8 @@ import tempfile
10
10
  from pathlib import Path
11
11
  from typing import Any, Dict, Optional
12
12
 
13
- from mloda_core.abstract_plugins.components.base_artifact import BaseArtifact
14
- from mloda_core.abstract_plugins.components.feature_set import FeatureSet
13
+ from mloda.provider import BaseArtifact
14
+ from mloda.provider import FeatureSet
15
15
 
16
16
 
17
17
  class SklearnArtifact(BaseArtifact):
@@ -39,16 +39,16 @@ Further, it allows defining:
39
39
  """
40
40
 
41
41
  from typing import Any, Dict, NamedTuple, Optional, Set, Tuple, Type, Union
42
- from mloda_core.abstract_plugins.abstract_feature_group import AbstractFeatureGroup
43
- from mloda_core.abstract_plugins.components.feature import Feature
44
- from mloda_core.abstract_plugins.components.feature_name import FeatureName
45
- from mloda_core.abstract_plugins.components.index.index import Index
46
- from mloda_core.abstract_plugins.components.link import JoinType, Link, JoinSpec
47
- from mloda_core.abstract_plugins.components.options import Options
42
+ from mloda import FeatureGroup
43
+ from mloda import Feature
44
+ from mloda.user import FeatureName
45
+ from mloda.user import Index
46
+ from mloda.user import JoinType, Link, JoinSpec
47
+ from mloda import Options
48
48
  from mloda_plugins.feature_group.experimental.default_options_key import DefaultOptionKeys
49
49
 
50
50
 
51
- class SourceInputFeature(AbstractFeatureGroup):
51
+ class SourceInputFeature(FeatureGroup):
52
52
  """
53
53
  This feature group focuses on defining input features, especially when they originate
54
54
  from other sources or require joins/merges.
@@ -100,19 +100,19 @@ class SourceTuple(NamedTuple):
100
100
 
101
101
  Attributes:
102
102
  feature_name: The name of the feature.
103
- source_class: (Optional) The source class of the feature, can be an `AbstractFeatureGroup` class or a `str` representing a scope.
103
+ source_class: (Optional) The source class of the feature, can be an `FeatureGroup` class or a `str` representing a scope.
104
104
  source_value: (Optional) The value associated with the source class, if applicable.
105
- left_link: (Optional) A tuple containing the left-side `AbstractFeatureGroup` class and index for join operations.
106
- right_link: (Optional) A tuple containing the right-side `AbstractFeatureGroup` class and index for join operations.
105
+ left_link: (Optional) A tuple containing the left-side `FeatureGroup` class and index for join operations.
106
+ right_link: (Optional) A tuple containing the right-side `FeatureGroup` class and index for join operations.
107
107
  join_type: (Optional) The type of join operation (`JoinType`).
108
108
  merge_index: (Optional) The index to use for merge operations.
109
109
  """
110
110
 
111
111
  feature_name: str
112
- source_class: Optional[Type[Union[AbstractFeatureGroup, str]]] = None
112
+ source_class: Optional[Type[Union[FeatureGroup, str]]] = None
113
113
  source_value: Optional[str] = None
114
- left_link: Optional[Tuple[Type[AbstractFeatureGroup], Union[str, Index]]] = None
115
- right_link: Optional[Tuple[Type[AbstractFeatureGroup], Union[str, Index]]] = None
114
+ left_link: Optional[Tuple[Type[FeatureGroup], Union[str, Index]]] = None
115
+ right_link: Optional[Tuple[Type[FeatureGroup], Union[str, Index]]] = None
116
116
  join_type: Optional[JoinType] = None
117
117
  merge_index: Optional[Union[str, Index]] = None
118
118
 
@@ -207,8 +207,8 @@ class SourceInputFeatureComposite:
207
207
  @classmethod
208
208
  def _handle_link(
209
209
  cls,
210
- left_link: Tuple[Type[AbstractFeatureGroup], Union[str, Index]],
211
- right_link: Tuple[Type[AbstractFeatureGroup], Union[str, Index]],
210
+ left_link: Tuple[Type[FeatureGroup], Union[str, Index]],
211
+ right_link: Tuple[Type[FeatureGroup], Union[str, Index]],
212
212
  join_type: Any,
213
213
  ) -> Link:
214
214
  """
@@ -6,16 +6,19 @@ from __future__ import annotations
6
6
 
7
7
  from typing import Any, Optional, Set, Union
8
8
 
9
- from mloda_core.abstract_plugins.abstract_feature_group import AbstractFeatureGroup
10
- from mloda_core.abstract_plugins.components.feature import Feature
11
- from mloda_core.abstract_plugins.components.feature_chainer.feature_chain_parser import FeatureChainParser
12
- from mloda_core.abstract_plugins.components.feature_name import FeatureName
13
- from mloda_core.abstract_plugins.components.feature_set import FeatureSet
14
- from mloda_core.abstract_plugins.components.options import Options
9
+ from mloda import FeatureGroup
10
+ from mloda import Feature
11
+ from mloda.provider import FeatureChainParser
12
+ from mloda.provider import (
13
+ FeatureChainParserMixin,
14
+ )
15
+ from mloda.user import FeatureName
16
+ from mloda.provider import FeatureSet
17
+ from mloda import Options
15
18
  from mloda_plugins.feature_group.experimental.default_options_key import DefaultOptionKeys
16
19
 
17
20
 
18
- class TextCleaningFeatureGroup(AbstractFeatureGroup):
21
+ class TextCleaningFeatureGroup(FeatureChainParserMixin, FeatureGroup):
19
22
  # Option key for the list of operations
20
23
  CLEANING_OPERATIONS = "cleaning_operations"
21
24
 
@@ -33,13 +36,17 @@ class TextCleaningFeatureGroup(AbstractFeatureGroup):
33
36
  PATTERN = "__"
34
37
  PREFIX_PATTERN = r".*__cleaned_text$"
35
38
 
39
+ # In-feature configuration for FeatureChainParserMixin
40
+ MIN_IN_FEATURES = 1
41
+ MAX_IN_FEATURES = 1
42
+
36
43
  # Property mapping for configuration-based features
37
44
  PROPERTY_MAPPING = {
38
45
  CLEANING_OPERATIONS: {
39
46
  **SUPPORTED_OPERATIONS, # All supported operations as valid options
40
- DefaultOptionKeys.mloda_context: True, # Mark as context parameter
41
- DefaultOptionKeys.mloda_strict_validation: True, # Enable strict validation
42
- DefaultOptionKeys.mloda_validation_function: lambda operations: (
47
+ DefaultOptionKeys.context: True, # Mark as context parameter
48
+ DefaultOptionKeys.strict_validation: True, # Enable strict validation
49
+ DefaultOptionKeys.validation_function: lambda operations: (
43
50
  # Handle both actual tuples/lists and string representations
44
51
  (
45
52
  isinstance(operations, (tuple, list))
@@ -59,7 +66,7 @@ class TextCleaningFeatureGroup(AbstractFeatureGroup):
59
66
  },
60
67
  DefaultOptionKeys.in_features: {
61
68
  "explanation": "Source feature to apply text cleaning operations to",
62
- DefaultOptionKeys.mloda_context: True,
69
+ DefaultOptionKeys.context: True,
63
70
  },
64
71
  }
65
72
 
@@ -115,41 +122,6 @@ class TextCleaningFeatureGroup(AbstractFeatureGroup):
115
122
  - The source feature must contain text data
116
123
  """
117
124
 
118
- def input_features(self, options: Options, feature_name: FeatureName) -> Optional[Set[Feature]]:
119
- """Extract source feature from either configuration-based options or string parsing."""
120
-
121
- source_feature: str | None = None
122
-
123
- # Try string-based parsing first
124
- _, source_feature = FeatureChainParser.parse_feature_name(feature_name, [self.PREFIX_PATTERN])
125
- if source_feature is not None:
126
- return {Feature(source_feature)}
127
-
128
- # Fall back to configuration-based approach
129
- source_features = options.get_in_features()
130
- if len(source_features) != 1:
131
- raise ValueError(
132
- f"Expected exactly one source feature, but found {len(source_features)}: {source_features}"
133
- )
134
- return set(source_features)
135
-
136
- @classmethod
137
- def match_feature_group_criteria(
138
- cls,
139
- feature_name: Union[FeatureName, str],
140
- options: Options,
141
- data_access_collection: Optional[Any] = None,
142
- ) -> bool:
143
- """Check if feature name matches the expected pattern for text cleaning features."""
144
-
145
- # Use the unified parser with property mapping for full configuration support
146
- return FeatureChainParser.match_configuration_feature_chain_parser(
147
- feature_name,
148
- options,
149
- property_mapping=cls.PROPERTY_MAPPING,
150
- prefix_patterns=[cls.PREFIX_PATTERN],
151
- )
152
-
153
125
  @classmethod
154
126
  def _extract_operations_and_source_feature(cls, feature: Feature) -> tuple[tuple[Any, Any], str]:
155
127
  """
@@ -166,31 +138,36 @@ class TextCleaningFeatureGroup(AbstractFeatureGroup):
166
138
  Raises:
167
139
  ValueError: If parameters cannot be extracted
168
140
  """
169
- operations = None
170
- source_feature_name: str | None = None
141
+ source_features = cls._extract_source_features(feature)
142
+ operations = cls._extract_cleaning_operations(feature)
143
+ if operations is None:
144
+ raise ValueError(f"Could not extract operations from: {feature.name}")
145
+ return operations, source_features[0]
171
146
 
147
+ @classmethod
148
+ def _extract_cleaning_operations(cls, feature: Feature) -> Optional[tuple[Any, Any]]:
149
+ """
150
+ Extract cleaning operations from a feature.
151
+
152
+ Tries string-based parsing first, falls back to configuration-based approach.
153
+
154
+ Args:
155
+ feature: The feature to extract operations from
156
+
157
+ Returns:
158
+ Tuple of cleaning operations, or None if not found
159
+ """
172
160
  # Try string-based parsing first
173
161
  feature_name_str = feature.name.name if hasattr(feature.name, "name") else str(feature.name)
174
162
 
175
163
  if FeatureChainParser.is_chained_feature(feature_name_str):
176
- _, source_feature_name = FeatureChainParser.parse_feature_name(feature_name_str, [cls.PREFIX_PATTERN])
177
164
  # For string-based features, get operations from options
178
165
  operations = feature.options.get(cls.CLEANING_OPERATIONS) or ()
179
- if source_feature_name is None:
180
- raise ValueError(f"Could not extract source feature from string-based feature: {feature.name}")
181
- return operations, source_feature_name # type: ignore
166
+ return operations # type: ignore
182
167
 
183
168
  # Fall back to configuration-based approach
184
- source_features = feature.options.get_in_features()
185
- source_feature = next(iter(source_features))
186
- source_feature_name = source_feature.get_name()
187
-
188
169
  operations = feature.options.get(cls.CLEANING_OPERATIONS)
189
-
190
- if operations is None or source_feature_name is None:
191
- raise ValueError(f"Could not extract cleaning operations and source feature from: {feature.name}")
192
-
193
- return operations, source_feature_name
170
+ return operations if operations is not None else None
194
171
 
195
172
  @classmethod
196
173
  def calculate_feature(cls, data: Any, features: FeatureSet) -> Any:
@@ -25,7 +25,7 @@ except ImportError:
25
25
  pd = None
26
26
 
27
27
 
28
- from mloda_core.abstract_plugins.compute_frame_work import ComputeFrameWork
28
+ from mloda import ComputeFramework
29
29
  from mloda_plugins.compute_framework.base_implementations.pandas.dataframe import PandasDataFrame
30
30
  from mloda_plugins.feature_group.experimental.text_cleaning.base import TextCleaningFeatureGroup
31
31
 
@@ -40,7 +40,7 @@ class PandasTextCleaningFeatureGroup(TextCleaningFeatureGroup):
40
40
  """
41
41
 
42
42
  @classmethod
43
- def compute_framework_rule(cls) -> set[type[ComputeFrameWork]]:
43
+ def compute_framework_rule(cls) -> set[type[ComputeFramework]]:
44
44
  """Define the compute framework for this feature group."""
45
45
  return {PandasDataFrame}
46
46
 
@@ -9,7 +9,7 @@ import string
9
9
  import unicodedata
10
10
  from typing import Any, Dict, List, Set, Type, Union
11
11
 
12
- from mloda_core.abstract_plugins.compute_frame_work import ComputeFrameWork
12
+ from mloda import ComputeFramework
13
13
 
14
14
  from mloda_plugins.compute_framework.base_implementations.python_dict.python_dict_framework import PythonDictFramework
15
15
  from mloda_plugins.feature_group.experimental.text_cleaning.base import TextCleaningFeatureGroup
@@ -35,7 +35,7 @@ class PythonDictTextCleaningFeatureGroup(TextCleaningFeatureGroup):
35
35
  """
36
36
 
37
37
  @classmethod
38
- def compute_framework_rule(cls) -> Union[bool, Set[Type[ComputeFrameWork]]]:
38
+ def compute_framework_rule(cls) -> Union[bool, Set[Type[ComputeFramework]]]:
39
39
  return {PythonDictFramework}
40
40
 
41
41
  @classmethod