mloda 0.3.3__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (201) hide show
  1. mloda/__init__.py +17 -0
  2. {mloda_core → mloda/core}/abstract_plugins/components/base_artifact.py +2 -2
  3. {mloda_core → mloda/core}/abstract_plugins/components/base_validator.py +13 -0
  4. {mloda_core → mloda/core}/abstract_plugins/components/data_access_collection.py +1 -1
  5. {mloda_core → mloda/core}/abstract_plugins/components/data_types.py +39 -0
  6. {mloda_core → mloda/core}/abstract_plugins/components/feature.py +39 -33
  7. {mloda_core → mloda/core}/abstract_plugins/components/feature_chainer/feature_chain_parser.py +19 -19
  8. mloda/core/abstract_plugins/components/feature_chainer/feature_chain_parser_mixin.py +197 -0
  9. {mloda_core → mloda/core}/abstract_plugins/components/feature_collection.py +6 -6
  10. {mloda_core → mloda/core}/abstract_plugins/components/feature_group_version.py +8 -8
  11. {mloda_core → mloda/core}/abstract_plugins/components/feature_set.py +18 -24
  12. {mloda_core → mloda/core}/abstract_plugins/components/framework_transformer/cfw_transformer.py +2 -2
  13. {mloda_core → mloda/core}/abstract_plugins/components/index/add_index_feature.py +4 -4
  14. {mloda_core → mloda/core}/abstract_plugins/components/input_data/api/api_input_data.py +3 -3
  15. {mloda_core → mloda/core}/abstract_plugins/components/input_data/api/api_input_data_collection.py +2 -2
  16. {mloda_core → mloda/core}/abstract_plugins/components/input_data/api/base_api_data.py +1 -1
  17. {mloda_core → mloda/core}/abstract_plugins/components/input_data/base_input_data.py +6 -6
  18. {mloda_core → mloda/core}/abstract_plugins/components/input_data/creator/data_creator.py +3 -3
  19. mloda/core/abstract_plugins/components/link.py +437 -0
  20. {mloda_core → mloda/core}/abstract_plugins/components/match_data/match_data.py +3 -3
  21. {mloda_core → mloda/core}/abstract_plugins/components/merge/base_merge_engine.py +2 -2
  22. {mloda_core → mloda/core}/abstract_plugins/components/options.py +12 -36
  23. {mloda_core → mloda/core}/abstract_plugins/components/parallelization_modes.py +1 -1
  24. {mloda_core → mloda/core}/abstract_plugins/components/plugin_option/plugin_collector.py +14 -14
  25. mloda/core/abstract_plugins/components/validators/datatype_validator.py +96 -0
  26. mloda/core/abstract_plugins/components/validators/feature_set_validator.py +38 -0
  27. mloda/core/abstract_plugins/components/validators/feature_validator.py +23 -0
  28. mloda/core/abstract_plugins/components/validators/link_validator.py +79 -0
  29. mloda/core/abstract_plugins/components/validators/options_validator.py +57 -0
  30. mloda_core/abstract_plugins/compute_frame_work.py → mloda/core/abstract_plugins/compute_framework.py +45 -37
  31. mloda_core/abstract_plugins/abstract_feature_group.py → mloda/core/abstract_plugins/feature_group.py +56 -33
  32. mloda/core/abstract_plugins/function_extender.py +78 -0
  33. mloda/core/api/plugin_docs.py +220 -0
  34. mloda/core/api/plugin_info.py +32 -0
  35. {mloda_core → mloda/core}/api/prepare/setup_compute_framework.py +11 -11
  36. {mloda_core → mloda/core}/api/request.py +42 -33
  37. {mloda_core → mloda/core}/core/cfw_manager.py +8 -8
  38. {mloda_core → mloda/core}/core/engine.py +47 -47
  39. {mloda_core → mloda/core}/core/step/abstract_step.py +7 -7
  40. {mloda_core → mloda/core}/core/step/feature_group_step.py +12 -12
  41. {mloda_core → mloda/core}/core/step/join_step.py +14 -14
  42. {mloda_core → mloda/core}/core/step/transform_frame_work_step.py +16 -16
  43. {mloda_core → mloda/core}/filter/filter_engine.py +1 -1
  44. {mloda_core → mloda/core}/filter/filter_type_enum.py +1 -1
  45. {mloda_core → mloda/core}/filter/global_filter.py +23 -23
  46. {mloda_core → mloda/core}/filter/single_filter.py +6 -6
  47. {mloda_core → mloda/core}/prepare/accessible_plugins.py +15 -18
  48. {mloda_core → mloda/core}/prepare/execution_plan.py +65 -39
  49. {mloda_core → mloda/core}/prepare/graph/build_graph.py +6 -6
  50. {mloda_core → mloda/core}/prepare/graph/graph.py +1 -1
  51. {mloda_core → mloda/core}/prepare/graph/properties.py +5 -5
  52. {mloda_core → mloda/core}/prepare/identify_feature_group.py +12 -14
  53. {mloda_core → mloda/core}/prepare/joinstep_collection.py +3 -3
  54. {mloda_core → mloda/core}/prepare/resolve_compute_frameworks.py +6 -6
  55. {mloda_core → mloda/core}/prepare/resolve_graph.py +11 -11
  56. {mloda_core → mloda/core}/prepare/resolve_links.py +11 -31
  57. mloda/core/prepare/validators/resolve_link_validator.py +32 -0
  58. mloda/core/runtime/compute_framework_executor.py +271 -0
  59. mloda/core/runtime/data_lifecycle_manager.py +160 -0
  60. mloda/core/runtime/flight/__init__.py +0 -0
  61. {mloda_core → mloda/core}/runtime/flight/runner_flight_server.py +1 -1
  62. mloda/core/runtime/run.py +317 -0
  63. mloda/core/runtime/worker/__init__.py +0 -0
  64. {mloda_core → mloda/core}/runtime/worker/multiprocessing_worker.py +15 -10
  65. {mloda_core → mloda/core}/runtime/worker/thread_worker.py +2 -2
  66. mloda/core/runtime/worker_manager.py +96 -0
  67. mloda/provider/__init__.py +101 -0
  68. mloda/steward/__init__.py +25 -0
  69. mloda/user/__init__.py +57 -0
  70. {mloda-0.3.3.dist-info → mloda-0.4.1.dist-info}/METADATA +24 -31
  71. mloda-0.4.1.dist-info/RECORD +248 -0
  72. {mloda-0.3.3.dist-info → mloda-0.4.1.dist-info}/top_level.txt +1 -1
  73. mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_filter_engine.py +2 -2
  74. mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_framework.py +15 -13
  75. mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_merge_engine.py +3 -3
  76. mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_pyarrow_transformer.py +1 -1
  77. mloda_plugins/compute_framework/base_implementations/iceberg/iceberg_filter_engine.py +2 -2
  78. mloda_plugins/compute_framework/base_implementations/iceberg/iceberg_framework.py +12 -10
  79. mloda_plugins/compute_framework/base_implementations/iceberg/iceberg_pyarrow_transformer.py +1 -1
  80. mloda_plugins/compute_framework/base_implementations/pandas/dataframe.py +18 -16
  81. mloda_plugins/compute_framework/base_implementations/pandas/pandas_filter_engine.py +36 -13
  82. mloda_plugins/compute_framework/base_implementations/pandas/pandas_merge_engine.py +7 -7
  83. mloda_plugins/compute_framework/base_implementations/pandas/pandaspyarrowtransformer.py +1 -1
  84. mloda_plugins/compute_framework/base_implementations/polars/dataframe.py +16 -14
  85. mloda_plugins/compute_framework/base_implementations/polars/lazy_dataframe.py +13 -12
  86. mloda_plugins/compute_framework/base_implementations/polars/polars_filter_engine.py +2 -2
  87. mloda_plugins/compute_framework/base_implementations/polars/polars_lazy_pyarrow_transformer.py +1 -1
  88. mloda_plugins/compute_framework/base_implementations/polars/polars_merge_engine.py +3 -3
  89. mloda_plugins/compute_framework/base_implementations/polars/polars_pyarrow_transformer.py +1 -1
  90. mloda_plugins/compute_framework/base_implementations/pyarrow/pyarrow_filter_engine.py +2 -2
  91. mloda_plugins/compute_framework/base_implementations/pyarrow/pyarrow_merge_engine.py +3 -3
  92. mloda_plugins/compute_framework/base_implementations/pyarrow/table.py +12 -11
  93. mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_filter_engine.py +2 -2
  94. mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_framework.py +11 -9
  95. mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_merge_engine.py +3 -3
  96. mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_pyarrow_transformer.py +1 -1
  97. mloda_plugins/compute_framework/base_implementations/spark/spark_filter_engine.py +2 -2
  98. mloda_plugins/compute_framework/base_implementations/spark/spark_framework.py +17 -15
  99. mloda_plugins/compute_framework/base_implementations/spark/spark_merge_engine.py +2 -3
  100. mloda_plugins/compute_framework/base_implementations/spark/spark_pyarrow_transformer.py +1 -1
  101. mloda_plugins/config/feature/loader.py +2 -2
  102. mloda_plugins/feature_group/experimental/aggregated_feature_group/base.py +45 -64
  103. mloda_plugins/feature_group/experimental/aggregated_feature_group/pandas.py +2 -2
  104. mloda_plugins/feature_group/experimental/aggregated_feature_group/polars_lazy.py +2 -2
  105. mloda_plugins/feature_group/experimental/aggregated_feature_group/pyarrow.py +2 -2
  106. mloda_plugins/feature_group/experimental/clustering/base.py +67 -97
  107. mloda_plugins/feature_group/experimental/clustering/pandas.py +2 -2
  108. mloda_plugins/feature_group/experimental/data_quality/missing_value/base.py +58 -82
  109. mloda_plugins/feature_group/experimental/data_quality/missing_value/pandas.py +2 -2
  110. mloda_plugins/feature_group/experimental/data_quality/missing_value/pyarrow.py +2 -2
  111. mloda_plugins/feature_group/experimental/data_quality/missing_value/python_dict.py +2 -2
  112. mloda_plugins/feature_group/experimental/default_options_key.py +16 -19
  113. mloda_plugins/feature_group/experimental/dimensionality_reduction/base.py +81 -96
  114. mloda_plugins/feature_group/experimental/dimensionality_reduction/pandas.py +2 -2
  115. mloda_plugins/feature_group/experimental/dynamic_feature_group_factory/dynamic_feature_group_factory.py +24 -24
  116. mloda_plugins/feature_group/experimental/forecasting/base.py +108 -106
  117. mloda_plugins/feature_group/experimental/forecasting/forecasting_artifact.py +2 -2
  118. mloda_plugins/feature_group/experimental/forecasting/pandas.py +15 -15
  119. mloda_plugins/feature_group/experimental/geo_distance/base.py +52 -44
  120. mloda_plugins/feature_group/experimental/geo_distance/pandas.py +2 -3
  121. mloda_plugins/feature_group/experimental/llm/cli.py +4 -4
  122. mloda_plugins/feature_group/experimental/llm/cli_features/refactor_git_cached.py +19 -19
  123. mloda_plugins/feature_group/experimental/llm/installed_packages_feature_group.py +8 -8
  124. mloda_plugins/feature_group/experimental/llm/list_directory_feature_group.py +5 -5
  125. mloda_plugins/feature_group/experimental/llm/llm_api/claude.py +3 -3
  126. mloda_plugins/feature_group/experimental/llm/llm_api/gemini.py +3 -3
  127. mloda_plugins/feature_group/experimental/llm/llm_api/llm_base_request.py +5 -5
  128. mloda_plugins/feature_group/experimental/llm/llm_api/openai.py +3 -3
  129. mloda_plugins/feature_group/experimental/llm/llm_api/request_loop.py +6 -6
  130. mloda_plugins/feature_group/experimental/llm/llm_file_selector.py +10 -10
  131. mloda_plugins/feature_group/experimental/llm/tools/tool_collection.py +1 -1
  132. mloda_plugins/feature_group/experimental/node_centrality/base.py +46 -74
  133. mloda_plugins/feature_group/experimental/node_centrality/pandas.py +2 -2
  134. mloda_plugins/feature_group/experimental/sklearn/encoding/base.py +53 -53
  135. mloda_plugins/feature_group/experimental/sklearn/encoding/pandas.py +2 -2
  136. mloda_plugins/feature_group/experimental/sklearn/pipeline/base.py +52 -39
  137. mloda_plugins/feature_group/experimental/sklearn/pipeline/pandas.py +3 -4
  138. mloda_plugins/feature_group/experimental/sklearn/scaling/base.py +44 -60
  139. mloda_plugins/feature_group/experimental/sklearn/scaling/pandas.py +2 -2
  140. mloda_plugins/feature_group/experimental/sklearn/sklearn_artifact.py +2 -3
  141. mloda_plugins/feature_group/experimental/source_input_feature.py +15 -15
  142. mloda_plugins/feature_group/experimental/text_cleaning/base.py +38 -63
  143. mloda_plugins/feature_group/experimental/text_cleaning/pandas.py +2 -2
  144. mloda_plugins/feature_group/experimental/text_cleaning/python_dict.py +2 -2
  145. mloda_plugins/feature_group/experimental/time_window/base.py +108 -95
  146. mloda_plugins/feature_group/experimental/time_window/pandas.py +13 -13
  147. mloda_plugins/feature_group/experimental/time_window/pyarrow.py +12 -12
  148. mloda_plugins/feature_group/input_data/api_data/api_data.py +9 -11
  149. mloda_plugins/feature_group/input_data/read_context_files.py +7 -7
  150. mloda_plugins/feature_group/input_data/read_db.py +7 -9
  151. mloda_plugins/feature_group/input_data/read_db_feature.py +4 -4
  152. mloda_plugins/feature_group/input_data/read_dbs/sqlite.py +23 -13
  153. mloda_plugins/feature_group/input_data/read_file.py +8 -8
  154. mloda_plugins/feature_group/input_data/read_file_feature.py +4 -4
  155. mloda_plugins/feature_group/input_data/read_files/csv.py +6 -6
  156. mloda_plugins/feature_group/input_data/read_files/feather.py +5 -5
  157. mloda_plugins/feature_group/input_data/read_files/json.py +5 -5
  158. mloda_plugins/feature_group/input_data/read_files/orc.py +5 -5
  159. mloda_plugins/feature_group/input_data/read_files/parquet.py +5 -5
  160. mloda_plugins/feature_group/input_data/read_files/text_file_reader.py +5 -5
  161. mloda_plugins/function_extender/base_implementations/otel/otel_extender.py +4 -4
  162. mloda-0.3.3.dist-info/RECORD +0 -230
  163. mloda_core/abstract_plugins/components/link.py +0 -286
  164. mloda_core/abstract_plugins/function_extender.py +0 -34
  165. mloda_core/runtime/run.py +0 -617
  166. {mloda_core → mloda/core}/__init__.py +0 -0
  167. {mloda_core → mloda/core}/abstract_plugins/__init__.py +0 -0
  168. {mloda_core → mloda/core}/abstract_plugins/components/__init__.py +0 -0
  169. {mloda_core → mloda/core}/abstract_plugins/components/domain.py +0 -0
  170. {mloda_core → mloda/core}/abstract_plugins/components/feature_chainer/__init__.py +0 -0
  171. {mloda_core → mloda/core}/abstract_plugins/components/feature_name.py +0 -0
  172. {mloda_core → mloda/core}/abstract_plugins/components/framework_transformer/__init__.py +0 -0
  173. {mloda_core → mloda/core}/abstract_plugins/components/framework_transformer/base_transformer.py +0 -0
  174. {mloda_core → mloda/core}/abstract_plugins/components/hashable_dict.py +0 -0
  175. {mloda_core → mloda/core}/abstract_plugins/components/index/__init__.py +0 -0
  176. {mloda_core → mloda/core}/abstract_plugins/components/index/index.py +0 -0
  177. {mloda_core → mloda/core}/abstract_plugins/components/input_data/__init__.py +0 -0
  178. {mloda_core → mloda/core}/abstract_plugins/components/input_data/api/__init__.py +0 -0
  179. {mloda_core → mloda/core}/abstract_plugins/components/input_data/creator/__init__.py +0 -0
  180. {mloda_core → mloda/core}/abstract_plugins/components/match_data/__init__.py +0 -0
  181. {mloda_core → mloda/core}/abstract_plugins/components/merge/__init__.py +0 -0
  182. {mloda_core → mloda/core}/abstract_plugins/components/plugin_option/__init__.py +0 -0
  183. {mloda_core → mloda/core}/abstract_plugins/components/utils.py +0 -0
  184. {mloda_core/abstract_plugins/plugin_loader → mloda/core/abstract_plugins/components/validators}/__init__.py +0 -0
  185. {mloda_core/api → mloda/core/abstract_plugins/plugin_loader}/__init__.py +0 -0
  186. {mloda_core → mloda/core}/abstract_plugins/plugin_loader/plugin_loader.py +0 -0
  187. {mloda_core/api/prepare → mloda/core/api}/__init__.py +0 -0
  188. {mloda_core/core → mloda/core/api/prepare}/__init__.py +0 -0
  189. {mloda_core/core/step → mloda/core/core}/__init__.py +0 -0
  190. {mloda_core/filter → mloda/core/core/step}/__init__.py +0 -0
  191. {mloda_core/prepare → mloda/core/filter}/__init__.py +0 -0
  192. {mloda_core → mloda/core}/filter/filter_parameter.py +0 -0
  193. {mloda_core/prepare/graph → mloda/core/prepare}/__init__.py +0 -0
  194. {mloda_core/runtime → mloda/core/prepare/graph}/__init__.py +0 -0
  195. {mloda_core/runtime/flight → mloda/core/prepare/validators}/__init__.py +0 -0
  196. {mloda_core/runtime/worker → mloda/core/runtime}/__init__.py +0 -0
  197. {mloda_core → mloda/core}/runtime/flight/flight_server.py +0 -0
  198. {mloda-0.3.3.dist-info → mloda-0.4.1.dist-info}/WHEEL +0 -0
  199. {mloda-0.3.3.dist-info → mloda-0.4.1.dist-info}/entry_points.txt +0 -0
  200. {mloda-0.3.3.dist-info → mloda-0.4.1.dist-info}/licenses/LICENSE.TXT +0 -0
  201. {mloda-0.3.3.dist-info → mloda-0.4.1.dist-info}/licenses/NOTICE.md +0 -0
@@ -5,20 +5,21 @@ Base implementation for scikit-learn scaling feature groups.
5
5
  from __future__ import annotations
6
6
 
7
7
  import datetime
8
- from typing import Any, Dict, Optional, Set, Type, Union
9
-
10
- from mloda_core.abstract_plugins.abstract_feature_group import AbstractFeatureGroup
11
- from mloda_core.abstract_plugins.components.feature import Feature
12
- from mloda_core.abstract_plugins.components.feature_name import FeatureName
13
- from mloda_core.abstract_plugins.components.feature_set import FeatureSet
14
- from mloda_core.abstract_plugins.components.options import Options
15
- from mloda_core.abstract_plugins.components.feature_chainer.feature_chain_parser import FeatureChainParser
16
- from mloda_core.abstract_plugins.components.base_artifact import BaseArtifact
8
+ from typing import Any, Dict, Optional, Type
9
+
10
+ from mloda import FeatureGroup
11
+ from mloda import Feature
12
+ from mloda.provider import FeatureSet
13
+ from mloda.provider import FeatureChainParser
14
+ from mloda.provider import (
15
+ FeatureChainParserMixin,
16
+ )
17
+ from mloda.provider import BaseArtifact
17
18
  from mloda_plugins.feature_group.experimental.default_options_key import DefaultOptionKeys
18
19
  from mloda_plugins.feature_group.experimental.sklearn.sklearn_artifact import SklearnArtifact
19
20
 
20
21
 
21
- class ScalingFeatureGroup(AbstractFeatureGroup):
22
+ class ScalingFeatureGroup(FeatureChainParserMixin, FeatureGroup):
22
23
  """
23
24
  Base class for scikit-learn scaling feature groups.
24
25
 
@@ -82,17 +83,21 @@ class ScalingFeatureGroup(AbstractFeatureGroup):
82
83
  PATTERN = "__"
83
84
  PREFIX_PATTERN = r".*__(standard|minmax|robust|normalizer)_scaled$"
84
85
 
86
+ # In-feature configuration for FeatureChainParserMixin
87
+ MIN_IN_FEATURES = 1
88
+ MAX_IN_FEATURES = 1
89
+
85
90
  # Property mapping for new configuration-based approach
86
91
  PROPERTY_MAPPING = {
87
92
  SCALER_TYPE: {
88
93
  **SUPPORTED_SCALERS, # All supported scaler types as valid options
89
- DefaultOptionKeys.mloda_context: True, # Context parameter
90
- DefaultOptionKeys.mloda_strict_validation: True, # Enable strict validation
94
+ DefaultOptionKeys.context: True, # Context parameter
95
+ DefaultOptionKeys.strict_validation: True, # Enable strict validation
91
96
  },
92
97
  DefaultOptionKeys.in_features: {
93
98
  "explanation": "Source feature to scale",
94
- DefaultOptionKeys.mloda_context: True, # Context parameter
95
- DefaultOptionKeys.mloda_strict_validation: False, # Flexible validation
99
+ DefaultOptionKeys.context: True, # Context parameter
100
+ DefaultOptionKeys.strict_validation: False, # Flexible validation
96
101
  },
97
102
  }
98
103
 
@@ -101,22 +106,6 @@ class ScalingFeatureGroup(AbstractFeatureGroup):
101
106
  """Return the artifact class for sklearn scaler persistence."""
102
107
  return SklearnArtifact
103
108
 
104
- def input_features(self, options: Options, feature_name: FeatureName) -> Optional[Set[Feature]]:
105
- """Extract source feature from either configuration-based options or string parsing."""
106
-
107
- # Try string-based parsing first
108
- _, source_feature = FeatureChainParser.parse_feature_name(feature_name, [self.PREFIX_PATTERN])
109
- if source_feature is not None:
110
- return {Feature(source_feature)}
111
-
112
- # Fall back to configuration-based approach
113
- source_features = options.get_in_features()
114
- if len(source_features) != 1:
115
- raise ValueError(
116
- f"Expected exactly one source feature, but found {len(source_features)}: {source_features}"
117
- )
118
- return set(source_features)
119
-
120
109
  @classmethod
121
110
  def get_scaler_type(cls, feature_name: str) -> str:
122
111
  """Extract the scaler type from the feature name."""
@@ -133,22 +122,6 @@ class ScalingFeatureGroup(AbstractFeatureGroup):
133
122
 
134
123
  return scaler_type
135
124
 
136
- @classmethod
137
- def match_feature_group_criteria(
138
- cls,
139
- feature_name: Union[FeatureName, str],
140
- options: Options,
141
- data_access_collection: Optional[Any] = None,
142
- ) -> bool:
143
- """Check if feature name matches the expected pattern using unified parser."""
144
- # Use the unified parser with property mapping for full configuration support
145
- return FeatureChainParser.match_configuration_feature_chain_parser(
146
- feature_name,
147
- options,
148
- property_mapping=cls.PROPERTY_MAPPING,
149
- prefix_patterns=[cls.PREFIX_PATTERN],
150
- )
151
-
152
125
  @classmethod
153
126
  def calculate_feature(cls, data: Any, features: FeatureSet) -> Any:
154
127
  """
@@ -213,33 +186,44 @@ class ScalingFeatureGroup(AbstractFeatureGroup):
213
186
  Raises:
214
187
  ValueError: If parameters cannot be extracted
215
188
  """
216
- scaler_type = None
217
- source_feature_name: str | None = None
189
+ source_features = cls._extract_source_features(feature)
190
+ scaler_type = cls._extract_scaler_type(feature)
191
+ if scaler_type is None:
192
+ raise ValueError(f"Could not extract scaler type from: {feature.name}")
193
+ return scaler_type, source_features[0]
218
194
 
219
- # Try string-based parsing first
195
+ @classmethod
196
+ def _extract_scaler_type(cls, feature: Feature) -> Optional[str]:
197
+ """
198
+ Extract scaler type from a feature.
199
+
200
+ Tries string-based parsing first, falls back to configuration-based approach.
201
+
202
+ Args:
203
+ feature: The feature to extract scaler type from
204
+
205
+ Returns:
206
+ The scaler type string
207
+
208
+ Raises:
209
+ ValueError: If scaler type is unsupported
210
+ """
220
211
  feature_name_str = feature.name.name if hasattr(feature.name, "name") else str(feature.name)
221
212
 
213
+ # Try string-based parsing first
222
214
  if FeatureChainParser.is_chained_feature(feature_name_str):
223
215
  scaler_type = cls.get_scaler_type(feature_name_str)
224
- source_feature_name = FeatureChainParser.extract_source_feature(feature_name_str, cls.PREFIX_PATTERN)
225
- return scaler_type, source_feature_name
216
+ return scaler_type
226
217
 
227
218
  # Fall back to configuration-based approach
228
- source_features = feature.options.get_in_features()
229
- source_feature = next(iter(source_features))
230
- source_feature_name = source_feature.get_name()
231
-
232
219
  scaler_type = feature.options.get(cls.SCALER_TYPE)
233
220
 
234
- if scaler_type is None or source_feature_name is None:
235
- raise ValueError(f"Could not extract scaler type and source feature from: {feature.name}")
236
-
237
- if scaler_type not in cls.SUPPORTED_SCALERS:
221
+ if scaler_type is not None and scaler_type not in cls.SUPPORTED_SCALERS:
238
222
  raise ValueError(
239
223
  f"Unsupported scaler type: {scaler_type}. Supported types: {', '.join(cls.SUPPORTED_SCALERS.keys())}"
240
224
  )
241
225
 
242
- return scaler_type, source_feature_name
226
+ return str(scaler_type) if scaler_type is not None else None
243
227
 
244
228
  @classmethod
245
229
  def _import_sklearn_components(cls) -> Dict[str, Any]:
@@ -6,7 +6,7 @@ from __future__ import annotations
6
6
 
7
7
  from typing import Any, Set, Type, Union
8
8
 
9
- from mloda_core.abstract_plugins.compute_frame_work import ComputeFrameWork
9
+ from mloda import ComputeFramework
10
10
 
11
11
  from mloda_plugins.compute_framework.base_implementations.pandas.dataframe import PandasDataFrame
12
12
  from mloda_plugins.feature_group.experimental.sklearn.scaling.base import ScalingFeatureGroup
@@ -21,7 +21,7 @@ class PandasScalingFeatureGroup(ScalingFeatureGroup):
21
21
  """
22
22
 
23
23
  @classmethod
24
- def compute_framework_rule(cls) -> Union[bool, Set[Type[ComputeFrameWork]]]:
24
+ def compute_framework_rule(cls) -> Union[bool, Set[Type[ComputeFramework]]]:
25
25
  """Specify that this feature group works with Pandas."""
26
26
  return {PandasDataFrame}
27
27
 
@@ -4,14 +4,13 @@ Artifact for storing fitted scikit-learn transformers and estimators.
4
4
 
5
5
  import json
6
6
  import base64
7
- import os
8
7
  import hashlib
9
8
  import tempfile
10
9
  from pathlib import Path
11
10
  from typing import Any, Dict, Optional
12
11
 
13
- from mloda_core.abstract_plugins.components.base_artifact import BaseArtifact
14
- from mloda_core.abstract_plugins.components.feature_set import FeatureSet
12
+ from mloda.provider import BaseArtifact
13
+ from mloda.provider import FeatureSet
15
14
 
16
15
 
17
16
  class SklearnArtifact(BaseArtifact):
@@ -39,16 +39,16 @@ Further, it allows defining:
39
39
  """
40
40
 
41
41
  from typing import Any, Dict, NamedTuple, Optional, Set, Tuple, Type, Union
42
- from mloda_core.abstract_plugins.abstract_feature_group import AbstractFeatureGroup
43
- from mloda_core.abstract_plugins.components.feature import Feature
44
- from mloda_core.abstract_plugins.components.feature_name import FeatureName
45
- from mloda_core.abstract_plugins.components.index.index import Index
46
- from mloda_core.abstract_plugins.components.link import JoinType, Link, JoinSpec
47
- from mloda_core.abstract_plugins.components.options import Options
42
+ from mloda import FeatureGroup
43
+ from mloda import Feature
44
+ from mloda.user import FeatureName
45
+ from mloda.user import Index
46
+ from mloda.user import JoinType, Link, JoinSpec
47
+ from mloda import Options
48
48
  from mloda_plugins.feature_group.experimental.default_options_key import DefaultOptionKeys
49
49
 
50
50
 
51
- class SourceInputFeature(AbstractFeatureGroup):
51
+ class SourceInputFeature(FeatureGroup):
52
52
  """
53
53
  This feature group focuses on defining input features, especially when they originate
54
54
  from other sources or require joins/merges.
@@ -100,19 +100,19 @@ class SourceTuple(NamedTuple):
100
100
 
101
101
  Attributes:
102
102
  feature_name: The name of the feature.
103
- source_class: (Optional) The source class of the feature, can be an `AbstractFeatureGroup` class or a `str` representing a scope.
103
+ source_class: (Optional) The source class of the feature, can be an `FeatureGroup` class or a `str` representing a scope.
104
104
  source_value: (Optional) The value associated with the source class, if applicable.
105
- left_link: (Optional) A tuple containing the left-side `AbstractFeatureGroup` class and index for join operations.
106
- right_link: (Optional) A tuple containing the right-side `AbstractFeatureGroup` class and index for join operations.
105
+ left_link: (Optional) A tuple containing the left-side `FeatureGroup` class and index for join operations.
106
+ right_link: (Optional) A tuple containing the right-side `FeatureGroup` class and index for join operations.
107
107
  join_type: (Optional) The type of join operation (`JoinType`).
108
108
  merge_index: (Optional) The index to use for merge operations.
109
109
  """
110
110
 
111
111
  feature_name: str
112
- source_class: Optional[Type[Union[AbstractFeatureGroup, str]]] = None
112
+ source_class: Optional[Type[Union[FeatureGroup, str]]] = None
113
113
  source_value: Optional[str] = None
114
- left_link: Optional[Tuple[Type[AbstractFeatureGroup], Union[str, Index]]] = None
115
- right_link: Optional[Tuple[Type[AbstractFeatureGroup], Union[str, Index]]] = None
114
+ left_link: Optional[Tuple[Type[FeatureGroup], Union[str, Index]]] = None
115
+ right_link: Optional[Tuple[Type[FeatureGroup], Union[str, Index]]] = None
116
116
  join_type: Optional[JoinType] = None
117
117
  merge_index: Optional[Union[str, Index]] = None
118
118
 
@@ -207,8 +207,8 @@ class SourceInputFeatureComposite:
207
207
  @classmethod
208
208
  def _handle_link(
209
209
  cls,
210
- left_link: Tuple[Type[AbstractFeatureGroup], Union[str, Index]],
211
- right_link: Tuple[Type[AbstractFeatureGroup], Union[str, Index]],
210
+ left_link: Tuple[Type[FeatureGroup], Union[str, Index]],
211
+ right_link: Tuple[Type[FeatureGroup], Union[str, Index]],
212
212
  join_type: Any,
213
213
  ) -> Link:
214
214
  """
@@ -4,18 +4,19 @@ Base implementation for text cleaning feature groups.
4
4
 
5
5
  from __future__ import annotations
6
6
 
7
- from typing import Any, Optional, Set, Union
8
-
9
- from mloda_core.abstract_plugins.abstract_feature_group import AbstractFeatureGroup
10
- from mloda_core.abstract_plugins.components.feature import Feature
11
- from mloda_core.abstract_plugins.components.feature_chainer.feature_chain_parser import FeatureChainParser
12
- from mloda_core.abstract_plugins.components.feature_name import FeatureName
13
- from mloda_core.abstract_plugins.components.feature_set import FeatureSet
14
- from mloda_core.abstract_plugins.components.options import Options
7
+ from typing import Any, Optional
8
+
9
+ from mloda import FeatureGroup
10
+ from mloda import Feature
11
+ from mloda.provider import FeatureChainParser
12
+ from mloda.provider import (
13
+ FeatureChainParserMixin,
14
+ )
15
+ from mloda.provider import FeatureSet
15
16
  from mloda_plugins.feature_group.experimental.default_options_key import DefaultOptionKeys
16
17
 
17
18
 
18
- class TextCleaningFeatureGroup(AbstractFeatureGroup):
19
+ class TextCleaningFeatureGroup(FeatureChainParserMixin, FeatureGroup):
19
20
  # Option key for the list of operations
20
21
  CLEANING_OPERATIONS = "cleaning_operations"
21
22
 
@@ -33,13 +34,17 @@ class TextCleaningFeatureGroup(AbstractFeatureGroup):
33
34
  PATTERN = "__"
34
35
  PREFIX_PATTERN = r".*__cleaned_text$"
35
36
 
37
+ # In-feature configuration for FeatureChainParserMixin
38
+ MIN_IN_FEATURES = 1
39
+ MAX_IN_FEATURES = 1
40
+
36
41
  # Property mapping for configuration-based features
37
42
  PROPERTY_MAPPING = {
38
43
  CLEANING_OPERATIONS: {
39
44
  **SUPPORTED_OPERATIONS, # All supported operations as valid options
40
- DefaultOptionKeys.mloda_context: True, # Mark as context parameter
41
- DefaultOptionKeys.mloda_strict_validation: True, # Enable strict validation
42
- DefaultOptionKeys.mloda_validation_function: lambda operations: (
45
+ DefaultOptionKeys.context: True, # Mark as context parameter
46
+ DefaultOptionKeys.strict_validation: True, # Enable strict validation
47
+ DefaultOptionKeys.validation_function: lambda operations: (
43
48
  # Handle both actual tuples/lists and string representations
44
49
  (
45
50
  isinstance(operations, (tuple, list))
@@ -59,7 +64,7 @@ class TextCleaningFeatureGroup(AbstractFeatureGroup):
59
64
  },
60
65
  DefaultOptionKeys.in_features: {
61
66
  "explanation": "Source feature to apply text cleaning operations to",
62
- DefaultOptionKeys.mloda_context: True,
67
+ DefaultOptionKeys.context: True,
63
68
  },
64
69
  }
65
70
 
@@ -115,41 +120,6 @@ class TextCleaningFeatureGroup(AbstractFeatureGroup):
115
120
  - The source feature must contain text data
116
121
  """
117
122
 
118
- def input_features(self, options: Options, feature_name: FeatureName) -> Optional[Set[Feature]]:
119
- """Extract source feature from either configuration-based options or string parsing."""
120
-
121
- source_feature: str | None = None
122
-
123
- # Try string-based parsing first
124
- _, source_feature = FeatureChainParser.parse_feature_name(feature_name, [self.PREFIX_PATTERN])
125
- if source_feature is not None:
126
- return {Feature(source_feature)}
127
-
128
- # Fall back to configuration-based approach
129
- source_features = options.get_in_features()
130
- if len(source_features) != 1:
131
- raise ValueError(
132
- f"Expected exactly one source feature, but found {len(source_features)}: {source_features}"
133
- )
134
- return set(source_features)
135
-
136
- @classmethod
137
- def match_feature_group_criteria(
138
- cls,
139
- feature_name: Union[FeatureName, str],
140
- options: Options,
141
- data_access_collection: Optional[Any] = None,
142
- ) -> bool:
143
- """Check if feature name matches the expected pattern for text cleaning features."""
144
-
145
- # Use the unified parser with property mapping for full configuration support
146
- return FeatureChainParser.match_configuration_feature_chain_parser(
147
- feature_name,
148
- options,
149
- property_mapping=cls.PROPERTY_MAPPING,
150
- prefix_patterns=[cls.PREFIX_PATTERN],
151
- )
152
-
153
123
  @classmethod
154
124
  def _extract_operations_and_source_feature(cls, feature: Feature) -> tuple[tuple[Any, Any], str]:
155
125
  """
@@ -166,31 +136,36 @@ class TextCleaningFeatureGroup(AbstractFeatureGroup):
166
136
  Raises:
167
137
  ValueError: If parameters cannot be extracted
168
138
  """
169
- operations = None
170
- source_feature_name: str | None = None
139
+ source_features = cls._extract_source_features(feature)
140
+ operations = cls._extract_cleaning_operations(feature)
141
+ if operations is None:
142
+ raise ValueError(f"Could not extract operations from: {feature.name}")
143
+ return operations, source_features[0]
171
144
 
145
+ @classmethod
146
+ def _extract_cleaning_operations(cls, feature: Feature) -> Optional[tuple[Any, Any]]:
147
+ """
148
+ Extract cleaning operations from a feature.
149
+
150
+ Tries string-based parsing first, falls back to configuration-based approach.
151
+
152
+ Args:
153
+ feature: The feature to extract operations from
154
+
155
+ Returns:
156
+ Tuple of cleaning operations, or None if not found
157
+ """
172
158
  # Try string-based parsing first
173
159
  feature_name_str = feature.name.name if hasattr(feature.name, "name") else str(feature.name)
174
160
 
175
161
  if FeatureChainParser.is_chained_feature(feature_name_str):
176
- _, source_feature_name = FeatureChainParser.parse_feature_name(feature_name_str, [cls.PREFIX_PATTERN])
177
162
  # For string-based features, get operations from options
178
163
  operations = feature.options.get(cls.CLEANING_OPERATIONS) or ()
179
- if source_feature_name is None:
180
- raise ValueError(f"Could not extract source feature from string-based feature: {feature.name}")
181
- return operations, source_feature_name # type: ignore
164
+ return operations # type: ignore
182
165
 
183
166
  # Fall back to configuration-based approach
184
- source_features = feature.options.get_in_features()
185
- source_feature = next(iter(source_features))
186
- source_feature_name = source_feature.get_name()
187
-
188
167
  operations = feature.options.get(cls.CLEANING_OPERATIONS)
189
-
190
- if operations is None or source_feature_name is None:
191
- raise ValueError(f"Could not extract cleaning operations and source feature from: {feature.name}")
192
-
193
- return operations, source_feature_name
168
+ return operations if operations is not None else None
194
169
 
195
170
  @classmethod
196
171
  def calculate_feature(cls, data: Any, features: FeatureSet) -> Any:
@@ -25,7 +25,7 @@ except ImportError:
25
25
  pd = None
26
26
 
27
27
 
28
- from mloda_core.abstract_plugins.compute_frame_work import ComputeFrameWork
28
+ from mloda import ComputeFramework
29
29
  from mloda_plugins.compute_framework.base_implementations.pandas.dataframe import PandasDataFrame
30
30
  from mloda_plugins.feature_group.experimental.text_cleaning.base import TextCleaningFeatureGroup
31
31
 
@@ -40,7 +40,7 @@ class PandasTextCleaningFeatureGroup(TextCleaningFeatureGroup):
40
40
  """
41
41
 
42
42
  @classmethod
43
- def compute_framework_rule(cls) -> set[type[ComputeFrameWork]]:
43
+ def compute_framework_rule(cls) -> set[type[ComputeFramework]]:
44
44
  """Define the compute framework for this feature group."""
45
45
  return {PandasDataFrame}
46
46
 
@@ -9,7 +9,7 @@ import string
9
9
  import unicodedata
10
10
  from typing import Any, Dict, List, Set, Type, Union
11
11
 
12
- from mloda_core.abstract_plugins.compute_frame_work import ComputeFrameWork
12
+ from mloda import ComputeFramework
13
13
 
14
14
  from mloda_plugins.compute_framework.base_implementations.python_dict.python_dict_framework import PythonDictFramework
15
15
  from mloda_plugins.feature_group.experimental.text_cleaning.base import TextCleaningFeatureGroup
@@ -35,7 +35,7 @@ class PythonDictTextCleaningFeatureGroup(TextCleaningFeatureGroup):
35
35
  """
36
36
 
37
37
  @classmethod
38
- def compute_framework_rule(cls) -> Union[bool, Set[Type[ComputeFrameWork]]]:
38
+ def compute_framework_rule(cls) -> Union[bool, Set[Type[ComputeFramework]]]:
39
39
  return {PythonDictFramework}
40
40
 
41
41
  @classmethod