mloda 0.3.2__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (201) hide show
  1. mloda/__init__.py +17 -0
  2. {mloda_core → mloda/core}/abstract_plugins/components/base_artifact.py +2 -2
  3. {mloda_core → mloda/core}/abstract_plugins/components/base_validator.py +13 -0
  4. {mloda_core → mloda/core}/abstract_plugins/components/data_access_collection.py +1 -1
  5. {mloda_core → mloda/core}/abstract_plugins/components/data_types.py +39 -0
  6. {mloda_core → mloda/core}/abstract_plugins/components/feature.py +39 -33
  7. {mloda_core → mloda/core}/abstract_plugins/components/feature_chainer/feature_chain_parser.py +19 -19
  8. mloda/core/abstract_plugins/components/feature_chainer/feature_chain_parser_mixin.py +197 -0
  9. {mloda_core → mloda/core}/abstract_plugins/components/feature_collection.py +6 -6
  10. {mloda_core → mloda/core}/abstract_plugins/components/feature_group_version.py +8 -8
  11. {mloda_core → mloda/core}/abstract_plugins/components/feature_set.py +18 -24
  12. {mloda_core → mloda/core}/abstract_plugins/components/framework_transformer/cfw_transformer.py +2 -2
  13. {mloda_core → mloda/core}/abstract_plugins/components/index/add_index_feature.py +4 -4
  14. {mloda_core → mloda/core}/abstract_plugins/components/input_data/api/api_input_data.py +3 -3
  15. {mloda_core → mloda/core}/abstract_plugins/components/input_data/api/api_input_data_collection.py +2 -2
  16. {mloda_core → mloda/core}/abstract_plugins/components/input_data/api/base_api_data.py +1 -1
  17. {mloda_core → mloda/core}/abstract_plugins/components/input_data/base_input_data.py +6 -6
  18. {mloda_core → mloda/core}/abstract_plugins/components/input_data/creator/data_creator.py +3 -3
  19. mloda/core/abstract_plugins/components/link.py +437 -0
  20. {mloda_core → mloda/core}/abstract_plugins/components/match_data/match_data.py +3 -3
  21. {mloda_core → mloda/core}/abstract_plugins/components/merge/base_merge_engine.py +2 -2
  22. {mloda_core → mloda/core}/abstract_plugins/components/options.py +12 -36
  23. {mloda_core → mloda/core}/abstract_plugins/components/parallelization_modes.py +1 -1
  24. {mloda_core → mloda/core}/abstract_plugins/components/plugin_option/plugin_collector.py +14 -14
  25. mloda/core/abstract_plugins/components/validators/datatype_validator.py +96 -0
  26. mloda/core/abstract_plugins/components/validators/feature_set_validator.py +38 -0
  27. mloda/core/abstract_plugins/components/validators/feature_validator.py +23 -0
  28. mloda/core/abstract_plugins/components/validators/link_validator.py +79 -0
  29. mloda/core/abstract_plugins/components/validators/options_validator.py +57 -0
  30. mloda_core/abstract_plugins/compute_frame_work.py → mloda/core/abstract_plugins/compute_framework.py +46 -37
  31. mloda_core/abstract_plugins/abstract_feature_group.py → mloda/core/abstract_plugins/feature_group.py +56 -33
  32. mloda/core/abstract_plugins/function_extender.py +78 -0
  33. mloda/core/api/plugin_docs.py +220 -0
  34. mloda/core/api/plugin_info.py +32 -0
  35. {mloda_core → mloda/core}/api/prepare/setup_compute_framework.py +11 -11
  36. {mloda_core → mloda/core}/api/request.py +42 -33
  37. {mloda_core → mloda/core}/core/cfw_manager.py +8 -8
  38. {mloda_core → mloda/core}/core/engine.py +47 -46
  39. {mloda_core → mloda/core}/core/step/abstract_step.py +7 -7
  40. {mloda_core → mloda/core}/core/step/feature_group_step.py +12 -12
  41. {mloda_core → mloda/core}/core/step/join_step.py +14 -14
  42. {mloda_core → mloda/core}/core/step/transform_frame_work_step.py +16 -16
  43. {mloda_core → mloda/core}/filter/filter_engine.py +1 -1
  44. {mloda_core → mloda/core}/filter/filter_type_enum.py +1 -1
  45. {mloda_core → mloda/core}/filter/global_filter.py +23 -23
  46. {mloda_core → mloda/core}/filter/single_filter.py +6 -6
  47. {mloda_core → mloda/core}/prepare/accessible_plugins.py +16 -18
  48. {mloda_core → mloda/core}/prepare/execution_plan.py +65 -39
  49. {mloda_core → mloda/core}/prepare/graph/build_graph.py +6 -6
  50. {mloda_core → mloda/core}/prepare/graph/graph.py +1 -1
  51. {mloda_core → mloda/core}/prepare/graph/properties.py +5 -5
  52. {mloda_core → mloda/core}/prepare/identify_feature_group.py +12 -14
  53. {mloda_core → mloda/core}/prepare/joinstep_collection.py +3 -3
  54. {mloda_core → mloda/core}/prepare/resolve_compute_frameworks.py +6 -6
  55. {mloda_core → mloda/core}/prepare/resolve_graph.py +11 -11
  56. {mloda_core → mloda/core}/prepare/resolve_links.py +31 -40
  57. mloda/core/prepare/validators/resolve_link_validator.py +32 -0
  58. mloda/core/runtime/compute_framework_executor.py +271 -0
  59. mloda/core/runtime/data_lifecycle_manager.py +160 -0
  60. mloda/core/runtime/flight/__init__.py +0 -0
  61. {mloda_core → mloda/core}/runtime/flight/runner_flight_server.py +1 -1
  62. mloda/core/runtime/run.py +317 -0
  63. mloda/core/runtime/worker/__init__.py +0 -0
  64. {mloda_core → mloda/core}/runtime/worker/multiprocessing_worker.py +15 -10
  65. {mloda_core → mloda/core}/runtime/worker/thread_worker.py +2 -2
  66. mloda/core/runtime/worker_manager.py +96 -0
  67. mloda/provider/__init__.py +101 -0
  68. mloda/steward/__init__.py +25 -0
  69. mloda/user/__init__.py +57 -0
  70. {mloda-0.3.2.dist-info → mloda-0.4.0.dist-info}/METADATA +18 -22
  71. mloda-0.4.0.dist-info/RECORD +248 -0
  72. {mloda-0.3.2.dist-info → mloda-0.4.0.dist-info}/top_level.txt +1 -1
  73. mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_filter_engine.py +2 -2
  74. mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_framework.py +15 -13
  75. mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_merge_engine.py +3 -3
  76. mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_pyarrow_transformer.py +1 -1
  77. mloda_plugins/compute_framework/base_implementations/iceberg/iceberg_filter_engine.py +2 -2
  78. mloda_plugins/compute_framework/base_implementations/iceberg/iceberg_framework.py +12 -10
  79. mloda_plugins/compute_framework/base_implementations/iceberg/iceberg_pyarrow_transformer.py +1 -1
  80. mloda_plugins/compute_framework/base_implementations/pandas/dataframe.py +18 -16
  81. mloda_plugins/compute_framework/base_implementations/pandas/pandas_filter_engine.py +36 -13
  82. mloda_plugins/compute_framework/base_implementations/pandas/pandas_merge_engine.py +7 -7
  83. mloda_plugins/compute_framework/base_implementations/pandas/pandaspyarrowtransformer.py +1 -1
  84. mloda_plugins/compute_framework/base_implementations/polars/dataframe.py +16 -14
  85. mloda_plugins/compute_framework/base_implementations/polars/lazy_dataframe.py +13 -12
  86. mloda_plugins/compute_framework/base_implementations/polars/polars_filter_engine.py +2 -2
  87. mloda_plugins/compute_framework/base_implementations/polars/polars_lazy_pyarrow_transformer.py +1 -1
  88. mloda_plugins/compute_framework/base_implementations/polars/polars_merge_engine.py +3 -3
  89. mloda_plugins/compute_framework/base_implementations/polars/polars_pyarrow_transformer.py +1 -1
  90. mloda_plugins/compute_framework/base_implementations/pyarrow/pyarrow_filter_engine.py +2 -2
  91. mloda_plugins/compute_framework/base_implementations/pyarrow/pyarrow_merge_engine.py +3 -3
  92. mloda_plugins/compute_framework/base_implementations/pyarrow/table.py +12 -10
  93. mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_filter_engine.py +2 -2
  94. mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_framework.py +11 -9
  95. mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_merge_engine.py +3 -3
  96. mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_pyarrow_transformer.py +1 -1
  97. mloda_plugins/compute_framework/base_implementations/spark/spark_filter_engine.py +2 -2
  98. mloda_plugins/compute_framework/base_implementations/spark/spark_framework.py +17 -15
  99. mloda_plugins/compute_framework/base_implementations/spark/spark_merge_engine.py +3 -3
  100. mloda_plugins/compute_framework/base_implementations/spark/spark_pyarrow_transformer.py +1 -1
  101. mloda_plugins/config/feature/loader.py +2 -2
  102. mloda_plugins/feature_group/experimental/aggregated_feature_group/base.py +45 -62
  103. mloda_plugins/feature_group/experimental/aggregated_feature_group/pandas.py +2 -2
  104. mloda_plugins/feature_group/experimental/aggregated_feature_group/polars_lazy.py +2 -2
  105. mloda_plugins/feature_group/experimental/aggregated_feature_group/pyarrow.py +2 -2
  106. mloda_plugins/feature_group/experimental/clustering/base.py +69 -97
  107. mloda_plugins/feature_group/experimental/clustering/pandas.py +2 -2
  108. mloda_plugins/feature_group/experimental/data_quality/missing_value/base.py +58 -79
  109. mloda_plugins/feature_group/experimental/data_quality/missing_value/pandas.py +2 -2
  110. mloda_plugins/feature_group/experimental/data_quality/missing_value/pyarrow.py +2 -2
  111. mloda_plugins/feature_group/experimental/data_quality/missing_value/python_dict.py +2 -2
  112. mloda_plugins/feature_group/experimental/default_options_key.py +16 -19
  113. mloda_plugins/feature_group/experimental/dimensionality_reduction/base.py +80 -94
  114. mloda_plugins/feature_group/experimental/dimensionality_reduction/pandas.py +2 -2
  115. mloda_plugins/feature_group/experimental/dynamic_feature_group_factory/dynamic_feature_group_factory.py +24 -24
  116. mloda_plugins/feature_group/experimental/forecasting/base.py +106 -104
  117. mloda_plugins/feature_group/experimental/forecasting/forecasting_artifact.py +2 -2
  118. mloda_plugins/feature_group/experimental/forecasting/pandas.py +15 -15
  119. mloda_plugins/feature_group/experimental/geo_distance/base.py +50 -42
  120. mloda_plugins/feature_group/experimental/geo_distance/pandas.py +2 -2
  121. mloda_plugins/feature_group/experimental/llm/cli.py +4 -4
  122. mloda_plugins/feature_group/experimental/llm/cli_features/refactor_git_cached.py +19 -19
  123. mloda_plugins/feature_group/experimental/llm/installed_packages_feature_group.py +8 -8
  124. mloda_plugins/feature_group/experimental/llm/list_directory_feature_group.py +5 -5
  125. mloda_plugins/feature_group/experimental/llm/llm_api/claude.py +3 -3
  126. mloda_plugins/feature_group/experimental/llm/llm_api/gemini.py +3 -3
  127. mloda_plugins/feature_group/experimental/llm/llm_api/llm_base_request.py +5 -5
  128. mloda_plugins/feature_group/experimental/llm/llm_api/openai.py +3 -3
  129. mloda_plugins/feature_group/experimental/llm/llm_api/request_loop.py +6 -6
  130. mloda_plugins/feature_group/experimental/llm/llm_file_selector.py +10 -10
  131. mloda_plugins/feature_group/experimental/llm/tools/tool_collection.py +1 -1
  132. mloda_plugins/feature_group/experimental/node_centrality/base.py +46 -72
  133. mloda_plugins/feature_group/experimental/node_centrality/pandas.py +2 -2
  134. mloda_plugins/feature_group/experimental/sklearn/encoding/base.py +51 -51
  135. mloda_plugins/feature_group/experimental/sklearn/encoding/pandas.py +2 -2
  136. mloda_plugins/feature_group/experimental/sklearn/pipeline/base.py +52 -39
  137. mloda_plugins/feature_group/experimental/sklearn/pipeline/pandas.py +2 -2
  138. mloda_plugins/feature_group/experimental/sklearn/scaling/base.py +44 -58
  139. mloda_plugins/feature_group/experimental/sklearn/scaling/pandas.py +2 -2
  140. mloda_plugins/feature_group/experimental/sklearn/sklearn_artifact.py +2 -2
  141. mloda_plugins/feature_group/experimental/source_input_feature.py +15 -15
  142. mloda_plugins/feature_group/experimental/text_cleaning/base.py +38 -61
  143. mloda_plugins/feature_group/experimental/text_cleaning/pandas.py +2 -2
  144. mloda_plugins/feature_group/experimental/text_cleaning/python_dict.py +2 -2
  145. mloda_plugins/feature_group/experimental/time_window/base.py +106 -93
  146. mloda_plugins/feature_group/experimental/time_window/pandas.py +13 -13
  147. mloda_plugins/feature_group/experimental/time_window/pyarrow.py +12 -12
  148. mloda_plugins/feature_group/input_data/api_data/api_data.py +9 -11
  149. mloda_plugins/feature_group/input_data/read_context_files.py +7 -7
  150. mloda_plugins/feature_group/input_data/read_db.py +7 -9
  151. mloda_plugins/feature_group/input_data/read_db_feature.py +4 -4
  152. mloda_plugins/feature_group/input_data/read_dbs/sqlite.py +23 -13
  153. mloda_plugins/feature_group/input_data/read_file.py +8 -8
  154. mloda_plugins/feature_group/input_data/read_file_feature.py +4 -4
  155. mloda_plugins/feature_group/input_data/read_files/csv.py +6 -6
  156. mloda_plugins/feature_group/input_data/read_files/feather.py +5 -5
  157. mloda_plugins/feature_group/input_data/read_files/json.py +5 -5
  158. mloda_plugins/feature_group/input_data/read_files/orc.py +5 -5
  159. mloda_plugins/feature_group/input_data/read_files/parquet.py +5 -5
  160. mloda_plugins/feature_group/input_data/read_files/text_file_reader.py +5 -5
  161. mloda_plugins/function_extender/base_implementations/otel/otel_extender.py +4 -4
  162. mloda-0.3.2.dist-info/RECORD +0 -230
  163. mloda_core/abstract_plugins/components/link.py +0 -286
  164. mloda_core/abstract_plugins/function_extender.py +0 -34
  165. mloda_core/runtime/run.py +0 -617
  166. {mloda_core → mloda/core}/__init__.py +0 -0
  167. {mloda_core → mloda/core}/abstract_plugins/__init__.py +0 -0
  168. {mloda_core → mloda/core}/abstract_plugins/components/__init__.py +0 -0
  169. {mloda_core → mloda/core}/abstract_plugins/components/domain.py +0 -0
  170. {mloda_core → mloda/core}/abstract_plugins/components/feature_chainer/__init__.py +0 -0
  171. {mloda_core → mloda/core}/abstract_plugins/components/feature_name.py +0 -0
  172. {mloda_core → mloda/core}/abstract_plugins/components/framework_transformer/__init__.py +0 -0
  173. {mloda_core → mloda/core}/abstract_plugins/components/framework_transformer/base_transformer.py +0 -0
  174. {mloda_core → mloda/core}/abstract_plugins/components/hashable_dict.py +0 -0
  175. {mloda_core → mloda/core}/abstract_plugins/components/index/__init__.py +0 -0
  176. {mloda_core → mloda/core}/abstract_plugins/components/index/index.py +0 -0
  177. {mloda_core → mloda/core}/abstract_plugins/components/input_data/__init__.py +0 -0
  178. {mloda_core → mloda/core}/abstract_plugins/components/input_data/api/__init__.py +0 -0
  179. {mloda_core → mloda/core}/abstract_plugins/components/input_data/creator/__init__.py +0 -0
  180. {mloda_core → mloda/core}/abstract_plugins/components/match_data/__init__.py +0 -0
  181. {mloda_core → mloda/core}/abstract_plugins/components/merge/__init__.py +0 -0
  182. {mloda_core → mloda/core}/abstract_plugins/components/plugin_option/__init__.py +0 -0
  183. {mloda_core → mloda/core}/abstract_plugins/components/utils.py +0 -0
  184. {mloda_core/abstract_plugins/plugin_loader → mloda/core/abstract_plugins/components/validators}/__init__.py +0 -0
  185. {mloda_core/api → mloda/core/abstract_plugins/plugin_loader}/__init__.py +0 -0
  186. {mloda_core → mloda/core}/abstract_plugins/plugin_loader/plugin_loader.py +0 -0
  187. {mloda_core/api/prepare → mloda/core/api}/__init__.py +0 -0
  188. {mloda_core/core → mloda/core/api/prepare}/__init__.py +0 -0
  189. {mloda_core/core/step → mloda/core/core}/__init__.py +0 -0
  190. {mloda_core/filter → mloda/core/core/step}/__init__.py +0 -0
  191. {mloda_core/prepare → mloda/core/filter}/__init__.py +0 -0
  192. {mloda_core → mloda/core}/filter/filter_parameter.py +0 -0
  193. {mloda_core/prepare/graph → mloda/core/prepare}/__init__.py +0 -0
  194. {mloda_core/runtime → mloda/core/prepare/graph}/__init__.py +0 -0
  195. {mloda_core/runtime/flight → mloda/core/prepare/validators}/__init__.py +0 -0
  196. {mloda_core/runtime/worker → mloda/core/runtime}/__init__.py +0 -0
  197. {mloda_core → mloda/core}/runtime/flight/flight_server.py +0 -0
  198. {mloda-0.3.2.dist-info → mloda-0.4.0.dist-info}/WHEEL +0 -0
  199. {mloda-0.3.2.dist-info → mloda-0.4.0.dist-info}/entry_points.txt +0 -0
  200. {mloda-0.3.2.dist-info → mloda-0.4.0.dist-info}/licenses/LICENSE.TXT +0 -0
  201. {mloda-0.3.2.dist-info → mloda-0.4.0.dist-info}/licenses/NOTICE.md +0 -0
@@ -8,16 +8,19 @@ import copy
8
8
  from abc import abstractmethod
9
9
  from typing import Any, List, Optional, Set, Union
10
10
 
11
- from mloda_core.abstract_plugins.abstract_feature_group import AbstractFeatureGroup
12
- from mloda_core.abstract_plugins.components.feature import Feature
13
- from mloda_core.abstract_plugins.components.feature_chainer.feature_chain_parser import FeatureChainParser
14
- from mloda_core.abstract_plugins.components.feature_name import FeatureName
15
- from mloda_core.abstract_plugins.components.feature_set import FeatureSet
16
- from mloda_core.abstract_plugins.components.options import Options
11
+ from mloda import FeatureGroup
12
+ from mloda import Feature
13
+ from mloda.provider import FeatureChainParser
14
+ from mloda.provider import (
15
+ FeatureChainParserMixin,
16
+ )
17
+ from mloda.user import FeatureName
18
+ from mloda.provider import FeatureSet
19
+ from mloda import Options
17
20
  from mloda_plugins.feature_group.experimental.default_options_key import DefaultOptionKeys
18
21
 
19
22
 
20
- class MissingValueFeatureGroup(AbstractFeatureGroup):
23
+ class MissingValueFeatureGroup(FeatureChainParserMixin, FeatureGroup):
21
24
  """
22
25
  Base class for all missing value imputation feature groups.
23
26
 
@@ -83,7 +86,7 @@ class MissingValueFeatureGroup(AbstractFeatureGroup):
83
86
  ### String-Based Creation
84
87
 
85
88
  ```python
86
- from mloda_core.abstract_plugins.components.feature import Feature
89
+ from mloda import Feature
87
90
 
88
91
  # Impute missing income values with mean
89
92
  feature = Feature(name="income__mean_imputed")
@@ -101,8 +104,8 @@ class MissingValueFeatureGroup(AbstractFeatureGroup):
101
104
  ### Configuration-Based Creation
102
105
 
103
106
  ```python
104
- from mloda_core.abstract_plugins.components.feature import Feature
105
- from mloda_core.abstract_plugins.components.options import Options
107
+ from mloda import Feature
108
+ from mloda import Options
106
109
  from mloda_plugins.feature_group.experimental.default_options_key import DefaultOptionKeys
107
110
 
108
111
  # Mean imputation using configuration
@@ -160,46 +163,31 @@ class MissingValueFeatureGroup(AbstractFeatureGroup):
160
163
 
161
164
  PREFIX_PATTERN = r".*__([\w]+)_imputed$"
162
165
 
166
+ # In-feature configuration for FeatureChainParserMixin
167
+ MIN_IN_FEATURES = 1
168
+ MAX_IN_FEATURES = 1
169
+
163
170
  PROPERTY_MAPPING = {
164
171
  IMPUTATION_METHOD: {
165
172
  **IMPUTATION_METHODS,
166
- DefaultOptionKeys.mloda_context: True,
173
+ DefaultOptionKeys.context: True,
167
174
  },
168
175
  DefaultOptionKeys.in_features: {
169
176
  "explanation": "Source feature to impute missing values",
170
- DefaultOptionKeys.mloda_context: True,
177
+ DefaultOptionKeys.context: True,
171
178
  },
172
179
  "constant_value": {
173
180
  "explanation": "Constant value to use for constant imputation method",
174
- DefaultOptionKeys.mloda_context: True,
175
- DefaultOptionKeys.mloda_default: None, # Default is None, required only for constant method
181
+ DefaultOptionKeys.context: True,
182
+ DefaultOptionKeys.default: None, # Default is None, required only for constant method
176
183
  },
177
184
  "group_by_features": {
178
185
  "explanation": "Optional list of features to group by before imputation",
179
- DefaultOptionKeys.mloda_context: True,
180
- DefaultOptionKeys.mloda_default: None, # Default is None (no grouping)
186
+ DefaultOptionKeys.context: True,
187
+ DefaultOptionKeys.default: None, # Default is None (no grouping)
181
188
  },
182
189
  }
183
190
 
184
- def input_features(self, options: Options, feature_name: FeatureName) -> Optional[Set[Feature]]:
185
- """Extract source feature from either configuration-based options or string parsing."""
186
-
187
- source_feature: str | None = None
188
-
189
- # Try string-based parsing first
190
- # parse_feature_name returns (operation_config, source_feature)
191
- operation_config, source_feature = FeatureChainParser.parse_feature_name(feature_name, [self.PREFIX_PATTERN])
192
- if source_feature is not None:
193
- return {Feature(source_feature)}
194
-
195
- # Fall back to configuration-based approach
196
- source_features = options.get_in_features()
197
- if len(source_features) != 1:
198
- raise ValueError(
199
- f"Expected exactly one source feature, but found {len(source_features)}: {source_features}"
200
- )
201
- return set(source_features)
202
-
203
191
  @classmethod
204
192
  def get_imputation_method(cls, feature_name: str) -> str:
205
193
  """Extract the imputation method from the feature name."""
@@ -223,21 +211,36 @@ class MissingValueFeatureGroup(AbstractFeatureGroup):
223
211
  return imputation_method
224
212
 
225
213
  @classmethod
226
- def match_feature_group_criteria(
227
- cls,
228
- feature_name: Union[FeatureName, str],
229
- options: Options,
230
- data_access_collection: Optional[Any] = None,
231
- ) -> bool:
232
- """Check if feature name matches the expected pattern for missing value features."""
233
-
234
- # Use the unified parser with property mapping for full configuration support
235
- return FeatureChainParser.match_configuration_feature_chain_parser(
236
- feature_name,
237
- options,
238
- property_mapping=cls.PROPERTY_MAPPING,
239
- prefix_patterns=[cls.PREFIX_PATTERN],
240
- )
214
+ def _extract_imputation_method(cls, feature: Feature) -> Optional[str]:
215
+ """
216
+ Extract imputation method from a feature.
217
+
218
+ Tries string-based parsing first, falls back to configuration-based.
219
+
220
+ Args:
221
+ feature: The feature to extract imputation method from
222
+
223
+ Returns:
224
+ Imputation method name or None if not found
225
+ """
226
+ feature_name = feature.get_name()
227
+
228
+ # Try string-based parsing first
229
+ if FeatureChainParser.is_chained_feature(feature_name):
230
+ # Use get_imputation_method which handles parse_feature_name correctly
231
+ return cls.get_imputation_method(feature_name)
232
+
233
+ # Fall back to configuration-based approach
234
+ imputation_method = feature.options.get(cls.IMPUTATION_METHOD)
235
+
236
+ # Validate imputation method if found
237
+ if imputation_method is not None and imputation_method not in cls.IMPUTATION_METHODS:
238
+ raise ValueError(
239
+ f"Unsupported imputation method: {imputation_method}. "
240
+ f"Supported methods: {', '.join(cls.IMPUTATION_METHODS.keys())}"
241
+ )
242
+
243
+ return str(imputation_method) if imputation_method is not None else None
241
244
 
242
245
  @classmethod
243
246
  def _extract_imputation_method_and_source_feature(cls, feature: Feature) -> tuple[str, str]:
@@ -255,37 +258,13 @@ class MissingValueFeatureGroup(AbstractFeatureGroup):
255
258
  Raises:
256
259
  ValueError: If parameters cannot be extracted
257
260
  """
258
- imputation_method = None
259
- source_feature_name: str | None = None
260
-
261
- # Try string-based parsing first
262
- feature_name_str = feature.name.name if hasattr(feature.name, "name") else str(feature.name)
263
-
264
- if FeatureChainParser.is_chained_feature(feature_name_str):
265
- # Use get_imputation_method which already handles parse_feature_name correctly
266
- imputation_method = cls.get_imputation_method(feature_name_str)
267
- # Use extract_source_feature which returns everything before the last __
268
- source_feature_name = FeatureChainParser.extract_source_feature(feature_name_str, cls.PREFIX_PATTERN)
269
- return imputation_method, source_feature_name
261
+ source_features = cls._extract_source_features(feature)
262
+ imputation_method = cls._extract_imputation_method(feature)
270
263
 
271
- # Fall back to configuration-based approach
272
- source_features = feature.options.get_in_features()
273
- source_feature = next(iter(source_features))
274
- source_feature_name = source_feature.get_name()
275
-
276
- imputation_method = feature.options.get(cls.IMPUTATION_METHOD)
277
-
278
- if imputation_method is None or source_feature_name is None:
279
- raise ValueError(f"Could not extract imputation method and source feature from: {feature.name}")
280
-
281
- # Validate imputation method (no need to strip "imputed" from config-based method)
282
- if imputation_method not in cls.IMPUTATION_METHODS:
283
- raise ValueError(
284
- f"Unsupported imputation method: {imputation_method}. "
285
- f"Supported methods: {', '.join(cls.IMPUTATION_METHODS.keys())}"
286
- )
264
+ if imputation_method is None:
265
+ raise ValueError(f"Could not extract imputation method from: {feature.name}")
287
266
 
288
- return imputation_method, source_feature_name
267
+ return imputation_method, source_features[0]
289
268
 
290
269
  @classmethod
291
270
  def calculate_feature(cls, data: Any, features: FeatureSet) -> Any:
@@ -7,7 +7,7 @@ from __future__ import annotations
7
7
  from typing import Any, List, Optional, Set, Type, Union
8
8
 
9
9
 
10
- from mloda_core.abstract_plugins.compute_frame_work import ComputeFrameWork
10
+ from mloda import ComputeFramework
11
11
 
12
12
  from mloda_plugins.compute_framework.base_implementations.pandas.dataframe import PandasDataFrame
13
13
  from mloda_plugins.feature_group.experimental.data_quality.missing_value.base import MissingValueFeatureGroup
@@ -20,7 +20,7 @@ except ImportError:
20
20
 
21
21
  class PandasMissingValueFeatureGroup(MissingValueFeatureGroup):
22
22
  @classmethod
23
- def compute_framework_rule(cls) -> Union[bool, Set[Type[ComputeFrameWork]]]:
23
+ def compute_framework_rule(cls) -> Union[bool, Set[Type[ComputeFramework]]]:
24
24
  return {PandasDataFrame}
25
25
 
26
26
  @classmethod
@@ -9,7 +9,7 @@ from typing import Any, List, Optional, Set, Type, Union
9
9
  import pyarrow as pa
10
10
  import pyarrow.compute as pc
11
11
 
12
- from mloda_core.abstract_plugins.compute_frame_work import ComputeFrameWork
12
+ from mloda import ComputeFramework
13
13
 
14
14
  from mloda_plugins.compute_framework.base_implementations.pyarrow.table import PyArrowTable
15
15
  from mloda_plugins.feature_group.experimental.data_quality.missing_value.base import MissingValueFeatureGroup
@@ -17,7 +17,7 @@ from mloda_plugins.feature_group.experimental.data_quality.missing_value.base im
17
17
 
18
18
  class PyArrowMissingValueFeatureGroup(MissingValueFeatureGroup):
19
19
  @classmethod
20
- def compute_framework_rule(cls) -> Union[bool, Set[Type[ComputeFrameWork]]]:
20
+ def compute_framework_rule(cls) -> Union[bool, Set[Type[ComputeFramework]]]:
21
21
  return {PyArrowTable}
22
22
 
23
23
  @classmethod
@@ -8,7 +8,7 @@ import statistics
8
8
  from collections import Counter
9
9
  from typing import Any, Dict, List, Optional, Set, Type, Union
10
10
 
11
- from mloda_core.abstract_plugins.compute_frame_work import ComputeFrameWork
11
+ from mloda import ComputeFramework
12
12
 
13
13
  from mloda_plugins.compute_framework.base_implementations.python_dict.python_dict_framework import PythonDictFramework
14
14
  from mloda_plugins.feature_group.experimental.data_quality.missing_value.base import MissingValueFeatureGroup
@@ -23,7 +23,7 @@ class PythonDictMissingValueFeatureGroup(MissingValueFeatureGroup):
23
23
  """
24
24
 
25
25
  @classmethod
26
- def compute_framework_rule(cls) -> Union[bool, Set[Type[ComputeFrameWork]]]:
26
+ def compute_framework_rule(cls) -> Union[bool, Set[Type[ComputeFramework]]]:
27
27
  return {PythonDictFramework}
28
28
 
29
29
  @classmethod
@@ -3,28 +3,25 @@ from enum import Enum
3
3
 
4
4
  class DefaultOptionKeys(str, Enum):
5
5
  """
6
+ Default option keys used to configure mloda feature groups.
6
7
 
7
- This class contains the default option keys for mloda.
8
+ These keys are used to look up configuration values in Options objects.
9
+ The enum value serves as both the option key and the default column name.
8
10
 
9
- These keys are used to set options for the features as conventions.
10
-
11
- For faster development and prototyping, it was decided to use the Option object to move configurations around.
12
- When the framework matured and we learned more about the requirements, we can refactor this to a more sophisticated solution.
13
-
14
- However we use the DefaultOptions object to store needed keywords.
11
+ Time-Related Keys:
12
+ - `reference_time`: Key for the event timestamp column. Value: "reference_time"
13
+ - `time_travel`: Key for the validity timestamp column. Value: "time_travel_filter"
15
14
 
15
+ These values are used as default column names when not customized via Options.
16
16
  """
17
17
 
18
18
  in_features = "in_features"
19
- mloda_source_feature_group = "mloda_source_feature_group"
20
- mloda_feature_chainer_parser_key = "mloda_feature_chainer_parser_key"
21
- reference_time = "time_filter"
22
- mloda_default = "default"
23
- mloda_context = "context"
24
- mloda_group = "group"
25
- mloda_strict_validation = "strict_validation"
26
- mloda_validation_function = "validation_function"
27
-
28
- @classmethod
29
- def list(cls) -> list[str]:
30
- return [member.value for member in cls]
19
+ feature_chainer_parser_key = "feature_chainer_parser_key"
20
+ reference_time = "reference_time"
21
+ time_travel = "time_travel_filter"
22
+ default = "default"
23
+ context = "context"
24
+ group = "group"
25
+ strict_validation = "strict_validation"
26
+ validation_function = "validation_function"
27
+ strict_type_enforcement = "strict_type_enforcement"
@@ -7,16 +7,19 @@ from __future__ import annotations
7
7
  from abc import abstractmethod
8
8
  from typing import Any, Optional, Set, Union
9
9
 
10
- from mloda_core.abstract_plugins.abstract_feature_group import AbstractFeatureGroup
11
- from mloda_core.abstract_plugins.components.feature import Feature
12
- from mloda_core.abstract_plugins.components.feature_chainer.feature_chain_parser import FeatureChainParser
13
- from mloda_core.abstract_plugins.components.feature_name import FeatureName
14
- from mloda_core.abstract_plugins.components.feature_set import FeatureSet
15
- from mloda_core.abstract_plugins.components.options import Options
10
+ from mloda import FeatureGroup
11
+ from mloda import Feature
12
+ from mloda.provider import FeatureChainParser
13
+ from mloda.provider import (
14
+ FeatureChainParserMixin,
15
+ )
16
+ from mloda.user import FeatureName
17
+ from mloda.provider import FeatureSet
18
+ from mloda import Options
16
19
  from mloda_plugins.feature_group.experimental.default_options_key import DefaultOptionKeys
17
20
 
18
21
 
19
- class DimensionalityReductionFeatureGroup(AbstractFeatureGroup):
22
+ class DimensionalityReductionFeatureGroup(FeatureChainParserMixin, FeatureGroup):
20
23
  """
21
24
  Base class for all dimensionality reduction feature groups.
22
25
 
@@ -113,41 +116,46 @@ class DimensionalityReductionFeatureGroup(AbstractFeatureGroup):
113
116
  # Define the prefix pattern for this feature group
114
117
  PREFIX_PATTERN = r".*__([\w]+)_(\d+)d$"
115
118
 
119
+ # In-feature configuration for FeatureChainParserMixin
120
+ IN_FEATURE_SEPARATOR = ","
121
+ MIN_IN_FEATURES = 1
122
+ MAX_IN_FEATURES = None
123
+
116
124
  PROPERTY_MAPPING = {
117
125
  ALGORITHM: {
118
126
  **REDUCTION_ALGORITHMS,
119
- DefaultOptionKeys.mloda_context: True,
120
- DefaultOptionKeys.mloda_strict_validation: True,
127
+ DefaultOptionKeys.context: True,
128
+ DefaultOptionKeys.strict_validation: True,
121
129
  },
122
130
  DIMENSION: {
123
131
  "explanation": "Target dimension for the reduction (positive integer)",
124
- DefaultOptionKeys.mloda_context: True,
125
- DefaultOptionKeys.mloda_strict_validation: True,
126
- DefaultOptionKeys.mloda_validation_function: lambda value: isinstance(value, (int, str))
132
+ DefaultOptionKeys.context: True,
133
+ DefaultOptionKeys.strict_validation: True,
134
+ DefaultOptionKeys.validation_function: lambda value: isinstance(value, (int, str))
127
135
  and str(value).isdigit()
128
136
  and int(value) > 0,
129
137
  },
130
138
  DefaultOptionKeys.in_features: {
131
139
  "explanation": "Source features to use for dimensionality reduction",
132
- DefaultOptionKeys.mloda_context: True,
133
- DefaultOptionKeys.mloda_strict_validation: False,
140
+ DefaultOptionKeys.context: True,
141
+ DefaultOptionKeys.strict_validation: False,
134
142
  },
135
143
  # t-SNE specific parameters
136
144
  TSNE_MAX_ITER: {
137
145
  "explanation": "Maximum number of iterations for t-SNE optimization",
138
- DefaultOptionKeys.mloda_context: True,
139
- DefaultOptionKeys.mloda_strict_validation: False,
146
+ DefaultOptionKeys.context: True,
147
+ DefaultOptionKeys.strict_validation: False,
140
148
  "default": 250,
141
- DefaultOptionKeys.mloda_validation_function: lambda value: isinstance(value, (int, str))
149
+ DefaultOptionKeys.validation_function: lambda value: isinstance(value, (int, str))
142
150
  and str(value).isdigit()
143
151
  and int(value) > 0,
144
152
  },
145
153
  TSNE_N_ITER_WITHOUT_PROGRESS: {
146
154
  "explanation": "Maximum iterations without progress before early stopping (t-SNE)",
147
- DefaultOptionKeys.mloda_context: True,
148
- DefaultOptionKeys.mloda_strict_validation: False,
155
+ DefaultOptionKeys.context: True,
156
+ DefaultOptionKeys.strict_validation: False,
149
157
  "default": 50,
150
- DefaultOptionKeys.mloda_validation_function: lambda value: isinstance(value, (int, str))
158
+ DefaultOptionKeys.validation_function: lambda value: isinstance(value, (int, str))
151
159
  and str(value).isdigit()
152
160
  and int(value) > 0,
153
161
  },
@@ -155,8 +163,8 @@ class DimensionalityReductionFeatureGroup(AbstractFeatureGroup):
155
163
  "barnes_hut": "Barnes-Hut approximation (faster, O(n log n))",
156
164
  "exact": "Exact method (slower, O(n^2))",
157
165
  "explanation": "t-SNE computation method",
158
- DefaultOptionKeys.mloda_context: True,
159
- DefaultOptionKeys.mloda_strict_validation: False,
166
+ DefaultOptionKeys.context: True,
167
+ DefaultOptionKeys.strict_validation: False,
160
168
  "default": "barnes_hut",
161
169
  },
162
170
  # PCA specific parameters
@@ -166,54 +174,32 @@ class DimensionalityReductionFeatureGroup(AbstractFeatureGroup):
166
174
  "arpack": "Truncated SVD using ARPACK",
167
175
  "randomized": "Randomized SVD",
168
176
  "explanation": "SVD solver algorithm for PCA",
169
- DefaultOptionKeys.mloda_context: True,
170
- DefaultOptionKeys.mloda_strict_validation: False,
177
+ DefaultOptionKeys.context: True,
178
+ DefaultOptionKeys.strict_validation: False,
171
179
  "default": "auto",
172
180
  },
173
181
  # ICA specific parameters
174
182
  ICA_MAX_ITER: {
175
183
  "explanation": "Maximum number of iterations for ICA",
176
- DefaultOptionKeys.mloda_context: True,
177
- DefaultOptionKeys.mloda_strict_validation: False,
184
+ DefaultOptionKeys.context: True,
185
+ DefaultOptionKeys.strict_validation: False,
178
186
  "default": 200,
179
- DefaultOptionKeys.mloda_validation_function: lambda value: isinstance(value, (int, str))
187
+ DefaultOptionKeys.validation_function: lambda value: isinstance(value, (int, str))
180
188
  and str(value).isdigit()
181
189
  and int(value) > 0,
182
190
  },
183
191
  # Isomap specific parameters
184
192
  ISOMAP_N_NEIGHBORS: {
185
193
  "explanation": "Number of neighbors for Isomap",
186
- DefaultOptionKeys.mloda_context: True,
187
- DefaultOptionKeys.mloda_strict_validation: False,
194
+ DefaultOptionKeys.context: True,
195
+ DefaultOptionKeys.strict_validation: False,
188
196
  "default": 5,
189
- DefaultOptionKeys.mloda_validation_function: lambda value: isinstance(value, (int, str))
197
+ DefaultOptionKeys.validation_function: lambda value: isinstance(value, (int, str))
190
198
  and str(value).isdigit()
191
199
  and int(value) > 0,
192
200
  },
193
201
  }
194
202
 
195
- def input_features(self, options: Options, feature_name: FeatureName) -> Optional[Set[Feature]]:
196
- """Extract source feature from either configuration-based options or string parsing."""
197
-
198
- source_feature: str | None = None
199
-
200
- # Try string-based parsing first
201
- _, source_feature = FeatureChainParser.parse_feature_name(feature_name, [self.PREFIX_PATTERN])
202
- if source_feature is not None:
203
- # Handle multiple source features (comma-separated)
204
- source_features = set()
205
- for feature in source_feature.split(","):
206
- source_features.add(Feature(feature.strip()))
207
- return source_features
208
-
209
- # Fall back to configuration-based approach
210
- source_featurez = options.get_in_features()
211
- if len(source_featurez) != 1:
212
- raise ValueError(
213
- f"Expected exactly one source feature, but found {len(source_featurez)}: {source_featurez}"
214
- )
215
- return set(source_featurez)
216
-
217
203
  @classmethod
218
204
  def parse_reduction_suffix(cls, feature_name: str) -> tuple[str, int]:
219
205
  """
@@ -265,35 +251,28 @@ class DimensionalityReductionFeatureGroup(AbstractFeatureGroup):
265
251
  raise ValueError(f"Invalid dimension: {dimension_str}. Must be a positive integer.")
266
252
 
267
253
  @classmethod
268
- def match_feature_group_criteria(
269
- cls,
270
- feature_name: Union[FeatureName, str],
271
- options: Options,
272
- data_access_collection: Optional[Any] = None,
273
- ) -> bool:
274
- """Check if feature name matches the expected pattern for dimensionality reduction features."""
275
-
276
- # Use the unified parser with property mapping for full configuration support
277
- result = FeatureChainParser.match_configuration_feature_chain_parser(
278
- feature_name,
279
- options,
280
- property_mapping=cls.PROPERTY_MAPPING,
281
- prefix_patterns=[cls.PREFIX_PATTERN],
282
- )
254
+ def _validate_string_match(cls, feature_name: str, _operation_config: str, _source_feature: str) -> bool:
255
+ """
256
+ Validate that a string-based feature name has valid dimensionality reduction components.
283
257
 
284
- # If it matches and it's a string-based feature, validate with our custom logic
285
- if result:
286
- feature_name_str = feature_name.name if isinstance(feature_name, FeatureName) else feature_name
258
+ Validates algorithm and dimension using parse_reduction_suffix().
287
259
 
288
- # Check if this is a string-based feature (contains the pattern)
289
- if FeatureChainParser.is_chained_feature(feature_name_str):
290
- try:
291
- # Use existing validation logic that validates algorithm and dimension
292
- cls.parse_reduction_suffix(feature_name_str)
293
- except ValueError:
294
- # If validation fails, this feature doesn't match
295
- return False
296
- return result
260
+ Args:
261
+ feature_name: The full feature name to validate
262
+ _operation_config: The operation config extracted by the regex (unused)
263
+ _source_feature: The source feature extracted by the regex (unused)
264
+
265
+ Returns:
266
+ True if valid, False otherwise
267
+ """
268
+ if FeatureChainParser.is_chained_feature(feature_name):
269
+ try:
270
+ # Use existing validation logic that validates algorithm and dimension
271
+ cls.parse_reduction_suffix(feature_name)
272
+ except ValueError:
273
+ # If validation fails, this feature doesn't match
274
+ return False
275
+ return True
297
276
 
298
277
  @classmethod
299
278
  def _extract_algorithm_dimension_and_source_features(cls, feature: Feature) -> tuple[str, int, list[str], Options]:
@@ -311,30 +290,38 @@ class DimensionalityReductionFeatureGroup(AbstractFeatureGroup):
311
290
  Raises:
312
291
  ValueError: If parameters cannot be extracted
313
292
  """
314
- algorithm = None
315
- dimension = None
316
- source_features = None
293
+ source_features = cls._extract_source_features(feature)
294
+ algorithm, dimension, algo_options = cls._extract_dim_reduction_params(feature)
295
+ if algorithm is None or dimension is None:
296
+ raise ValueError(f"Could not extract algorithm and dimension from: {feature.name}")
297
+ return algorithm, dimension, source_features, algo_options
317
298
 
318
- # Try string-based parsing first
299
+ @classmethod
300
+ def _extract_dim_reduction_params(cls, feature: Feature) -> tuple[Optional[str], Optional[int], Options]:
301
+ """
302
+ Extract dimensionality reduction algorithm, dimension, and options from a feature.
303
+
304
+ Tries string-based parsing first, falls back to configuration-based approach.
305
+
306
+ Args:
307
+ feature: The feature to extract parameters from
308
+
309
+ Returns:
310
+ Tuple of (algorithm, dimension, algorithm_options)
311
+ """
319
312
  feature_name_str = feature.name.name if hasattr(feature.name, "name") else str(feature.name)
320
313
 
314
+ # Try string-based parsing first
321
315
  if FeatureChainParser.is_chained_feature(feature_name_str):
322
316
  algorithm, dimension = cls.parse_reduction_suffix(feature_name_str)
323
- source_features_str = FeatureChainParser.extract_source_feature(feature_name_str, cls.PREFIX_PATTERN)
324
- source_features = [feature.strip() for feature in source_features_str.split(",")]
325
- # For string-based features, still extract algorithm-specific options from feature.options
326
- return algorithm, dimension, source_features, feature.options
317
+ return algorithm, dimension, feature.options
327
318
 
328
319
  # Fall back to configuration-based approach
329
- source_features_set = feature.options.get_in_features()
330
- source_feature = next(iter(source_features_set))
331
- source_features = [source_feature.get_name()]
332
-
333
320
  algorithm = feature.options.get(cls.ALGORITHM)
334
321
  dimension = feature.options.get(cls.DIMENSION)
335
322
 
336
323
  if algorithm is None or dimension is None:
337
- raise ValueError(f"Could not extract algorithm and dimension from: {feature.name}")
324
+ return None, None, feature.options
338
325
 
339
326
  # Validate algorithm
340
327
  if algorithm not in cls.REDUCTION_ALGORITHMS:
@@ -344,12 +331,11 @@ class DimensionalityReductionFeatureGroup(AbstractFeatureGroup):
344
331
  )
345
332
 
346
333
  # Validate and convert dimension
347
-
348
334
  dimension = int(dimension)
349
335
  if dimension <= 0:
350
336
  raise ValueError(f"Invalid dimension: {dimension}. Must be a positive integer.")
351
337
 
352
- return algorithm, dimension, source_features, feature.options
338
+ return algorithm, dimension, feature.options
353
339
 
354
340
  @classmethod
355
341
  def calculate_feature(cls, data: Any, features: FeatureSet) -> Any:
@@ -27,14 +27,14 @@ except ImportError:
27
27
  SKLEARN_AVAILABLE = False
28
28
 
29
29
 
30
- from mloda_core.abstract_plugins.compute_frame_work import ComputeFrameWork
30
+ from mloda import ComputeFramework
31
31
  from mloda_plugins.compute_framework.base_implementations.pandas.dataframe import PandasDataFrame
32
32
  from mloda_plugins.feature_group.experimental.dimensionality_reduction.base import DimensionalityReductionFeatureGroup
33
33
 
34
34
 
35
35
  class PandasDimensionalityReductionFeatureGroup(DimensionalityReductionFeatureGroup):
36
36
  @classmethod
37
- def compute_framework_rule(cls) -> set[type[ComputeFrameWork]]:
37
+ def compute_framework_rule(cls) -> set[type[ComputeFramework]]:
38
38
  """Define the compute framework for this feature group."""
39
39
  return {PandasDataFrame}
40
40