mloda 0.3.3__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mloda/__init__.py +17 -0
- {mloda_core → mloda/core}/abstract_plugins/components/base_artifact.py +2 -2
- {mloda_core → mloda/core}/abstract_plugins/components/base_validator.py +13 -0
- {mloda_core → mloda/core}/abstract_plugins/components/data_access_collection.py +1 -1
- {mloda_core → mloda/core}/abstract_plugins/components/data_types.py +39 -0
- {mloda_core → mloda/core}/abstract_plugins/components/feature.py +39 -33
- {mloda_core → mloda/core}/abstract_plugins/components/feature_chainer/feature_chain_parser.py +19 -19
- mloda/core/abstract_plugins/components/feature_chainer/feature_chain_parser_mixin.py +197 -0
- {mloda_core → mloda/core}/abstract_plugins/components/feature_collection.py +6 -6
- {mloda_core → mloda/core}/abstract_plugins/components/feature_group_version.py +8 -8
- {mloda_core → mloda/core}/abstract_plugins/components/feature_set.py +18 -24
- {mloda_core → mloda/core}/abstract_plugins/components/framework_transformer/cfw_transformer.py +2 -2
- {mloda_core → mloda/core}/abstract_plugins/components/index/add_index_feature.py +4 -4
- {mloda_core → mloda/core}/abstract_plugins/components/input_data/api/api_input_data.py +3 -3
- {mloda_core → mloda/core}/abstract_plugins/components/input_data/api/api_input_data_collection.py +2 -2
- {mloda_core → mloda/core}/abstract_plugins/components/input_data/api/base_api_data.py +1 -1
- {mloda_core → mloda/core}/abstract_plugins/components/input_data/base_input_data.py +6 -6
- {mloda_core → mloda/core}/abstract_plugins/components/input_data/creator/data_creator.py +3 -3
- mloda/core/abstract_plugins/components/link.py +437 -0
- {mloda_core → mloda/core}/abstract_plugins/components/match_data/match_data.py +3 -3
- {mloda_core → mloda/core}/abstract_plugins/components/merge/base_merge_engine.py +2 -2
- {mloda_core → mloda/core}/abstract_plugins/components/options.py +12 -36
- {mloda_core → mloda/core}/abstract_plugins/components/parallelization_modes.py +1 -1
- {mloda_core → mloda/core}/abstract_plugins/components/plugin_option/plugin_collector.py +14 -14
- mloda/core/abstract_plugins/components/validators/datatype_validator.py +96 -0
- mloda/core/abstract_plugins/components/validators/feature_set_validator.py +38 -0
- mloda/core/abstract_plugins/components/validators/feature_validator.py +23 -0
- mloda/core/abstract_plugins/components/validators/link_validator.py +79 -0
- mloda/core/abstract_plugins/components/validators/options_validator.py +57 -0
- mloda_core/abstract_plugins/compute_frame_work.py → mloda/core/abstract_plugins/compute_framework.py +46 -37
- mloda_core/abstract_plugins/abstract_feature_group.py → mloda/core/abstract_plugins/feature_group.py +56 -33
- mloda/core/abstract_plugins/function_extender.py +78 -0
- mloda/core/api/plugin_docs.py +220 -0
- mloda/core/api/plugin_info.py +32 -0
- {mloda_core → mloda/core}/api/prepare/setup_compute_framework.py +11 -11
- {mloda_core → mloda/core}/api/request.py +42 -33
- {mloda_core → mloda/core}/core/cfw_manager.py +8 -8
- {mloda_core → mloda/core}/core/engine.py +47 -46
- {mloda_core → mloda/core}/core/step/abstract_step.py +7 -7
- {mloda_core → mloda/core}/core/step/feature_group_step.py +12 -12
- {mloda_core → mloda/core}/core/step/join_step.py +14 -14
- {mloda_core → mloda/core}/core/step/transform_frame_work_step.py +16 -16
- {mloda_core → mloda/core}/filter/filter_engine.py +1 -1
- {mloda_core → mloda/core}/filter/filter_type_enum.py +1 -1
- {mloda_core → mloda/core}/filter/global_filter.py +23 -23
- {mloda_core → mloda/core}/filter/single_filter.py +6 -6
- {mloda_core → mloda/core}/prepare/accessible_plugins.py +16 -18
- {mloda_core → mloda/core}/prepare/execution_plan.py +65 -39
- {mloda_core → mloda/core}/prepare/graph/build_graph.py +6 -6
- {mloda_core → mloda/core}/prepare/graph/graph.py +1 -1
- {mloda_core → mloda/core}/prepare/graph/properties.py +5 -5
- {mloda_core → mloda/core}/prepare/identify_feature_group.py +12 -14
- {mloda_core → mloda/core}/prepare/joinstep_collection.py +3 -3
- {mloda_core → mloda/core}/prepare/resolve_compute_frameworks.py +6 -6
- {mloda_core → mloda/core}/prepare/resolve_graph.py +11 -11
- {mloda_core → mloda/core}/prepare/resolve_links.py +11 -31
- mloda/core/prepare/validators/resolve_link_validator.py +32 -0
- mloda/core/runtime/compute_framework_executor.py +271 -0
- mloda/core/runtime/data_lifecycle_manager.py +160 -0
- mloda/core/runtime/flight/__init__.py +0 -0
- {mloda_core → mloda/core}/runtime/flight/runner_flight_server.py +1 -1
- mloda/core/runtime/run.py +317 -0
- mloda/core/runtime/worker/__init__.py +0 -0
- {mloda_core → mloda/core}/runtime/worker/multiprocessing_worker.py +15 -10
- {mloda_core → mloda/core}/runtime/worker/thread_worker.py +2 -2
- mloda/core/runtime/worker_manager.py +96 -0
- mloda/provider/__init__.py +101 -0
- mloda/steward/__init__.py +25 -0
- mloda/user/__init__.py +57 -0
- {mloda-0.3.3.dist-info → mloda-0.4.0.dist-info}/METADATA +18 -22
- mloda-0.4.0.dist-info/RECORD +248 -0
- {mloda-0.3.3.dist-info → mloda-0.4.0.dist-info}/top_level.txt +1 -1
- mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_filter_engine.py +2 -2
- mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_framework.py +15 -13
- mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_merge_engine.py +3 -3
- mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_pyarrow_transformer.py +1 -1
- mloda_plugins/compute_framework/base_implementations/iceberg/iceberg_filter_engine.py +2 -2
- mloda_plugins/compute_framework/base_implementations/iceberg/iceberg_framework.py +12 -10
- mloda_plugins/compute_framework/base_implementations/iceberg/iceberg_pyarrow_transformer.py +1 -1
- mloda_plugins/compute_framework/base_implementations/pandas/dataframe.py +18 -16
- mloda_plugins/compute_framework/base_implementations/pandas/pandas_filter_engine.py +36 -13
- mloda_plugins/compute_framework/base_implementations/pandas/pandas_merge_engine.py +7 -7
- mloda_plugins/compute_framework/base_implementations/pandas/pandaspyarrowtransformer.py +1 -1
- mloda_plugins/compute_framework/base_implementations/polars/dataframe.py +16 -14
- mloda_plugins/compute_framework/base_implementations/polars/lazy_dataframe.py +13 -12
- mloda_plugins/compute_framework/base_implementations/polars/polars_filter_engine.py +2 -2
- mloda_plugins/compute_framework/base_implementations/polars/polars_lazy_pyarrow_transformer.py +1 -1
- mloda_plugins/compute_framework/base_implementations/polars/polars_merge_engine.py +3 -3
- mloda_plugins/compute_framework/base_implementations/polars/polars_pyarrow_transformer.py +1 -1
- mloda_plugins/compute_framework/base_implementations/pyarrow/pyarrow_filter_engine.py +2 -2
- mloda_plugins/compute_framework/base_implementations/pyarrow/pyarrow_merge_engine.py +3 -3
- mloda_plugins/compute_framework/base_implementations/pyarrow/table.py +12 -10
- mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_filter_engine.py +2 -2
- mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_framework.py +11 -9
- mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_merge_engine.py +3 -3
- mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_pyarrow_transformer.py +1 -1
- mloda_plugins/compute_framework/base_implementations/spark/spark_filter_engine.py +2 -2
- mloda_plugins/compute_framework/base_implementations/spark/spark_framework.py +17 -15
- mloda_plugins/compute_framework/base_implementations/spark/spark_merge_engine.py +3 -3
- mloda_plugins/compute_framework/base_implementations/spark/spark_pyarrow_transformer.py +1 -1
- mloda_plugins/config/feature/loader.py +2 -2
- mloda_plugins/feature_group/experimental/aggregated_feature_group/base.py +45 -62
- mloda_plugins/feature_group/experimental/aggregated_feature_group/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/aggregated_feature_group/polars_lazy.py +2 -2
- mloda_plugins/feature_group/experimental/aggregated_feature_group/pyarrow.py +2 -2
- mloda_plugins/feature_group/experimental/clustering/base.py +69 -97
- mloda_plugins/feature_group/experimental/clustering/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/data_quality/missing_value/base.py +58 -79
- mloda_plugins/feature_group/experimental/data_quality/missing_value/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/data_quality/missing_value/pyarrow.py +2 -2
- mloda_plugins/feature_group/experimental/data_quality/missing_value/python_dict.py +2 -2
- mloda_plugins/feature_group/experimental/default_options_key.py +16 -19
- mloda_plugins/feature_group/experimental/dimensionality_reduction/base.py +80 -94
- mloda_plugins/feature_group/experimental/dimensionality_reduction/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/dynamic_feature_group_factory/dynamic_feature_group_factory.py +24 -24
- mloda_plugins/feature_group/experimental/forecasting/base.py +106 -104
- mloda_plugins/feature_group/experimental/forecasting/forecasting_artifact.py +2 -2
- mloda_plugins/feature_group/experimental/forecasting/pandas.py +15 -15
- mloda_plugins/feature_group/experimental/geo_distance/base.py +50 -42
- mloda_plugins/feature_group/experimental/geo_distance/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/llm/cli.py +4 -4
- mloda_plugins/feature_group/experimental/llm/cli_features/refactor_git_cached.py +19 -19
- mloda_plugins/feature_group/experimental/llm/installed_packages_feature_group.py +8 -8
- mloda_plugins/feature_group/experimental/llm/list_directory_feature_group.py +5 -5
- mloda_plugins/feature_group/experimental/llm/llm_api/claude.py +3 -3
- mloda_plugins/feature_group/experimental/llm/llm_api/gemini.py +3 -3
- mloda_plugins/feature_group/experimental/llm/llm_api/llm_base_request.py +5 -5
- mloda_plugins/feature_group/experimental/llm/llm_api/openai.py +3 -3
- mloda_plugins/feature_group/experimental/llm/llm_api/request_loop.py +6 -6
- mloda_plugins/feature_group/experimental/llm/llm_file_selector.py +10 -10
- mloda_plugins/feature_group/experimental/llm/tools/tool_collection.py +1 -1
- mloda_plugins/feature_group/experimental/node_centrality/base.py +46 -72
- mloda_plugins/feature_group/experimental/node_centrality/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/sklearn/encoding/base.py +51 -51
- mloda_plugins/feature_group/experimental/sklearn/encoding/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/sklearn/pipeline/base.py +52 -39
- mloda_plugins/feature_group/experimental/sklearn/pipeline/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/sklearn/scaling/base.py +44 -58
- mloda_plugins/feature_group/experimental/sklearn/scaling/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/sklearn/sklearn_artifact.py +2 -2
- mloda_plugins/feature_group/experimental/source_input_feature.py +15 -15
- mloda_plugins/feature_group/experimental/text_cleaning/base.py +38 -61
- mloda_plugins/feature_group/experimental/text_cleaning/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/text_cleaning/python_dict.py +2 -2
- mloda_plugins/feature_group/experimental/time_window/base.py +106 -93
- mloda_plugins/feature_group/experimental/time_window/pandas.py +13 -13
- mloda_plugins/feature_group/experimental/time_window/pyarrow.py +12 -12
- mloda_plugins/feature_group/input_data/api_data/api_data.py +9 -11
- mloda_plugins/feature_group/input_data/read_context_files.py +7 -7
- mloda_plugins/feature_group/input_data/read_db.py +7 -9
- mloda_plugins/feature_group/input_data/read_db_feature.py +4 -4
- mloda_plugins/feature_group/input_data/read_dbs/sqlite.py +23 -13
- mloda_plugins/feature_group/input_data/read_file.py +8 -8
- mloda_plugins/feature_group/input_data/read_file_feature.py +4 -4
- mloda_plugins/feature_group/input_data/read_files/csv.py +6 -6
- mloda_plugins/feature_group/input_data/read_files/feather.py +5 -5
- mloda_plugins/feature_group/input_data/read_files/json.py +5 -5
- mloda_plugins/feature_group/input_data/read_files/orc.py +5 -5
- mloda_plugins/feature_group/input_data/read_files/parquet.py +5 -5
- mloda_plugins/feature_group/input_data/read_files/text_file_reader.py +5 -5
- mloda_plugins/function_extender/base_implementations/otel/otel_extender.py +4 -4
- mloda-0.3.3.dist-info/RECORD +0 -230
- mloda_core/abstract_plugins/components/link.py +0 -286
- mloda_core/abstract_plugins/function_extender.py +0 -34
- mloda_core/runtime/run.py +0 -617
- {mloda_core → mloda/core}/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/domain.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/feature_chainer/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/feature_name.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/framework_transformer/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/framework_transformer/base_transformer.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/hashable_dict.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/index/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/index/index.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/input_data/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/input_data/api/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/input_data/creator/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/match_data/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/merge/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/plugin_option/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/utils.py +0 -0
- {mloda_core/abstract_plugins/plugin_loader → mloda/core/abstract_plugins/components/validators}/__init__.py +0 -0
- {mloda_core/api → mloda/core/abstract_plugins/plugin_loader}/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/plugin_loader/plugin_loader.py +0 -0
- {mloda_core/api/prepare → mloda/core/api}/__init__.py +0 -0
- {mloda_core/core → mloda/core/api/prepare}/__init__.py +0 -0
- {mloda_core/core/step → mloda/core/core}/__init__.py +0 -0
- {mloda_core/filter → mloda/core/core/step}/__init__.py +0 -0
- {mloda_core/prepare → mloda/core/filter}/__init__.py +0 -0
- {mloda_core → mloda/core}/filter/filter_parameter.py +0 -0
- {mloda_core/prepare/graph → mloda/core/prepare}/__init__.py +0 -0
- {mloda_core/runtime → mloda/core/prepare/graph}/__init__.py +0 -0
- {mloda_core/runtime/flight → mloda/core/prepare/validators}/__init__.py +0 -0
- {mloda_core/runtime/worker → mloda/core/runtime}/__init__.py +0 -0
- {mloda_core → mloda/core}/runtime/flight/flight_server.py +0 -0
- {mloda-0.3.3.dist-info → mloda-0.4.0.dist-info}/WHEEL +0 -0
- {mloda-0.3.3.dist-info → mloda-0.4.0.dist-info}/entry_points.txt +0 -0
- {mloda-0.3.3.dist-info → mloda-0.4.0.dist-info}/licenses/LICENSE.TXT +0 -0
- {mloda-0.3.3.dist-info → mloda-0.4.0.dist-info}/licenses/NOTICE.md +0 -0
|
@@ -8,16 +8,19 @@ import copy
|
|
|
8
8
|
from abc import abstractmethod
|
|
9
9
|
from typing import Any, List, Optional, Set, Union
|
|
10
10
|
|
|
11
|
-
from
|
|
12
|
-
from
|
|
13
|
-
from
|
|
14
|
-
from
|
|
15
|
-
|
|
16
|
-
|
|
11
|
+
from mloda import FeatureGroup
|
|
12
|
+
from mloda import Feature
|
|
13
|
+
from mloda.provider import FeatureChainParser
|
|
14
|
+
from mloda.provider import (
|
|
15
|
+
FeatureChainParserMixin,
|
|
16
|
+
)
|
|
17
|
+
from mloda.user import FeatureName
|
|
18
|
+
from mloda.provider import FeatureSet
|
|
19
|
+
from mloda import Options
|
|
17
20
|
from mloda_plugins.feature_group.experimental.default_options_key import DefaultOptionKeys
|
|
18
21
|
|
|
19
22
|
|
|
20
|
-
class MissingValueFeatureGroup(
|
|
23
|
+
class MissingValueFeatureGroup(FeatureChainParserMixin, FeatureGroup):
|
|
21
24
|
"""
|
|
22
25
|
Base class for all missing value imputation feature groups.
|
|
23
26
|
|
|
@@ -83,7 +86,7 @@ class MissingValueFeatureGroup(AbstractFeatureGroup):
|
|
|
83
86
|
### String-Based Creation
|
|
84
87
|
|
|
85
88
|
```python
|
|
86
|
-
from
|
|
89
|
+
from mloda import Feature
|
|
87
90
|
|
|
88
91
|
# Impute missing income values with mean
|
|
89
92
|
feature = Feature(name="income__mean_imputed")
|
|
@@ -101,8 +104,8 @@ class MissingValueFeatureGroup(AbstractFeatureGroup):
|
|
|
101
104
|
### Configuration-Based Creation
|
|
102
105
|
|
|
103
106
|
```python
|
|
104
|
-
from
|
|
105
|
-
from
|
|
107
|
+
from mloda import Feature
|
|
108
|
+
from mloda import Options
|
|
106
109
|
from mloda_plugins.feature_group.experimental.default_options_key import DefaultOptionKeys
|
|
107
110
|
|
|
108
111
|
# Mean imputation using configuration
|
|
@@ -160,46 +163,31 @@ class MissingValueFeatureGroup(AbstractFeatureGroup):
|
|
|
160
163
|
|
|
161
164
|
PREFIX_PATTERN = r".*__([\w]+)_imputed$"
|
|
162
165
|
|
|
166
|
+
# In-feature configuration for FeatureChainParserMixin
|
|
167
|
+
MIN_IN_FEATURES = 1
|
|
168
|
+
MAX_IN_FEATURES = 1
|
|
169
|
+
|
|
163
170
|
PROPERTY_MAPPING = {
|
|
164
171
|
IMPUTATION_METHOD: {
|
|
165
172
|
**IMPUTATION_METHODS,
|
|
166
|
-
DefaultOptionKeys.
|
|
173
|
+
DefaultOptionKeys.context: True,
|
|
167
174
|
},
|
|
168
175
|
DefaultOptionKeys.in_features: {
|
|
169
176
|
"explanation": "Source feature to impute missing values",
|
|
170
|
-
DefaultOptionKeys.
|
|
177
|
+
DefaultOptionKeys.context: True,
|
|
171
178
|
},
|
|
172
179
|
"constant_value": {
|
|
173
180
|
"explanation": "Constant value to use for constant imputation method",
|
|
174
|
-
DefaultOptionKeys.
|
|
175
|
-
DefaultOptionKeys.
|
|
181
|
+
DefaultOptionKeys.context: True,
|
|
182
|
+
DefaultOptionKeys.default: None, # Default is None, required only for constant method
|
|
176
183
|
},
|
|
177
184
|
"group_by_features": {
|
|
178
185
|
"explanation": "Optional list of features to group by before imputation",
|
|
179
|
-
DefaultOptionKeys.
|
|
180
|
-
DefaultOptionKeys.
|
|
186
|
+
DefaultOptionKeys.context: True,
|
|
187
|
+
DefaultOptionKeys.default: None, # Default is None (no grouping)
|
|
181
188
|
},
|
|
182
189
|
}
|
|
183
190
|
|
|
184
|
-
def input_features(self, options: Options, feature_name: FeatureName) -> Optional[Set[Feature]]:
|
|
185
|
-
"""Extract source feature from either configuration-based options or string parsing."""
|
|
186
|
-
|
|
187
|
-
source_feature: str | None = None
|
|
188
|
-
|
|
189
|
-
# Try string-based parsing first
|
|
190
|
-
# parse_feature_name returns (operation_config, source_feature)
|
|
191
|
-
operation_config, source_feature = FeatureChainParser.parse_feature_name(feature_name, [self.PREFIX_PATTERN])
|
|
192
|
-
if source_feature is not None:
|
|
193
|
-
return {Feature(source_feature)}
|
|
194
|
-
|
|
195
|
-
# Fall back to configuration-based approach
|
|
196
|
-
source_features = options.get_in_features()
|
|
197
|
-
if len(source_features) != 1:
|
|
198
|
-
raise ValueError(
|
|
199
|
-
f"Expected exactly one source feature, but found {len(source_features)}: {source_features}"
|
|
200
|
-
)
|
|
201
|
-
return set(source_features)
|
|
202
|
-
|
|
203
191
|
@classmethod
|
|
204
192
|
def get_imputation_method(cls, feature_name: str) -> str:
|
|
205
193
|
"""Extract the imputation method from the feature name."""
|
|
@@ -223,21 +211,36 @@ class MissingValueFeatureGroup(AbstractFeatureGroup):
|
|
|
223
211
|
return imputation_method
|
|
224
212
|
|
|
225
213
|
@classmethod
|
|
226
|
-
def
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
214
|
+
def _extract_imputation_method(cls, feature: Feature) -> Optional[str]:
|
|
215
|
+
"""
|
|
216
|
+
Extract imputation method from a feature.
|
|
217
|
+
|
|
218
|
+
Tries string-based parsing first, falls back to configuration-based.
|
|
219
|
+
|
|
220
|
+
Args:
|
|
221
|
+
feature: The feature to extract imputation method from
|
|
222
|
+
|
|
223
|
+
Returns:
|
|
224
|
+
Imputation method name or None if not found
|
|
225
|
+
"""
|
|
226
|
+
feature_name = feature.get_name()
|
|
227
|
+
|
|
228
|
+
# Try string-based parsing first
|
|
229
|
+
if FeatureChainParser.is_chained_feature(feature_name):
|
|
230
|
+
# Use get_imputation_method which handles parse_feature_name correctly
|
|
231
|
+
return cls.get_imputation_method(feature_name)
|
|
232
|
+
|
|
233
|
+
# Fall back to configuration-based approach
|
|
234
|
+
imputation_method = feature.options.get(cls.IMPUTATION_METHOD)
|
|
235
|
+
|
|
236
|
+
# Validate imputation method if found
|
|
237
|
+
if imputation_method is not None and imputation_method not in cls.IMPUTATION_METHODS:
|
|
238
|
+
raise ValueError(
|
|
239
|
+
f"Unsupported imputation method: {imputation_method}. "
|
|
240
|
+
f"Supported methods: {', '.join(cls.IMPUTATION_METHODS.keys())}"
|
|
241
|
+
)
|
|
242
|
+
|
|
243
|
+
return str(imputation_method) if imputation_method is not None else None
|
|
241
244
|
|
|
242
245
|
@classmethod
|
|
243
246
|
def _extract_imputation_method_and_source_feature(cls, feature: Feature) -> tuple[str, str]:
|
|
@@ -255,37 +258,13 @@ class MissingValueFeatureGroup(AbstractFeatureGroup):
|
|
|
255
258
|
Raises:
|
|
256
259
|
ValueError: If parameters cannot be extracted
|
|
257
260
|
"""
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
# Try string-based parsing first
|
|
262
|
-
feature_name_str = feature.name.name if hasattr(feature.name, "name") else str(feature.name)
|
|
263
|
-
|
|
264
|
-
if FeatureChainParser.is_chained_feature(feature_name_str):
|
|
265
|
-
# Use get_imputation_method which already handles parse_feature_name correctly
|
|
266
|
-
imputation_method = cls.get_imputation_method(feature_name_str)
|
|
267
|
-
# Use extract_source_feature which returns everything before the last __
|
|
268
|
-
source_feature_name = FeatureChainParser.extract_source_feature(feature_name_str, cls.PREFIX_PATTERN)
|
|
269
|
-
return imputation_method, source_feature_name
|
|
261
|
+
source_features = cls._extract_source_features(feature)
|
|
262
|
+
imputation_method = cls._extract_imputation_method(feature)
|
|
270
263
|
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
source_feature = next(iter(source_features))
|
|
274
|
-
source_feature_name = source_feature.get_name()
|
|
275
|
-
|
|
276
|
-
imputation_method = feature.options.get(cls.IMPUTATION_METHOD)
|
|
277
|
-
|
|
278
|
-
if imputation_method is None or source_feature_name is None:
|
|
279
|
-
raise ValueError(f"Could not extract imputation method and source feature from: {feature.name}")
|
|
280
|
-
|
|
281
|
-
# Validate imputation method (no need to strip "imputed" from config-based method)
|
|
282
|
-
if imputation_method not in cls.IMPUTATION_METHODS:
|
|
283
|
-
raise ValueError(
|
|
284
|
-
f"Unsupported imputation method: {imputation_method}. "
|
|
285
|
-
f"Supported methods: {', '.join(cls.IMPUTATION_METHODS.keys())}"
|
|
286
|
-
)
|
|
264
|
+
if imputation_method is None:
|
|
265
|
+
raise ValueError(f"Could not extract imputation method from: {feature.name}")
|
|
287
266
|
|
|
288
|
-
return imputation_method,
|
|
267
|
+
return imputation_method, source_features[0]
|
|
289
268
|
|
|
290
269
|
@classmethod
|
|
291
270
|
def calculate_feature(cls, data: Any, features: FeatureSet) -> Any:
|
|
@@ -7,7 +7,7 @@ from __future__ import annotations
|
|
|
7
7
|
from typing import Any, List, Optional, Set, Type, Union
|
|
8
8
|
|
|
9
9
|
|
|
10
|
-
from
|
|
10
|
+
from mloda import ComputeFramework
|
|
11
11
|
|
|
12
12
|
from mloda_plugins.compute_framework.base_implementations.pandas.dataframe import PandasDataFrame
|
|
13
13
|
from mloda_plugins.feature_group.experimental.data_quality.missing_value.base import MissingValueFeatureGroup
|
|
@@ -20,7 +20,7 @@ except ImportError:
|
|
|
20
20
|
|
|
21
21
|
class PandasMissingValueFeatureGroup(MissingValueFeatureGroup):
|
|
22
22
|
@classmethod
|
|
23
|
-
def compute_framework_rule(cls) -> Union[bool, Set[Type[
|
|
23
|
+
def compute_framework_rule(cls) -> Union[bool, Set[Type[ComputeFramework]]]:
|
|
24
24
|
return {PandasDataFrame}
|
|
25
25
|
|
|
26
26
|
@classmethod
|
|
@@ -9,7 +9,7 @@ from typing import Any, List, Optional, Set, Type, Union
|
|
|
9
9
|
import pyarrow as pa
|
|
10
10
|
import pyarrow.compute as pc
|
|
11
11
|
|
|
12
|
-
from
|
|
12
|
+
from mloda import ComputeFramework
|
|
13
13
|
|
|
14
14
|
from mloda_plugins.compute_framework.base_implementations.pyarrow.table import PyArrowTable
|
|
15
15
|
from mloda_plugins.feature_group.experimental.data_quality.missing_value.base import MissingValueFeatureGroup
|
|
@@ -17,7 +17,7 @@ from mloda_plugins.feature_group.experimental.data_quality.missing_value.base im
|
|
|
17
17
|
|
|
18
18
|
class PyArrowMissingValueFeatureGroup(MissingValueFeatureGroup):
|
|
19
19
|
@classmethod
|
|
20
|
-
def compute_framework_rule(cls) -> Union[bool, Set[Type[
|
|
20
|
+
def compute_framework_rule(cls) -> Union[bool, Set[Type[ComputeFramework]]]:
|
|
21
21
|
return {PyArrowTable}
|
|
22
22
|
|
|
23
23
|
@classmethod
|
|
@@ -8,7 +8,7 @@ import statistics
|
|
|
8
8
|
from collections import Counter
|
|
9
9
|
from typing import Any, Dict, List, Optional, Set, Type, Union
|
|
10
10
|
|
|
11
|
-
from
|
|
11
|
+
from mloda import ComputeFramework
|
|
12
12
|
|
|
13
13
|
from mloda_plugins.compute_framework.base_implementations.python_dict.python_dict_framework import PythonDictFramework
|
|
14
14
|
from mloda_plugins.feature_group.experimental.data_quality.missing_value.base import MissingValueFeatureGroup
|
|
@@ -23,7 +23,7 @@ class PythonDictMissingValueFeatureGroup(MissingValueFeatureGroup):
|
|
|
23
23
|
"""
|
|
24
24
|
|
|
25
25
|
@classmethod
|
|
26
|
-
def compute_framework_rule(cls) -> Union[bool, Set[Type[
|
|
26
|
+
def compute_framework_rule(cls) -> Union[bool, Set[Type[ComputeFramework]]]:
|
|
27
27
|
return {PythonDictFramework}
|
|
28
28
|
|
|
29
29
|
@classmethod
|
|
@@ -3,28 +3,25 @@ from enum import Enum
|
|
|
3
3
|
|
|
4
4
|
class DefaultOptionKeys(str, Enum):
|
|
5
5
|
"""
|
|
6
|
+
Default option keys used to configure mloda feature groups.
|
|
6
7
|
|
|
7
|
-
|
|
8
|
+
These keys are used to look up configuration values in Options objects.
|
|
9
|
+
The enum value serves as both the option key and the default column name.
|
|
8
10
|
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
When the framework matured and we learned more about the requirements, we can refactor this to a more sophisticated solution.
|
|
13
|
-
|
|
14
|
-
However we use the DefaultOptions object to store needed keywords.
|
|
11
|
+
Time-Related Keys:
|
|
12
|
+
- `reference_time`: Key for the event timestamp column. Value: "reference_time"
|
|
13
|
+
- `time_travel`: Key for the validity timestamp column. Value: "time_travel_filter"
|
|
15
14
|
|
|
15
|
+
These values are used as default column names when not customized via Options.
|
|
16
16
|
"""
|
|
17
17
|
|
|
18
18
|
in_features = "in_features"
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
@classmethod
|
|
29
|
-
def list(cls) -> list[str]:
|
|
30
|
-
return [member.value for member in cls]
|
|
19
|
+
feature_chainer_parser_key = "feature_chainer_parser_key"
|
|
20
|
+
reference_time = "reference_time"
|
|
21
|
+
time_travel = "time_travel_filter"
|
|
22
|
+
default = "default"
|
|
23
|
+
context = "context"
|
|
24
|
+
group = "group"
|
|
25
|
+
strict_validation = "strict_validation"
|
|
26
|
+
validation_function = "validation_function"
|
|
27
|
+
strict_type_enforcement = "strict_type_enforcement"
|
|
@@ -7,16 +7,19 @@ from __future__ import annotations
|
|
|
7
7
|
from abc import abstractmethod
|
|
8
8
|
from typing import Any, Optional, Set, Union
|
|
9
9
|
|
|
10
|
-
from
|
|
11
|
-
from
|
|
12
|
-
from
|
|
13
|
-
from
|
|
14
|
-
|
|
15
|
-
|
|
10
|
+
from mloda import FeatureGroup
|
|
11
|
+
from mloda import Feature
|
|
12
|
+
from mloda.provider import FeatureChainParser
|
|
13
|
+
from mloda.provider import (
|
|
14
|
+
FeatureChainParserMixin,
|
|
15
|
+
)
|
|
16
|
+
from mloda.user import FeatureName
|
|
17
|
+
from mloda.provider import FeatureSet
|
|
18
|
+
from mloda import Options
|
|
16
19
|
from mloda_plugins.feature_group.experimental.default_options_key import DefaultOptionKeys
|
|
17
20
|
|
|
18
21
|
|
|
19
|
-
class DimensionalityReductionFeatureGroup(
|
|
22
|
+
class DimensionalityReductionFeatureGroup(FeatureChainParserMixin, FeatureGroup):
|
|
20
23
|
"""
|
|
21
24
|
Base class for all dimensionality reduction feature groups.
|
|
22
25
|
|
|
@@ -113,41 +116,46 @@ class DimensionalityReductionFeatureGroup(AbstractFeatureGroup):
|
|
|
113
116
|
# Define the prefix pattern for this feature group
|
|
114
117
|
PREFIX_PATTERN = r".*__([\w]+)_(\d+)d$"
|
|
115
118
|
|
|
119
|
+
# In-feature configuration for FeatureChainParserMixin
|
|
120
|
+
IN_FEATURE_SEPARATOR = ","
|
|
121
|
+
MIN_IN_FEATURES = 1
|
|
122
|
+
MAX_IN_FEATURES = None
|
|
123
|
+
|
|
116
124
|
PROPERTY_MAPPING = {
|
|
117
125
|
ALGORITHM: {
|
|
118
126
|
**REDUCTION_ALGORITHMS,
|
|
119
|
-
DefaultOptionKeys.
|
|
120
|
-
DefaultOptionKeys.
|
|
127
|
+
DefaultOptionKeys.context: True,
|
|
128
|
+
DefaultOptionKeys.strict_validation: True,
|
|
121
129
|
},
|
|
122
130
|
DIMENSION: {
|
|
123
131
|
"explanation": "Target dimension for the reduction (positive integer)",
|
|
124
|
-
DefaultOptionKeys.
|
|
125
|
-
DefaultOptionKeys.
|
|
126
|
-
DefaultOptionKeys.
|
|
132
|
+
DefaultOptionKeys.context: True,
|
|
133
|
+
DefaultOptionKeys.strict_validation: True,
|
|
134
|
+
DefaultOptionKeys.validation_function: lambda value: isinstance(value, (int, str))
|
|
127
135
|
and str(value).isdigit()
|
|
128
136
|
and int(value) > 0,
|
|
129
137
|
},
|
|
130
138
|
DefaultOptionKeys.in_features: {
|
|
131
139
|
"explanation": "Source features to use for dimensionality reduction",
|
|
132
|
-
DefaultOptionKeys.
|
|
133
|
-
DefaultOptionKeys.
|
|
140
|
+
DefaultOptionKeys.context: True,
|
|
141
|
+
DefaultOptionKeys.strict_validation: False,
|
|
134
142
|
},
|
|
135
143
|
# t-SNE specific parameters
|
|
136
144
|
TSNE_MAX_ITER: {
|
|
137
145
|
"explanation": "Maximum number of iterations for t-SNE optimization",
|
|
138
|
-
DefaultOptionKeys.
|
|
139
|
-
DefaultOptionKeys.
|
|
146
|
+
DefaultOptionKeys.context: True,
|
|
147
|
+
DefaultOptionKeys.strict_validation: False,
|
|
140
148
|
"default": 250,
|
|
141
|
-
DefaultOptionKeys.
|
|
149
|
+
DefaultOptionKeys.validation_function: lambda value: isinstance(value, (int, str))
|
|
142
150
|
and str(value).isdigit()
|
|
143
151
|
and int(value) > 0,
|
|
144
152
|
},
|
|
145
153
|
TSNE_N_ITER_WITHOUT_PROGRESS: {
|
|
146
154
|
"explanation": "Maximum iterations without progress before early stopping (t-SNE)",
|
|
147
|
-
DefaultOptionKeys.
|
|
148
|
-
DefaultOptionKeys.
|
|
155
|
+
DefaultOptionKeys.context: True,
|
|
156
|
+
DefaultOptionKeys.strict_validation: False,
|
|
149
157
|
"default": 50,
|
|
150
|
-
DefaultOptionKeys.
|
|
158
|
+
DefaultOptionKeys.validation_function: lambda value: isinstance(value, (int, str))
|
|
151
159
|
and str(value).isdigit()
|
|
152
160
|
and int(value) > 0,
|
|
153
161
|
},
|
|
@@ -155,8 +163,8 @@ class DimensionalityReductionFeatureGroup(AbstractFeatureGroup):
|
|
|
155
163
|
"barnes_hut": "Barnes-Hut approximation (faster, O(n log n))",
|
|
156
164
|
"exact": "Exact method (slower, O(n^2))",
|
|
157
165
|
"explanation": "t-SNE computation method",
|
|
158
|
-
DefaultOptionKeys.
|
|
159
|
-
DefaultOptionKeys.
|
|
166
|
+
DefaultOptionKeys.context: True,
|
|
167
|
+
DefaultOptionKeys.strict_validation: False,
|
|
160
168
|
"default": "barnes_hut",
|
|
161
169
|
},
|
|
162
170
|
# PCA specific parameters
|
|
@@ -166,54 +174,32 @@ class DimensionalityReductionFeatureGroup(AbstractFeatureGroup):
|
|
|
166
174
|
"arpack": "Truncated SVD using ARPACK",
|
|
167
175
|
"randomized": "Randomized SVD",
|
|
168
176
|
"explanation": "SVD solver algorithm for PCA",
|
|
169
|
-
DefaultOptionKeys.
|
|
170
|
-
DefaultOptionKeys.
|
|
177
|
+
DefaultOptionKeys.context: True,
|
|
178
|
+
DefaultOptionKeys.strict_validation: False,
|
|
171
179
|
"default": "auto",
|
|
172
180
|
},
|
|
173
181
|
# ICA specific parameters
|
|
174
182
|
ICA_MAX_ITER: {
|
|
175
183
|
"explanation": "Maximum number of iterations for ICA",
|
|
176
|
-
DefaultOptionKeys.
|
|
177
|
-
DefaultOptionKeys.
|
|
184
|
+
DefaultOptionKeys.context: True,
|
|
185
|
+
DefaultOptionKeys.strict_validation: False,
|
|
178
186
|
"default": 200,
|
|
179
|
-
DefaultOptionKeys.
|
|
187
|
+
DefaultOptionKeys.validation_function: lambda value: isinstance(value, (int, str))
|
|
180
188
|
and str(value).isdigit()
|
|
181
189
|
and int(value) > 0,
|
|
182
190
|
},
|
|
183
191
|
# Isomap specific parameters
|
|
184
192
|
ISOMAP_N_NEIGHBORS: {
|
|
185
193
|
"explanation": "Number of neighbors for Isomap",
|
|
186
|
-
DefaultOptionKeys.
|
|
187
|
-
DefaultOptionKeys.
|
|
194
|
+
DefaultOptionKeys.context: True,
|
|
195
|
+
DefaultOptionKeys.strict_validation: False,
|
|
188
196
|
"default": 5,
|
|
189
|
-
DefaultOptionKeys.
|
|
197
|
+
DefaultOptionKeys.validation_function: lambda value: isinstance(value, (int, str))
|
|
190
198
|
and str(value).isdigit()
|
|
191
199
|
and int(value) > 0,
|
|
192
200
|
},
|
|
193
201
|
}
|
|
194
202
|
|
|
195
|
-
def input_features(self, options: Options, feature_name: FeatureName) -> Optional[Set[Feature]]:
|
|
196
|
-
"""Extract source feature from either configuration-based options or string parsing."""
|
|
197
|
-
|
|
198
|
-
source_feature: str | None = None
|
|
199
|
-
|
|
200
|
-
# Try string-based parsing first
|
|
201
|
-
_, source_feature = FeatureChainParser.parse_feature_name(feature_name, [self.PREFIX_PATTERN])
|
|
202
|
-
if source_feature is not None:
|
|
203
|
-
# Handle multiple source features (comma-separated)
|
|
204
|
-
source_features = set()
|
|
205
|
-
for feature in source_feature.split(","):
|
|
206
|
-
source_features.add(Feature(feature.strip()))
|
|
207
|
-
return source_features
|
|
208
|
-
|
|
209
|
-
# Fall back to configuration-based approach
|
|
210
|
-
source_featurez = options.get_in_features()
|
|
211
|
-
if len(source_featurez) != 1:
|
|
212
|
-
raise ValueError(
|
|
213
|
-
f"Expected exactly one source feature, but found {len(source_featurez)}: {source_featurez}"
|
|
214
|
-
)
|
|
215
|
-
return set(source_featurez)
|
|
216
|
-
|
|
217
203
|
@classmethod
|
|
218
204
|
def parse_reduction_suffix(cls, feature_name: str) -> tuple[str, int]:
|
|
219
205
|
"""
|
|
@@ -265,35 +251,28 @@ class DimensionalityReductionFeatureGroup(AbstractFeatureGroup):
|
|
|
265
251
|
raise ValueError(f"Invalid dimension: {dimension_str}. Must be a positive integer.")
|
|
266
252
|
|
|
267
253
|
@classmethod
|
|
268
|
-
def
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
options: Options,
|
|
272
|
-
data_access_collection: Optional[Any] = None,
|
|
273
|
-
) -> bool:
|
|
274
|
-
"""Check if feature name matches the expected pattern for dimensionality reduction features."""
|
|
275
|
-
|
|
276
|
-
# Use the unified parser with property mapping for full configuration support
|
|
277
|
-
result = FeatureChainParser.match_configuration_feature_chain_parser(
|
|
278
|
-
feature_name,
|
|
279
|
-
options,
|
|
280
|
-
property_mapping=cls.PROPERTY_MAPPING,
|
|
281
|
-
prefix_patterns=[cls.PREFIX_PATTERN],
|
|
282
|
-
)
|
|
254
|
+
def _validate_string_match(cls, feature_name: str, _operation_config: str, _source_feature: str) -> bool:
|
|
255
|
+
"""
|
|
256
|
+
Validate that a string-based feature name has valid dimensionality reduction components.
|
|
283
257
|
|
|
284
|
-
|
|
285
|
-
if result:
|
|
286
|
-
feature_name_str = feature_name.name if isinstance(feature_name, FeatureName) else feature_name
|
|
258
|
+
Validates algorithm and dimension using parse_reduction_suffix().
|
|
287
259
|
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
260
|
+
Args:
|
|
261
|
+
feature_name: The full feature name to validate
|
|
262
|
+
_operation_config: The operation config extracted by the regex (unused)
|
|
263
|
+
_source_feature: The source feature extracted by the regex (unused)
|
|
264
|
+
|
|
265
|
+
Returns:
|
|
266
|
+
True if valid, False otherwise
|
|
267
|
+
"""
|
|
268
|
+
if FeatureChainParser.is_chained_feature(feature_name):
|
|
269
|
+
try:
|
|
270
|
+
# Use existing validation logic that validates algorithm and dimension
|
|
271
|
+
cls.parse_reduction_suffix(feature_name)
|
|
272
|
+
except ValueError:
|
|
273
|
+
# If validation fails, this feature doesn't match
|
|
274
|
+
return False
|
|
275
|
+
return True
|
|
297
276
|
|
|
298
277
|
@classmethod
|
|
299
278
|
def _extract_algorithm_dimension_and_source_features(cls, feature: Feature) -> tuple[str, int, list[str], Options]:
|
|
@@ -311,30 +290,38 @@ class DimensionalityReductionFeatureGroup(AbstractFeatureGroup):
|
|
|
311
290
|
Raises:
|
|
312
291
|
ValueError: If parameters cannot be extracted
|
|
313
292
|
"""
|
|
314
|
-
|
|
315
|
-
dimension =
|
|
316
|
-
|
|
293
|
+
source_features = cls._extract_source_features(feature)
|
|
294
|
+
algorithm, dimension, algo_options = cls._extract_dim_reduction_params(feature)
|
|
295
|
+
if algorithm is None or dimension is None:
|
|
296
|
+
raise ValueError(f"Could not extract algorithm and dimension from: {feature.name}")
|
|
297
|
+
return algorithm, dimension, source_features, algo_options
|
|
317
298
|
|
|
318
|
-
|
|
299
|
+
@classmethod
|
|
300
|
+
def _extract_dim_reduction_params(cls, feature: Feature) -> tuple[Optional[str], Optional[int], Options]:
|
|
301
|
+
"""
|
|
302
|
+
Extract dimensionality reduction algorithm, dimension, and options from a feature.
|
|
303
|
+
|
|
304
|
+
Tries string-based parsing first, falls back to configuration-based approach.
|
|
305
|
+
|
|
306
|
+
Args:
|
|
307
|
+
feature: The feature to extract parameters from
|
|
308
|
+
|
|
309
|
+
Returns:
|
|
310
|
+
Tuple of (algorithm, dimension, algorithm_options)
|
|
311
|
+
"""
|
|
319
312
|
feature_name_str = feature.name.name if hasattr(feature.name, "name") else str(feature.name)
|
|
320
313
|
|
|
314
|
+
# Try string-based parsing first
|
|
321
315
|
if FeatureChainParser.is_chained_feature(feature_name_str):
|
|
322
316
|
algorithm, dimension = cls.parse_reduction_suffix(feature_name_str)
|
|
323
|
-
|
|
324
|
-
source_features = [feature.strip() for feature in source_features_str.split(",")]
|
|
325
|
-
# For string-based features, still extract algorithm-specific options from feature.options
|
|
326
|
-
return algorithm, dimension, source_features, feature.options
|
|
317
|
+
return algorithm, dimension, feature.options
|
|
327
318
|
|
|
328
319
|
# Fall back to configuration-based approach
|
|
329
|
-
source_features_set = feature.options.get_in_features()
|
|
330
|
-
source_feature = next(iter(source_features_set))
|
|
331
|
-
source_features = [source_feature.get_name()]
|
|
332
|
-
|
|
333
320
|
algorithm = feature.options.get(cls.ALGORITHM)
|
|
334
321
|
dimension = feature.options.get(cls.DIMENSION)
|
|
335
322
|
|
|
336
323
|
if algorithm is None or dimension is None:
|
|
337
|
-
|
|
324
|
+
return None, None, feature.options
|
|
338
325
|
|
|
339
326
|
# Validate algorithm
|
|
340
327
|
if algorithm not in cls.REDUCTION_ALGORITHMS:
|
|
@@ -344,12 +331,11 @@ class DimensionalityReductionFeatureGroup(AbstractFeatureGroup):
|
|
|
344
331
|
)
|
|
345
332
|
|
|
346
333
|
# Validate and convert dimension
|
|
347
|
-
|
|
348
334
|
dimension = int(dimension)
|
|
349
335
|
if dimension <= 0:
|
|
350
336
|
raise ValueError(f"Invalid dimension: {dimension}. Must be a positive integer.")
|
|
351
337
|
|
|
352
|
-
return algorithm, dimension,
|
|
338
|
+
return algorithm, dimension, feature.options
|
|
353
339
|
|
|
354
340
|
@classmethod
|
|
355
341
|
def calculate_feature(cls, data: Any, features: FeatureSet) -> Any:
|
|
@@ -27,14 +27,14 @@ except ImportError:
|
|
|
27
27
|
SKLEARN_AVAILABLE = False
|
|
28
28
|
|
|
29
29
|
|
|
30
|
-
from
|
|
30
|
+
from mloda import ComputeFramework
|
|
31
31
|
from mloda_plugins.compute_framework.base_implementations.pandas.dataframe import PandasDataFrame
|
|
32
32
|
from mloda_plugins.feature_group.experimental.dimensionality_reduction.base import DimensionalityReductionFeatureGroup
|
|
33
33
|
|
|
34
34
|
|
|
35
35
|
class PandasDimensionalityReductionFeatureGroup(DimensionalityReductionFeatureGroup):
|
|
36
36
|
@classmethod
|
|
37
|
-
def compute_framework_rule(cls) -> set[type[
|
|
37
|
+
def compute_framework_rule(cls) -> set[type[ComputeFramework]]:
|
|
38
38
|
"""Define the compute framework for this feature group."""
|
|
39
39
|
return {PandasDataFrame}
|
|
40
40
|
|