mloda 0.3.2__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mloda/__init__.py +17 -0
- {mloda_core → mloda/core}/abstract_plugins/components/base_artifact.py +2 -2
- {mloda_core → mloda/core}/abstract_plugins/components/base_validator.py +13 -0
- {mloda_core → mloda/core}/abstract_plugins/components/data_access_collection.py +1 -1
- {mloda_core → mloda/core}/abstract_plugins/components/data_types.py +39 -0
- {mloda_core → mloda/core}/abstract_plugins/components/feature.py +39 -33
- {mloda_core → mloda/core}/abstract_plugins/components/feature_chainer/feature_chain_parser.py +19 -19
- mloda/core/abstract_plugins/components/feature_chainer/feature_chain_parser_mixin.py +197 -0
- {mloda_core → mloda/core}/abstract_plugins/components/feature_collection.py +6 -6
- {mloda_core → mloda/core}/abstract_plugins/components/feature_group_version.py +8 -8
- {mloda_core → mloda/core}/abstract_plugins/components/feature_set.py +18 -24
- {mloda_core → mloda/core}/abstract_plugins/components/framework_transformer/cfw_transformer.py +2 -2
- {mloda_core → mloda/core}/abstract_plugins/components/index/add_index_feature.py +4 -4
- {mloda_core → mloda/core}/abstract_plugins/components/input_data/api/api_input_data.py +3 -3
- {mloda_core → mloda/core}/abstract_plugins/components/input_data/api/api_input_data_collection.py +2 -2
- {mloda_core → mloda/core}/abstract_plugins/components/input_data/api/base_api_data.py +1 -1
- {mloda_core → mloda/core}/abstract_plugins/components/input_data/base_input_data.py +6 -6
- {mloda_core → mloda/core}/abstract_plugins/components/input_data/creator/data_creator.py +3 -3
- mloda/core/abstract_plugins/components/link.py +437 -0
- {mloda_core → mloda/core}/abstract_plugins/components/match_data/match_data.py +3 -3
- {mloda_core → mloda/core}/abstract_plugins/components/merge/base_merge_engine.py +2 -2
- {mloda_core → mloda/core}/abstract_plugins/components/options.py +12 -36
- {mloda_core → mloda/core}/abstract_plugins/components/parallelization_modes.py +1 -1
- {mloda_core → mloda/core}/abstract_plugins/components/plugin_option/plugin_collector.py +14 -14
- mloda/core/abstract_plugins/components/validators/datatype_validator.py +96 -0
- mloda/core/abstract_plugins/components/validators/feature_set_validator.py +38 -0
- mloda/core/abstract_plugins/components/validators/feature_validator.py +23 -0
- mloda/core/abstract_plugins/components/validators/link_validator.py +79 -0
- mloda/core/abstract_plugins/components/validators/options_validator.py +57 -0
- mloda_core/abstract_plugins/compute_frame_work.py → mloda/core/abstract_plugins/compute_framework.py +46 -37
- mloda_core/abstract_plugins/abstract_feature_group.py → mloda/core/abstract_plugins/feature_group.py +56 -33
- mloda/core/abstract_plugins/function_extender.py +78 -0
- mloda/core/api/plugin_docs.py +220 -0
- mloda/core/api/plugin_info.py +32 -0
- {mloda_core → mloda/core}/api/prepare/setup_compute_framework.py +11 -11
- {mloda_core → mloda/core}/api/request.py +42 -33
- {mloda_core → mloda/core}/core/cfw_manager.py +8 -8
- {mloda_core → mloda/core}/core/engine.py +47 -46
- {mloda_core → mloda/core}/core/step/abstract_step.py +7 -7
- {mloda_core → mloda/core}/core/step/feature_group_step.py +12 -12
- {mloda_core → mloda/core}/core/step/join_step.py +14 -14
- {mloda_core → mloda/core}/core/step/transform_frame_work_step.py +16 -16
- {mloda_core → mloda/core}/filter/filter_engine.py +1 -1
- {mloda_core → mloda/core}/filter/filter_type_enum.py +1 -1
- {mloda_core → mloda/core}/filter/global_filter.py +23 -23
- {mloda_core → mloda/core}/filter/single_filter.py +6 -6
- {mloda_core → mloda/core}/prepare/accessible_plugins.py +16 -18
- {mloda_core → mloda/core}/prepare/execution_plan.py +65 -39
- {mloda_core → mloda/core}/prepare/graph/build_graph.py +6 -6
- {mloda_core → mloda/core}/prepare/graph/graph.py +1 -1
- {mloda_core → mloda/core}/prepare/graph/properties.py +5 -5
- {mloda_core → mloda/core}/prepare/identify_feature_group.py +12 -14
- {mloda_core → mloda/core}/prepare/joinstep_collection.py +3 -3
- {mloda_core → mloda/core}/prepare/resolve_compute_frameworks.py +6 -6
- {mloda_core → mloda/core}/prepare/resolve_graph.py +11 -11
- {mloda_core → mloda/core}/prepare/resolve_links.py +31 -40
- mloda/core/prepare/validators/resolve_link_validator.py +32 -0
- mloda/core/runtime/compute_framework_executor.py +271 -0
- mloda/core/runtime/data_lifecycle_manager.py +160 -0
- mloda/core/runtime/flight/__init__.py +0 -0
- {mloda_core → mloda/core}/runtime/flight/runner_flight_server.py +1 -1
- mloda/core/runtime/run.py +317 -0
- mloda/core/runtime/worker/__init__.py +0 -0
- {mloda_core → mloda/core}/runtime/worker/multiprocessing_worker.py +15 -10
- {mloda_core → mloda/core}/runtime/worker/thread_worker.py +2 -2
- mloda/core/runtime/worker_manager.py +96 -0
- mloda/provider/__init__.py +101 -0
- mloda/steward/__init__.py +25 -0
- mloda/user/__init__.py +57 -0
- {mloda-0.3.2.dist-info → mloda-0.4.0.dist-info}/METADATA +18 -22
- mloda-0.4.0.dist-info/RECORD +248 -0
- {mloda-0.3.2.dist-info → mloda-0.4.0.dist-info}/top_level.txt +1 -1
- mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_filter_engine.py +2 -2
- mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_framework.py +15 -13
- mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_merge_engine.py +3 -3
- mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_pyarrow_transformer.py +1 -1
- mloda_plugins/compute_framework/base_implementations/iceberg/iceberg_filter_engine.py +2 -2
- mloda_plugins/compute_framework/base_implementations/iceberg/iceberg_framework.py +12 -10
- mloda_plugins/compute_framework/base_implementations/iceberg/iceberg_pyarrow_transformer.py +1 -1
- mloda_plugins/compute_framework/base_implementations/pandas/dataframe.py +18 -16
- mloda_plugins/compute_framework/base_implementations/pandas/pandas_filter_engine.py +36 -13
- mloda_plugins/compute_framework/base_implementations/pandas/pandas_merge_engine.py +7 -7
- mloda_plugins/compute_framework/base_implementations/pandas/pandaspyarrowtransformer.py +1 -1
- mloda_plugins/compute_framework/base_implementations/polars/dataframe.py +16 -14
- mloda_plugins/compute_framework/base_implementations/polars/lazy_dataframe.py +13 -12
- mloda_plugins/compute_framework/base_implementations/polars/polars_filter_engine.py +2 -2
- mloda_plugins/compute_framework/base_implementations/polars/polars_lazy_pyarrow_transformer.py +1 -1
- mloda_plugins/compute_framework/base_implementations/polars/polars_merge_engine.py +3 -3
- mloda_plugins/compute_framework/base_implementations/polars/polars_pyarrow_transformer.py +1 -1
- mloda_plugins/compute_framework/base_implementations/pyarrow/pyarrow_filter_engine.py +2 -2
- mloda_plugins/compute_framework/base_implementations/pyarrow/pyarrow_merge_engine.py +3 -3
- mloda_plugins/compute_framework/base_implementations/pyarrow/table.py +12 -10
- mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_filter_engine.py +2 -2
- mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_framework.py +11 -9
- mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_merge_engine.py +3 -3
- mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_pyarrow_transformer.py +1 -1
- mloda_plugins/compute_framework/base_implementations/spark/spark_filter_engine.py +2 -2
- mloda_plugins/compute_framework/base_implementations/spark/spark_framework.py +17 -15
- mloda_plugins/compute_framework/base_implementations/spark/spark_merge_engine.py +3 -3
- mloda_plugins/compute_framework/base_implementations/spark/spark_pyarrow_transformer.py +1 -1
- mloda_plugins/config/feature/loader.py +2 -2
- mloda_plugins/feature_group/experimental/aggregated_feature_group/base.py +45 -62
- mloda_plugins/feature_group/experimental/aggregated_feature_group/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/aggregated_feature_group/polars_lazy.py +2 -2
- mloda_plugins/feature_group/experimental/aggregated_feature_group/pyarrow.py +2 -2
- mloda_plugins/feature_group/experimental/clustering/base.py +69 -97
- mloda_plugins/feature_group/experimental/clustering/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/data_quality/missing_value/base.py +58 -79
- mloda_plugins/feature_group/experimental/data_quality/missing_value/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/data_quality/missing_value/pyarrow.py +2 -2
- mloda_plugins/feature_group/experimental/data_quality/missing_value/python_dict.py +2 -2
- mloda_plugins/feature_group/experimental/default_options_key.py +16 -19
- mloda_plugins/feature_group/experimental/dimensionality_reduction/base.py +80 -94
- mloda_plugins/feature_group/experimental/dimensionality_reduction/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/dynamic_feature_group_factory/dynamic_feature_group_factory.py +24 -24
- mloda_plugins/feature_group/experimental/forecasting/base.py +106 -104
- mloda_plugins/feature_group/experimental/forecasting/forecasting_artifact.py +2 -2
- mloda_plugins/feature_group/experimental/forecasting/pandas.py +15 -15
- mloda_plugins/feature_group/experimental/geo_distance/base.py +50 -42
- mloda_plugins/feature_group/experimental/geo_distance/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/llm/cli.py +4 -4
- mloda_plugins/feature_group/experimental/llm/cli_features/refactor_git_cached.py +19 -19
- mloda_plugins/feature_group/experimental/llm/installed_packages_feature_group.py +8 -8
- mloda_plugins/feature_group/experimental/llm/list_directory_feature_group.py +5 -5
- mloda_plugins/feature_group/experimental/llm/llm_api/claude.py +3 -3
- mloda_plugins/feature_group/experimental/llm/llm_api/gemini.py +3 -3
- mloda_plugins/feature_group/experimental/llm/llm_api/llm_base_request.py +5 -5
- mloda_plugins/feature_group/experimental/llm/llm_api/openai.py +3 -3
- mloda_plugins/feature_group/experimental/llm/llm_api/request_loop.py +6 -6
- mloda_plugins/feature_group/experimental/llm/llm_file_selector.py +10 -10
- mloda_plugins/feature_group/experimental/llm/tools/tool_collection.py +1 -1
- mloda_plugins/feature_group/experimental/node_centrality/base.py +46 -72
- mloda_plugins/feature_group/experimental/node_centrality/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/sklearn/encoding/base.py +51 -51
- mloda_plugins/feature_group/experimental/sklearn/encoding/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/sklearn/pipeline/base.py +52 -39
- mloda_plugins/feature_group/experimental/sklearn/pipeline/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/sklearn/scaling/base.py +44 -58
- mloda_plugins/feature_group/experimental/sklearn/scaling/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/sklearn/sklearn_artifact.py +2 -2
- mloda_plugins/feature_group/experimental/source_input_feature.py +15 -15
- mloda_plugins/feature_group/experimental/text_cleaning/base.py +38 -61
- mloda_plugins/feature_group/experimental/text_cleaning/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/text_cleaning/python_dict.py +2 -2
- mloda_plugins/feature_group/experimental/time_window/base.py +106 -93
- mloda_plugins/feature_group/experimental/time_window/pandas.py +13 -13
- mloda_plugins/feature_group/experimental/time_window/pyarrow.py +12 -12
- mloda_plugins/feature_group/input_data/api_data/api_data.py +9 -11
- mloda_plugins/feature_group/input_data/read_context_files.py +7 -7
- mloda_plugins/feature_group/input_data/read_db.py +7 -9
- mloda_plugins/feature_group/input_data/read_db_feature.py +4 -4
- mloda_plugins/feature_group/input_data/read_dbs/sqlite.py +23 -13
- mloda_plugins/feature_group/input_data/read_file.py +8 -8
- mloda_plugins/feature_group/input_data/read_file_feature.py +4 -4
- mloda_plugins/feature_group/input_data/read_files/csv.py +6 -6
- mloda_plugins/feature_group/input_data/read_files/feather.py +5 -5
- mloda_plugins/feature_group/input_data/read_files/json.py +5 -5
- mloda_plugins/feature_group/input_data/read_files/orc.py +5 -5
- mloda_plugins/feature_group/input_data/read_files/parquet.py +5 -5
- mloda_plugins/feature_group/input_data/read_files/text_file_reader.py +5 -5
- mloda_plugins/function_extender/base_implementations/otel/otel_extender.py +4 -4
- mloda-0.3.2.dist-info/RECORD +0 -230
- mloda_core/abstract_plugins/components/link.py +0 -286
- mloda_core/abstract_plugins/function_extender.py +0 -34
- mloda_core/runtime/run.py +0 -617
- {mloda_core → mloda/core}/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/domain.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/feature_chainer/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/feature_name.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/framework_transformer/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/framework_transformer/base_transformer.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/hashable_dict.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/index/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/index/index.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/input_data/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/input_data/api/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/input_data/creator/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/match_data/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/merge/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/plugin_option/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/utils.py +0 -0
- {mloda_core/abstract_plugins/plugin_loader → mloda/core/abstract_plugins/components/validators}/__init__.py +0 -0
- {mloda_core/api → mloda/core/abstract_plugins/plugin_loader}/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/plugin_loader/plugin_loader.py +0 -0
- {mloda_core/api/prepare → mloda/core/api}/__init__.py +0 -0
- {mloda_core/core → mloda/core/api/prepare}/__init__.py +0 -0
- {mloda_core/core/step → mloda/core/core}/__init__.py +0 -0
- {mloda_core/filter → mloda/core/core/step}/__init__.py +0 -0
- {mloda_core/prepare → mloda/core/filter}/__init__.py +0 -0
- {mloda_core → mloda/core}/filter/filter_parameter.py +0 -0
- {mloda_core/prepare/graph → mloda/core/prepare}/__init__.py +0 -0
- {mloda_core/runtime → mloda/core/prepare/graph}/__init__.py +0 -0
- {mloda_core/runtime/flight → mloda/core/prepare/validators}/__init__.py +0 -0
- {mloda_core/runtime/worker → mloda/core/runtime}/__init__.py +0 -0
- {mloda_core → mloda/core}/runtime/flight/flight_server.py +0 -0
- {mloda-0.3.2.dist-info → mloda-0.4.0.dist-info}/WHEEL +0 -0
- {mloda-0.3.2.dist-info → mloda-0.4.0.dist-info}/entry_points.txt +0 -0
- {mloda-0.3.2.dist-info → mloda-0.4.0.dist-info}/licenses/LICENSE.TXT +0 -0
- {mloda-0.3.2.dist-info → mloda-0.4.0.dist-info}/licenses/NOTICE.md +0 -0
mloda/__init__.py
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
from mloda.core.api.request import mlodaAPI as API
|
|
2
|
+
from mloda.core.abstract_plugins.components.feature import Feature
|
|
3
|
+
from mloda.core.abstract_plugins.components.options import Options
|
|
4
|
+
from mloda.core.abstract_plugins.feature_group import FeatureGroup as FeatureGroup
|
|
5
|
+
from mloda.core.abstract_plugins.compute_framework import ComputeFramework as ComputeFramework
|
|
6
|
+
|
|
7
|
+
# Module-level API alias and function for `import mloda; mloda.API(...)` pattern
|
|
8
|
+
run_all = API.run_all
|
|
9
|
+
|
|
10
|
+
__all__ = [
|
|
11
|
+
"API",
|
|
12
|
+
"run_all",
|
|
13
|
+
"Feature",
|
|
14
|
+
"Options",
|
|
15
|
+
"FeatureGroup",
|
|
16
|
+
"ComputeFramework",
|
|
17
|
+
]
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
from abc import ABC
|
|
2
2
|
from typing import Any, Optional, final
|
|
3
3
|
|
|
4
|
-
from
|
|
5
|
-
from
|
|
4
|
+
from mloda.core.abstract_plugins.components.feature_set import FeatureSet
|
|
5
|
+
from mloda.core.abstract_plugins.components.options import Options
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
class BaseArtifact(ABC):
|
|
@@ -38,6 +38,19 @@ class BaseValidator(ABC):
|
|
|
38
38
|
|
|
39
39
|
@abstractmethod
|
|
40
40
|
def validate(self, data: Any) -> Optional[bool]:
|
|
41
|
+
"""
|
|
42
|
+
Validate the given data against the validation rules.
|
|
43
|
+
|
|
44
|
+
Subclasses must implement this method with their specific validation logic.
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
data: The data to validate.
|
|
48
|
+
|
|
49
|
+
Returns:
|
|
50
|
+
None: No validation needed/not applicable (neutral - passes by default)
|
|
51
|
+
True: Validation explicitly passed
|
|
52
|
+
False: Validation failed
|
|
53
|
+
"""
|
|
41
54
|
pass
|
|
42
55
|
|
|
43
56
|
def handle_log_level(self, _error: str, _exception: Exception) -> None:
|
|
@@ -95,6 +95,45 @@ class DataType(Enum):
|
|
|
95
95
|
else:
|
|
96
96
|
raise ValueError(f"Unsupported DataType: {data_type}")
|
|
97
97
|
|
|
98
|
+
@classmethod
|
|
99
|
+
def from_arrow_type(cls, arrow_type: pa.DataType) -> "DataType":
|
|
100
|
+
"""
|
|
101
|
+
Converts a PyArrow DataType to the custom DataType enum.
|
|
102
|
+
|
|
103
|
+
Args:
|
|
104
|
+
arrow_type (pa.DataType): The PyArrow DataType to convert.
|
|
105
|
+
|
|
106
|
+
Returns:
|
|
107
|
+
DataType: The corresponding DataType enum member.
|
|
108
|
+
|
|
109
|
+
Raises:
|
|
110
|
+
ValueError: If the arrow_type is not supported.
|
|
111
|
+
"""
|
|
112
|
+
if pa.types.is_int32(arrow_type):
|
|
113
|
+
return cls.INT32
|
|
114
|
+
elif pa.types.is_int64(arrow_type):
|
|
115
|
+
return cls.INT64
|
|
116
|
+
elif pa.types.is_float32(arrow_type):
|
|
117
|
+
return cls.FLOAT
|
|
118
|
+
elif pa.types.is_float64(arrow_type):
|
|
119
|
+
return cls.DOUBLE
|
|
120
|
+
elif pa.types.is_boolean(arrow_type):
|
|
121
|
+
return cls.BOOLEAN
|
|
122
|
+
elif pa.types.is_string(arrow_type) or pa.types.is_large_string(arrow_type):
|
|
123
|
+
return cls.STRING
|
|
124
|
+
elif pa.types.is_binary(arrow_type) or pa.types.is_large_binary(arrow_type):
|
|
125
|
+
return cls.BINARY
|
|
126
|
+
elif pa.types.is_date32(arrow_type):
|
|
127
|
+
return cls.DATE
|
|
128
|
+
elif pa.types.is_timestamp(arrow_type):
|
|
129
|
+
if arrow_type.unit == "ms":
|
|
130
|
+
return cls.TIMESTAMP_MILLIS
|
|
131
|
+
elif arrow_type.unit == "us":
|
|
132
|
+
return cls.TIMESTAMP_MICROS
|
|
133
|
+
elif pa.types.is_decimal(arrow_type):
|
|
134
|
+
return cls.DECIMAL
|
|
135
|
+
raise ValueError(f"Unsupported PyArrow type: {arrow_type}")
|
|
136
|
+
|
|
98
137
|
@classmethod
|
|
99
138
|
def infer_arrow_type(cls, value: Any) -> pa.DataType:
|
|
100
139
|
"""
|
|
@@ -3,15 +3,16 @@ from __future__ import annotations
|
|
|
3
3
|
import copy
|
|
4
4
|
from typing import Any, Dict, Optional, Set, Type, Union
|
|
5
5
|
from uuid import UUID, uuid4
|
|
6
|
-
from
|
|
7
|
-
|
|
8
|
-
from
|
|
9
|
-
from
|
|
10
|
-
from
|
|
11
|
-
from
|
|
12
|
-
from
|
|
13
|
-
from
|
|
14
|
-
from
|
|
6
|
+
from mloda.core.abstract_plugins.components.data_types import DataType
|
|
7
|
+
|
|
8
|
+
from mloda.core.abstract_plugins.components.domain import Domain
|
|
9
|
+
from mloda.core.abstract_plugins.components.feature_name import FeatureName
|
|
10
|
+
from mloda.core.abstract_plugins.components.index.index import Index
|
|
11
|
+
from mloda.core.abstract_plugins.components.link import Link
|
|
12
|
+
from mloda.core.abstract_plugins.compute_framework import ComputeFramework
|
|
13
|
+
from mloda.core.abstract_plugins.components.options import Options
|
|
14
|
+
from mloda.core.abstract_plugins.components.utils import get_all_subclasses
|
|
15
|
+
from mloda.core.abstract_plugins.components.validators.feature_validator import FeatureValidator
|
|
15
16
|
from mloda_plugins.feature_group.experimental.default_options_key import DefaultOptionKeys
|
|
16
17
|
|
|
17
18
|
|
|
@@ -22,7 +23,7 @@ class Feature:
|
|
|
22
23
|
name (FeatureName): The name of the feature.
|
|
23
24
|
options (Options): The options associated with the feature.
|
|
24
25
|
domain (Optional[Domain]): The domain of the feature.
|
|
25
|
-
compute_frameworks (Optional[Set[Type[
|
|
26
|
+
compute_frameworks (Optional[Set[Type[ComputeFramework]]]): The compute frameworks supported by the feature.
|
|
26
27
|
data_type (Optional[DataType]): The data type of the feature.
|
|
27
28
|
initial_requested_data (bool): Whether the data was initially requested.
|
|
28
29
|
link (Optional[Link]): The link associated with the feature.
|
|
@@ -163,9 +164,23 @@ class Feature:
|
|
|
163
164
|
return hash((self.name, self.options, self.domain, compute_frameworks_hashable, self.data_type, child_options))
|
|
164
165
|
|
|
165
166
|
def is_different_data_type(self, other: Feature) -> bool:
|
|
166
|
-
return self.name == other.
|
|
167
|
+
return self.name == other.name and self.data_type != other.data_type
|
|
167
168
|
|
|
168
169
|
def has_similarity_properties(self) -> int:
|
|
170
|
+
"""Hash for grouping features by compute framework, options, and data type.
|
|
171
|
+
|
|
172
|
+
When data_type is None, it's excluded from the hash so None-typed features
|
|
173
|
+
can be grouped with any typed features (handled by grouping logic).
|
|
174
|
+
"""
|
|
175
|
+
compute_frameworks_hashable = (
|
|
176
|
+
frozenset(self.compute_frameworks) if self.compute_frameworks is not None else None
|
|
177
|
+
)
|
|
178
|
+
if self.data_type is not None:
|
|
179
|
+
return hash((self.options, compute_frameworks_hashable, self.data_type))
|
|
180
|
+
return hash((self.options, compute_frameworks_hashable))
|
|
181
|
+
|
|
182
|
+
def base_similarity_properties(self) -> int:
|
|
183
|
+
"""Base hash excluding data_type - used for lenient grouping of None-typed features."""
|
|
169
184
|
compute_frameworks_hashable = (
|
|
170
185
|
frozenset(self.compute_frameworks) if self.compute_frameworks is not None else None
|
|
171
186
|
)
|
|
@@ -180,21 +195,15 @@ class Feature:
|
|
|
180
195
|
|
|
181
196
|
def _set_compute_framework(
|
|
182
197
|
self, compute_framework: Optional[str], compute_framework_options: Optional[str]
|
|
183
|
-
) -> Optional[Type[
|
|
184
|
-
if compute_framework
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
elif compute_framework_options:
|
|
194
|
-
for subclass in subclasses_compute_frameworks:
|
|
195
|
-
if compute_framework_options == subclass.get_class_name():
|
|
196
|
-
return subclass
|
|
197
|
-
raise ValueError(f"Compute framework via options {compute_framework_options} not found.")
|
|
198
|
+
) -> Optional[Type[ComputeFramework]]:
|
|
199
|
+
if compute_framework:
|
|
200
|
+
return FeatureValidator.validate_and_resolve_compute_framework(
|
|
201
|
+
compute_framework, get_all_subclasses(ComputeFramework), "parameter"
|
|
202
|
+
)
|
|
203
|
+
elif compute_framework_options:
|
|
204
|
+
return FeatureValidator.validate_and_resolve_compute_framework(
|
|
205
|
+
compute_framework_options, get_all_subclasses(ComputeFramework), "options"
|
|
206
|
+
)
|
|
198
207
|
return None
|
|
199
208
|
|
|
200
209
|
def _set_uuid(self, uuid: UUID) -> Feature:
|
|
@@ -202,17 +211,14 @@ class Feature:
|
|
|
202
211
|
self.uuid = uuid
|
|
203
212
|
return self
|
|
204
213
|
|
|
205
|
-
def _set_compute_frameworks(self, compute_frameworks: Set[Type[
|
|
214
|
+
def _set_compute_frameworks(self, compute_frameworks: Set[Type[ComputeFramework]]) -> Feature:
|
|
206
215
|
# use only for testing
|
|
207
216
|
self.compute_frameworks = compute_frameworks
|
|
208
217
|
return self
|
|
209
218
|
|
|
210
|
-
def get_compute_framework(self) -> Type[
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
f"Feature {self.name} does not have any compute framework. This function can only be called when the frameworks were resolved."
|
|
214
|
-
)
|
|
215
|
-
|
|
219
|
+
def get_compute_framework(self) -> Type[ComputeFramework]:
|
|
220
|
+
FeatureValidator.validate_compute_frameworks_resolved(self.compute_frameworks, str(self.name))
|
|
221
|
+
assert self.compute_frameworks is not None
|
|
216
222
|
return next(iter(self.compute_frameworks))
|
|
217
223
|
|
|
218
224
|
def get_name(self) -> str:
|
{mloda_core → mloda/core}/abstract_plugins/components/feature_chainer/feature_chain_parser.py
RENAMED
|
@@ -7,9 +7,9 @@ from __future__ import annotations
|
|
|
7
7
|
import re
|
|
8
8
|
from typing import Any, Dict, List, Optional, Set, Tuple
|
|
9
9
|
|
|
10
|
-
from
|
|
11
|
-
from
|
|
12
|
-
from
|
|
10
|
+
from mloda.core.abstract_plugins.components.feature import Feature
|
|
11
|
+
from mloda.core.abstract_plugins.components.feature_name import FeatureName
|
|
12
|
+
from mloda.core.abstract_plugins.components.options import Options
|
|
13
13
|
from mloda_plugins.feature_group.experimental.default_options_key import DefaultOptionKeys
|
|
14
14
|
|
|
15
15
|
# Separator constants for feature name parsing
|
|
@@ -103,23 +103,23 @@ class FeatureChainParser:
|
|
|
103
103
|
@classmethod
|
|
104
104
|
def _has_default_value(cls, property_value: Any) -> bool:
|
|
105
105
|
"""Check if property has a default value defined."""
|
|
106
|
-
return isinstance(property_value, dict) and DefaultOptionKeys.
|
|
106
|
+
return isinstance(property_value, dict) and DefaultOptionKeys.default in property_value
|
|
107
107
|
|
|
108
108
|
@classmethod
|
|
109
109
|
def _is_context_parameter(cls, property_value: Any) -> bool:
|
|
110
110
|
"""Check if property is marked as context parameter in mapping."""
|
|
111
|
-
return isinstance(property_value, dict) and property_value.get(DefaultOptionKeys.
|
|
111
|
+
return isinstance(property_value, dict) and property_value.get(DefaultOptionKeys.context, False)
|
|
112
112
|
|
|
113
113
|
@classmethod
|
|
114
114
|
def _is_strict_validation(cls, property_value: Any) -> bool:
|
|
115
115
|
"""Check if property requires strict validation (values must be in mapping)."""
|
|
116
|
-
return isinstance(property_value, dict) and property_value.get(DefaultOptionKeys.
|
|
116
|
+
return isinstance(property_value, dict) and property_value.get(DefaultOptionKeys.strict_validation, False)
|
|
117
117
|
|
|
118
118
|
@classmethod
|
|
119
119
|
def _get_validation_function(cls, property_value: Any) -> Any:
|
|
120
120
|
"""Get validation function from property mapping if present."""
|
|
121
121
|
if isinstance(property_value, dict):
|
|
122
|
-
return property_value.get(DefaultOptionKeys.
|
|
122
|
+
return property_value.get(DefaultOptionKeys.validation_function, None)
|
|
123
123
|
return None
|
|
124
124
|
|
|
125
125
|
@classmethod
|
|
@@ -174,13 +174,13 @@ class FeatureChainParser:
|
|
|
174
174
|
)
|
|
175
175
|
|
|
176
176
|
if property_name in options.group:
|
|
177
|
-
return DefaultOptionKeys.
|
|
177
|
+
return DefaultOptionKeys.group.value
|
|
178
178
|
elif property_name in options.context:
|
|
179
|
-
return DefaultOptionKeys.
|
|
179
|
+
return DefaultOptionKeys.context.value
|
|
180
180
|
elif cls._is_context_parameter(property_value):
|
|
181
|
-
return DefaultOptionKeys.
|
|
181
|
+
return DefaultOptionKeys.context.value
|
|
182
182
|
else:
|
|
183
|
-
return DefaultOptionKeys.
|
|
183
|
+
return DefaultOptionKeys.group.value
|
|
184
184
|
|
|
185
185
|
@classmethod
|
|
186
186
|
def _extract_property_values(cls, property_value: Any) -> Any:
|
|
@@ -188,11 +188,11 @@ class FeatureChainParser:
|
|
|
188
188
|
if isinstance(property_value, dict):
|
|
189
189
|
# Remove metadata keys, keep only the actual valid values
|
|
190
190
|
metadata_keys = {
|
|
191
|
-
DefaultOptionKeys.
|
|
192
|
-
DefaultOptionKeys.
|
|
193
|
-
DefaultOptionKeys.
|
|
194
|
-
DefaultOptionKeys.
|
|
195
|
-
DefaultOptionKeys.
|
|
191
|
+
DefaultOptionKeys.default,
|
|
192
|
+
DefaultOptionKeys.context,
|
|
193
|
+
DefaultOptionKeys.group,
|
|
194
|
+
DefaultOptionKeys.strict_validation,
|
|
195
|
+
DefaultOptionKeys.validation_function,
|
|
196
196
|
}
|
|
197
197
|
return {k: v for k, v in property_value.items() if k not in metadata_keys}
|
|
198
198
|
return property_value
|
|
@@ -312,16 +312,16 @@ class FeatureChainParser:
|
|
|
312
312
|
return False
|
|
313
313
|
|
|
314
314
|
@classmethod
|
|
315
|
-
def
|
|
315
|
+
def extract_in_feature(cls, feature_name: str, suffix_pattern: str) -> str:
|
|
316
316
|
"""
|
|
317
|
-
Extract the
|
|
317
|
+
Extract the in_feature from a feature name based on the suffix pattern.
|
|
318
318
|
|
|
319
319
|
Args:
|
|
320
320
|
feature_name: The feature name to parse
|
|
321
321
|
suffix_pattern: Regex pattern for the suffix (e.g., r"^.+__([w]+)$")
|
|
322
322
|
|
|
323
323
|
Returns:
|
|
324
|
-
The
|
|
324
|
+
The in_feature part of the name
|
|
325
325
|
|
|
326
326
|
Raises:
|
|
327
327
|
ValueError: If the feature name doesn't match the expected pattern
|
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Mixin class providing default implementations for feature chain parsing.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from typing import Any, Dict, List, Optional, Set, cast
|
|
8
|
+
|
|
9
|
+
from mloda.core.abstract_plugins.components.feature import Feature
|
|
10
|
+
from mloda.core.abstract_plugins.components.feature_name import FeatureName
|
|
11
|
+
from mloda.core.abstract_plugins.components.options import Options
|
|
12
|
+
from mloda.core.abstract_plugins.components.feature_chainer.feature_chain_parser import (
|
|
13
|
+
FeatureChainParser,
|
|
14
|
+
CHAIN_SEPARATOR,
|
|
15
|
+
INPUT_SEPARATOR,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class FeatureChainParserMixin:
|
|
20
|
+
"""
|
|
21
|
+
Mixin providing default implementations for feature chain parsing.
|
|
22
|
+
|
|
23
|
+
Subclasses should define:
|
|
24
|
+
- PREFIX_PATTERN or SUFFIX_PATTERN: Regex patterns for matching
|
|
25
|
+
- PROPERTY_MAPPING: Property validation mapping
|
|
26
|
+
- IN_FEATURE_SEPARATOR: Optional custom separator (default: "&")
|
|
27
|
+
- MIN_IN_FEATURES: Optional minimum in_feature count (default: 1)
|
|
28
|
+
- MAX_IN_FEATURES: Optional maximum in_feature count (default: None)
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
IN_FEATURE_SEPARATOR: str = INPUT_SEPARATOR
|
|
32
|
+
MIN_IN_FEATURES: int = 1
|
|
33
|
+
MAX_IN_FEATURES: Optional[int] = None
|
|
34
|
+
|
|
35
|
+
@classmethod
|
|
36
|
+
def _validate_string_match(cls, _feature_name: str, _operation_config: str, _in_feature: str) -> bool:
|
|
37
|
+
"""
|
|
38
|
+
Hook for subclasses to provide custom validation for string-based matches.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
_feature_name: The full feature name
|
|
42
|
+
_operation_config: The parsed operation configuration
|
|
43
|
+
_in_feature: The parsed in_feature
|
|
44
|
+
|
|
45
|
+
Returns:
|
|
46
|
+
True if the match is valid, False otherwise
|
|
47
|
+
"""
|
|
48
|
+
return True
|
|
49
|
+
|
|
50
|
+
def input_features(self, options: Options, feature_name: FeatureName) -> Optional[Set[Feature]]:
|
|
51
|
+
"""
|
|
52
|
+
Parse input features from feature name or options.
|
|
53
|
+
|
|
54
|
+
First attempts to parse in_features from the feature name string.
|
|
55
|
+
Falls back to options.get_in_features() if string parsing fails.
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
options: Options containing configuration
|
|
59
|
+
feature_name: Feature name to parse
|
|
60
|
+
|
|
61
|
+
Returns:
|
|
62
|
+
Set of Feature objects representing input features, or None
|
|
63
|
+
|
|
64
|
+
Raises:
|
|
65
|
+
ValueError: If in_feature constraints are violated
|
|
66
|
+
"""
|
|
67
|
+
_feature_name = feature_name.name if isinstance(feature_name, FeatureName) else feature_name
|
|
68
|
+
|
|
69
|
+
prefix_patterns = self._get_prefix_patterns()
|
|
70
|
+
operation_config, in_feature = FeatureChainParser.parse_feature_name(
|
|
71
|
+
_feature_name, prefix_patterns, CHAIN_SEPARATOR
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
# String-based parsing succeeded
|
|
75
|
+
if operation_config is not None and in_feature is not None and in_feature:
|
|
76
|
+
in_features = in_feature.split(self.IN_FEATURE_SEPARATOR)
|
|
77
|
+
self._validate_in_feature_count(in_features, _feature_name)
|
|
78
|
+
return {Feature(f) for f in in_features}
|
|
79
|
+
|
|
80
|
+
# Configuration-based fallback using get_in_features()
|
|
81
|
+
in_features_set = options.get_in_features()
|
|
82
|
+
self._validate_in_feature_count(list(in_features_set), _feature_name)
|
|
83
|
+
return set(in_features_set)
|
|
84
|
+
|
|
85
|
+
def _validate_in_feature_count(self, in_features: List[Any], feature_name: str) -> None:
|
|
86
|
+
"""
|
|
87
|
+
Validate that in_feature count meets min/max constraints.
|
|
88
|
+
|
|
89
|
+
Args:
|
|
90
|
+
in_features: List of in_features (strings or Feature objects)
|
|
91
|
+
feature_name: Original feature name for error messages
|
|
92
|
+
|
|
93
|
+
Raises:
|
|
94
|
+
ValueError: If constraints are violated
|
|
95
|
+
"""
|
|
96
|
+
count = len(in_features)
|
|
97
|
+
|
|
98
|
+
if count < self.MIN_IN_FEATURES:
|
|
99
|
+
raise ValueError(
|
|
100
|
+
f"Feature '{feature_name}' requires at least {self.MIN_IN_FEATURES} in_feature(s), but found {count}"
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
if self.MAX_IN_FEATURES is not None and count > self.MAX_IN_FEATURES:
|
|
104
|
+
raise ValueError(
|
|
105
|
+
f"Feature '{feature_name}' allows at most {self.MAX_IN_FEATURES} in_feature(s), but found {count}"
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
@classmethod
|
|
109
|
+
def match_feature_group_criteria(
|
|
110
|
+
cls,
|
|
111
|
+
feature_name: str | FeatureName,
|
|
112
|
+
options: Options,
|
|
113
|
+
_data_access_collection: Any = None,
|
|
114
|
+
) -> bool:
|
|
115
|
+
"""
|
|
116
|
+
Match feature against criteria using pattern-based or config-based parsing.
|
|
117
|
+
|
|
118
|
+
Delegates to FeatureChainParser.match_configuration_feature_chain_parser() and
|
|
119
|
+
optionally calls _validate_string_match() hook for custom validation.
|
|
120
|
+
|
|
121
|
+
Args:
|
|
122
|
+
feature_name: Feature name to match
|
|
123
|
+
options: Options containing configuration
|
|
124
|
+
data_access_collection: Optional data access collection (unused)
|
|
125
|
+
|
|
126
|
+
Returns:
|
|
127
|
+
True if feature matches criteria, False otherwise
|
|
128
|
+
"""
|
|
129
|
+
_feature_name = feature_name.name if isinstance(feature_name, FeatureName) else feature_name
|
|
130
|
+
|
|
131
|
+
prefix_patterns = cls._get_prefix_patterns()
|
|
132
|
+
property_mapping = cls._get_property_mapping()
|
|
133
|
+
|
|
134
|
+
# Use the unified parser for basic matching
|
|
135
|
+
result = FeatureChainParser.match_configuration_feature_chain_parser(
|
|
136
|
+
_feature_name,
|
|
137
|
+
options,
|
|
138
|
+
property_mapping=property_mapping,
|
|
139
|
+
prefix_patterns=prefix_patterns,
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
# If basic match succeeded and it's a string-based feature, call validation hook
|
|
143
|
+
if result:
|
|
144
|
+
operation_config, source_feature = FeatureChainParser.parse_feature_name(
|
|
145
|
+
_feature_name, prefix_patterns, CHAIN_SEPARATOR
|
|
146
|
+
)
|
|
147
|
+
if operation_config is not None and source_feature is not None:
|
|
148
|
+
if not cls._validate_string_match(_feature_name, operation_config, source_feature):
|
|
149
|
+
return False
|
|
150
|
+
|
|
151
|
+
return result
|
|
152
|
+
|
|
153
|
+
@classmethod
|
|
154
|
+
def _get_prefix_patterns(cls) -> List[str]:
|
|
155
|
+
"""Get prefix/suffix patterns from class attributes."""
|
|
156
|
+
patterns = []
|
|
157
|
+
if hasattr(cls, "PREFIX_PATTERN"):
|
|
158
|
+
patterns.append(cls.PREFIX_PATTERN)
|
|
159
|
+
if hasattr(cls, "SUFFIX_PATTERN"):
|
|
160
|
+
patterns.append(cls.SUFFIX_PATTERN)
|
|
161
|
+
return patterns
|
|
162
|
+
|
|
163
|
+
@classmethod
|
|
164
|
+
def _get_property_mapping(cls) -> Optional[Dict[str, Any]]:
|
|
165
|
+
"""Get property mapping from class attribute."""
|
|
166
|
+
if hasattr(cls, "PROPERTY_MAPPING"):
|
|
167
|
+
return cast(Dict[str, Any], cls.PROPERTY_MAPPING)
|
|
168
|
+
return None
|
|
169
|
+
|
|
170
|
+
@classmethod
|
|
171
|
+
def _extract_source_features(cls, feature: Feature) -> List[str]:
|
|
172
|
+
"""
|
|
173
|
+
Extract source features from a feature.
|
|
174
|
+
|
|
175
|
+
Tries string-based parsing first, falls back to configuration-based.
|
|
176
|
+
Uses class attributes IN_FEATURE_SEPARATOR and PREFIX_PATTERN.
|
|
177
|
+
|
|
178
|
+
Args:
|
|
179
|
+
feature: The feature to extract source features from
|
|
180
|
+
|
|
181
|
+
Returns:
|
|
182
|
+
List of source feature names
|
|
183
|
+
"""
|
|
184
|
+
feature_name = feature.get_name()
|
|
185
|
+
prefix_patterns = cls._get_prefix_patterns()
|
|
186
|
+
|
|
187
|
+
operation_config, source_feature = FeatureChainParser.parse_feature_name(
|
|
188
|
+
feature_name, prefix_patterns, CHAIN_SEPARATOR
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
# String-based parsing succeeded
|
|
192
|
+
if operation_config is not None and source_feature is not None and source_feature:
|
|
193
|
+
return source_feature.split(cls.IN_FEATURE_SEPARATOR)
|
|
194
|
+
|
|
195
|
+
# Configuration-based fallback using get_in_features()
|
|
196
|
+
in_features_set = feature.options.get_in_features()
|
|
197
|
+
return [f.get_name() for f in in_features_set]
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
from typing import Generator, List, Optional, Set, Union
|
|
3
3
|
from uuid import UUID
|
|
4
|
-
from
|
|
5
|
-
from
|
|
4
|
+
from mloda.core.abstract_plugins.components.feature import Feature
|
|
5
|
+
from mloda.core.abstract_plugins.components.options import Options
|
|
6
6
|
from mloda_plugins.feature_group.experimental.default_options_key import DefaultOptionKeys
|
|
7
7
|
|
|
8
8
|
|
|
@@ -56,7 +56,7 @@ class Features:
|
|
|
56
56
|
|
|
57
57
|
Protected keys are determined dynamically by reading:
|
|
58
58
|
- in_features (always protected)
|
|
59
|
-
- Keys listed in feature_options.get(
|
|
59
|
+
- Keys listed in feature_options.get(feature_chainer_parser_key)
|
|
60
60
|
|
|
61
61
|
Args:
|
|
62
62
|
feature_options: Parent feature's options (will be updated)
|
|
@@ -67,8 +67,8 @@ class Features:
|
|
|
67
67
|
"""
|
|
68
68
|
# Get protected keys dynamically from the feature options
|
|
69
69
|
protected_keys = {DefaultOptionKeys.in_features}
|
|
70
|
-
if feature_options.get(DefaultOptionKeys.
|
|
71
|
-
protected_keys.update(feature_options.get(DefaultOptionKeys.
|
|
70
|
+
if feature_options.get(DefaultOptionKeys.feature_chainer_parser_key):
|
|
71
|
+
protected_keys.update(feature_options.get(DefaultOptionKeys.feature_chainer_parser_key))
|
|
72
72
|
|
|
73
73
|
# Check for conflicts in non-protected keys only
|
|
74
74
|
for key_child, value_child in child_options.items():
|
|
@@ -87,7 +87,7 @@ class Features:
|
|
|
87
87
|
)
|
|
88
88
|
|
|
89
89
|
# Merge child options into parent, excluding protected keys
|
|
90
|
-
# update_with_protected_keys will read
|
|
90
|
+
# update_with_protected_keys will read feature_chainer_parser_key dynamically
|
|
91
91
|
feature_options.update_with_protected_keys(child_options)
|
|
92
92
|
|
|
93
93
|
def check_duplicate_feature(self, feature: Feature) -> None:
|
|
@@ -25,11 +25,11 @@ class FeatureGroupVersion(ABC):
|
|
|
25
25
|
Returns a SHA-256 hash of the target class's source code.
|
|
26
26
|
"""
|
|
27
27
|
|
|
28
|
-
# Import
|
|
29
|
-
from
|
|
28
|
+
# Import FeatureGroup locally to avoid circular import.
|
|
29
|
+
from mloda.core.abstract_plugins.feature_group import FeatureGroup
|
|
30
30
|
|
|
31
|
-
if not issubclass(target_class,
|
|
32
|
-
raise ValueError(f"target_class must be a subclass of
|
|
31
|
+
if not issubclass(target_class, FeatureGroup):
|
|
32
|
+
raise ValueError(f"target_class must be a subclass of FeatureGroup: {target_class}")
|
|
33
33
|
|
|
34
34
|
source = inspect.getsource(target_class)
|
|
35
35
|
return hashlib.sha256(source.encode("utf-8")).hexdigest()
|
|
@@ -52,10 +52,10 @@ class FeatureGroupVersion(ABC):
|
|
|
52
52
|
- a SHA-256 hash of the target class's source code.
|
|
53
53
|
"""
|
|
54
54
|
|
|
55
|
-
# Import
|
|
56
|
-
from
|
|
55
|
+
# Import FeatureGroup locally to avoid circular import.
|
|
56
|
+
from mloda.core.abstract_plugins.feature_group import FeatureGroup
|
|
57
57
|
|
|
58
|
-
if not issubclass(target_class,
|
|
59
|
-
raise ValueError(f"target_class must be a subclass of
|
|
58
|
+
if not issubclass(target_class, FeatureGroup):
|
|
59
|
+
raise ValueError(f"target_class must be a subclass of FeatureGroup: {target_class}")
|
|
60
60
|
|
|
61
61
|
return f"{cls.mloda_version()}-{cls.module_name(target_class)}-{cls.class_source_hash(target_class)}"
|
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
from typing import Any, Optional, Set, Type
|
|
2
2
|
from uuid import UUID
|
|
3
3
|
|
|
4
|
-
from
|
|
5
|
-
from
|
|
6
|
-
from
|
|
7
|
-
from
|
|
8
|
-
from
|
|
4
|
+
from mloda.core.abstract_plugins.components.feature_name import FeatureName
|
|
5
|
+
from mloda.core.abstract_plugins.components.feature import Feature
|
|
6
|
+
from mloda.core.abstract_plugins.components.options import Options
|
|
7
|
+
from mloda.core.abstract_plugins.components.validators.feature_set_validator import FeatureSetValidator
|
|
8
|
+
from mloda.core.filter.filter_engine import BaseFilterEngine
|
|
9
|
+
from mloda.core.filter.single_filter import SingleFilter
|
|
9
10
|
|
|
10
11
|
|
|
11
12
|
class FeatureSet:
|
|
@@ -22,8 +23,8 @@ class FeatureSet:
|
|
|
22
23
|
self.filter_engine: Type[BaseFilterEngine] = BaseFilterEngine
|
|
23
24
|
|
|
24
25
|
def add_artifact_name(self) -> None:
|
|
25
|
-
|
|
26
|
-
|
|
26
|
+
FeatureSetValidator.validate_options_initialized(self.options, "add_artifact_name")
|
|
27
|
+
assert self.options is not None # Type narrowing for mypy
|
|
27
28
|
|
|
28
29
|
for feature_name in self.get_all_names():
|
|
29
30
|
if feature_name in self.options.keys():
|
|
@@ -72,35 +73,28 @@ class FeatureSet:
|
|
|
72
73
|
Prefer accessing options directly from individual features when possible.
|
|
73
74
|
Only use this when you need to ensure all features share the same option value.
|
|
74
75
|
"""
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
76
|
+
FeatureSetValidator.validate_options_initialized(self.options, "get_options_key")
|
|
77
|
+
assert self.options is not None # Type narrowing for mypy
|
|
78
|
+
FeatureSetValidator.validate_equal_options(self.features)
|
|
78
79
|
return self.options.get(key)
|
|
79
80
|
|
|
80
81
|
def validate_equal_options(self) -> None:
|
|
81
82
|
"""Checks if all features have the same options."""
|
|
82
|
-
|
|
83
|
-
for feature in self.features:
|
|
84
|
-
if _options:
|
|
85
|
-
if _options != feature.options:
|
|
86
|
-
raise ValueError("All features must have the same options.")
|
|
87
|
-
_options = feature.options
|
|
83
|
+
FeatureSetValidator.validate_equal_options(self.features)
|
|
88
84
|
|
|
89
85
|
def get_initial_requested_features(self) -> Set[FeatureName]:
|
|
90
86
|
return {feature.name for feature in self.features if feature.initial_requested_data}
|
|
91
87
|
|
|
92
88
|
def get_name_of_one_feature(self) -> FeatureName:
|
|
93
|
-
|
|
94
|
-
|
|
89
|
+
FeatureSetValidator.validate_feature_added(
|
|
90
|
+
self.name_of_one_feature.name if self.name_of_one_feature else None, "get_name_of_one_feature"
|
|
91
|
+
)
|
|
92
|
+
assert self.name_of_one_feature is not None # Type narrowing for mypy
|
|
95
93
|
return self.name_of_one_feature
|
|
96
94
|
|
|
97
95
|
def add_filters(self, single_filters: Set[SingleFilter]) -> None:
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
if not isinstance(single_filters, Set):
|
|
102
|
-
raise ValueError("Filters should be a set.")
|
|
103
|
-
|
|
96
|
+
FeatureSetValidator.validate_filters_not_set(self.filters)
|
|
97
|
+
FeatureSetValidator.validate_filters_is_set_type(single_filters)
|
|
104
98
|
self.filters = single_filters
|
|
105
99
|
|
|
106
100
|
def get_artifact(self, config: Options) -> Any:
|