mloda 0.3.3__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mloda/__init__.py +17 -0
- {mloda_core → mloda/core}/abstract_plugins/components/base_artifact.py +2 -2
- {mloda_core → mloda/core}/abstract_plugins/components/base_validator.py +13 -0
- {mloda_core → mloda/core}/abstract_plugins/components/data_access_collection.py +1 -1
- {mloda_core → mloda/core}/abstract_plugins/components/data_types.py +39 -0
- {mloda_core → mloda/core}/abstract_plugins/components/feature.py +39 -33
- {mloda_core → mloda/core}/abstract_plugins/components/feature_chainer/feature_chain_parser.py +19 -19
- mloda/core/abstract_plugins/components/feature_chainer/feature_chain_parser_mixin.py +197 -0
- {mloda_core → mloda/core}/abstract_plugins/components/feature_collection.py +6 -6
- {mloda_core → mloda/core}/abstract_plugins/components/feature_group_version.py +8 -8
- {mloda_core → mloda/core}/abstract_plugins/components/feature_set.py +18 -24
- {mloda_core → mloda/core}/abstract_plugins/components/framework_transformer/cfw_transformer.py +2 -2
- {mloda_core → mloda/core}/abstract_plugins/components/index/add_index_feature.py +4 -4
- {mloda_core → mloda/core}/abstract_plugins/components/input_data/api/api_input_data.py +3 -3
- {mloda_core → mloda/core}/abstract_plugins/components/input_data/api/api_input_data_collection.py +2 -2
- {mloda_core → mloda/core}/abstract_plugins/components/input_data/api/base_api_data.py +1 -1
- {mloda_core → mloda/core}/abstract_plugins/components/input_data/base_input_data.py +6 -6
- {mloda_core → mloda/core}/abstract_plugins/components/input_data/creator/data_creator.py +3 -3
- mloda/core/abstract_plugins/components/link.py +437 -0
- {mloda_core → mloda/core}/abstract_plugins/components/match_data/match_data.py +3 -3
- {mloda_core → mloda/core}/abstract_plugins/components/merge/base_merge_engine.py +2 -2
- {mloda_core → mloda/core}/abstract_plugins/components/options.py +12 -36
- {mloda_core → mloda/core}/abstract_plugins/components/parallelization_modes.py +1 -1
- {mloda_core → mloda/core}/abstract_plugins/components/plugin_option/plugin_collector.py +14 -14
- mloda/core/abstract_plugins/components/validators/datatype_validator.py +96 -0
- mloda/core/abstract_plugins/components/validators/feature_set_validator.py +38 -0
- mloda/core/abstract_plugins/components/validators/feature_validator.py +23 -0
- mloda/core/abstract_plugins/components/validators/link_validator.py +79 -0
- mloda/core/abstract_plugins/components/validators/options_validator.py +57 -0
- mloda_core/abstract_plugins/compute_frame_work.py → mloda/core/abstract_plugins/compute_framework.py +46 -37
- mloda_core/abstract_plugins/abstract_feature_group.py → mloda/core/abstract_plugins/feature_group.py +56 -33
- mloda/core/abstract_plugins/function_extender.py +78 -0
- mloda/core/api/plugin_docs.py +220 -0
- mloda/core/api/plugin_info.py +32 -0
- {mloda_core → mloda/core}/api/prepare/setup_compute_framework.py +11 -11
- {mloda_core → mloda/core}/api/request.py +42 -33
- {mloda_core → mloda/core}/core/cfw_manager.py +8 -8
- {mloda_core → mloda/core}/core/engine.py +47 -46
- {mloda_core → mloda/core}/core/step/abstract_step.py +7 -7
- {mloda_core → mloda/core}/core/step/feature_group_step.py +12 -12
- {mloda_core → mloda/core}/core/step/join_step.py +14 -14
- {mloda_core → mloda/core}/core/step/transform_frame_work_step.py +16 -16
- {mloda_core → mloda/core}/filter/filter_engine.py +1 -1
- {mloda_core → mloda/core}/filter/filter_type_enum.py +1 -1
- {mloda_core → mloda/core}/filter/global_filter.py +23 -23
- {mloda_core → mloda/core}/filter/single_filter.py +6 -6
- {mloda_core → mloda/core}/prepare/accessible_plugins.py +16 -18
- {mloda_core → mloda/core}/prepare/execution_plan.py +65 -39
- {mloda_core → mloda/core}/prepare/graph/build_graph.py +6 -6
- {mloda_core → mloda/core}/prepare/graph/graph.py +1 -1
- {mloda_core → mloda/core}/prepare/graph/properties.py +5 -5
- {mloda_core → mloda/core}/prepare/identify_feature_group.py +12 -14
- {mloda_core → mloda/core}/prepare/joinstep_collection.py +3 -3
- {mloda_core → mloda/core}/prepare/resolve_compute_frameworks.py +6 -6
- {mloda_core → mloda/core}/prepare/resolve_graph.py +11 -11
- {mloda_core → mloda/core}/prepare/resolve_links.py +11 -31
- mloda/core/prepare/validators/resolve_link_validator.py +32 -0
- mloda/core/runtime/compute_framework_executor.py +271 -0
- mloda/core/runtime/data_lifecycle_manager.py +160 -0
- mloda/core/runtime/flight/__init__.py +0 -0
- {mloda_core → mloda/core}/runtime/flight/runner_flight_server.py +1 -1
- mloda/core/runtime/run.py +317 -0
- mloda/core/runtime/worker/__init__.py +0 -0
- {mloda_core → mloda/core}/runtime/worker/multiprocessing_worker.py +15 -10
- {mloda_core → mloda/core}/runtime/worker/thread_worker.py +2 -2
- mloda/core/runtime/worker_manager.py +96 -0
- mloda/provider/__init__.py +101 -0
- mloda/steward/__init__.py +25 -0
- mloda/user/__init__.py +57 -0
- {mloda-0.3.3.dist-info → mloda-0.4.0.dist-info}/METADATA +18 -22
- mloda-0.4.0.dist-info/RECORD +248 -0
- {mloda-0.3.3.dist-info → mloda-0.4.0.dist-info}/top_level.txt +1 -1
- mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_filter_engine.py +2 -2
- mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_framework.py +15 -13
- mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_merge_engine.py +3 -3
- mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_pyarrow_transformer.py +1 -1
- mloda_plugins/compute_framework/base_implementations/iceberg/iceberg_filter_engine.py +2 -2
- mloda_plugins/compute_framework/base_implementations/iceberg/iceberg_framework.py +12 -10
- mloda_plugins/compute_framework/base_implementations/iceberg/iceberg_pyarrow_transformer.py +1 -1
- mloda_plugins/compute_framework/base_implementations/pandas/dataframe.py +18 -16
- mloda_plugins/compute_framework/base_implementations/pandas/pandas_filter_engine.py +36 -13
- mloda_plugins/compute_framework/base_implementations/pandas/pandas_merge_engine.py +7 -7
- mloda_plugins/compute_framework/base_implementations/pandas/pandaspyarrowtransformer.py +1 -1
- mloda_plugins/compute_framework/base_implementations/polars/dataframe.py +16 -14
- mloda_plugins/compute_framework/base_implementations/polars/lazy_dataframe.py +13 -12
- mloda_plugins/compute_framework/base_implementations/polars/polars_filter_engine.py +2 -2
- mloda_plugins/compute_framework/base_implementations/polars/polars_lazy_pyarrow_transformer.py +1 -1
- mloda_plugins/compute_framework/base_implementations/polars/polars_merge_engine.py +3 -3
- mloda_plugins/compute_framework/base_implementations/polars/polars_pyarrow_transformer.py +1 -1
- mloda_plugins/compute_framework/base_implementations/pyarrow/pyarrow_filter_engine.py +2 -2
- mloda_plugins/compute_framework/base_implementations/pyarrow/pyarrow_merge_engine.py +3 -3
- mloda_plugins/compute_framework/base_implementations/pyarrow/table.py +12 -10
- mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_filter_engine.py +2 -2
- mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_framework.py +11 -9
- mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_merge_engine.py +3 -3
- mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_pyarrow_transformer.py +1 -1
- mloda_plugins/compute_framework/base_implementations/spark/spark_filter_engine.py +2 -2
- mloda_plugins/compute_framework/base_implementations/spark/spark_framework.py +17 -15
- mloda_plugins/compute_framework/base_implementations/spark/spark_merge_engine.py +3 -3
- mloda_plugins/compute_framework/base_implementations/spark/spark_pyarrow_transformer.py +1 -1
- mloda_plugins/config/feature/loader.py +2 -2
- mloda_plugins/feature_group/experimental/aggregated_feature_group/base.py +45 -62
- mloda_plugins/feature_group/experimental/aggregated_feature_group/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/aggregated_feature_group/polars_lazy.py +2 -2
- mloda_plugins/feature_group/experimental/aggregated_feature_group/pyarrow.py +2 -2
- mloda_plugins/feature_group/experimental/clustering/base.py +69 -97
- mloda_plugins/feature_group/experimental/clustering/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/data_quality/missing_value/base.py +58 -79
- mloda_plugins/feature_group/experimental/data_quality/missing_value/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/data_quality/missing_value/pyarrow.py +2 -2
- mloda_plugins/feature_group/experimental/data_quality/missing_value/python_dict.py +2 -2
- mloda_plugins/feature_group/experimental/default_options_key.py +16 -19
- mloda_plugins/feature_group/experimental/dimensionality_reduction/base.py +80 -94
- mloda_plugins/feature_group/experimental/dimensionality_reduction/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/dynamic_feature_group_factory/dynamic_feature_group_factory.py +24 -24
- mloda_plugins/feature_group/experimental/forecasting/base.py +106 -104
- mloda_plugins/feature_group/experimental/forecasting/forecasting_artifact.py +2 -2
- mloda_plugins/feature_group/experimental/forecasting/pandas.py +15 -15
- mloda_plugins/feature_group/experimental/geo_distance/base.py +50 -42
- mloda_plugins/feature_group/experimental/geo_distance/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/llm/cli.py +4 -4
- mloda_plugins/feature_group/experimental/llm/cli_features/refactor_git_cached.py +19 -19
- mloda_plugins/feature_group/experimental/llm/installed_packages_feature_group.py +8 -8
- mloda_plugins/feature_group/experimental/llm/list_directory_feature_group.py +5 -5
- mloda_plugins/feature_group/experimental/llm/llm_api/claude.py +3 -3
- mloda_plugins/feature_group/experimental/llm/llm_api/gemini.py +3 -3
- mloda_plugins/feature_group/experimental/llm/llm_api/llm_base_request.py +5 -5
- mloda_plugins/feature_group/experimental/llm/llm_api/openai.py +3 -3
- mloda_plugins/feature_group/experimental/llm/llm_api/request_loop.py +6 -6
- mloda_plugins/feature_group/experimental/llm/llm_file_selector.py +10 -10
- mloda_plugins/feature_group/experimental/llm/tools/tool_collection.py +1 -1
- mloda_plugins/feature_group/experimental/node_centrality/base.py +46 -72
- mloda_plugins/feature_group/experimental/node_centrality/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/sklearn/encoding/base.py +51 -51
- mloda_plugins/feature_group/experimental/sklearn/encoding/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/sklearn/pipeline/base.py +52 -39
- mloda_plugins/feature_group/experimental/sklearn/pipeline/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/sklearn/scaling/base.py +44 -58
- mloda_plugins/feature_group/experimental/sklearn/scaling/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/sklearn/sklearn_artifact.py +2 -2
- mloda_plugins/feature_group/experimental/source_input_feature.py +15 -15
- mloda_plugins/feature_group/experimental/text_cleaning/base.py +38 -61
- mloda_plugins/feature_group/experimental/text_cleaning/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/text_cleaning/python_dict.py +2 -2
- mloda_plugins/feature_group/experimental/time_window/base.py +106 -93
- mloda_plugins/feature_group/experimental/time_window/pandas.py +13 -13
- mloda_plugins/feature_group/experimental/time_window/pyarrow.py +12 -12
- mloda_plugins/feature_group/input_data/api_data/api_data.py +9 -11
- mloda_plugins/feature_group/input_data/read_context_files.py +7 -7
- mloda_plugins/feature_group/input_data/read_db.py +7 -9
- mloda_plugins/feature_group/input_data/read_db_feature.py +4 -4
- mloda_plugins/feature_group/input_data/read_dbs/sqlite.py +23 -13
- mloda_plugins/feature_group/input_data/read_file.py +8 -8
- mloda_plugins/feature_group/input_data/read_file_feature.py +4 -4
- mloda_plugins/feature_group/input_data/read_files/csv.py +6 -6
- mloda_plugins/feature_group/input_data/read_files/feather.py +5 -5
- mloda_plugins/feature_group/input_data/read_files/json.py +5 -5
- mloda_plugins/feature_group/input_data/read_files/orc.py +5 -5
- mloda_plugins/feature_group/input_data/read_files/parquet.py +5 -5
- mloda_plugins/feature_group/input_data/read_files/text_file_reader.py +5 -5
- mloda_plugins/function_extender/base_implementations/otel/otel_extender.py +4 -4
- mloda-0.3.3.dist-info/RECORD +0 -230
- mloda_core/abstract_plugins/components/link.py +0 -286
- mloda_core/abstract_plugins/function_extender.py +0 -34
- mloda_core/runtime/run.py +0 -617
- {mloda_core → mloda/core}/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/domain.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/feature_chainer/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/feature_name.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/framework_transformer/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/framework_transformer/base_transformer.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/hashable_dict.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/index/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/index/index.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/input_data/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/input_data/api/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/input_data/creator/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/match_data/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/merge/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/plugin_option/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/utils.py +0 -0
- {mloda_core/abstract_plugins/plugin_loader → mloda/core/abstract_plugins/components/validators}/__init__.py +0 -0
- {mloda_core/api → mloda/core/abstract_plugins/plugin_loader}/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/plugin_loader/plugin_loader.py +0 -0
- {mloda_core/api/prepare → mloda/core/api}/__init__.py +0 -0
- {mloda_core/core → mloda/core/api/prepare}/__init__.py +0 -0
- {mloda_core/core/step → mloda/core/core}/__init__.py +0 -0
- {mloda_core/filter → mloda/core/core/step}/__init__.py +0 -0
- {mloda_core/prepare → mloda/core/filter}/__init__.py +0 -0
- {mloda_core → mloda/core}/filter/filter_parameter.py +0 -0
- {mloda_core/prepare/graph → mloda/core/prepare}/__init__.py +0 -0
- {mloda_core/runtime → mloda/core/prepare/graph}/__init__.py +0 -0
- {mloda_core/runtime/flight → mloda/core/prepare/validators}/__init__.py +0 -0
- {mloda_core/runtime/worker → mloda/core/runtime}/__init__.py +0 -0
- {mloda_core → mloda/core}/runtime/flight/flight_server.py +0 -0
- {mloda-0.3.3.dist-info → mloda-0.4.0.dist-info}/WHEEL +0 -0
- {mloda-0.3.3.dist-info → mloda-0.4.0.dist-info}/entry_points.txt +0 -0
- {mloda-0.3.3.dist-info → mloda-0.4.0.dist-info}/licenses/LICENSE.TXT +0 -0
- {mloda-0.3.3.dist-info → mloda-0.4.0.dist-info}/licenses/NOTICE.md +0 -0
mloda_core/abstract_plugins/abstract_feature_group.py → mloda/core/abstract_plugins/feature_group.py
RENAMED
|
@@ -3,26 +3,26 @@ from __future__ import annotations
|
|
|
3
3
|
from typing import Any, Callable, Dict, List, Optional, Set, Type, Union, final
|
|
4
4
|
from abc import ABC
|
|
5
5
|
|
|
6
|
-
from
|
|
7
|
-
from
|
|
8
|
-
from
|
|
9
|
-
|
|
10
|
-
from
|
|
11
|
-
from
|
|
12
|
-
from
|
|
13
|
-
from
|
|
14
|
-
from
|
|
15
|
-
from
|
|
16
|
-
from
|
|
17
|
-
from
|
|
18
|
-
from
|
|
19
|
-
from
|
|
20
|
-
from
|
|
21
|
-
from
|
|
22
|
-
from
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
class
|
|
6
|
+
from mloda.core.abstract_plugins.components.base_artifact import BaseArtifact
|
|
7
|
+
from mloda.core.abstract_plugins.components.data_access_collection import DataAccessCollection
|
|
8
|
+
from mloda.core.abstract_plugins.components.data_types import DataType
|
|
9
|
+
|
|
10
|
+
from mloda.core.abstract_plugins.components.domain import Domain
|
|
11
|
+
from mloda.core.abstract_plugins.components.feature_group_version import FeatureGroupVersion
|
|
12
|
+
from mloda.core.abstract_plugins.components.feature_name import FeatureName
|
|
13
|
+
from mloda.core.abstract_plugins.components.input_data.api.api_input_data import ApiInputData
|
|
14
|
+
from mloda.core.abstract_plugins.components.input_data.base_input_data import BaseInputData
|
|
15
|
+
from mloda.core.abstract_plugins.components.input_data.creator.data_creator import DataCreator
|
|
16
|
+
from mloda.core.abstract_plugins.components.match_data.match_data import MatchData
|
|
17
|
+
from mloda.core.abstract_plugins.compute_framework import ComputeFramework
|
|
18
|
+
from mloda.core.abstract_plugins.components.feature import Feature
|
|
19
|
+
from mloda.core.abstract_plugins.components.feature_set import FeatureSet
|
|
20
|
+
from mloda.core.abstract_plugins.components.options import Options
|
|
21
|
+
from mloda.core.abstract_plugins.components.index.index import Index
|
|
22
|
+
from mloda.core.abstract_plugins.components.utils import get_all_subclasses
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class FeatureGroup(ABC):
|
|
26
26
|
"""
|
|
27
27
|
Mostly implement:
|
|
28
28
|
input_features, except it is a primary source
|
|
@@ -47,7 +47,7 @@ class AbstractFeatureGroup(ABC):
|
|
|
47
47
|
the base class's docstring. Otherwise, it falls back to the class name.
|
|
48
48
|
This behavior allows subclasses to easily customize their description.
|
|
49
49
|
"""
|
|
50
|
-
base_doc = (
|
|
50
|
+
base_doc = (FeatureGroup.__doc__ or "").strip()
|
|
51
51
|
current_doc = (cls.__doc__ or "").strip()
|
|
52
52
|
|
|
53
53
|
if current_doc and current_doc != base_doc:
|
|
@@ -82,14 +82,30 @@ class AbstractFeatureGroup(ABC):
|
|
|
82
82
|
@classmethod
|
|
83
83
|
def validate_input_features(cls, data: Any, features: FeatureSet) -> Optional[bool]:
|
|
84
84
|
"""
|
|
85
|
-
|
|
85
|
+
Validate the input data before feature calculation.
|
|
86
|
+
|
|
87
|
+
Override this method to implement custom input validation logic.
|
|
88
|
+
The base implementation returns None (no validation).
|
|
89
|
+
|
|
90
|
+
Returns:
|
|
91
|
+
None: No validation needed/not implemented (neutral - validation passes by default)
|
|
92
|
+
True: Validation explicitly passed
|
|
93
|
+
False: Validation failed
|
|
86
94
|
"""
|
|
87
95
|
return None
|
|
88
96
|
|
|
89
97
|
@classmethod
|
|
90
98
|
def validate_output_features(cls, data: Any, features: FeatureSet) -> Optional[bool]:
|
|
91
99
|
"""
|
|
92
|
-
|
|
100
|
+
Validate the output data after feature calculation.
|
|
101
|
+
|
|
102
|
+
Override this method to implement custom output validation logic.
|
|
103
|
+
The base implementation returns None (no validation).
|
|
104
|
+
|
|
105
|
+
Returns:
|
|
106
|
+
None: No validation needed/not implemented (neutral - validation passes by default)
|
|
107
|
+
True: Validation explicitly passed
|
|
108
|
+
False: Validation failed
|
|
93
109
|
"""
|
|
94
110
|
return None
|
|
95
111
|
|
|
@@ -238,11 +254,18 @@ class AbstractFeatureGroup(ABC):
|
|
|
238
254
|
@classmethod
|
|
239
255
|
def supports_index(cls, index: Index) -> Optional[bool]:
|
|
240
256
|
"""
|
|
241
|
-
|
|
257
|
+
Check if this feature group supports the given index.
|
|
258
|
+
|
|
259
|
+
This method checks the index against the feature group's supported index columns.
|
|
260
|
+
If no index columns are defined, any index is accepted.
|
|
261
|
+
|
|
262
|
+
Args:
|
|
263
|
+
index: The index to check for support.
|
|
242
264
|
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
265
|
+
Returns:
|
|
266
|
+
None: No index constraint defined (accepts any index)
|
|
267
|
+
True: Index is supported
|
|
268
|
+
False: Index is not supported
|
|
246
269
|
"""
|
|
247
270
|
supported_index_columns = cls.index_columns()
|
|
248
271
|
|
|
@@ -355,7 +378,7 @@ class AbstractFeatureGroup(ABC):
|
|
|
355
378
|
return Domain.get_default_domain()
|
|
356
379
|
|
|
357
380
|
@classmethod
|
|
358
|
-
def compute_framework_rule(cls) -> Union[bool, Set[Type[
|
|
381
|
+
def compute_framework_rule(cls) -> Union[bool, Set[Type[ComputeFramework]]]:
|
|
359
382
|
"""
|
|
360
383
|
Defines the rule for determining the compute framework to use for this feature group.
|
|
361
384
|
|
|
@@ -366,7 +389,7 @@ class AbstractFeatureGroup(ABC):
|
|
|
366
389
|
|
|
367
390
|
@final
|
|
368
391
|
@classmethod
|
|
369
|
-
def compute_framework_definition(cls) -> Set[Type[
|
|
392
|
+
def compute_framework_definition(cls) -> Set[Type[ComputeFramework]]:
|
|
370
393
|
"""
|
|
371
394
|
Determines the set of compute frameworks supported by this feature group based on the
|
|
372
395
|
`compute_framework_rule`.
|
|
@@ -376,7 +399,7 @@ class AbstractFeatureGroup(ABC):
|
|
|
376
399
|
|
|
377
400
|
"""If FG creator does not care, we allow every framework."""
|
|
378
401
|
if rule is True:
|
|
379
|
-
return get_all_subclasses(
|
|
402
|
+
return get_all_subclasses(ComputeFramework)
|
|
380
403
|
if isinstance(rule, bool):
|
|
381
404
|
raise Exception("Compute framework rule for is not a set of compute frameworks.")
|
|
382
405
|
return rule
|
|
@@ -393,9 +416,9 @@ class AbstractFeatureGroup(ABC):
|
|
|
393
416
|
"""
|
|
394
417
|
Checks if this feature group is equal to another object.
|
|
395
418
|
"""
|
|
396
|
-
if isinstance(another,
|
|
419
|
+
if isinstance(another, FeatureGroup):
|
|
397
420
|
return self.get_class_name() == another.get_class_name()
|
|
398
|
-
raise Exception(f"Cannot compare
|
|
421
|
+
raise Exception(f"Cannot compare FeatureGroup with another type. {another} ")
|
|
399
422
|
|
|
400
423
|
def __hash__(self) -> int:
|
|
401
424
|
"""
|
|
@@ -463,7 +486,7 @@ class AbstractFeatureGroup(ABC):
|
|
|
463
486
|
|
|
464
487
|
To be used, create a class like this:
|
|
465
488
|
|
|
466
|
-
class MyMatchData(
|
|
489
|
+
class MyMatchData(FeatureGroup, MatchData):
|
|
467
490
|
...
|
|
468
491
|
|
|
469
492
|
and then create the function match_data_access.
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
from enum import Enum
|
|
3
|
+
from typing import Any, List, Optional, Set
|
|
4
|
+
import logging
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class ExtenderHook(Enum):
|
|
8
|
+
FEATURE_GROUP_CALCULATE_FEATURE = "feature_group_calculate_feature"
|
|
9
|
+
VALIDATE_INPUT_FEATURE = "validate_input_feature"
|
|
10
|
+
VALIDATE_OUTPUT_FEATURE = "validate_output_feature"
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class Extender(ABC):
|
|
14
|
+
"""
|
|
15
|
+
- Automated Metadata harvestor connector
|
|
16
|
+
- Messaging Integration ( email )
|
|
17
|
+
- Automation Tools
|
|
18
|
+
- data lineage mapping
|
|
19
|
+
- Impact Analysis
|
|
20
|
+
- Audit Trail
|
|
21
|
+
- Monitoring alerts
|
|
22
|
+
- metadata capture
|
|
23
|
+
- Event logging
|
|
24
|
+
- metrics on feature calculation
|
|
25
|
+
- visibility / observibility
|
|
26
|
+
- Performance
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
@property
|
|
30
|
+
def priority(self) -> int:
|
|
31
|
+
"""Lower priority runs first. Default is 100."""
|
|
32
|
+
if hasattr(self, "_priority"):
|
|
33
|
+
return self._priority
|
|
34
|
+
return 100
|
|
35
|
+
|
|
36
|
+
@priority.setter
|
|
37
|
+
def priority(self, value: int) -> None:
|
|
38
|
+
self._priority = value
|
|
39
|
+
|
|
40
|
+
@abstractmethod
|
|
41
|
+
def wraps(self) -> Set[ExtenderHook]:
|
|
42
|
+
pass
|
|
43
|
+
|
|
44
|
+
@abstractmethod
|
|
45
|
+
def __call__(self, func: Any, *args: Any, **kwargs: Any) -> Any:
|
|
46
|
+
pass
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class _CompositeExtender(Extender):
|
|
50
|
+
"""Internal class that chains multiple Extenders in priority order."""
|
|
51
|
+
|
|
52
|
+
def __init__(self, extenders: List[Extender], function_type: Optional[ExtenderHook] = None):
|
|
53
|
+
self.extenders = sorted(extenders, key=lambda e: e.priority)
|
|
54
|
+
self.function_type = function_type
|
|
55
|
+
|
|
56
|
+
def wraps(self) -> Set[ExtenderHook]:
|
|
57
|
+
if self.function_type:
|
|
58
|
+
return {self.function_type}
|
|
59
|
+
result = set()
|
|
60
|
+
for extender in self.extenders:
|
|
61
|
+
result.update(extender.wraps())
|
|
62
|
+
return result
|
|
63
|
+
|
|
64
|
+
def __call__(self, func: Any, *args: Any, **kwargs: Any) -> Any:
|
|
65
|
+
def make_wrapper(ext: Extender, inner_func: Any) -> Any:
|
|
66
|
+
def wrapper(*a: Any, **kw: Any) -> Any:
|
|
67
|
+
try:
|
|
68
|
+
return ext.__call__(inner_func, *a, **kw)
|
|
69
|
+
except Exception as e:
|
|
70
|
+
logging.error(f"{ext.__class__.__name__} {ext.name if hasattr(ext, 'name') else ''} {str(e)}")
|
|
71
|
+
return inner_func(*a, **kw)
|
|
72
|
+
|
|
73
|
+
return wrapper
|
|
74
|
+
|
|
75
|
+
wrapped_func = func
|
|
76
|
+
for extender in reversed(self.extenders):
|
|
77
|
+
wrapped_func = make_wrapper(extender, wrapped_func)
|
|
78
|
+
return wrapped_func(*args, **kwargs)
|
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Plugin documentation discovery functions.
|
|
3
|
+
|
|
4
|
+
These functions return documentation and metadata for currently loaded plugins.
|
|
5
|
+
They report the current state - ensure plugins are loaded before calling.
|
|
6
|
+
|
|
7
|
+
Example:
|
|
8
|
+
from mloda.core.abstract_plugins.plugin_loader.plugin_loader import PluginLoader
|
|
9
|
+
from mloda.core.api.plugin_docs import get_feature_group_docs
|
|
10
|
+
|
|
11
|
+
# Load plugins first
|
|
12
|
+
PluginLoader.all()
|
|
13
|
+
|
|
14
|
+
# Then get documentation
|
|
15
|
+
docs = get_feature_group_docs()
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from typing import List, Optional, Type, Union
|
|
19
|
+
|
|
20
|
+
from mloda.core.abstract_plugins.feature_group import FeatureGroup
|
|
21
|
+
from mloda.core.abstract_plugins.components.plugin_option.plugin_collector import PluginCollector
|
|
22
|
+
from mloda.core.abstract_plugins.components.utils import get_all_subclasses
|
|
23
|
+
from mloda.core.abstract_plugins.compute_framework import ComputeFramework
|
|
24
|
+
from mloda.core.abstract_plugins.function_extender import Extender
|
|
25
|
+
from mloda.core.api.plugin_info import ComputeFrameworkInfo, ExtenderInfo, FeatureGroupInfo
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def get_feature_group_docs(
|
|
29
|
+
name: Optional[str] = None,
|
|
30
|
+
search: Optional[str] = None,
|
|
31
|
+
compute_framework: Optional[Union[str, Type[ComputeFramework]]] = None,
|
|
32
|
+
version_contains: Optional[str] = None,
|
|
33
|
+
plugin_collector: Optional[PluginCollector] = None,
|
|
34
|
+
) -> List[FeatureGroupInfo]:
|
|
35
|
+
"""
|
|
36
|
+
Get documentation for feature groups with optional filtering.
|
|
37
|
+
|
|
38
|
+
Returns the current state of loaded feature groups. Ensure plugins are loaded
|
|
39
|
+
before calling this function (e.g., via PluginLoader.all()).
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
name: Filter by feature group name (case-insensitive partial match).
|
|
43
|
+
search: Search in feature group description (case-insensitive partial match).
|
|
44
|
+
compute_framework: Filter by compute framework name or class.
|
|
45
|
+
version_contains: Filter by version substring.
|
|
46
|
+
plugin_collector: Filter using plugin collector's applicability check.
|
|
47
|
+
|
|
48
|
+
Returns:
|
|
49
|
+
List of FeatureGroupInfo objects sorted by name.
|
|
50
|
+
"""
|
|
51
|
+
all_feature_groups = get_all_subclasses(FeatureGroup)
|
|
52
|
+
results = []
|
|
53
|
+
|
|
54
|
+
for fg_class in all_feature_groups:
|
|
55
|
+
fg_name = fg_class.get_class_name()
|
|
56
|
+
description = fg_class.description()
|
|
57
|
+
version = fg_class.version()
|
|
58
|
+
module = fg_class.__module__
|
|
59
|
+
compute_frameworks = [cfw.__name__ for cfw in fg_class.compute_framework_definition()]
|
|
60
|
+
supported_feature_names = fg_class.feature_names_supported()
|
|
61
|
+
prefix = fg_class.prefix()
|
|
62
|
+
|
|
63
|
+
if name is not None and name.lower() not in fg_name.lower():
|
|
64
|
+
continue
|
|
65
|
+
|
|
66
|
+
if search is not None and search.lower() not in description.lower():
|
|
67
|
+
continue
|
|
68
|
+
|
|
69
|
+
if compute_framework is not None:
|
|
70
|
+
cfw_name = compute_framework if isinstance(compute_framework, str) else compute_framework.__name__
|
|
71
|
+
if cfw_name not in compute_frameworks:
|
|
72
|
+
continue
|
|
73
|
+
|
|
74
|
+
if version_contains is not None and version_contains not in version:
|
|
75
|
+
continue
|
|
76
|
+
|
|
77
|
+
if plugin_collector is not None and not plugin_collector.applicable_feature_group_class(fg_class):
|
|
78
|
+
continue
|
|
79
|
+
|
|
80
|
+
results.append(
|
|
81
|
+
FeatureGroupInfo(
|
|
82
|
+
name=fg_name,
|
|
83
|
+
description=description,
|
|
84
|
+
version=version,
|
|
85
|
+
module=module,
|
|
86
|
+
compute_frameworks=compute_frameworks,
|
|
87
|
+
supported_feature_names=supported_feature_names,
|
|
88
|
+
prefix=prefix,
|
|
89
|
+
)
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
return sorted(results, key=lambda x: x.name)
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def get_compute_framework_docs(
|
|
96
|
+
name: Optional[str] = None,
|
|
97
|
+
search: Optional[str] = None,
|
|
98
|
+
available_only: bool = True,
|
|
99
|
+
) -> List[ComputeFrameworkInfo]:
|
|
100
|
+
"""
|
|
101
|
+
Get documentation for compute frameworks with optional filtering.
|
|
102
|
+
|
|
103
|
+
Returns the current state of loaded compute frameworks. Ensure plugins are loaded
|
|
104
|
+
before calling this function (e.g., via PluginLoader.all()).
|
|
105
|
+
|
|
106
|
+
Args:
|
|
107
|
+
name: Filter by compute framework name (case-insensitive partial match).
|
|
108
|
+
search: Search in compute framework description (case-insensitive partial match).
|
|
109
|
+
available_only: If True, only return available frameworks (default True).
|
|
110
|
+
|
|
111
|
+
Returns:
|
|
112
|
+
List of ComputeFrameworkInfo objects sorted by name.
|
|
113
|
+
"""
|
|
114
|
+
all_compute_frameworks = get_all_subclasses(ComputeFramework)
|
|
115
|
+
results = []
|
|
116
|
+
|
|
117
|
+
for cfw_class in all_compute_frameworks:
|
|
118
|
+
cfw_name = cfw_class.__name__
|
|
119
|
+
description = (cfw_class.__doc__ or "").strip() or cfw_class.__name__
|
|
120
|
+
module = cfw_class.__module__
|
|
121
|
+
|
|
122
|
+
is_available = cfw_class.is_available()
|
|
123
|
+
|
|
124
|
+
try:
|
|
125
|
+
expected_data_framework = str(cfw_class.expected_data_framework())
|
|
126
|
+
except Exception: # nosec
|
|
127
|
+
expected_data_framework = "unavailable"
|
|
128
|
+
|
|
129
|
+
try:
|
|
130
|
+
has_merge_engine = cfw_class.merge_engine() is not None
|
|
131
|
+
except Exception: # nosec
|
|
132
|
+
has_merge_engine = False
|
|
133
|
+
|
|
134
|
+
try:
|
|
135
|
+
has_filter_engine = cfw_class.filter_engine() is not None
|
|
136
|
+
except Exception: # nosec
|
|
137
|
+
has_filter_engine = False
|
|
138
|
+
|
|
139
|
+
if available_only and not is_available:
|
|
140
|
+
continue
|
|
141
|
+
|
|
142
|
+
if name is not None and name.lower() not in cfw_name.lower():
|
|
143
|
+
continue
|
|
144
|
+
|
|
145
|
+
if search is not None and search.lower() not in description.lower():
|
|
146
|
+
continue
|
|
147
|
+
|
|
148
|
+
results.append(
|
|
149
|
+
ComputeFrameworkInfo(
|
|
150
|
+
name=cfw_name,
|
|
151
|
+
description=description,
|
|
152
|
+
module=module,
|
|
153
|
+
is_available=is_available,
|
|
154
|
+
expected_data_framework=expected_data_framework,
|
|
155
|
+
has_merge_engine=has_merge_engine,
|
|
156
|
+
has_filter_engine=has_filter_engine,
|
|
157
|
+
)
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
return sorted(results, key=lambda x: x.name)
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def get_extender_docs(
|
|
164
|
+
name: Optional[str] = None,
|
|
165
|
+
search: Optional[str] = None,
|
|
166
|
+
wraps: Optional[str] = None,
|
|
167
|
+
) -> List[ExtenderInfo]:
|
|
168
|
+
"""
|
|
169
|
+
Get documentation for extenders with optional filtering.
|
|
170
|
+
|
|
171
|
+
Returns the current state of loaded extenders. Ensure plugins are loaded
|
|
172
|
+
before calling this function (e.g., via PluginLoader.all()).
|
|
173
|
+
|
|
174
|
+
Args:
|
|
175
|
+
name: Filter by extender name (case-insensitive partial match).
|
|
176
|
+
search: Search in extender description (case-insensitive partial match).
|
|
177
|
+
wraps: Filter by wrapped function type (case-insensitive exact match).
|
|
178
|
+
|
|
179
|
+
Returns:
|
|
180
|
+
List of ExtenderInfo objects sorted by name.
|
|
181
|
+
"""
|
|
182
|
+
all_extenders = get_all_subclasses(Extender)
|
|
183
|
+
results = []
|
|
184
|
+
|
|
185
|
+
for ext_class in all_extenders:
|
|
186
|
+
ext_name = ext_class.__name__
|
|
187
|
+
description = (ext_class.__doc__ or "").strip() or ext_class.__name__
|
|
188
|
+
module = ext_class.__module__
|
|
189
|
+
|
|
190
|
+
if ext_name in ("Extender", "_CompositeExtender"):
|
|
191
|
+
continue
|
|
192
|
+
|
|
193
|
+
wraps_list: List[str] = []
|
|
194
|
+
try:
|
|
195
|
+
instance = ext_class()
|
|
196
|
+
wraps_list = [w.value for w in instance.wraps()]
|
|
197
|
+
except Exception: # nosec
|
|
198
|
+
pass
|
|
199
|
+
|
|
200
|
+
if name is not None and name.lower() not in ext_name.lower():
|
|
201
|
+
continue
|
|
202
|
+
|
|
203
|
+
if search is not None and search.lower() not in description.lower():
|
|
204
|
+
continue
|
|
205
|
+
|
|
206
|
+
if wraps is not None:
|
|
207
|
+
wraps_lower = wraps.lower()
|
|
208
|
+
if not any(wraps_lower == w.lower() for w in wraps_list):
|
|
209
|
+
continue
|
|
210
|
+
|
|
211
|
+
results.append(
|
|
212
|
+
ExtenderInfo(
|
|
213
|
+
name=ext_name,
|
|
214
|
+
description=description,
|
|
215
|
+
module=module,
|
|
216
|
+
wraps=wraps_list,
|
|
217
|
+
)
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
return sorted(results, key=lambda x: x.name)
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
from typing import List, Set
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
@dataclass
|
|
6
|
+
class FeatureGroupInfo:
|
|
7
|
+
name: str
|
|
8
|
+
description: str
|
|
9
|
+
version: str
|
|
10
|
+
module: str
|
|
11
|
+
compute_frameworks: List[str]
|
|
12
|
+
supported_feature_names: Set[str]
|
|
13
|
+
prefix: str
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclass
|
|
17
|
+
class ComputeFrameworkInfo:
|
|
18
|
+
name: str
|
|
19
|
+
description: str
|
|
20
|
+
module: str
|
|
21
|
+
is_available: bool
|
|
22
|
+
expected_data_framework: str
|
|
23
|
+
has_merge_engine: bool
|
|
24
|
+
has_filter_engine: bool
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@dataclass
|
|
28
|
+
class ExtenderInfo:
|
|
29
|
+
name: str
|
|
30
|
+
description: str
|
|
31
|
+
module: str
|
|
32
|
+
wraps: List[str]
|
|
@@ -1,22 +1,22 @@
|
|
|
1
1
|
from typing import Optional, Set, Type, Union, cast
|
|
2
|
-
from
|
|
3
|
-
from
|
|
4
|
-
from
|
|
2
|
+
from mloda.core.abstract_plugins.compute_framework import ComputeFramework
|
|
3
|
+
from mloda.core.abstract_plugins.components.feature_collection import Features
|
|
4
|
+
from mloda.core.abstract_plugins.components.utils import get_all_subclasses
|
|
5
5
|
|
|
6
6
|
|
|
7
7
|
class SetupComputeFramework:
|
|
8
8
|
"""A class to create the compute framework and do basic validation."""
|
|
9
9
|
|
|
10
10
|
def __init__(
|
|
11
|
-
self, user_compute_frameworks: Union[Set[Type[
|
|
11
|
+
self, user_compute_frameworks: Union[Set[Type[ComputeFramework]], Optional[list[str]]], features: Features
|
|
12
12
|
) -> None:
|
|
13
|
-
available_compute_frameworks = get_all_subclasses(
|
|
13
|
+
available_compute_frameworks = get_all_subclasses(ComputeFramework)
|
|
14
14
|
|
|
15
15
|
if user_compute_frameworks:
|
|
16
16
|
if isinstance(user_compute_frameworks, list):
|
|
17
|
-
user_set_compute_frameworks: set[str | Type[
|
|
17
|
+
user_set_compute_frameworks: set[str | Type[ComputeFramework]] = set(user_compute_frameworks)
|
|
18
18
|
else:
|
|
19
|
-
user_set_compute_frameworks = cast(set[str | Type[
|
|
19
|
+
user_set_compute_frameworks = cast(set[str | Type[ComputeFramework]], user_compute_frameworks)
|
|
20
20
|
|
|
21
21
|
available_compute_frameworks = self.filter_user_set_in_available_sub_classes(
|
|
22
22
|
user_set_compute_frameworks, available_compute_frameworks
|
|
@@ -29,7 +29,7 @@ class SetupComputeFramework:
|
|
|
29
29
|
self.compute_frameworks = available_compute_frameworks
|
|
30
30
|
|
|
31
31
|
def validate_if_at_least_one_feature_compute_framework_is_in_available_compute_framework(
|
|
32
|
-
self, features: Features, available_compute_frameworks: set[type[
|
|
32
|
+
self, features: Features, available_compute_frameworks: set[type[ComputeFramework]]
|
|
33
33
|
) -> None:
|
|
34
34
|
for feature in features.collection:
|
|
35
35
|
if feature.compute_frameworks and not any(
|
|
@@ -41,9 +41,9 @@ class SetupComputeFramework:
|
|
|
41
41
|
|
|
42
42
|
def filter_user_set_in_available_sub_classes(
|
|
43
43
|
self,
|
|
44
|
-
api_request_compute_frameworks: set[str | Type[
|
|
45
|
-
sub_classes: set[type[
|
|
46
|
-
) -> set[type[
|
|
44
|
+
api_request_compute_frameworks: set[str | Type[ComputeFramework]],
|
|
45
|
+
sub_classes: set[type[ComputeFramework]],
|
|
46
|
+
) -> set[type[ComputeFramework]]:
|
|
47
47
|
compute_frameworks = set()
|
|
48
48
|
compute_frameworks = {
|
|
49
49
|
sub
|