mloda 0.3.3__py3-none-any.whl → 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mloda/__init__.py +17 -0
- {mloda_core → mloda/core}/abstract_plugins/components/base_artifact.py +2 -2
- {mloda_core → mloda/core}/abstract_plugins/components/base_validator.py +13 -0
- {mloda_core → mloda/core}/abstract_plugins/components/data_access_collection.py +1 -1
- {mloda_core → mloda/core}/abstract_plugins/components/data_types.py +39 -0
- {mloda_core → mloda/core}/abstract_plugins/components/feature.py +39 -33
- {mloda_core → mloda/core}/abstract_plugins/components/feature_chainer/feature_chain_parser.py +19 -19
- mloda/core/abstract_plugins/components/feature_chainer/feature_chain_parser_mixin.py +197 -0
- {mloda_core → mloda/core}/abstract_plugins/components/feature_collection.py +6 -6
- {mloda_core → mloda/core}/abstract_plugins/components/feature_group_version.py +8 -8
- {mloda_core → mloda/core}/abstract_plugins/components/feature_set.py +18 -24
- {mloda_core → mloda/core}/abstract_plugins/components/framework_transformer/cfw_transformer.py +2 -2
- {mloda_core → mloda/core}/abstract_plugins/components/index/add_index_feature.py +4 -4
- {mloda_core → mloda/core}/abstract_plugins/components/input_data/api/api_input_data.py +3 -3
- {mloda_core → mloda/core}/abstract_plugins/components/input_data/api/api_input_data_collection.py +2 -2
- {mloda_core → mloda/core}/abstract_plugins/components/input_data/api/base_api_data.py +1 -1
- {mloda_core → mloda/core}/abstract_plugins/components/input_data/base_input_data.py +6 -6
- {mloda_core → mloda/core}/abstract_plugins/components/input_data/creator/data_creator.py +3 -3
- mloda/core/abstract_plugins/components/link.py +437 -0
- {mloda_core → mloda/core}/abstract_plugins/components/match_data/match_data.py +3 -3
- {mloda_core → mloda/core}/abstract_plugins/components/merge/base_merge_engine.py +2 -2
- {mloda_core → mloda/core}/abstract_plugins/components/options.py +12 -36
- {mloda_core → mloda/core}/abstract_plugins/components/parallelization_modes.py +1 -1
- {mloda_core → mloda/core}/abstract_plugins/components/plugin_option/plugin_collector.py +14 -14
- mloda/core/abstract_plugins/components/validators/datatype_validator.py +96 -0
- mloda/core/abstract_plugins/components/validators/feature_set_validator.py +38 -0
- mloda/core/abstract_plugins/components/validators/feature_validator.py +23 -0
- mloda/core/abstract_plugins/components/validators/link_validator.py +79 -0
- mloda/core/abstract_plugins/components/validators/options_validator.py +57 -0
- mloda_core/abstract_plugins/compute_frame_work.py → mloda/core/abstract_plugins/compute_framework.py +45 -37
- mloda_core/abstract_plugins/abstract_feature_group.py → mloda/core/abstract_plugins/feature_group.py +56 -33
- mloda/core/abstract_plugins/function_extender.py +78 -0
- mloda/core/api/plugin_docs.py +220 -0
- mloda/core/api/plugin_info.py +32 -0
- {mloda_core → mloda/core}/api/prepare/setup_compute_framework.py +11 -11
- {mloda_core → mloda/core}/api/request.py +42 -33
- {mloda_core → mloda/core}/core/cfw_manager.py +8 -8
- {mloda_core → mloda/core}/core/engine.py +47 -47
- {mloda_core → mloda/core}/core/step/abstract_step.py +7 -7
- {mloda_core → mloda/core}/core/step/feature_group_step.py +12 -12
- {mloda_core → mloda/core}/core/step/join_step.py +14 -14
- {mloda_core → mloda/core}/core/step/transform_frame_work_step.py +16 -16
- {mloda_core → mloda/core}/filter/filter_engine.py +1 -1
- {mloda_core → mloda/core}/filter/filter_type_enum.py +1 -1
- {mloda_core → mloda/core}/filter/global_filter.py +23 -23
- {mloda_core → mloda/core}/filter/single_filter.py +6 -6
- {mloda_core → mloda/core}/prepare/accessible_plugins.py +15 -18
- {mloda_core → mloda/core}/prepare/execution_plan.py +65 -39
- {mloda_core → mloda/core}/prepare/graph/build_graph.py +6 -6
- {mloda_core → mloda/core}/prepare/graph/graph.py +1 -1
- {mloda_core → mloda/core}/prepare/graph/properties.py +5 -5
- {mloda_core → mloda/core}/prepare/identify_feature_group.py +12 -14
- {mloda_core → mloda/core}/prepare/joinstep_collection.py +3 -3
- {mloda_core → mloda/core}/prepare/resolve_compute_frameworks.py +6 -6
- {mloda_core → mloda/core}/prepare/resolve_graph.py +11 -11
- {mloda_core → mloda/core}/prepare/resolve_links.py +11 -31
- mloda/core/prepare/validators/resolve_link_validator.py +32 -0
- mloda/core/runtime/compute_framework_executor.py +271 -0
- mloda/core/runtime/data_lifecycle_manager.py +160 -0
- mloda/core/runtime/flight/__init__.py +0 -0
- {mloda_core → mloda/core}/runtime/flight/runner_flight_server.py +1 -1
- mloda/core/runtime/run.py +317 -0
- mloda/core/runtime/worker/__init__.py +0 -0
- {mloda_core → mloda/core}/runtime/worker/multiprocessing_worker.py +15 -10
- {mloda_core → mloda/core}/runtime/worker/thread_worker.py +2 -2
- mloda/core/runtime/worker_manager.py +96 -0
- mloda/provider/__init__.py +101 -0
- mloda/steward/__init__.py +25 -0
- mloda/user/__init__.py +57 -0
- {mloda-0.3.3.dist-info → mloda-0.4.1.dist-info}/METADATA +24 -31
- mloda-0.4.1.dist-info/RECORD +248 -0
- {mloda-0.3.3.dist-info → mloda-0.4.1.dist-info}/top_level.txt +1 -1
- mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_filter_engine.py +2 -2
- mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_framework.py +15 -13
- mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_merge_engine.py +3 -3
- mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_pyarrow_transformer.py +1 -1
- mloda_plugins/compute_framework/base_implementations/iceberg/iceberg_filter_engine.py +2 -2
- mloda_plugins/compute_framework/base_implementations/iceberg/iceberg_framework.py +12 -10
- mloda_plugins/compute_framework/base_implementations/iceberg/iceberg_pyarrow_transformer.py +1 -1
- mloda_plugins/compute_framework/base_implementations/pandas/dataframe.py +18 -16
- mloda_plugins/compute_framework/base_implementations/pandas/pandas_filter_engine.py +36 -13
- mloda_plugins/compute_framework/base_implementations/pandas/pandas_merge_engine.py +7 -7
- mloda_plugins/compute_framework/base_implementations/pandas/pandaspyarrowtransformer.py +1 -1
- mloda_plugins/compute_framework/base_implementations/polars/dataframe.py +16 -14
- mloda_plugins/compute_framework/base_implementations/polars/lazy_dataframe.py +13 -12
- mloda_plugins/compute_framework/base_implementations/polars/polars_filter_engine.py +2 -2
- mloda_plugins/compute_framework/base_implementations/polars/polars_lazy_pyarrow_transformer.py +1 -1
- mloda_plugins/compute_framework/base_implementations/polars/polars_merge_engine.py +3 -3
- mloda_plugins/compute_framework/base_implementations/polars/polars_pyarrow_transformer.py +1 -1
- mloda_plugins/compute_framework/base_implementations/pyarrow/pyarrow_filter_engine.py +2 -2
- mloda_plugins/compute_framework/base_implementations/pyarrow/pyarrow_merge_engine.py +3 -3
- mloda_plugins/compute_framework/base_implementations/pyarrow/table.py +12 -11
- mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_filter_engine.py +2 -2
- mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_framework.py +11 -9
- mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_merge_engine.py +3 -3
- mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_pyarrow_transformer.py +1 -1
- mloda_plugins/compute_framework/base_implementations/spark/spark_filter_engine.py +2 -2
- mloda_plugins/compute_framework/base_implementations/spark/spark_framework.py +17 -15
- mloda_plugins/compute_framework/base_implementations/spark/spark_merge_engine.py +2 -3
- mloda_plugins/compute_framework/base_implementations/spark/spark_pyarrow_transformer.py +1 -1
- mloda_plugins/config/feature/loader.py +2 -2
- mloda_plugins/feature_group/experimental/aggregated_feature_group/base.py +45 -64
- mloda_plugins/feature_group/experimental/aggregated_feature_group/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/aggregated_feature_group/polars_lazy.py +2 -2
- mloda_plugins/feature_group/experimental/aggregated_feature_group/pyarrow.py +2 -2
- mloda_plugins/feature_group/experimental/clustering/base.py +67 -97
- mloda_plugins/feature_group/experimental/clustering/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/data_quality/missing_value/base.py +58 -82
- mloda_plugins/feature_group/experimental/data_quality/missing_value/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/data_quality/missing_value/pyarrow.py +2 -2
- mloda_plugins/feature_group/experimental/data_quality/missing_value/python_dict.py +2 -2
- mloda_plugins/feature_group/experimental/default_options_key.py +16 -19
- mloda_plugins/feature_group/experimental/dimensionality_reduction/base.py +81 -96
- mloda_plugins/feature_group/experimental/dimensionality_reduction/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/dynamic_feature_group_factory/dynamic_feature_group_factory.py +24 -24
- mloda_plugins/feature_group/experimental/forecasting/base.py +108 -106
- mloda_plugins/feature_group/experimental/forecasting/forecasting_artifact.py +2 -2
- mloda_plugins/feature_group/experimental/forecasting/pandas.py +15 -15
- mloda_plugins/feature_group/experimental/geo_distance/base.py +52 -44
- mloda_plugins/feature_group/experimental/geo_distance/pandas.py +2 -3
- mloda_plugins/feature_group/experimental/llm/cli.py +4 -4
- mloda_plugins/feature_group/experimental/llm/cli_features/refactor_git_cached.py +19 -19
- mloda_plugins/feature_group/experimental/llm/installed_packages_feature_group.py +8 -8
- mloda_plugins/feature_group/experimental/llm/list_directory_feature_group.py +5 -5
- mloda_plugins/feature_group/experimental/llm/llm_api/claude.py +3 -3
- mloda_plugins/feature_group/experimental/llm/llm_api/gemini.py +3 -3
- mloda_plugins/feature_group/experimental/llm/llm_api/llm_base_request.py +5 -5
- mloda_plugins/feature_group/experimental/llm/llm_api/openai.py +3 -3
- mloda_plugins/feature_group/experimental/llm/llm_api/request_loop.py +6 -6
- mloda_plugins/feature_group/experimental/llm/llm_file_selector.py +10 -10
- mloda_plugins/feature_group/experimental/llm/tools/tool_collection.py +1 -1
- mloda_plugins/feature_group/experimental/node_centrality/base.py +46 -74
- mloda_plugins/feature_group/experimental/node_centrality/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/sklearn/encoding/base.py +53 -53
- mloda_plugins/feature_group/experimental/sklearn/encoding/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/sklearn/pipeline/base.py +52 -39
- mloda_plugins/feature_group/experimental/sklearn/pipeline/pandas.py +3 -4
- mloda_plugins/feature_group/experimental/sklearn/scaling/base.py +44 -60
- mloda_plugins/feature_group/experimental/sklearn/scaling/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/sklearn/sklearn_artifact.py +2 -3
- mloda_plugins/feature_group/experimental/source_input_feature.py +15 -15
- mloda_plugins/feature_group/experimental/text_cleaning/base.py +38 -63
- mloda_plugins/feature_group/experimental/text_cleaning/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/text_cleaning/python_dict.py +2 -2
- mloda_plugins/feature_group/experimental/time_window/base.py +108 -95
- mloda_plugins/feature_group/experimental/time_window/pandas.py +13 -13
- mloda_plugins/feature_group/experimental/time_window/pyarrow.py +12 -12
- mloda_plugins/feature_group/input_data/api_data/api_data.py +9 -11
- mloda_plugins/feature_group/input_data/read_context_files.py +7 -7
- mloda_plugins/feature_group/input_data/read_db.py +7 -9
- mloda_plugins/feature_group/input_data/read_db_feature.py +4 -4
- mloda_plugins/feature_group/input_data/read_dbs/sqlite.py +23 -13
- mloda_plugins/feature_group/input_data/read_file.py +8 -8
- mloda_plugins/feature_group/input_data/read_file_feature.py +4 -4
- mloda_plugins/feature_group/input_data/read_files/csv.py +6 -6
- mloda_plugins/feature_group/input_data/read_files/feather.py +5 -5
- mloda_plugins/feature_group/input_data/read_files/json.py +5 -5
- mloda_plugins/feature_group/input_data/read_files/orc.py +5 -5
- mloda_plugins/feature_group/input_data/read_files/parquet.py +5 -5
- mloda_plugins/feature_group/input_data/read_files/text_file_reader.py +5 -5
- mloda_plugins/function_extender/base_implementations/otel/otel_extender.py +4 -4
- mloda-0.3.3.dist-info/RECORD +0 -230
- mloda_core/abstract_plugins/components/link.py +0 -286
- mloda_core/abstract_plugins/function_extender.py +0 -34
- mloda_core/runtime/run.py +0 -617
- {mloda_core → mloda/core}/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/domain.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/feature_chainer/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/feature_name.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/framework_transformer/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/framework_transformer/base_transformer.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/hashable_dict.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/index/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/index/index.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/input_data/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/input_data/api/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/input_data/creator/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/match_data/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/merge/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/plugin_option/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/utils.py +0 -0
- {mloda_core/abstract_plugins/plugin_loader → mloda/core/abstract_plugins/components/validators}/__init__.py +0 -0
- {mloda_core/api → mloda/core/abstract_plugins/plugin_loader}/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/plugin_loader/plugin_loader.py +0 -0
- {mloda_core/api/prepare → mloda/core/api}/__init__.py +0 -0
- {mloda_core/core → mloda/core/api/prepare}/__init__.py +0 -0
- {mloda_core/core/step → mloda/core/core}/__init__.py +0 -0
- {mloda_core/filter → mloda/core/core/step}/__init__.py +0 -0
- {mloda_core/prepare → mloda/core/filter}/__init__.py +0 -0
- {mloda_core → mloda/core}/filter/filter_parameter.py +0 -0
- {mloda_core/prepare/graph → mloda/core/prepare}/__init__.py +0 -0
- {mloda_core/runtime → mloda/core/prepare/graph}/__init__.py +0 -0
- {mloda_core/runtime/flight → mloda/core/prepare/validators}/__init__.py +0 -0
- {mloda_core/runtime/worker → mloda/core/runtime}/__init__.py +0 -0
- {mloda_core → mloda/core}/runtime/flight/flight_server.py +0 -0
- {mloda-0.3.3.dist-info → mloda-0.4.1.dist-info}/WHEEL +0 -0
- {mloda-0.3.3.dist-info → mloda-0.4.1.dist-info}/entry_points.txt +0 -0
- {mloda-0.3.3.dist-info → mloda-0.4.1.dist-info}/licenses/LICENSE.TXT +0 -0
- {mloda-0.3.3.dist-info → mloda-0.4.1.dist-info}/licenses/NOTICE.md +0 -0
|
@@ -1,34 +1,36 @@
|
|
|
1
1
|
from copy import deepcopy
|
|
2
2
|
from typing import Any, Dict, List, Optional, Set, Type, Union
|
|
3
3
|
|
|
4
|
-
from
|
|
4
|
+
from mloda.core.abstract_plugins.components.input_data.api.api_input_data_collection import (
|
|
5
5
|
ApiInputDataCollection,
|
|
6
6
|
)
|
|
7
|
-
from
|
|
8
|
-
from
|
|
9
|
-
from
|
|
10
|
-
from
|
|
11
|
-
from
|
|
12
|
-
from
|
|
13
|
-
from
|
|
14
|
-
from
|
|
15
|
-
from
|
|
16
|
-
from
|
|
17
|
-
from
|
|
18
|
-
from
|
|
7
|
+
from mloda.core.abstract_plugins.components.plugin_option.plugin_collector import PluginCollector
|
|
8
|
+
from mloda.core.core.engine import Engine
|
|
9
|
+
from mloda.core.api.prepare.setup_compute_framework import SetupComputeFramework
|
|
10
|
+
from mloda.core.filter.global_filter import GlobalFilter
|
|
11
|
+
from mloda.core.runtime.run import ExecutionOrchestrator
|
|
12
|
+
from mloda.core.abstract_plugins.compute_framework import ComputeFramework
|
|
13
|
+
from mloda.core.abstract_plugins.function_extender import Extender
|
|
14
|
+
from mloda.core.abstract_plugins.components.data_access_collection import DataAccessCollection
|
|
15
|
+
from mloda.core.abstract_plugins.components.parallelization_modes import ParallelizationMode
|
|
16
|
+
from mloda.core.abstract_plugins.components.feature_collection import Features
|
|
17
|
+
from mloda.core.abstract_plugins.components.feature import Feature
|
|
18
|
+
from mloda.core.abstract_plugins.components.link import Link
|
|
19
|
+
from mloda_plugins.feature_group.experimental.default_options_key import DefaultOptionKeys
|
|
19
20
|
|
|
20
21
|
|
|
21
22
|
class mlodaAPI:
|
|
22
23
|
def __init__(
|
|
23
24
|
self,
|
|
24
25
|
requested_features: Union[Features, list[Union[Feature, str]]],
|
|
25
|
-
compute_frameworks: Union[Set[Type[
|
|
26
|
+
compute_frameworks: Union[Set[Type[ComputeFramework]], Optional[list[str]]] = None,
|
|
26
27
|
links: Optional[Set[Link]] = None,
|
|
27
28
|
data_access_collection: Optional[DataAccessCollection] = None,
|
|
28
29
|
global_filter: Optional[GlobalFilter] = None,
|
|
29
30
|
api_data: Optional[Dict[str, Dict[str, Any]]] = None,
|
|
30
|
-
plugin_collector: Optional[
|
|
31
|
+
plugin_collector: Optional[PluginCollector] = None,
|
|
31
32
|
copy_features: Optional[bool] = True,
|
|
33
|
+
strict_type_enforcement: bool = False,
|
|
32
34
|
) -> None:
|
|
33
35
|
# The features object is potentially changed during the run, so we need to deepcopy it by default, so that follow up runs with the same object are not affected.
|
|
34
36
|
# Set copy_features=False to disable deep copying for use cases where features contain non-copyable objects.
|
|
@@ -41,6 +43,7 @@ class mlodaAPI:
|
|
|
41
43
|
for key_name, key_data in api_data.items():
|
|
42
44
|
api_input_data_collection.setup_key_class(key_name, list(key_data.keys()))
|
|
43
45
|
|
|
46
|
+
self.strict_type_enforcement = strict_type_enforcement
|
|
44
47
|
self.features = self._process_features(_requested_features, api_input_data_collection)
|
|
45
48
|
self.compute_framework = SetupComputeFramework(compute_frameworks, self.features).compute_frameworks
|
|
46
49
|
self.links = links
|
|
@@ -50,7 +53,7 @@ class mlodaAPI:
|
|
|
50
53
|
self.api_data = api_data
|
|
51
54
|
self.plugin_collector = plugin_collector
|
|
52
55
|
|
|
53
|
-
self.runner: None |
|
|
56
|
+
self.runner: None | ExecutionOrchestrator = None
|
|
54
57
|
self.engine: None | Engine = None
|
|
55
58
|
|
|
56
59
|
self.engine = self._create_engine()
|
|
@@ -66,22 +69,26 @@ class mlodaAPI:
|
|
|
66
69
|
for feature in features:
|
|
67
70
|
feature.initial_requested_data = True
|
|
68
71
|
self._add_api_input_data(feature, api_input_data_collection)
|
|
72
|
+
# Propagate strict_type_enforcement to typed features only
|
|
73
|
+
if self.strict_type_enforcement and feature.data_type is not None:
|
|
74
|
+
feature.options.add(DefaultOptionKeys.strict_type_enforcement, True)
|
|
69
75
|
|
|
70
76
|
return features
|
|
71
77
|
|
|
72
78
|
@staticmethod
|
|
73
79
|
def run_all(
|
|
74
80
|
features: Union[Features, list[Union[Feature, str]]],
|
|
75
|
-
compute_frameworks: Union[Set[Type[
|
|
81
|
+
compute_frameworks: Union[Set[Type[ComputeFramework]], Optional[list[str]]] = None,
|
|
76
82
|
links: Optional[Set[Link]] = None,
|
|
77
83
|
data_access_collection: Optional[DataAccessCollection] = None,
|
|
78
|
-
parallelization_modes: Set[
|
|
84
|
+
parallelization_modes: Set[ParallelizationMode] = {ParallelizationMode.SYNC},
|
|
79
85
|
flight_server: Optional[Any] = None,
|
|
80
|
-
function_extender: Optional[Set[
|
|
86
|
+
function_extender: Optional[Set[Extender]] = None,
|
|
81
87
|
global_filter: Optional[GlobalFilter] = None,
|
|
82
88
|
api_data: Optional[Dict[str, Dict[str, Any]]] = None,
|
|
83
|
-
plugin_collector: Optional[
|
|
89
|
+
plugin_collector: Optional[PluginCollector] = None,
|
|
84
90
|
copy_features: Optional[bool] = True,
|
|
91
|
+
strict_type_enforcement: bool = False,
|
|
85
92
|
) -> List[Any]:
|
|
86
93
|
"""
|
|
87
94
|
Run feature computation in one step.
|
|
@@ -99,6 +106,7 @@ class mlodaAPI:
|
|
|
99
106
|
Auto-creates ApiInputDataCollection internally.
|
|
100
107
|
plugin_collector: Plugin collector.
|
|
101
108
|
copy_features: Whether to deep copy features (default True).
|
|
109
|
+
strict_type_enforcement: If True, enforce strict type matching for typed features.
|
|
102
110
|
|
|
103
111
|
Returns:
|
|
104
112
|
List of computed results.
|
|
@@ -118,14 +126,15 @@ class mlodaAPI:
|
|
|
118
126
|
api_data=api_data,
|
|
119
127
|
plugin_collector=plugin_collector,
|
|
120
128
|
copy_features=copy_features,
|
|
129
|
+
strict_type_enforcement=strict_type_enforcement,
|
|
121
130
|
)
|
|
122
131
|
return api._execute_batch_run(parallelization_modes, flight_server, function_extender)
|
|
123
132
|
|
|
124
133
|
def _execute_batch_run(
|
|
125
134
|
self,
|
|
126
|
-
parallelization_modes: Set[
|
|
135
|
+
parallelization_modes: Set[ParallelizationMode] = {ParallelizationMode.SYNC},
|
|
127
136
|
flight_server: Optional[Any] = None,
|
|
128
|
-
function_extender: Optional[Set[
|
|
137
|
+
function_extender: Optional[Set[Extender]] = None,
|
|
129
138
|
) -> List[Any]:
|
|
130
139
|
"""Encapsulates the batch run execution flow."""
|
|
131
140
|
self._batch_run(parallelization_modes, flight_server, function_extender)
|
|
@@ -133,9 +142,9 @@ class mlodaAPI:
|
|
|
133
142
|
|
|
134
143
|
def _batch_run(
|
|
135
144
|
self,
|
|
136
|
-
parallelization_modes: Set[
|
|
145
|
+
parallelization_modes: Set[ParallelizationMode] = {ParallelizationMode.SYNC},
|
|
137
146
|
flight_server: Optional[Any] = None,
|
|
138
|
-
function_extender: Optional[Set[
|
|
147
|
+
function_extender: Optional[Set[Extender]] = None,
|
|
139
148
|
api_data: Optional[Dict[str, Any]] = None,
|
|
140
149
|
) -> None:
|
|
141
150
|
"""Sets up the engine runner and runs the engine computation."""
|
|
@@ -146,12 +155,12 @@ class mlodaAPI:
|
|
|
146
155
|
|
|
147
156
|
def _run_engine_computation(
|
|
148
157
|
self,
|
|
149
|
-
parallelization_modes: Set[
|
|
150
|
-
function_extender: Optional[Set[
|
|
158
|
+
parallelization_modes: Set[ParallelizationMode] = {ParallelizationMode.SYNC},
|
|
159
|
+
function_extender: Optional[Set[Extender]] = None,
|
|
151
160
|
api_data: Optional[Dict[str, Any]] = None,
|
|
152
161
|
) -> None:
|
|
153
162
|
"""Runs the engine computation within a context manager."""
|
|
154
|
-
if not isinstance(self.runner,
|
|
163
|
+
if not isinstance(self.runner, ExecutionOrchestrator):
|
|
155
164
|
raise ValueError("You need to run setup_engine_runner beforehand.")
|
|
156
165
|
|
|
157
166
|
try:
|
|
@@ -162,8 +171,8 @@ class mlodaAPI:
|
|
|
162
171
|
|
|
163
172
|
def _enter_runner_context(
|
|
164
173
|
self,
|
|
165
|
-
parallelization_modes: Set[
|
|
166
|
-
function_extender: Optional[Set[
|
|
174
|
+
parallelization_modes: Set[ParallelizationMode],
|
|
175
|
+
function_extender: Optional[Set[Extender]],
|
|
167
176
|
api_data: Optional[Dict[str, Any]],
|
|
168
177
|
) -> None:
|
|
169
178
|
"""Enters the runner context."""
|
|
@@ -206,7 +215,7 @@ class mlodaAPI:
|
|
|
206
215
|
|
|
207
216
|
def _setup_engine_runner(
|
|
208
217
|
self,
|
|
209
|
-
parallelization_modes: Set[
|
|
218
|
+
parallelization_modes: Set[ParallelizationMode] = {ParallelizationMode.SYNC},
|
|
210
219
|
flight_server: Optional[Any] = None,
|
|
211
220
|
) -> None:
|
|
212
221
|
"""Sets up the engine runner based on parallelization mode."""
|
|
@@ -215,12 +224,12 @@ class mlodaAPI:
|
|
|
215
224
|
|
|
216
225
|
self.runner = (
|
|
217
226
|
self.engine.compute(flight_server)
|
|
218
|
-
if
|
|
227
|
+
if ParallelizationMode.MULTIPROCESSING in parallelization_modes
|
|
219
228
|
else self.engine.compute()
|
|
220
229
|
)
|
|
221
230
|
|
|
222
|
-
if not isinstance(self.runner,
|
|
223
|
-
raise ValueError("
|
|
231
|
+
if not isinstance(self.runner, ExecutionOrchestrator):
|
|
232
|
+
raise ValueError("ExecutionOrchestrator initialization failed.")
|
|
224
233
|
|
|
225
234
|
def get_result(self) -> List[Any]:
|
|
226
235
|
if self.runner is None:
|
|
@@ -2,9 +2,9 @@ from multiprocessing.managers import BaseManager
|
|
|
2
2
|
from typing import Any, Dict, Optional, Set, Tuple, Type
|
|
3
3
|
from uuid import UUID
|
|
4
4
|
|
|
5
|
-
from
|
|
6
|
-
from
|
|
7
|
-
from
|
|
5
|
+
from mloda.core.abstract_plugins.compute_framework import ComputeFramework
|
|
6
|
+
from mloda.core.abstract_plugins.function_extender import Extender
|
|
7
|
+
from mloda.core.abstract_plugins.components.parallelization_modes import ParallelizationMode
|
|
8
8
|
|
|
9
9
|
import logging
|
|
10
10
|
|
|
@@ -27,8 +27,8 @@ class CfwManager:
|
|
|
27
27
|
|
|
28
28
|
def __init__(
|
|
29
29
|
self,
|
|
30
|
-
parallelization_modes: Set[
|
|
31
|
-
function_extender: Optional[Set[
|
|
30
|
+
parallelization_modes: Set[ParallelizationMode],
|
|
31
|
+
function_extender: Optional[Set[Extender]] = None,
|
|
32
32
|
) -> None:
|
|
33
33
|
"""
|
|
34
34
|
Initializes the CfwManager.
|
|
@@ -147,7 +147,7 @@ class CfwManager:
|
|
|
147
147
|
raise ValueError(f"UUID {uuid} already exists in compute_frameworks")
|
|
148
148
|
self.compute_frameworks[uuid] = (cls_name, children_if_root)
|
|
149
149
|
|
|
150
|
-
def get_initialized_compute_framework_uuid(self, cf_class: Type[
|
|
150
|
+
def get_initialized_compute_framework_uuid(self, cf_class: Type[ComputeFramework], feature_uuid: UUID) -> UUID:
|
|
151
151
|
"""
|
|
152
152
|
Retrieves the UUID of an initialized Compute Framework.
|
|
153
153
|
|
|
@@ -173,7 +173,7 @@ class CfwManager:
|
|
|
173
173
|
"""Retrieves the location for multiprocessing."""
|
|
174
174
|
return self.location
|
|
175
175
|
|
|
176
|
-
def get_parallelization_modes(self) -> Set[
|
|
176
|
+
def get_parallelization_modes(self) -> Set[ParallelizationMode]:
|
|
177
177
|
"""Retrieves the set of parallelization modes."""
|
|
178
178
|
return self.parallelization_modes
|
|
179
179
|
|
|
@@ -199,7 +199,7 @@ class CfwManager:
|
|
|
199
199
|
"""Retrieves the dictionary of compute frameworks."""
|
|
200
200
|
return self.compute_frameworks
|
|
201
201
|
|
|
202
|
-
def get_function_extender(self) -> Optional[Set[
|
|
202
|
+
def get_function_extender(self) -> Optional[Set[Extender]]:
|
|
203
203
|
"""Retrieves the optional set of function extenders."""
|
|
204
204
|
return self.function_extender
|
|
205
205
|
|
|
@@ -4,45 +4,45 @@ from typing import Dict, Optional, Set, Type
|
|
|
4
4
|
from uuid import UUID
|
|
5
5
|
import uuid
|
|
6
6
|
|
|
7
|
-
from
|
|
8
|
-
from
|
|
9
|
-
from
|
|
7
|
+
from mloda.core.abstract_plugins.components.index.add_index_feature import create_index_feature
|
|
8
|
+
from mloda.core.abstract_plugins.components.index.index import Index
|
|
9
|
+
from mloda.core.abstract_plugins.components.input_data.api.api_input_data_collection import (
|
|
10
10
|
ApiInputDataCollection,
|
|
11
11
|
)
|
|
12
|
-
from
|
|
13
|
-
from
|
|
14
|
-
from
|
|
15
|
-
from
|
|
16
|
-
from
|
|
17
|
-
from
|
|
18
|
-
from
|
|
19
|
-
from
|
|
20
|
-
from
|
|
21
|
-
from
|
|
22
|
-
from
|
|
23
|
-
from
|
|
24
|
-
from
|
|
25
|
-
from
|
|
26
|
-
from
|
|
27
|
-
from
|
|
28
|
-
from
|
|
29
|
-
from
|
|
30
|
-
from
|
|
12
|
+
from mloda.core.abstract_plugins.components.plugin_option.plugin_collector import PluginCollector
|
|
13
|
+
from mloda.core.filter.global_filter import GlobalFilter
|
|
14
|
+
from mloda.core.prepare.accessible_plugins import PreFilterPlugins
|
|
15
|
+
from mloda.core.abstract_plugins.components.feature_name import FeatureName
|
|
16
|
+
from mloda.core.abstract_plugins.components.data_access_collection import DataAccessCollection
|
|
17
|
+
from mloda.core.abstract_plugins.components.data_types import DataType
|
|
18
|
+
from mloda.core.abstract_plugins.compute_framework import ComputeFramework
|
|
19
|
+
from mloda.core.prepare.execution_plan import ExecutionPlan
|
|
20
|
+
from mloda.core.prepare.graph.build_graph import BuildGraph
|
|
21
|
+
from mloda.core.prepare.resolve_graph import ResolveGraph
|
|
22
|
+
from mloda.core.runtime.run import ExecutionOrchestrator
|
|
23
|
+
from mloda.core.prepare.identify_feature_group import IdentifyFeatureGroupClass
|
|
24
|
+
from mloda.core.runtime.flight.runner_flight_server import ParallelRunnerFlightServer
|
|
25
|
+
from mloda.core.abstract_plugins.feature_group import FeatureGroup
|
|
26
|
+
from mloda.core.abstract_plugins.components.feature import Feature
|
|
27
|
+
from mloda.core.abstract_plugins.components.feature_collection import Features
|
|
28
|
+
from mloda.core.abstract_plugins.components.options import Options
|
|
29
|
+
from mloda.core.abstract_plugins.components.link import Link
|
|
30
|
+
from mloda.core.abstract_plugins.components.validators.link_validator import LinkValidator
|
|
31
31
|
|
|
32
32
|
|
|
33
33
|
class Engine:
|
|
34
34
|
def __init__(
|
|
35
35
|
self,
|
|
36
36
|
features: Features,
|
|
37
|
-
compute_frameworks: Set[Type[
|
|
37
|
+
compute_frameworks: Set[Type[ComputeFramework]],
|
|
38
38
|
links: Optional[Set[Link]],
|
|
39
39
|
data_access_collection: Optional[DataAccessCollection] = None,
|
|
40
40
|
global_filter: Optional[GlobalFilter] = None,
|
|
41
41
|
api_input_data_collection: Optional[ApiInputDataCollection] = None,
|
|
42
|
-
plugin_collector: Optional[
|
|
42
|
+
plugin_collector: Optional[PluginCollector] = None,
|
|
43
43
|
) -> None:
|
|
44
44
|
# setup variables which track the primary sources and the compute platforms
|
|
45
|
-
self.feature_group_collection: Dict[Type[
|
|
45
|
+
self.feature_group_collection: Dict[Type[FeatureGroup], Set[Feature]] = defaultdict(set)
|
|
46
46
|
|
|
47
47
|
# use global filters
|
|
48
48
|
self.global_filter = global_filter
|
|
@@ -53,7 +53,7 @@ class Engine:
|
|
|
53
53
|
# get accessible feature groups and their compute platforms
|
|
54
54
|
self.accessible_plugins = PreFilterPlugins(compute_frameworks, plugin_collector).get_accessible_plugins()
|
|
55
55
|
# get links
|
|
56
|
-
|
|
56
|
+
LinkValidator.validate_links(links)
|
|
57
57
|
self.links = links
|
|
58
58
|
|
|
59
59
|
# set api input collection if relevant
|
|
@@ -66,11 +66,11 @@ class Engine:
|
|
|
66
66
|
self.data_access_collection = data_access_collection
|
|
67
67
|
self.execution_planner = self.create_setup_execution_plan(features)
|
|
68
68
|
|
|
69
|
-
def compute(self, flight_server: Optional[ParallelRunnerFlightServer] = None) ->
|
|
70
|
-
|
|
71
|
-
if isinstance(
|
|
72
|
-
return
|
|
73
|
-
raise ValueError("
|
|
69
|
+
def compute(self, flight_server: Optional[ParallelRunnerFlightServer] = None) -> ExecutionOrchestrator:
|
|
70
|
+
orchestrator = ExecutionOrchestrator(self.execution_planner, flight_server)
|
|
71
|
+
if isinstance(orchestrator, ExecutionOrchestrator):
|
|
72
|
+
return orchestrator
|
|
73
|
+
raise ValueError("ExecutionOrchestrator setup failed.")
|
|
74
74
|
|
|
75
75
|
def create_setup_execution_plan(self, features: Features) -> ExecutionPlan:
|
|
76
76
|
self.setup_features_recursion(features)
|
|
@@ -124,15 +124,15 @@ class Engine:
|
|
|
124
124
|
if feature_group.index_columns():
|
|
125
125
|
self._add_index_feature(feature_group_class, feature_group, feature, features)
|
|
126
126
|
|
|
127
|
-
def _set_feature_name(self, feature: Feature, feature_group:
|
|
127
|
+
def _set_feature_name(self, feature: Feature, feature_group: FeatureGroup) -> None:
|
|
128
128
|
"""Sets the feature name using the feature group's logic."""
|
|
129
129
|
feature.name = feature_group.set_feature_name(feature.options, feature.name)
|
|
130
130
|
|
|
131
131
|
def _set_compute_framework_and_data_type(
|
|
132
132
|
self,
|
|
133
133
|
feature: Feature,
|
|
134
|
-
compute_frameworks: Set[Type[
|
|
135
|
-
feature_group_class: Type[
|
|
134
|
+
compute_frameworks: Set[Type[ComputeFramework]],
|
|
135
|
+
feature_group_class: Type[FeatureGroup],
|
|
136
136
|
) -> None:
|
|
137
137
|
"""Sets the compute framework and data type for the feature."""
|
|
138
138
|
feature = self.set_compute_framework(feature, compute_frameworks)
|
|
@@ -140,7 +140,7 @@ class Engine:
|
|
|
140
140
|
|
|
141
141
|
def _identify_feature_group_and_frameworks(
|
|
142
142
|
self, feature: Feature
|
|
143
|
-
) -> tuple[Type[
|
|
143
|
+
) -> tuple[Type[FeatureGroup], Set[Type[ComputeFramework]]]:
|
|
144
144
|
"""Identifies the feature group class and compute frameworks for a given feature."""
|
|
145
145
|
identifier = IdentifyFeatureGroupClass(
|
|
146
146
|
feature, self.accessible_plugins, self.links, self.data_access_collection
|
|
@@ -149,8 +149,8 @@ class Engine:
|
|
|
149
149
|
|
|
150
150
|
def _add_index_feature(
|
|
151
151
|
self,
|
|
152
|
-
feature_group_class: Type[
|
|
153
|
-
feature_group:
|
|
152
|
+
feature_group_class: Type[FeatureGroup],
|
|
153
|
+
feature_group: FeatureGroup,
|
|
154
154
|
feature: Feature,
|
|
155
155
|
features: Features,
|
|
156
156
|
) -> None:
|
|
@@ -166,8 +166,8 @@ class Engine:
|
|
|
166
166
|
|
|
167
167
|
def _process_index_feature(
|
|
168
168
|
self,
|
|
169
|
-
feature_group_class: Type[
|
|
170
|
-
feature_group:
|
|
169
|
+
feature_group_class: Type[FeatureGroup],
|
|
170
|
+
feature_group: FeatureGroup,
|
|
171
171
|
feature: Feature,
|
|
172
172
|
features: Features,
|
|
173
173
|
index: Index,
|
|
@@ -185,8 +185,8 @@ class Engine:
|
|
|
185
185
|
|
|
186
186
|
def _create_and_add_index_feature(
|
|
187
187
|
self,
|
|
188
|
-
feature_group_class: Type[
|
|
189
|
-
feature_group:
|
|
188
|
+
feature_group_class: Type[FeatureGroup],
|
|
189
|
+
feature_group: FeatureGroup,
|
|
190
190
|
feature: Feature,
|
|
191
191
|
features: Features,
|
|
192
192
|
index: Index,
|
|
@@ -197,8 +197,8 @@ class Engine:
|
|
|
197
197
|
|
|
198
198
|
def _add_filter_feature(
|
|
199
199
|
self,
|
|
200
|
-
feature_group_class: Type[
|
|
201
|
-
feature_group:
|
|
200
|
+
feature_group_class: Type[FeatureGroup],
|
|
201
|
+
feature_group: FeatureGroup,
|
|
202
202
|
feature: Feature,
|
|
203
203
|
features: Features,
|
|
204
204
|
) -> None:
|
|
@@ -230,7 +230,7 @@ class Engine:
|
|
|
230
230
|
|
|
231
231
|
def add_feature_to_collection(
|
|
232
232
|
self,
|
|
233
|
-
feature_group_class: Type[
|
|
233
|
+
feature_group_class: Type[FeatureGroup],
|
|
234
234
|
feature: Feature,
|
|
235
235
|
child_uuid: Optional[UUID],
|
|
236
236
|
if_index_feature: bool = False,
|
|
@@ -265,7 +265,7 @@ class Engine:
|
|
|
265
265
|
self.feature_link_parents[child_uuid].add(wanted_uuid)
|
|
266
266
|
|
|
267
267
|
def _handle_input_features_recursion(
|
|
268
|
-
self, feature_group_class: Type[
|
|
268
|
+
self, feature_group_class: Type[FeatureGroup], uuid: UUID, options: Options, feature_name: FeatureName
|
|
269
269
|
) -> None:
|
|
270
270
|
"""Handles recursion for input features of a feature group."""
|
|
271
271
|
feature_group = feature_group_class()
|
|
@@ -284,7 +284,7 @@ class Engine:
|
|
|
284
284
|
self.feature_link_parents[features.child_uuid] = features.parent_uuids
|
|
285
285
|
self.setup_features_recursion(features)
|
|
286
286
|
|
|
287
|
-
def set_compute_framework(self, feature: Feature, compute_frameworks: Set[Type[
|
|
287
|
+
def set_compute_framework(self, feature: Feature, compute_frameworks: Set[Type[ComputeFramework]]) -> Feature:
|
|
288
288
|
"""
|
|
289
289
|
This function leads to that the feature has always a compute framework set!
|
|
290
290
|
"""
|
|
@@ -297,7 +297,7 @@ class Engine:
|
|
|
297
297
|
feature.compute_frameworks = compute_frameworks
|
|
298
298
|
return feature
|
|
299
299
|
|
|
300
|
-
def set_data_type(self, feature: Feature, feature_group_class: Type[
|
|
300
|
+
def set_data_type(self, feature: Feature, feature_group_class: Type[FeatureGroup]) -> Optional[DataType]:
|
|
301
301
|
fg_data_type = feature_group_class.return_data_type_rule(feature)
|
|
302
302
|
if feature.data_type and fg_data_type:
|
|
303
303
|
if feature.data_type != fg_data_type:
|
|
@@ -2,9 +2,9 @@ from abc import ABC, abstractmethod
|
|
|
2
2
|
from typing import Any, Optional, Set, Union, final
|
|
3
3
|
from uuid import UUID, uuid4
|
|
4
4
|
|
|
5
|
-
from
|
|
6
|
-
from
|
|
7
|
-
from
|
|
5
|
+
from mloda.core.abstract_plugins.compute_framework import ComputeFramework
|
|
6
|
+
from mloda.core.core.cfw_manager import CfwManager
|
|
7
|
+
from mloda.core.abstract_plugins.components.parallelization_modes import ParallelizationMode
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
class Step(ABC):
|
|
@@ -17,8 +17,8 @@ class Step(ABC):
|
|
|
17
17
|
def execute(
|
|
18
18
|
self,
|
|
19
19
|
cfw_register: CfwManager,
|
|
20
|
-
cfw:
|
|
21
|
-
from_cfw: Optional[Union[
|
|
20
|
+
cfw: ComputeFramework,
|
|
21
|
+
from_cfw: Optional[Union[ComputeFramework, UUID]] = None,
|
|
22
22
|
data: Optional[Any] = None,
|
|
23
23
|
) -> Optional[Any]:
|
|
24
24
|
"""Define what executing this step involves."""
|
|
@@ -29,9 +29,9 @@ class Step(ABC):
|
|
|
29
29
|
"""Return result uuids of this step"""
|
|
30
30
|
return set()
|
|
31
31
|
|
|
32
|
-
def get_parallelization_mode(self) -> Set[
|
|
32
|
+
def get_parallelization_mode(self) -> Set[ParallelizationMode]:
|
|
33
33
|
# TODO: This is a placeholder. We will need to add this to feature group later.
|
|
34
|
-
return {
|
|
34
|
+
return {ParallelizationMode.SYNC, ParallelizationMode.THREADING, ParallelizationMode.MULTIPROCESSING}
|
|
35
35
|
|
|
36
36
|
@final
|
|
37
37
|
def get_result_uuid(self) -> UUID:
|
|
@@ -1,21 +1,21 @@
|
|
|
1
1
|
from typing import Any, Optional, Set, Type, Union
|
|
2
2
|
from uuid import UUID, uuid4
|
|
3
|
-
from
|
|
4
|
-
from
|
|
5
|
-
from
|
|
6
|
-
from
|
|
7
|
-
from
|
|
8
|
-
from
|
|
9
|
-
from
|
|
3
|
+
from mloda.core.abstract_plugins.components.base_artifact import BaseArtifact
|
|
4
|
+
from mloda.core.abstract_plugins.components.input_data.api.base_api_data import BaseApiData
|
|
5
|
+
from mloda.core.abstract_plugins.compute_framework import ComputeFramework
|
|
6
|
+
from mloda.core.core.cfw_manager import CfwManager
|
|
7
|
+
from mloda.core.core.step.abstract_step import Step
|
|
8
|
+
from mloda.core.abstract_plugins.feature_group import FeatureGroup
|
|
9
|
+
from mloda.core.abstract_plugins.components.feature_set import FeatureSet
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
class FeatureGroupStep(Step):
|
|
13
13
|
def __init__(
|
|
14
14
|
self,
|
|
15
|
-
feature_group: Type[
|
|
15
|
+
feature_group: Type[FeatureGroup],
|
|
16
16
|
features: FeatureSet,
|
|
17
17
|
required_uuids: Set[UUID],
|
|
18
|
-
compute_framework: Type[
|
|
18
|
+
compute_framework: Type[ComputeFramework],
|
|
19
19
|
children_if_root: set[UUID] = set(),
|
|
20
20
|
api_input_data: Union[BaseApiData, bool] = False,
|
|
21
21
|
) -> None:
|
|
@@ -41,8 +41,8 @@ class FeatureGroupStep(Step):
|
|
|
41
41
|
def execute(
|
|
42
42
|
self,
|
|
43
43
|
cfw_register: CfwManager,
|
|
44
|
-
cfw:
|
|
45
|
-
from_cfw: Optional[Union[
|
|
44
|
+
cfw: ComputeFramework,
|
|
45
|
+
from_cfw: Optional[Union[ComputeFramework, UUID]] = None, # Not used in this implementation
|
|
46
46
|
data: Optional[Any] = None,
|
|
47
47
|
) -> Optional[Any]:
|
|
48
48
|
self.location = cfw_register.get_location()
|
|
@@ -61,7 +61,7 @@ class FeatureGroupStep(Step):
|
|
|
61
61
|
return data
|
|
62
62
|
return None
|
|
63
63
|
|
|
64
|
-
def run_calculate_feature(self, cfw:
|
|
64
|
+
def run_calculate_feature(self, cfw: ComputeFramework, data: Optional[Any] = None) -> Any:
|
|
65
65
|
if self.feature_group.calculate_feature is None:
|
|
66
66
|
raise ValueError("FeatureGroup calculate_feature is not implemented")
|
|
67
67
|
|
|
@@ -1,20 +1,20 @@
|
|
|
1
1
|
from typing import Optional, Set, Type, Any, Union
|
|
2
2
|
from uuid import UUID, uuid4
|
|
3
|
-
from
|
|
4
|
-
from
|
|
5
|
-
from
|
|
3
|
+
from mloda.core.abstract_plugins.components.framework_transformer.cfw_transformer import ComputeFrameworkTransformer
|
|
4
|
+
from mloda.core.abstract_plugins.compute_framework import ComputeFramework
|
|
5
|
+
from mloda.core.core.cfw_manager import CfwManager
|
|
6
6
|
|
|
7
|
-
from
|
|
8
|
-
from
|
|
9
|
-
from
|
|
7
|
+
from mloda.core.core.step.abstract_step import Step
|
|
8
|
+
from mloda.core.abstract_plugins.components.link import Link
|
|
9
|
+
from mloda.core.runtime.flight.flight_server import FlightServer
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
class JoinStep(Step):
|
|
13
13
|
def __init__(
|
|
14
14
|
self,
|
|
15
15
|
link: Link,
|
|
16
|
-
left_framework: Type[
|
|
17
|
-
right_framework: Type[
|
|
16
|
+
left_framework: Type[ComputeFramework],
|
|
17
|
+
right_framework: Type[ComputeFramework],
|
|
18
18
|
required_uuids: Set[UUID],
|
|
19
19
|
left_framework_uuids: Set[UUID],
|
|
20
20
|
right_framework_uuids: Set[UUID],
|
|
@@ -31,7 +31,7 @@ class JoinStep(Step):
|
|
|
31
31
|
def get_uuids(self) -> Set[UUID]:
|
|
32
32
|
return {self.uuid, self.link.uuid}
|
|
33
33
|
|
|
34
|
-
def _merge_data(self, cfw:
|
|
34
|
+
def _merge_data(self, cfw: ComputeFramework, from_cfw_data: Any) -> None:
|
|
35
35
|
"""Merges data from another ComputeFramework into the current one."""
|
|
36
36
|
merge_engine_class = cfw.merge_engine()
|
|
37
37
|
framework_connection = cfw.get_framework_connection_object()
|
|
@@ -42,7 +42,7 @@ class JoinStep(Step):
|
|
|
42
42
|
)
|
|
43
43
|
cfw.set_column_names()
|
|
44
44
|
|
|
45
|
-
def _upload_data_if_needed(self, cfw:
|
|
45
|
+
def _upload_data_if_needed(self, cfw: ComputeFramework, cfw_register: CfwManager) -> None:
|
|
46
46
|
"""Uploads the merged data to Flyway if a location is configured."""
|
|
47
47
|
if self.location:
|
|
48
48
|
if cfw_register.get_uuid_flyway_datasets(cfw.uuid):
|
|
@@ -51,8 +51,8 @@ class JoinStep(Step):
|
|
|
51
51
|
def execute(
|
|
52
52
|
self,
|
|
53
53
|
cfw_register: CfwManager,
|
|
54
|
-
cfw:
|
|
55
|
-
from_cfw: Optional[Union[
|
|
54
|
+
cfw: ComputeFramework,
|
|
55
|
+
from_cfw: Optional[Union[ComputeFramework, UUID]] = None,
|
|
56
56
|
data: Optional[Any] = None,
|
|
57
57
|
) -> Optional[Any]:
|
|
58
58
|
self.location = cfw_register.get_location()
|
|
@@ -69,7 +69,7 @@ class JoinStep(Step):
|
|
|
69
69
|
|
|
70
70
|
return None
|
|
71
71
|
|
|
72
|
-
def get_data(self, from_cfw: Union[UUID,
|
|
72
|
+
def get_data(self, from_cfw: Union[UUID, ComputeFramework], cfw: ComputeFramework) -> Any:
|
|
73
73
|
"""
|
|
74
74
|
This method is used to get the data from the compute framework.
|
|
75
75
|
If we are using multiprocessing, we use flightserver to transport the data.
|
|
@@ -86,7 +86,7 @@ class JoinStep(Step):
|
|
|
86
86
|
raise ValueError("From_cfw is a UUID, but we are not using flightserver.")
|
|
87
87
|
return from_cfw.get_data(), from_cfw.uuid
|
|
88
88
|
|
|
89
|
-
def matched(self, other_framework: Type[
|
|
89
|
+
def matched(self, other_framework: Type[ComputeFramework], uuid: UUID) -> Optional[UUID]:
|
|
90
90
|
"""
|
|
91
91
|
If matched, return the uuid of the join step.
|
|
92
92
|
"""
|