mloda 0.3.3__py3-none-any.whl → 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mloda/__init__.py +17 -0
- {mloda_core → mloda/core}/abstract_plugins/components/base_artifact.py +2 -2
- {mloda_core → mloda/core}/abstract_plugins/components/base_validator.py +13 -0
- {mloda_core → mloda/core}/abstract_plugins/components/data_access_collection.py +1 -1
- {mloda_core → mloda/core}/abstract_plugins/components/data_types.py +39 -0
- {mloda_core → mloda/core}/abstract_plugins/components/feature.py +39 -33
- {mloda_core → mloda/core}/abstract_plugins/components/feature_chainer/feature_chain_parser.py +19 -19
- mloda/core/abstract_plugins/components/feature_chainer/feature_chain_parser_mixin.py +197 -0
- {mloda_core → mloda/core}/abstract_plugins/components/feature_collection.py +6 -6
- {mloda_core → mloda/core}/abstract_plugins/components/feature_group_version.py +8 -8
- {mloda_core → mloda/core}/abstract_plugins/components/feature_set.py +18 -24
- {mloda_core → mloda/core}/abstract_plugins/components/framework_transformer/cfw_transformer.py +2 -2
- {mloda_core → mloda/core}/abstract_plugins/components/index/add_index_feature.py +4 -4
- {mloda_core → mloda/core}/abstract_plugins/components/input_data/api/api_input_data.py +3 -3
- {mloda_core → mloda/core}/abstract_plugins/components/input_data/api/api_input_data_collection.py +2 -2
- {mloda_core → mloda/core}/abstract_plugins/components/input_data/api/base_api_data.py +1 -1
- {mloda_core → mloda/core}/abstract_plugins/components/input_data/base_input_data.py +6 -6
- {mloda_core → mloda/core}/abstract_plugins/components/input_data/creator/data_creator.py +3 -3
- mloda/core/abstract_plugins/components/link.py +437 -0
- {mloda_core → mloda/core}/abstract_plugins/components/match_data/match_data.py +3 -3
- {mloda_core → mloda/core}/abstract_plugins/components/merge/base_merge_engine.py +2 -2
- {mloda_core → mloda/core}/abstract_plugins/components/options.py +12 -36
- {mloda_core → mloda/core}/abstract_plugins/components/parallelization_modes.py +1 -1
- {mloda_core → mloda/core}/abstract_plugins/components/plugin_option/plugin_collector.py +14 -14
- mloda/core/abstract_plugins/components/validators/datatype_validator.py +96 -0
- mloda/core/abstract_plugins/components/validators/feature_set_validator.py +38 -0
- mloda/core/abstract_plugins/components/validators/feature_validator.py +23 -0
- mloda/core/abstract_plugins/components/validators/link_validator.py +79 -0
- mloda/core/abstract_plugins/components/validators/options_validator.py +57 -0
- mloda_core/abstract_plugins/compute_frame_work.py → mloda/core/abstract_plugins/compute_framework.py +45 -37
- mloda_core/abstract_plugins/abstract_feature_group.py → mloda/core/abstract_plugins/feature_group.py +56 -33
- mloda/core/abstract_plugins/function_extender.py +78 -0
- mloda/core/api/plugin_docs.py +220 -0
- mloda/core/api/plugin_info.py +32 -0
- {mloda_core → mloda/core}/api/prepare/setup_compute_framework.py +11 -11
- {mloda_core → mloda/core}/api/request.py +42 -33
- {mloda_core → mloda/core}/core/cfw_manager.py +8 -8
- {mloda_core → mloda/core}/core/engine.py +47 -47
- {mloda_core → mloda/core}/core/step/abstract_step.py +7 -7
- {mloda_core → mloda/core}/core/step/feature_group_step.py +12 -12
- {mloda_core → mloda/core}/core/step/join_step.py +14 -14
- {mloda_core → mloda/core}/core/step/transform_frame_work_step.py +16 -16
- {mloda_core → mloda/core}/filter/filter_engine.py +1 -1
- {mloda_core → mloda/core}/filter/filter_type_enum.py +1 -1
- {mloda_core → mloda/core}/filter/global_filter.py +23 -23
- {mloda_core → mloda/core}/filter/single_filter.py +6 -6
- {mloda_core → mloda/core}/prepare/accessible_plugins.py +15 -18
- {mloda_core → mloda/core}/prepare/execution_plan.py +65 -39
- {mloda_core → mloda/core}/prepare/graph/build_graph.py +6 -6
- {mloda_core → mloda/core}/prepare/graph/graph.py +1 -1
- {mloda_core → mloda/core}/prepare/graph/properties.py +5 -5
- {mloda_core → mloda/core}/prepare/identify_feature_group.py +12 -14
- {mloda_core → mloda/core}/prepare/joinstep_collection.py +3 -3
- {mloda_core → mloda/core}/prepare/resolve_compute_frameworks.py +6 -6
- {mloda_core → mloda/core}/prepare/resolve_graph.py +11 -11
- {mloda_core → mloda/core}/prepare/resolve_links.py +11 -31
- mloda/core/prepare/validators/resolve_link_validator.py +32 -0
- mloda/core/runtime/compute_framework_executor.py +271 -0
- mloda/core/runtime/data_lifecycle_manager.py +160 -0
- mloda/core/runtime/flight/__init__.py +0 -0
- {mloda_core → mloda/core}/runtime/flight/runner_flight_server.py +1 -1
- mloda/core/runtime/run.py +317 -0
- mloda/core/runtime/worker/__init__.py +0 -0
- {mloda_core → mloda/core}/runtime/worker/multiprocessing_worker.py +15 -10
- {mloda_core → mloda/core}/runtime/worker/thread_worker.py +2 -2
- mloda/core/runtime/worker_manager.py +96 -0
- mloda/provider/__init__.py +101 -0
- mloda/steward/__init__.py +25 -0
- mloda/user/__init__.py +57 -0
- {mloda-0.3.3.dist-info → mloda-0.4.1.dist-info}/METADATA +24 -31
- mloda-0.4.1.dist-info/RECORD +248 -0
- {mloda-0.3.3.dist-info → mloda-0.4.1.dist-info}/top_level.txt +1 -1
- mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_filter_engine.py +2 -2
- mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_framework.py +15 -13
- mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_merge_engine.py +3 -3
- mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_pyarrow_transformer.py +1 -1
- mloda_plugins/compute_framework/base_implementations/iceberg/iceberg_filter_engine.py +2 -2
- mloda_plugins/compute_framework/base_implementations/iceberg/iceberg_framework.py +12 -10
- mloda_plugins/compute_framework/base_implementations/iceberg/iceberg_pyarrow_transformer.py +1 -1
- mloda_plugins/compute_framework/base_implementations/pandas/dataframe.py +18 -16
- mloda_plugins/compute_framework/base_implementations/pandas/pandas_filter_engine.py +36 -13
- mloda_plugins/compute_framework/base_implementations/pandas/pandas_merge_engine.py +7 -7
- mloda_plugins/compute_framework/base_implementations/pandas/pandaspyarrowtransformer.py +1 -1
- mloda_plugins/compute_framework/base_implementations/polars/dataframe.py +16 -14
- mloda_plugins/compute_framework/base_implementations/polars/lazy_dataframe.py +13 -12
- mloda_plugins/compute_framework/base_implementations/polars/polars_filter_engine.py +2 -2
- mloda_plugins/compute_framework/base_implementations/polars/polars_lazy_pyarrow_transformer.py +1 -1
- mloda_plugins/compute_framework/base_implementations/polars/polars_merge_engine.py +3 -3
- mloda_plugins/compute_framework/base_implementations/polars/polars_pyarrow_transformer.py +1 -1
- mloda_plugins/compute_framework/base_implementations/pyarrow/pyarrow_filter_engine.py +2 -2
- mloda_plugins/compute_framework/base_implementations/pyarrow/pyarrow_merge_engine.py +3 -3
- mloda_plugins/compute_framework/base_implementations/pyarrow/table.py +12 -11
- mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_filter_engine.py +2 -2
- mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_framework.py +11 -9
- mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_merge_engine.py +3 -3
- mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_pyarrow_transformer.py +1 -1
- mloda_plugins/compute_framework/base_implementations/spark/spark_filter_engine.py +2 -2
- mloda_plugins/compute_framework/base_implementations/spark/spark_framework.py +17 -15
- mloda_plugins/compute_framework/base_implementations/spark/spark_merge_engine.py +2 -3
- mloda_plugins/compute_framework/base_implementations/spark/spark_pyarrow_transformer.py +1 -1
- mloda_plugins/config/feature/loader.py +2 -2
- mloda_plugins/feature_group/experimental/aggregated_feature_group/base.py +45 -64
- mloda_plugins/feature_group/experimental/aggregated_feature_group/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/aggregated_feature_group/polars_lazy.py +2 -2
- mloda_plugins/feature_group/experimental/aggregated_feature_group/pyarrow.py +2 -2
- mloda_plugins/feature_group/experimental/clustering/base.py +67 -97
- mloda_plugins/feature_group/experimental/clustering/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/data_quality/missing_value/base.py +58 -82
- mloda_plugins/feature_group/experimental/data_quality/missing_value/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/data_quality/missing_value/pyarrow.py +2 -2
- mloda_plugins/feature_group/experimental/data_quality/missing_value/python_dict.py +2 -2
- mloda_plugins/feature_group/experimental/default_options_key.py +16 -19
- mloda_plugins/feature_group/experimental/dimensionality_reduction/base.py +81 -96
- mloda_plugins/feature_group/experimental/dimensionality_reduction/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/dynamic_feature_group_factory/dynamic_feature_group_factory.py +24 -24
- mloda_plugins/feature_group/experimental/forecasting/base.py +108 -106
- mloda_plugins/feature_group/experimental/forecasting/forecasting_artifact.py +2 -2
- mloda_plugins/feature_group/experimental/forecasting/pandas.py +15 -15
- mloda_plugins/feature_group/experimental/geo_distance/base.py +52 -44
- mloda_plugins/feature_group/experimental/geo_distance/pandas.py +2 -3
- mloda_plugins/feature_group/experimental/llm/cli.py +4 -4
- mloda_plugins/feature_group/experimental/llm/cli_features/refactor_git_cached.py +19 -19
- mloda_plugins/feature_group/experimental/llm/installed_packages_feature_group.py +8 -8
- mloda_plugins/feature_group/experimental/llm/list_directory_feature_group.py +5 -5
- mloda_plugins/feature_group/experimental/llm/llm_api/claude.py +3 -3
- mloda_plugins/feature_group/experimental/llm/llm_api/gemini.py +3 -3
- mloda_plugins/feature_group/experimental/llm/llm_api/llm_base_request.py +5 -5
- mloda_plugins/feature_group/experimental/llm/llm_api/openai.py +3 -3
- mloda_plugins/feature_group/experimental/llm/llm_api/request_loop.py +6 -6
- mloda_plugins/feature_group/experimental/llm/llm_file_selector.py +10 -10
- mloda_plugins/feature_group/experimental/llm/tools/tool_collection.py +1 -1
- mloda_plugins/feature_group/experimental/node_centrality/base.py +46 -74
- mloda_plugins/feature_group/experimental/node_centrality/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/sklearn/encoding/base.py +53 -53
- mloda_plugins/feature_group/experimental/sklearn/encoding/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/sklearn/pipeline/base.py +52 -39
- mloda_plugins/feature_group/experimental/sklearn/pipeline/pandas.py +3 -4
- mloda_plugins/feature_group/experimental/sklearn/scaling/base.py +44 -60
- mloda_plugins/feature_group/experimental/sklearn/scaling/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/sklearn/sklearn_artifact.py +2 -3
- mloda_plugins/feature_group/experimental/source_input_feature.py +15 -15
- mloda_plugins/feature_group/experimental/text_cleaning/base.py +38 -63
- mloda_plugins/feature_group/experimental/text_cleaning/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/text_cleaning/python_dict.py +2 -2
- mloda_plugins/feature_group/experimental/time_window/base.py +108 -95
- mloda_plugins/feature_group/experimental/time_window/pandas.py +13 -13
- mloda_plugins/feature_group/experimental/time_window/pyarrow.py +12 -12
- mloda_plugins/feature_group/input_data/api_data/api_data.py +9 -11
- mloda_plugins/feature_group/input_data/read_context_files.py +7 -7
- mloda_plugins/feature_group/input_data/read_db.py +7 -9
- mloda_plugins/feature_group/input_data/read_db_feature.py +4 -4
- mloda_plugins/feature_group/input_data/read_dbs/sqlite.py +23 -13
- mloda_plugins/feature_group/input_data/read_file.py +8 -8
- mloda_plugins/feature_group/input_data/read_file_feature.py +4 -4
- mloda_plugins/feature_group/input_data/read_files/csv.py +6 -6
- mloda_plugins/feature_group/input_data/read_files/feather.py +5 -5
- mloda_plugins/feature_group/input_data/read_files/json.py +5 -5
- mloda_plugins/feature_group/input_data/read_files/orc.py +5 -5
- mloda_plugins/feature_group/input_data/read_files/parquet.py +5 -5
- mloda_plugins/feature_group/input_data/read_files/text_file_reader.py +5 -5
- mloda_plugins/function_extender/base_implementations/otel/otel_extender.py +4 -4
- mloda-0.3.3.dist-info/RECORD +0 -230
- mloda_core/abstract_plugins/components/link.py +0 -286
- mloda_core/abstract_plugins/function_extender.py +0 -34
- mloda_core/runtime/run.py +0 -617
- {mloda_core → mloda/core}/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/domain.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/feature_chainer/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/feature_name.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/framework_transformer/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/framework_transformer/base_transformer.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/hashable_dict.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/index/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/index/index.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/input_data/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/input_data/api/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/input_data/creator/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/match_data/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/merge/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/plugin_option/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/utils.py +0 -0
- {mloda_core/abstract_plugins/plugin_loader → mloda/core/abstract_plugins/components/validators}/__init__.py +0 -0
- {mloda_core/api → mloda/core/abstract_plugins/plugin_loader}/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/plugin_loader/plugin_loader.py +0 -0
- {mloda_core/api/prepare → mloda/core/api}/__init__.py +0 -0
- {mloda_core/core → mloda/core/api/prepare}/__init__.py +0 -0
- {mloda_core/core/step → mloda/core/core}/__init__.py +0 -0
- {mloda_core/filter → mloda/core/core/step}/__init__.py +0 -0
- {mloda_core/prepare → mloda/core/filter}/__init__.py +0 -0
- {mloda_core → mloda/core}/filter/filter_parameter.py +0 -0
- {mloda_core/prepare/graph → mloda/core/prepare}/__init__.py +0 -0
- {mloda_core/runtime → mloda/core/prepare/graph}/__init__.py +0 -0
- {mloda_core/runtime/flight → mloda/core/prepare/validators}/__init__.py +0 -0
- {mloda_core/runtime/worker → mloda/core/runtime}/__init__.py +0 -0
- {mloda_core → mloda/core}/runtime/flight/flight_server.py +0 -0
- {mloda-0.3.3.dist-info → mloda-0.4.1.dist-info}/WHEEL +0 -0
- {mloda-0.3.3.dist-info → mloda-0.4.1.dist-info}/entry_points.txt +0 -0
- {mloda-0.3.3.dist-info → mloda-0.4.1.dist-info}/licenses/LICENSE.TXT +0 -0
- {mloda-0.3.3.dist-info → mloda-0.4.1.dist-info}/licenses/NOTICE.md +0 -0
mloda_core/runtime/run.py
DELETED
|
@@ -1,617 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
from collections import defaultdict
|
|
4
|
-
import multiprocessing
|
|
5
|
-
import queue
|
|
6
|
-
import threading
|
|
7
|
-
import time
|
|
8
|
-
import traceback
|
|
9
|
-
from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Type, Union
|
|
10
|
-
from uuid import UUID, uuid4
|
|
11
|
-
import logging
|
|
12
|
-
|
|
13
|
-
from mloda_core.abstract_plugins.components.framework_transformer.cfw_transformer import ComputeFrameworkTransformer
|
|
14
|
-
from mloda_core.abstract_plugins.function_extender import WrapperFunctionExtender
|
|
15
|
-
from mloda_core.abstract_plugins.components.feature_name import FeatureName
|
|
16
|
-
from mloda_core.abstract_plugins.compute_frame_work import ComputeFrameWork
|
|
17
|
-
from mloda_core.prepare.execution_plan import ExecutionPlan
|
|
18
|
-
from mloda_core.runtime.worker.multiprocessing_worker import worker
|
|
19
|
-
from mloda_core.runtime.worker.thread_worker import thread_worker
|
|
20
|
-
from mloda_core.core.cfw_manager import CfwManager, MyManager
|
|
21
|
-
from mloda_core.abstract_plugins.components.parallelization_modes import ParallelizationModes
|
|
22
|
-
from mloda_core.runtime.flight.runner_flight_server import ParallelRunnerFlightServer
|
|
23
|
-
from mloda_core.core.step.feature_group_step import FeatureGroupStep
|
|
24
|
-
from mloda_core.core.step.join_step import JoinStep
|
|
25
|
-
from mloda_core.core.step.transform_frame_work_step import TransformFrameworkStep
|
|
26
|
-
from mloda_core.abstract_plugins.components.feature_set import FeatureSet
|
|
27
|
-
from mloda_core.runtime.flight.flight_server import FlightServer
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
logger = logging.getLogger(__name__)
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
class Runner:
|
|
34
|
-
"""
|
|
35
|
-
Orchestrates the execution of an mloda based on a given execution plan.
|
|
36
|
-
|
|
37
|
-
This class manages compute frameworks (CFWs), data dependencies, and parallel execution
|
|
38
|
-
using threads or multiprocessing. It handles the execution of feature group steps,
|
|
39
|
-
transform framework steps, and join steps, while also managing data dropping and result collection.
|
|
40
|
-
"""
|
|
41
|
-
|
|
42
|
-
def __init__(
|
|
43
|
-
self,
|
|
44
|
-
execution_planner: ExecutionPlan,
|
|
45
|
-
flight_server: Optional[ParallelRunnerFlightServer] = None,
|
|
46
|
-
) -> None:
|
|
47
|
-
"""
|
|
48
|
-
Initializes the Runner with an execution plan and optional flight server.
|
|
49
|
-
|
|
50
|
-
Args:
|
|
51
|
-
execution_planner: The execution plan that defines the steps to be executed.
|
|
52
|
-
flight_server: An optional flight server for data transfer.
|
|
53
|
-
"""
|
|
54
|
-
self.execution_planner = execution_planner
|
|
55
|
-
|
|
56
|
-
self.cfw_register: CfwManager
|
|
57
|
-
self.result_data_collection: Dict[UUID, Any] = {}
|
|
58
|
-
self.track_data_to_drop: Dict[UUID, Set[UUID]] = {}
|
|
59
|
-
self.artifacts: Dict[str, Any] = {}
|
|
60
|
-
|
|
61
|
-
# multiprocessing
|
|
62
|
-
self.location: Optional[str] = None
|
|
63
|
-
self.tasks: List[Union[threading.Thread, multiprocessing.Process]] = []
|
|
64
|
-
self.process_register: Dict[
|
|
65
|
-
UUID, Tuple[multiprocessing.Process, multiprocessing.Queue[Any], multiprocessing.Queue[Any]]
|
|
66
|
-
] = defaultdict()
|
|
67
|
-
self.result_queues_collection: Set[multiprocessing.Queue[Any]] = set()
|
|
68
|
-
self.result_uuids_collection: Set[UUID] = set()
|
|
69
|
-
|
|
70
|
-
# Initialize framework transformer
|
|
71
|
-
self.transformer = ComputeFrameworkTransformer()
|
|
72
|
-
|
|
73
|
-
self.flight_server = None
|
|
74
|
-
if flight_server:
|
|
75
|
-
self.flight_server = flight_server
|
|
76
|
-
|
|
77
|
-
# This can be reduced in realtime.
|
|
78
|
-
# It is set currently for convenience on this high level
|
|
79
|
-
self.wait_for_drop_data = 0.01
|
|
80
|
-
|
|
81
|
-
def _is_step_done(self, step_uuids: Set[UUID], finished_ids: Set[UUID]) -> bool:
|
|
82
|
-
"""
|
|
83
|
-
Checks if all steps identified by the given UUIDs have already been finished.
|
|
84
|
-
"""
|
|
85
|
-
return all(uuid in finished_ids for uuid in step_uuids)
|
|
86
|
-
|
|
87
|
-
def _drop_data_for_finished_cfws(self, finished_ids: Set[UUID]) -> None:
|
|
88
|
-
"""
|
|
89
|
-
Handles the dropping of intermediate data based on finished steps.
|
|
90
|
-
"""
|
|
91
|
-
if not finished_ids:
|
|
92
|
-
return
|
|
93
|
-
|
|
94
|
-
cfw_to_delete = set()
|
|
95
|
-
for cfw_uuid, step_uuids in self.track_data_to_drop.items():
|
|
96
|
-
if all(step_id in finished_ids for step_id in step_uuids):
|
|
97
|
-
self._drop_cfw_data(cfw_uuid)
|
|
98
|
-
cfw_to_delete.add(cfw_uuid)
|
|
99
|
-
|
|
100
|
-
for cfw_uuid in cfw_to_delete:
|
|
101
|
-
del self.track_data_to_drop[cfw_uuid]
|
|
102
|
-
|
|
103
|
-
def _drop_cfw_data(self, cfw_uuid: UUID) -> None:
|
|
104
|
-
"""Drops data associated with a CFW."""
|
|
105
|
-
if self.location:
|
|
106
|
-
# FlightServer.drop_tables(self.location, {str(self.cfw_collection[cfw_uuid].uuid)})
|
|
107
|
-
pass
|
|
108
|
-
else:
|
|
109
|
-
self.cfw_collection[cfw_uuid].drop_last_data()
|
|
110
|
-
|
|
111
|
-
def compute(self) -> None:
|
|
112
|
-
"""
|
|
113
|
-
Executes the mloda pipeline based on the execution plan.
|
|
114
|
-
|
|
115
|
-
This method iterates through the execution plan, checks dependencies,
|
|
116
|
-
and executes steps using the appropriate parallelization mode.
|
|
117
|
-
It also handles errors, result collection, and data dropping.
|
|
118
|
-
"""
|
|
119
|
-
if self.cfw_register is None:
|
|
120
|
-
raise ValueError("CfwManager not initialized")
|
|
121
|
-
|
|
122
|
-
finished_ids: Set[UUID] = set()
|
|
123
|
-
to_finish_ids: Set[UUID] = set()
|
|
124
|
-
currently_running_steps: Set[UUID] = set()
|
|
125
|
-
|
|
126
|
-
self.cfw_collection: Dict[UUID, ComputeFrameWork] = {}
|
|
127
|
-
|
|
128
|
-
try:
|
|
129
|
-
while to_finish_ids != finished_ids or len(finished_ids) == 0:
|
|
130
|
-
if self.cfw_register:
|
|
131
|
-
error = self.cfw_register.get_error()
|
|
132
|
-
if error:
|
|
133
|
-
logger.error(self.cfw_register.get_error_exc_info())
|
|
134
|
-
raise Exception(self.cfw_register.get_error_exc_info(), self.cfw_register.get_error_msg())
|
|
135
|
-
else:
|
|
136
|
-
break
|
|
137
|
-
|
|
138
|
-
for step in self.execution_planner:
|
|
139
|
-
to_finish_ids.update(step.get_uuids())
|
|
140
|
-
|
|
141
|
-
if isinstance(step, FeatureGroupStep):
|
|
142
|
-
self._drop_data_for_finished_cfws(finished_ids)
|
|
143
|
-
|
|
144
|
-
if self._is_step_done(step.get_uuids(), finished_ids):
|
|
145
|
-
continue
|
|
146
|
-
|
|
147
|
-
# check if step is currently running
|
|
148
|
-
if self.currently_running_step(step.get_uuids(), currently_running_steps):
|
|
149
|
-
if self._process_step_result(step):
|
|
150
|
-
self._mark_step_as_finished(step.get_uuids(), finished_ids, currently_running_steps)
|
|
151
|
-
continue
|
|
152
|
-
|
|
153
|
-
if not self._can_run_step(
|
|
154
|
-
step.required_uuids, step.get_uuids(), finished_ids, currently_running_steps
|
|
155
|
-
):
|
|
156
|
-
continue
|
|
157
|
-
self._execute_step(step)
|
|
158
|
-
|
|
159
|
-
time.sleep(0.01)
|
|
160
|
-
|
|
161
|
-
finally:
|
|
162
|
-
self.artifacts = self.cfw_register.get_artifacts()
|
|
163
|
-
self.join()
|
|
164
|
-
|
|
165
|
-
def get_done_steps_of_multiprocessing_result_queue(self) -> None:
|
|
166
|
-
"""
|
|
167
|
-
Retrieves UUIDs of finished steps from multiprocessing result queues.
|
|
168
|
-
|
|
169
|
-
This method iterates through the result queues and adds any available UUIDs
|
|
170
|
-
to the collection of finished UUIDs.
|
|
171
|
-
"""
|
|
172
|
-
for r_queue in self.result_queues_collection:
|
|
173
|
-
try:
|
|
174
|
-
result_uuid = r_queue.get(block=False)
|
|
175
|
-
self.result_uuids_collection.add(UUID(result_uuid))
|
|
176
|
-
except queue.Empty:
|
|
177
|
-
continue
|
|
178
|
-
|
|
179
|
-
def _process_step_result(self, step: Any) -> Union[Any, bool]:
|
|
180
|
-
"""
|
|
181
|
-
Handles the result of a step based on its type.
|
|
182
|
-
|
|
183
|
-
This method checks if a step is done, then performs specific actions based
|
|
184
|
-
on the step's type, such as adding results to the data collection or dropping data.
|
|
185
|
-
"""
|
|
186
|
-
# set step.is_done from other processes via result queue
|
|
187
|
-
self.get_done_steps_of_multiprocessing_result_queue()
|
|
188
|
-
if step.uuid in self.result_uuids_collection:
|
|
189
|
-
step.step_is_done = True
|
|
190
|
-
|
|
191
|
-
if not step.step_is_done:
|
|
192
|
-
return False
|
|
193
|
-
|
|
194
|
-
if isinstance(step, (TransformFrameworkStep, JoinStep)):
|
|
195
|
-
return True
|
|
196
|
-
|
|
197
|
-
if isinstance(step, FeatureGroupStep):
|
|
198
|
-
if step.features.any_uuid is None:
|
|
199
|
-
raise ValueError(f"from_feature_uuid should not be none. {step}")
|
|
200
|
-
|
|
201
|
-
cfw = self.get_cfw(step.compute_framework, step.features.any_uuid)
|
|
202
|
-
self.add_to_result_data_collection(cfw, step.features, step.uuid)
|
|
203
|
-
self._drop_data_if_possible(cfw, step)
|
|
204
|
-
|
|
205
|
-
return True
|
|
206
|
-
|
|
207
|
-
def _drop_data_if_possible(self, cfw: ComputeFrameWork, step: Any) -> None:
|
|
208
|
-
"""
|
|
209
|
-
Drops data associated with a compute framework if possible.
|
|
210
|
-
|
|
211
|
-
This method checks if data can be dropped based on the CFW's dependencies
|
|
212
|
-
and either drops the data directly or sends a command to a worker process to do so.
|
|
213
|
-
"""
|
|
214
|
-
process, command_queue, result_queue = self.process_register.get(cfw.uuid, (None, None, None))
|
|
215
|
-
|
|
216
|
-
feature_uuids_to_possible_drop = {f.uuid for f in step.features.features}
|
|
217
|
-
|
|
218
|
-
if command_queue is None:
|
|
219
|
-
data_to_drop = cfw.add_already_calculated_children_and_drop_if_possible(
|
|
220
|
-
feature_uuids_to_possible_drop, self.location
|
|
221
|
-
)
|
|
222
|
-
if isinstance(data_to_drop, frozenset):
|
|
223
|
-
self.track_data_to_drop[cfw.uuid] = set(data_to_drop)
|
|
224
|
-
else:
|
|
225
|
-
command_queue.put(feature_uuids_to_possible_drop)
|
|
226
|
-
|
|
227
|
-
flyway_datasets = self.cfw_register.get_uuid_flyway_datasets(cfw.uuid)
|
|
228
|
-
if flyway_datasets:
|
|
229
|
-
self.track_data_to_drop[cfw.uuid] = flyway_datasets
|
|
230
|
-
|
|
231
|
-
time.sleep(self.wait_for_drop_data)
|
|
232
|
-
|
|
233
|
-
def get_cfw(self, compute_framework: Type[ComputeFrameWork], feature_uuid: UUID) -> ComputeFrameWork:
|
|
234
|
-
"""
|
|
235
|
-
Retrieves a compute framework based on its type and a feature UUID.
|
|
236
|
-
|
|
237
|
-
Args:
|
|
238
|
-
compute_framework: The type of compute framework to retrieve.
|
|
239
|
-
feature_uuid: The UUID of the feature associated with the compute framework.
|
|
240
|
-
"""
|
|
241
|
-
cfw_uuid = self.cfw_register.get_initialized_compute_framework_uuid(
|
|
242
|
-
compute_framework, feature_uuid=feature_uuid
|
|
243
|
-
)
|
|
244
|
-
if cfw_uuid is None:
|
|
245
|
-
raise ValueError(f"cfw_uuid should not be none: {compute_framework}.")
|
|
246
|
-
return self.cfw_collection[cfw_uuid]
|
|
247
|
-
|
|
248
|
-
def prepare_execute_step(self, step: Any, parallelization_mode: ParallelizationModes) -> UUID:
|
|
249
|
-
"""
|
|
250
|
-
Prepares a step for execution by initializing or retrieving the associated CFW.
|
|
251
|
-
"""
|
|
252
|
-
cfw_uuid: Optional[UUID] = None
|
|
253
|
-
|
|
254
|
-
if isinstance(step, FeatureGroupStep):
|
|
255
|
-
for tfs_id in step.tfs_ids:
|
|
256
|
-
cfw_uuid = self.cfw_register.get_cfw_uuid(step.compute_framework.get_class_name(), tfs_id)
|
|
257
|
-
if cfw_uuid:
|
|
258
|
-
return cfw_uuid
|
|
259
|
-
|
|
260
|
-
feature_uuid = step.features.any_uuid
|
|
261
|
-
|
|
262
|
-
if feature_uuid is None:
|
|
263
|
-
raise ValueError(f"from_feature_uuid should not be none. {step, feature_uuid}")
|
|
264
|
-
|
|
265
|
-
cfw_uuid = self.add_compute_framework(step, parallelization_mode, feature_uuid, set(step.children_if_root))
|
|
266
|
-
elif isinstance(step, TransformFrameworkStep):
|
|
267
|
-
from_feature_uuid, from_cfw_uuid = None, None
|
|
268
|
-
for r_f in step.required_uuids:
|
|
269
|
-
from_cfw_uuid = self.cfw_register.get_cfw_uuid(step.from_framework.get_class_name(), r_f)
|
|
270
|
-
if from_cfw_uuid:
|
|
271
|
-
from_feature_uuid = r_f
|
|
272
|
-
break
|
|
273
|
-
|
|
274
|
-
if from_feature_uuid is None or from_cfw_uuid is None:
|
|
275
|
-
raise ValueError(
|
|
276
|
-
f"from_feature_uuid or from_cfw_uuid should not be none. {step, from_feature_uuid, from_cfw_uuid}"
|
|
277
|
-
)
|
|
278
|
-
|
|
279
|
-
from_cfw = self.cfw_collection[from_cfw_uuid]
|
|
280
|
-
childrens = set(from_cfw.children_if_root)
|
|
281
|
-
|
|
282
|
-
if step.link_id:
|
|
283
|
-
from_feature_uuid = step.link_id
|
|
284
|
-
childrens.add(from_feature_uuid)
|
|
285
|
-
|
|
286
|
-
with multiprocessing.Lock():
|
|
287
|
-
cfw_uuid = self.init_compute_framework(step.to_framework, parallelization_mode, childrens, step.uuid)
|
|
288
|
-
|
|
289
|
-
elif isinstance(step, JoinStep):
|
|
290
|
-
cfw_uuid = self.cfw_register.get_cfw_uuid(
|
|
291
|
-
step.left_framework.get_class_name(), next(iter(step.left_framework_uuids))
|
|
292
|
-
)
|
|
293
|
-
|
|
294
|
-
if cfw_uuid is None:
|
|
295
|
-
raise ValueError(f"This should not occur. {step}")
|
|
296
|
-
|
|
297
|
-
return cfw_uuid
|
|
298
|
-
|
|
299
|
-
def prepare_tfs_right_cfw(self, step: TransformFrameworkStep) -> UUID:
|
|
300
|
-
"""
|
|
301
|
-
Prepares the right CFW for a TransformFrameworkStep.
|
|
302
|
-
"""
|
|
303
|
-
uuid = step.right_framework_uuid if step.right_framework_uuid else next(iter(step.required_uuids))
|
|
304
|
-
|
|
305
|
-
cfw_uuid = self.cfw_register.get_cfw_uuid(step.from_framework.get_class_name(), uuid)
|
|
306
|
-
|
|
307
|
-
if cfw_uuid is None or isinstance(cfw_uuid, UUID) is False:
|
|
308
|
-
raise ValueError(
|
|
309
|
-
f"cfw_uuid should not be none in prepare_tfs: {step.from_framework.get_class_name()}, {uuid}"
|
|
310
|
-
)
|
|
311
|
-
|
|
312
|
-
return cfw_uuid
|
|
313
|
-
|
|
314
|
-
def prepare_tfs_and_joinstep(self, step: Any) -> Any:
|
|
315
|
-
"""
|
|
316
|
-
Prepares CFWs required for TransformFrameworkStep or JoinStep.
|
|
317
|
-
"""
|
|
318
|
-
from_cfw: Optional[Union[ComputeFrameWork, UUID]] = None
|
|
319
|
-
if isinstance(step, TransformFrameworkStep):
|
|
320
|
-
from_cfw = self.prepare_tfs_right_cfw(step)
|
|
321
|
-
from_cfw = self.cfw_collection[from_cfw]
|
|
322
|
-
elif isinstance(step, JoinStep):
|
|
323
|
-
# Left framework here, because it is already transformed beforehand
|
|
324
|
-
from_cfw_uuid = self.cfw_register.get_cfw_uuid(step.left_framework.get_class_name(), step.link.uuid)
|
|
325
|
-
|
|
326
|
-
if from_cfw_uuid is None:
|
|
327
|
-
from_cfw_uuid = self.cfw_register.get_cfw_uuid(
|
|
328
|
-
step.left_framework.get_class_name(), next(iter(step.right_framework_uuids))
|
|
329
|
-
)
|
|
330
|
-
|
|
331
|
-
if from_cfw_uuid is None:
|
|
332
|
-
raise ValueError(
|
|
333
|
-
f"from_cfw_uuid should not be none: {step.left_framework.get_class_name()}, {step.link.uuid}"
|
|
334
|
-
)
|
|
335
|
-
|
|
336
|
-
from_cfw = self.cfw_collection[from_cfw_uuid]
|
|
337
|
-
return from_cfw
|
|
338
|
-
|
|
339
|
-
def _execute_step(self, step: Any) -> None:
|
|
340
|
-
"""
|
|
341
|
-
Executes a step based on its parallelization mode.
|
|
342
|
-
"""
|
|
343
|
-
execution_function = self._get_execution_function(
|
|
344
|
-
self.cfw_register.get_parallelization_modes(), step.get_parallelization_mode()
|
|
345
|
-
)
|
|
346
|
-
execution_function(step)
|
|
347
|
-
|
|
348
|
-
def sync_execute_step(self, step: Any) -> None:
|
|
349
|
-
"""
|
|
350
|
-
Executes a step synchronously.
|
|
351
|
-
"""
|
|
352
|
-
cfw_uuid = self.prepare_execute_step(step, ParallelizationModes.SYNC)
|
|
353
|
-
|
|
354
|
-
try:
|
|
355
|
-
from_cfw = self.prepare_tfs_and_joinstep(step) or None
|
|
356
|
-
step.execute(self.cfw_register, self.cfw_collection[cfw_uuid], from_cfw=from_cfw)
|
|
357
|
-
step.step_is_done = True
|
|
358
|
-
|
|
359
|
-
except Exception as e:
|
|
360
|
-
error_message = f"An error occurred: {e}"
|
|
361
|
-
msg = f"{error_message}\nFull traceback:\n{traceback.format_exc()}"
|
|
362
|
-
logging.error(msg)
|
|
363
|
-
exc_info = traceback.format_exc()
|
|
364
|
-
self.cfw_register.set_error(msg, exc_info)
|
|
365
|
-
|
|
366
|
-
def thread_execute_step(self, step: Any) -> None:
|
|
367
|
-
"""
|
|
368
|
-
Executes a step in a separate thread.
|
|
369
|
-
"""
|
|
370
|
-
cfw_uuid = self.prepare_execute_step(step, ParallelizationModes.THREADING)
|
|
371
|
-
from_cfw = self.prepare_tfs_and_joinstep(step) or None
|
|
372
|
-
|
|
373
|
-
task = threading.Thread(
|
|
374
|
-
target=thread_worker,
|
|
375
|
-
args=(step, self.cfw_register, self.cfw_collection[cfw_uuid], from_cfw),
|
|
376
|
-
)
|
|
377
|
-
|
|
378
|
-
self.tasks.append(task)
|
|
379
|
-
task.start()
|
|
380
|
-
|
|
381
|
-
def multi_execute_step(self, step: Any) -> None:
|
|
382
|
-
"""
|
|
383
|
-
Executes a step in a separate process.
|
|
384
|
-
"""
|
|
385
|
-
cfw_uuid = self.prepare_execute_step(step, ParallelizationModes.MULTIPROCESSING)
|
|
386
|
-
|
|
387
|
-
from_cfw = None
|
|
388
|
-
if isinstance(step, TransformFrameworkStep):
|
|
389
|
-
from_cfw = self.prepare_tfs_right_cfw(step)
|
|
390
|
-
|
|
391
|
-
process, command_queue, result_queue = self.process_register.get(
|
|
392
|
-
cfw_uuid, (None, multiprocessing.Queue(), multiprocessing.Queue())
|
|
393
|
-
)
|
|
394
|
-
|
|
395
|
-
if process is None:
|
|
396
|
-
process = multiprocessing.Process(
|
|
397
|
-
target=worker,
|
|
398
|
-
args=(command_queue, result_queue, self.cfw_register, self.cfw_collection[cfw_uuid], from_cfw),
|
|
399
|
-
)
|
|
400
|
-
self.process_register[cfw_uuid] = (process, command_queue, result_queue)
|
|
401
|
-
process.start()
|
|
402
|
-
self.tasks.append(process)
|
|
403
|
-
self.result_queues_collection.add(result_queue)
|
|
404
|
-
|
|
405
|
-
if command_queue:
|
|
406
|
-
command_queue.put(step)
|
|
407
|
-
else:
|
|
408
|
-
raise ValueError("Command queue should not be None.")
|
|
409
|
-
|
|
410
|
-
def join(self) -> None:
|
|
411
|
-
"""
|
|
412
|
-
Joins all tasks (threads or processes) and terminates multiprocessing processes.
|
|
413
|
-
"""
|
|
414
|
-
failed = False
|
|
415
|
-
for task in self.tasks:
|
|
416
|
-
try:
|
|
417
|
-
if isinstance(task, multiprocessing.Process):
|
|
418
|
-
task.terminate()
|
|
419
|
-
|
|
420
|
-
task.join()
|
|
421
|
-
except Exception as e:
|
|
422
|
-
logger.error(f"Error joining task: {e}")
|
|
423
|
-
failed = True
|
|
424
|
-
|
|
425
|
-
if failed:
|
|
426
|
-
raise Exception("Error while joining tasks")
|
|
427
|
-
|
|
428
|
-
def add_to_result_data_collection(self, cfw: ComputeFrameWork, features: FeatureSet, step_uuid: UUID) -> None:
|
|
429
|
-
"""
|
|
430
|
-
Adds the result data to the result data collection.
|
|
431
|
-
"""
|
|
432
|
-
if initial_requested_features := features.get_initial_requested_features():
|
|
433
|
-
result = None
|
|
434
|
-
result = self.get_result_data(cfw, initial_requested_features, self.location)
|
|
435
|
-
if result is not None:
|
|
436
|
-
self.result_data_collection[step_uuid] = result
|
|
437
|
-
|
|
438
|
-
def get_result_data(
|
|
439
|
-
self, cfw: ComputeFrameWork, selected_feature_names: Set[FeatureName], location: Optional[str] = None
|
|
440
|
-
) -> Any:
|
|
441
|
-
"""
|
|
442
|
-
Gets result data from the compute framework.
|
|
443
|
-
"""
|
|
444
|
-
if cfw.data is not None:
|
|
445
|
-
data = cfw.data
|
|
446
|
-
elif location:
|
|
447
|
-
data = FlightServer.download_table(location, str(cfw.uuid))
|
|
448
|
-
data = cfw.convert_flyserver_data_back(data, self.transformer)
|
|
449
|
-
else:
|
|
450
|
-
raise ValueError("Not implemented.")
|
|
451
|
-
|
|
452
|
-
return cfw.select_data_by_column_names(data, selected_feature_names)
|
|
453
|
-
|
|
454
|
-
def add_compute_framework(
|
|
455
|
-
self,
|
|
456
|
-
step: Any,
|
|
457
|
-
parallelization_mode: ParallelizationModes,
|
|
458
|
-
feature_uuid: UUID,
|
|
459
|
-
children_if_root: Set[UUID],
|
|
460
|
-
) -> UUID:
|
|
461
|
-
"""
|
|
462
|
-
Adds a compute framework to the CFW register and CFW collection.
|
|
463
|
-
|
|
464
|
-
Returns:
|
|
465
|
-
The UUID of the compute framework.
|
|
466
|
-
"""
|
|
467
|
-
with multiprocessing.Lock():
|
|
468
|
-
cfw_uuid = self.cfw_register.get_cfw_uuid(step.compute_framework.get_class_name(), feature_uuid)
|
|
469
|
-
# if cfw does not exist, create a new one
|
|
470
|
-
if cfw_uuid is None:
|
|
471
|
-
cfw_uuid = self.init_compute_framework(step.compute_framework, parallelization_mode, children_if_root)
|
|
472
|
-
|
|
473
|
-
return cfw_uuid
|
|
474
|
-
|
|
475
|
-
def init_compute_framework(
|
|
476
|
-
self,
|
|
477
|
-
cf_class: Type[ComputeFrameWork],
|
|
478
|
-
parallelization_mode: ParallelizationModes,
|
|
479
|
-
children_if_root: Set[UUID],
|
|
480
|
-
uuid: Optional[UUID] = None,
|
|
481
|
-
) -> UUID:
|
|
482
|
-
"""
|
|
483
|
-
Initializes a compute framework.
|
|
484
|
-
|
|
485
|
-
Returns:
|
|
486
|
-
The UUID of the compute framework.
|
|
487
|
-
"""
|
|
488
|
-
# get function_extender
|
|
489
|
-
function_extender = self.cfw_register.get_function_extender()
|
|
490
|
-
|
|
491
|
-
# init framework
|
|
492
|
-
new_cfw = cf_class(
|
|
493
|
-
parallelization_mode,
|
|
494
|
-
frozenset(children_if_root),
|
|
495
|
-
uuid or uuid4(),
|
|
496
|
-
function_extender=function_extender,
|
|
497
|
-
)
|
|
498
|
-
|
|
499
|
-
# add to register
|
|
500
|
-
self.cfw_register.add_cfw_to_compute_frameworks(new_cfw.get_uuid(), cf_class.get_class_name(), children_if_root)
|
|
501
|
-
|
|
502
|
-
# add to collection
|
|
503
|
-
self.cfw_collection[new_cfw.get_uuid()] = new_cfw
|
|
504
|
-
|
|
505
|
-
return new_cfw.get_uuid()
|
|
506
|
-
|
|
507
|
-
def currently_running_step(self, step_uuids: Set[UUID], currently_running_steps: Set[UUID]) -> bool:
|
|
508
|
-
"""
|
|
509
|
-
Checks if a step is currently running.
|
|
510
|
-
|
|
511
|
-
Returns:
|
|
512
|
-
True if the step is currently running, False otherwise.
|
|
513
|
-
"""
|
|
514
|
-
if next(iter(step_uuids)) not in currently_running_steps:
|
|
515
|
-
return False
|
|
516
|
-
return True
|
|
517
|
-
|
|
518
|
-
def __enter__(
|
|
519
|
-
self,
|
|
520
|
-
parallelization_modes: Set[ParallelizationModes] = {ParallelizationModes.SYNC},
|
|
521
|
-
function_extender: Optional[Set[WrapperFunctionExtender]] = None,
|
|
522
|
-
api_data: Optional[Dict[str, Any]] = None,
|
|
523
|
-
) -> None:
|
|
524
|
-
"""
|
|
525
|
-
Enters the context of the Runner.
|
|
526
|
-
"""
|
|
527
|
-
MyManager.register("CfwManager", CfwManager)
|
|
528
|
-
self.manager = MyManager().__enter__()
|
|
529
|
-
self.cfw_register = self.manager.CfwManager(parallelization_modes, function_extender) # type: ignore[attr-defined]
|
|
530
|
-
|
|
531
|
-
if self.flight_server:
|
|
532
|
-
if self.flight_server.flight_server_process is None:
|
|
533
|
-
self.flight_server.start_flight_server_process()
|
|
534
|
-
|
|
535
|
-
if self.flight_server:
|
|
536
|
-
self.location = self.flight_server.get_location()
|
|
537
|
-
|
|
538
|
-
if self.location is None:
|
|
539
|
-
raise ValueError("Location should not be None.")
|
|
540
|
-
|
|
541
|
-
self.cfw_register.set_location(self.location)
|
|
542
|
-
|
|
543
|
-
if api_data:
|
|
544
|
-
self.cfw_register.set_api_data(api_data)
|
|
545
|
-
|
|
546
|
-
def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
|
|
547
|
-
"""
|
|
548
|
-
Exits the context of the Runner.
|
|
549
|
-
|
|
550
|
-
Args:
|
|
551
|
-
exc_type: The exception type.
|
|
552
|
-
exc_val: The exception value.
|
|
553
|
-
exc_tb: The exception traceback.
|
|
554
|
-
"""
|
|
555
|
-
self.manager.shutdown()
|
|
556
|
-
|
|
557
|
-
def get_artifacts(self) -> Dict[str, Any]:
|
|
558
|
-
"""
|
|
559
|
-
Gets the artifacts.
|
|
560
|
-
"""
|
|
561
|
-
return self.artifacts
|
|
562
|
-
|
|
563
|
-
def _can_run_step(
|
|
564
|
-
self,
|
|
565
|
-
required_uuids: Set[UUID],
|
|
566
|
-
step_uuid: Set[UUID],
|
|
567
|
-
finished_steps: Set[UUID],
|
|
568
|
-
currently_running_steps: Set[UUID],
|
|
569
|
-
) -> bool:
|
|
570
|
-
"""
|
|
571
|
-
Checks if a step can be run. If it can, add it to the currently_running_steps set.
|
|
572
|
-
"""
|
|
573
|
-
|
|
574
|
-
with threading.Lock():
|
|
575
|
-
if required_uuids.issubset(finished_steps) and not step_uuid.intersection(currently_running_steps):
|
|
576
|
-
currently_running_steps.update(step_uuid)
|
|
577
|
-
return True
|
|
578
|
-
return False
|
|
579
|
-
|
|
580
|
-
def _mark_step_as_finished(
|
|
581
|
-
self, step_uuid: Set[UUID], finished_steps: Set[UUID], currently_running_steps: Set[UUID]
|
|
582
|
-
) -> None:
|
|
583
|
-
"""
|
|
584
|
-
Marks a step as finished.
|
|
585
|
-
"""
|
|
586
|
-
with threading.Lock():
|
|
587
|
-
currently_running_steps.difference_update(step_uuid)
|
|
588
|
-
finished_steps.update(step_uuid)
|
|
589
|
-
|
|
590
|
-
def _get_execution_function(
|
|
591
|
-
self, mode_by_cfw_register: Set[ParallelizationModes], mode_by_step: Set[ParallelizationModes]
|
|
592
|
-
) -> Callable[[Any], None]:
|
|
593
|
-
"""
|
|
594
|
-
Identifies the execution mode and returns the corresponding execute step function.
|
|
595
|
-
|
|
596
|
-
Returns:
|
|
597
|
-
The execute step function corresponding to the identified mode.
|
|
598
|
-
"""
|
|
599
|
-
modes = mode_by_cfw_register.intersection(mode_by_step)
|
|
600
|
-
|
|
601
|
-
if ParallelizationModes.MULTIPROCESSING in modes:
|
|
602
|
-
return self.multi_execute_step
|
|
603
|
-
elif ParallelizationModes.THREADING in modes:
|
|
604
|
-
return self.thread_execute_step
|
|
605
|
-
return self.sync_execute_step
|
|
606
|
-
|
|
607
|
-
def get_result(self) -> List[Any]:
|
|
608
|
-
"""
|
|
609
|
-
Gets the results.
|
|
610
|
-
"""
|
|
611
|
-
# TODO: This is a temporary solution. We need to return the data in a more structured way.
|
|
612
|
-
# Idea: return a dictionary with the feature name as key and the data as value.
|
|
613
|
-
# Idea: list can keep history for debug more
|
|
614
|
-
results = [v for k, v in self.result_data_collection.items()]
|
|
615
|
-
if len(results) > 0:
|
|
616
|
-
return results
|
|
617
|
-
raise ValueError("No results found.")
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{mloda_core → mloda/core}/abstract_plugins/components/framework_transformer/base_transformer.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|