mloda 0.3.3__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mloda/__init__.py +17 -0
- {mloda_core → mloda/core}/abstract_plugins/components/base_artifact.py +2 -2
- {mloda_core → mloda/core}/abstract_plugins/components/base_validator.py +13 -0
- {mloda_core → mloda/core}/abstract_plugins/components/data_access_collection.py +1 -1
- {mloda_core → mloda/core}/abstract_plugins/components/data_types.py +39 -0
- {mloda_core → mloda/core}/abstract_plugins/components/feature.py +39 -33
- {mloda_core → mloda/core}/abstract_plugins/components/feature_chainer/feature_chain_parser.py +19 -19
- mloda/core/abstract_plugins/components/feature_chainer/feature_chain_parser_mixin.py +197 -0
- {mloda_core → mloda/core}/abstract_plugins/components/feature_collection.py +6 -6
- {mloda_core → mloda/core}/abstract_plugins/components/feature_group_version.py +8 -8
- {mloda_core → mloda/core}/abstract_plugins/components/feature_set.py +18 -24
- {mloda_core → mloda/core}/abstract_plugins/components/framework_transformer/cfw_transformer.py +2 -2
- {mloda_core → mloda/core}/abstract_plugins/components/index/add_index_feature.py +4 -4
- {mloda_core → mloda/core}/abstract_plugins/components/input_data/api/api_input_data.py +3 -3
- {mloda_core → mloda/core}/abstract_plugins/components/input_data/api/api_input_data_collection.py +2 -2
- {mloda_core → mloda/core}/abstract_plugins/components/input_data/api/base_api_data.py +1 -1
- {mloda_core → mloda/core}/abstract_plugins/components/input_data/base_input_data.py +6 -6
- {mloda_core → mloda/core}/abstract_plugins/components/input_data/creator/data_creator.py +3 -3
- mloda/core/abstract_plugins/components/link.py +437 -0
- {mloda_core → mloda/core}/abstract_plugins/components/match_data/match_data.py +3 -3
- {mloda_core → mloda/core}/abstract_plugins/components/merge/base_merge_engine.py +2 -2
- {mloda_core → mloda/core}/abstract_plugins/components/options.py +12 -36
- {mloda_core → mloda/core}/abstract_plugins/components/parallelization_modes.py +1 -1
- {mloda_core → mloda/core}/abstract_plugins/components/plugin_option/plugin_collector.py +14 -14
- mloda/core/abstract_plugins/components/validators/datatype_validator.py +96 -0
- mloda/core/abstract_plugins/components/validators/feature_set_validator.py +38 -0
- mloda/core/abstract_plugins/components/validators/feature_validator.py +23 -0
- mloda/core/abstract_plugins/components/validators/link_validator.py +79 -0
- mloda/core/abstract_plugins/components/validators/options_validator.py +57 -0
- mloda_core/abstract_plugins/compute_frame_work.py → mloda/core/abstract_plugins/compute_framework.py +46 -37
- mloda_core/abstract_plugins/abstract_feature_group.py → mloda/core/abstract_plugins/feature_group.py +56 -33
- mloda/core/abstract_plugins/function_extender.py +78 -0
- mloda/core/api/plugin_docs.py +220 -0
- mloda/core/api/plugin_info.py +32 -0
- {mloda_core → mloda/core}/api/prepare/setup_compute_framework.py +11 -11
- {mloda_core → mloda/core}/api/request.py +42 -33
- {mloda_core → mloda/core}/core/cfw_manager.py +8 -8
- {mloda_core → mloda/core}/core/engine.py +47 -46
- {mloda_core → mloda/core}/core/step/abstract_step.py +7 -7
- {mloda_core → mloda/core}/core/step/feature_group_step.py +12 -12
- {mloda_core → mloda/core}/core/step/join_step.py +14 -14
- {mloda_core → mloda/core}/core/step/transform_frame_work_step.py +16 -16
- {mloda_core → mloda/core}/filter/filter_engine.py +1 -1
- {mloda_core → mloda/core}/filter/filter_type_enum.py +1 -1
- {mloda_core → mloda/core}/filter/global_filter.py +23 -23
- {mloda_core → mloda/core}/filter/single_filter.py +6 -6
- {mloda_core → mloda/core}/prepare/accessible_plugins.py +16 -18
- {mloda_core → mloda/core}/prepare/execution_plan.py +65 -39
- {mloda_core → mloda/core}/prepare/graph/build_graph.py +6 -6
- {mloda_core → mloda/core}/prepare/graph/graph.py +1 -1
- {mloda_core → mloda/core}/prepare/graph/properties.py +5 -5
- {mloda_core → mloda/core}/prepare/identify_feature_group.py +12 -14
- {mloda_core → mloda/core}/prepare/joinstep_collection.py +3 -3
- {mloda_core → mloda/core}/prepare/resolve_compute_frameworks.py +6 -6
- {mloda_core → mloda/core}/prepare/resolve_graph.py +11 -11
- {mloda_core → mloda/core}/prepare/resolve_links.py +11 -31
- mloda/core/prepare/validators/resolve_link_validator.py +32 -0
- mloda/core/runtime/compute_framework_executor.py +271 -0
- mloda/core/runtime/data_lifecycle_manager.py +160 -0
- mloda/core/runtime/flight/__init__.py +0 -0
- {mloda_core → mloda/core}/runtime/flight/runner_flight_server.py +1 -1
- mloda/core/runtime/run.py +317 -0
- mloda/core/runtime/worker/__init__.py +0 -0
- {mloda_core → mloda/core}/runtime/worker/multiprocessing_worker.py +15 -10
- {mloda_core → mloda/core}/runtime/worker/thread_worker.py +2 -2
- mloda/core/runtime/worker_manager.py +96 -0
- mloda/provider/__init__.py +101 -0
- mloda/steward/__init__.py +25 -0
- mloda/user/__init__.py +57 -0
- {mloda-0.3.3.dist-info → mloda-0.4.0.dist-info}/METADATA +18 -22
- mloda-0.4.0.dist-info/RECORD +248 -0
- {mloda-0.3.3.dist-info → mloda-0.4.0.dist-info}/top_level.txt +1 -1
- mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_filter_engine.py +2 -2
- mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_framework.py +15 -13
- mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_merge_engine.py +3 -3
- mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_pyarrow_transformer.py +1 -1
- mloda_plugins/compute_framework/base_implementations/iceberg/iceberg_filter_engine.py +2 -2
- mloda_plugins/compute_framework/base_implementations/iceberg/iceberg_framework.py +12 -10
- mloda_plugins/compute_framework/base_implementations/iceberg/iceberg_pyarrow_transformer.py +1 -1
- mloda_plugins/compute_framework/base_implementations/pandas/dataframe.py +18 -16
- mloda_plugins/compute_framework/base_implementations/pandas/pandas_filter_engine.py +36 -13
- mloda_plugins/compute_framework/base_implementations/pandas/pandas_merge_engine.py +7 -7
- mloda_plugins/compute_framework/base_implementations/pandas/pandaspyarrowtransformer.py +1 -1
- mloda_plugins/compute_framework/base_implementations/polars/dataframe.py +16 -14
- mloda_plugins/compute_framework/base_implementations/polars/lazy_dataframe.py +13 -12
- mloda_plugins/compute_framework/base_implementations/polars/polars_filter_engine.py +2 -2
- mloda_plugins/compute_framework/base_implementations/polars/polars_lazy_pyarrow_transformer.py +1 -1
- mloda_plugins/compute_framework/base_implementations/polars/polars_merge_engine.py +3 -3
- mloda_plugins/compute_framework/base_implementations/polars/polars_pyarrow_transformer.py +1 -1
- mloda_plugins/compute_framework/base_implementations/pyarrow/pyarrow_filter_engine.py +2 -2
- mloda_plugins/compute_framework/base_implementations/pyarrow/pyarrow_merge_engine.py +3 -3
- mloda_plugins/compute_framework/base_implementations/pyarrow/table.py +12 -10
- mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_filter_engine.py +2 -2
- mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_framework.py +11 -9
- mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_merge_engine.py +3 -3
- mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_pyarrow_transformer.py +1 -1
- mloda_plugins/compute_framework/base_implementations/spark/spark_filter_engine.py +2 -2
- mloda_plugins/compute_framework/base_implementations/spark/spark_framework.py +17 -15
- mloda_plugins/compute_framework/base_implementations/spark/spark_merge_engine.py +3 -3
- mloda_plugins/compute_framework/base_implementations/spark/spark_pyarrow_transformer.py +1 -1
- mloda_plugins/config/feature/loader.py +2 -2
- mloda_plugins/feature_group/experimental/aggregated_feature_group/base.py +45 -62
- mloda_plugins/feature_group/experimental/aggregated_feature_group/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/aggregated_feature_group/polars_lazy.py +2 -2
- mloda_plugins/feature_group/experimental/aggregated_feature_group/pyarrow.py +2 -2
- mloda_plugins/feature_group/experimental/clustering/base.py +69 -97
- mloda_plugins/feature_group/experimental/clustering/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/data_quality/missing_value/base.py +58 -79
- mloda_plugins/feature_group/experimental/data_quality/missing_value/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/data_quality/missing_value/pyarrow.py +2 -2
- mloda_plugins/feature_group/experimental/data_quality/missing_value/python_dict.py +2 -2
- mloda_plugins/feature_group/experimental/default_options_key.py +16 -19
- mloda_plugins/feature_group/experimental/dimensionality_reduction/base.py +80 -94
- mloda_plugins/feature_group/experimental/dimensionality_reduction/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/dynamic_feature_group_factory/dynamic_feature_group_factory.py +24 -24
- mloda_plugins/feature_group/experimental/forecasting/base.py +106 -104
- mloda_plugins/feature_group/experimental/forecasting/forecasting_artifact.py +2 -2
- mloda_plugins/feature_group/experimental/forecasting/pandas.py +15 -15
- mloda_plugins/feature_group/experimental/geo_distance/base.py +50 -42
- mloda_plugins/feature_group/experimental/geo_distance/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/llm/cli.py +4 -4
- mloda_plugins/feature_group/experimental/llm/cli_features/refactor_git_cached.py +19 -19
- mloda_plugins/feature_group/experimental/llm/installed_packages_feature_group.py +8 -8
- mloda_plugins/feature_group/experimental/llm/list_directory_feature_group.py +5 -5
- mloda_plugins/feature_group/experimental/llm/llm_api/claude.py +3 -3
- mloda_plugins/feature_group/experimental/llm/llm_api/gemini.py +3 -3
- mloda_plugins/feature_group/experimental/llm/llm_api/llm_base_request.py +5 -5
- mloda_plugins/feature_group/experimental/llm/llm_api/openai.py +3 -3
- mloda_plugins/feature_group/experimental/llm/llm_api/request_loop.py +6 -6
- mloda_plugins/feature_group/experimental/llm/llm_file_selector.py +10 -10
- mloda_plugins/feature_group/experimental/llm/tools/tool_collection.py +1 -1
- mloda_plugins/feature_group/experimental/node_centrality/base.py +46 -72
- mloda_plugins/feature_group/experimental/node_centrality/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/sklearn/encoding/base.py +51 -51
- mloda_plugins/feature_group/experimental/sklearn/encoding/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/sklearn/pipeline/base.py +52 -39
- mloda_plugins/feature_group/experimental/sklearn/pipeline/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/sklearn/scaling/base.py +44 -58
- mloda_plugins/feature_group/experimental/sklearn/scaling/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/sklearn/sklearn_artifact.py +2 -2
- mloda_plugins/feature_group/experimental/source_input_feature.py +15 -15
- mloda_plugins/feature_group/experimental/text_cleaning/base.py +38 -61
- mloda_plugins/feature_group/experimental/text_cleaning/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/text_cleaning/python_dict.py +2 -2
- mloda_plugins/feature_group/experimental/time_window/base.py +106 -93
- mloda_plugins/feature_group/experimental/time_window/pandas.py +13 -13
- mloda_plugins/feature_group/experimental/time_window/pyarrow.py +12 -12
- mloda_plugins/feature_group/input_data/api_data/api_data.py +9 -11
- mloda_plugins/feature_group/input_data/read_context_files.py +7 -7
- mloda_plugins/feature_group/input_data/read_db.py +7 -9
- mloda_plugins/feature_group/input_data/read_db_feature.py +4 -4
- mloda_plugins/feature_group/input_data/read_dbs/sqlite.py +23 -13
- mloda_plugins/feature_group/input_data/read_file.py +8 -8
- mloda_plugins/feature_group/input_data/read_file_feature.py +4 -4
- mloda_plugins/feature_group/input_data/read_files/csv.py +6 -6
- mloda_plugins/feature_group/input_data/read_files/feather.py +5 -5
- mloda_plugins/feature_group/input_data/read_files/json.py +5 -5
- mloda_plugins/feature_group/input_data/read_files/orc.py +5 -5
- mloda_plugins/feature_group/input_data/read_files/parquet.py +5 -5
- mloda_plugins/feature_group/input_data/read_files/text_file_reader.py +5 -5
- mloda_plugins/function_extender/base_implementations/otel/otel_extender.py +4 -4
- mloda-0.3.3.dist-info/RECORD +0 -230
- mloda_core/abstract_plugins/components/link.py +0 -286
- mloda_core/abstract_plugins/function_extender.py +0 -34
- mloda_core/runtime/run.py +0 -617
- {mloda_core → mloda/core}/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/domain.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/feature_chainer/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/feature_name.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/framework_transformer/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/framework_transformer/base_transformer.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/hashable_dict.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/index/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/index/index.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/input_data/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/input_data/api/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/input_data/creator/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/match_data/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/merge/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/plugin_option/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/components/utils.py +0 -0
- {mloda_core/abstract_plugins/plugin_loader → mloda/core/abstract_plugins/components/validators}/__init__.py +0 -0
- {mloda_core/api → mloda/core/abstract_plugins/plugin_loader}/__init__.py +0 -0
- {mloda_core → mloda/core}/abstract_plugins/plugin_loader/plugin_loader.py +0 -0
- {mloda_core/api/prepare → mloda/core/api}/__init__.py +0 -0
- {mloda_core/core → mloda/core/api/prepare}/__init__.py +0 -0
- {mloda_core/core/step → mloda/core/core}/__init__.py +0 -0
- {mloda_core/filter → mloda/core/core/step}/__init__.py +0 -0
- {mloda_core/prepare → mloda/core/filter}/__init__.py +0 -0
- {mloda_core → mloda/core}/filter/filter_parameter.py +0 -0
- {mloda_core/prepare/graph → mloda/core/prepare}/__init__.py +0 -0
- {mloda_core/runtime → mloda/core/prepare/graph}/__init__.py +0 -0
- {mloda_core/runtime/flight → mloda/core/prepare/validators}/__init__.py +0 -0
- {mloda_core/runtime/worker → mloda/core/runtime}/__init__.py +0 -0
- {mloda_core → mloda/core}/runtime/flight/flight_server.py +0 -0
- {mloda-0.3.3.dist-info → mloda-0.4.0.dist-info}/WHEEL +0 -0
- {mloda-0.3.3.dist-info → mloda-0.4.0.dist-info}/entry_points.txt +0 -0
- {mloda-0.3.3.dist-info → mloda-0.4.0.dist-info}/licenses/LICENSE.TXT +0 -0
- {mloda-0.3.3.dist-info → mloda-0.4.0.dist-info}/licenses/NOTICE.md +0 -0
|
@@ -1,286 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
from dataclasses import dataclass, FrozenInstanceError
|
|
4
|
-
from enum import Enum
|
|
5
|
-
from uuid import uuid4
|
|
6
|
-
from typing import Any, Dict, Optional, Set, Tuple, Type, Union
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
from mloda_core.abstract_plugins.components.index.index import Index
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
class JoinType(Enum):
|
|
13
|
-
"""
|
|
14
|
-
Enum defining types of dataset merge operations.
|
|
15
|
-
|
|
16
|
-
Attributes:
|
|
17
|
-
INNER: Includes rows with matching keys from both datasets.
|
|
18
|
-
LEFT: Includes all rows from the left dataset, with matches from the right.
|
|
19
|
-
RIGHT: Includes all rows from the right dataset, with matches from the left.
|
|
20
|
-
OUTER: Includes all rows from both datasets, filling unmatched values with nulls.
|
|
21
|
-
APPEND: Stacks datasets vertically, preserving all rows from both.
|
|
22
|
-
UNION: Combines datasets, removing duplicate rows.
|
|
23
|
-
"""
|
|
24
|
-
|
|
25
|
-
INNER = "inner"
|
|
26
|
-
LEFT = "left"
|
|
27
|
-
RIGHT = "right"
|
|
28
|
-
OUTER = "outer"
|
|
29
|
-
APPEND = "append"
|
|
30
|
-
UNION = "union"
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
class JoinSpec:
|
|
34
|
-
"""Specification for one side of a join operation.
|
|
35
|
-
|
|
36
|
-
Args:
|
|
37
|
-
feature_group: The feature group class for this side of the join.
|
|
38
|
-
index: Join column(s) - can be:
|
|
39
|
-
- str: single column name, e.g., "id"
|
|
40
|
-
- Tuple[str, ...]: multiple columns, e.g., ("col1", "col2")
|
|
41
|
-
- Index: explicit Index object
|
|
42
|
-
"""
|
|
43
|
-
|
|
44
|
-
feature_group: Type[Any]
|
|
45
|
-
index: Index
|
|
46
|
-
|
|
47
|
-
def __init__(self, feature_group: Type[Any], index: Union[Index, Tuple[str, ...], str]) -> None:
|
|
48
|
-
"""Create JoinSpec, converting index input to Index if needed."""
|
|
49
|
-
if isinstance(index, str):
|
|
50
|
-
if not index:
|
|
51
|
-
raise ValueError("Index column name cannot be empty")
|
|
52
|
-
index = Index((index,))
|
|
53
|
-
elif isinstance(index, tuple):
|
|
54
|
-
if not index:
|
|
55
|
-
raise ValueError("Index tuple cannot be empty")
|
|
56
|
-
index = Index(index)
|
|
57
|
-
|
|
58
|
-
object.__setattr__(self, "feature_group", feature_group)
|
|
59
|
-
object.__setattr__(self, "index", index)
|
|
60
|
-
|
|
61
|
-
def __setattr__(self, name: str, value: Any) -> None:
|
|
62
|
-
raise FrozenInstanceError(f"cannot assign to field '{name}'")
|
|
63
|
-
|
|
64
|
-
def __eq__(self, other: Any) -> bool:
|
|
65
|
-
if not isinstance(other, JoinSpec):
|
|
66
|
-
return False
|
|
67
|
-
return self.feature_group == other.feature_group and self.index == other.index
|
|
68
|
-
|
|
69
|
-
def __hash__(self) -> int:
|
|
70
|
-
return hash((self.feature_group, self.index))
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
class Link:
|
|
74
|
-
"""
|
|
75
|
-
Defines a join relationship between two feature groups.
|
|
76
|
-
|
|
77
|
-
Args:
|
|
78
|
-
jointype: Type of join operation (inner, left, right, outer, append, union).
|
|
79
|
-
left: JoinSpec for the left side of the join.
|
|
80
|
-
right: JoinSpec for the right side of the join.
|
|
81
|
-
left_pointer: Optional dict to distinguish left instance in self-joins.
|
|
82
|
-
Must match key-value pairs in the left feature's options.
|
|
83
|
-
right_pointer: Optional dict to distinguish right instance in self-joins.
|
|
84
|
-
Must match key-value pairs in the right feature's options.
|
|
85
|
-
|
|
86
|
-
Example:
|
|
87
|
-
>>> # Simple join using string index (single column)
|
|
88
|
-
>>> Link.inner(JoinSpec(UserFG, "user_id"), JoinSpec(OrderFG, "user_id"))
|
|
89
|
-
>>>
|
|
90
|
-
>>> # Multi-column join using tuple index
|
|
91
|
-
>>> Link.inner(JoinSpec(UserFG, ("id", "date")), JoinSpec(OrderFG, ("user_id", "order_date")))
|
|
92
|
-
>>>
|
|
93
|
-
>>> # Self-join with pointers
|
|
94
|
-
>>> Link("inner", JoinSpec(UserFG, "user_id"), JoinSpec(UserFG, "user_id"),
|
|
95
|
-
... left_pointer={"side": "manager"},
|
|
96
|
-
... right_pointer={"side": "employee"})
|
|
97
|
-
|
|
98
|
-
Polymorphic Matching:
|
|
99
|
-
Links support inheritance-based matching, allowing a link defined with base
|
|
100
|
-
classes to automatically apply to subclasses. The matching follows these rules:
|
|
101
|
-
|
|
102
|
-
1. **Exact match first**: If a link's feature groups exactly match the classes
|
|
103
|
-
being joined, it takes priority over any polymorphic matches.
|
|
104
|
-
|
|
105
|
-
2. **Balanced inheritance**: For polymorphic matches, both sides must have the
|
|
106
|
-
same inheritance distance. This prevents sibling class mismatches.
|
|
107
|
-
|
|
108
|
-
Example - Given hierarchy:
|
|
109
|
-
BaseFeatureGroup
|
|
110
|
-
├── ChildA
|
|
111
|
-
└── ChildB
|
|
112
|
-
|
|
113
|
-
Link(BaseFeatureGroup, BaseFeatureGroup) will match:
|
|
114
|
-
- (ChildA, ChildA) ✓ - both sides distance=1
|
|
115
|
-
- (ChildB, ChildB) ✓ - both sides distance=1
|
|
116
|
-
- (ChildA, ChildB) ✗ - rejected: siblings, not balanced inheritance
|
|
117
|
-
|
|
118
|
-
3. **Most specific wins**: Among valid matches, the link closest in the
|
|
119
|
-
inheritance hierarchy is selected.
|
|
120
|
-
"""
|
|
121
|
-
|
|
122
|
-
def __init__(
|
|
123
|
-
self,
|
|
124
|
-
jointype: Union[JoinType, str],
|
|
125
|
-
left: JoinSpec,
|
|
126
|
-
right: JoinSpec,
|
|
127
|
-
left_pointer: Optional[Dict[str, Any]] = None,
|
|
128
|
-
right_pointer: Optional[Dict[str, Any]] = None,
|
|
129
|
-
) -> None:
|
|
130
|
-
self.jointype = JoinType(jointype) if isinstance(jointype, str) else jointype
|
|
131
|
-
self.left_feature_group = left.feature_group
|
|
132
|
-
self.right_feature_group = right.feature_group
|
|
133
|
-
self.left_index = left.index
|
|
134
|
-
self.right_index = right.index
|
|
135
|
-
self.left_pointer = left_pointer
|
|
136
|
-
self.right_pointer = right_pointer
|
|
137
|
-
|
|
138
|
-
self.uuid = uuid4()
|
|
139
|
-
|
|
140
|
-
def __str__(self) -> str:
|
|
141
|
-
return f"{self.jointype.value} {self.left_feature_group.get_class_name()} {self.left_index} {self.right_feature_group.get_class_name()} {self.right_index} {self.uuid}"
|
|
142
|
-
|
|
143
|
-
@classmethod
|
|
144
|
-
def inner(
|
|
145
|
-
cls,
|
|
146
|
-
left: JoinSpec,
|
|
147
|
-
right: JoinSpec,
|
|
148
|
-
) -> Link:
|
|
149
|
-
return cls(JoinType.INNER, left, right)
|
|
150
|
-
|
|
151
|
-
@classmethod
|
|
152
|
-
def left(
|
|
153
|
-
cls,
|
|
154
|
-
left: JoinSpec,
|
|
155
|
-
right: JoinSpec,
|
|
156
|
-
) -> Link:
|
|
157
|
-
return cls(JoinType.LEFT, left, right)
|
|
158
|
-
|
|
159
|
-
@classmethod
|
|
160
|
-
def right(
|
|
161
|
-
cls,
|
|
162
|
-
left: JoinSpec,
|
|
163
|
-
right: JoinSpec,
|
|
164
|
-
) -> Link:
|
|
165
|
-
return cls(JoinType.RIGHT, left, right)
|
|
166
|
-
|
|
167
|
-
@classmethod
|
|
168
|
-
def outer(
|
|
169
|
-
cls,
|
|
170
|
-
left: JoinSpec,
|
|
171
|
-
right: JoinSpec,
|
|
172
|
-
) -> Link:
|
|
173
|
-
return cls(JoinType.OUTER, left, right)
|
|
174
|
-
|
|
175
|
-
@classmethod
|
|
176
|
-
def append(
|
|
177
|
-
cls,
|
|
178
|
-
left: JoinSpec,
|
|
179
|
-
right: JoinSpec,
|
|
180
|
-
) -> Link:
|
|
181
|
-
return cls(JoinType.APPEND, left, right)
|
|
182
|
-
|
|
183
|
-
@classmethod
|
|
184
|
-
def union(
|
|
185
|
-
cls,
|
|
186
|
-
left: JoinSpec,
|
|
187
|
-
right: JoinSpec,
|
|
188
|
-
) -> Link:
|
|
189
|
-
return cls(JoinType.UNION, left, right)
|
|
190
|
-
|
|
191
|
-
def matches_exact(
|
|
192
|
-
self,
|
|
193
|
-
other_left_feature_group: Type[Any],
|
|
194
|
-
other_right_feature_group: Type[Any],
|
|
195
|
-
) -> bool:
|
|
196
|
-
"""Exact class name match only."""
|
|
197
|
-
left_match: bool = self.left_feature_group.get_class_name() == other_left_feature_group.get_class_name()
|
|
198
|
-
right_match: bool = self.right_feature_group.get_class_name() == other_right_feature_group.get_class_name()
|
|
199
|
-
return left_match and right_match
|
|
200
|
-
|
|
201
|
-
def matches_polymorphic(
|
|
202
|
-
self,
|
|
203
|
-
other_left_feature_group: Type[Any],
|
|
204
|
-
other_right_feature_group: Type[Any],
|
|
205
|
-
) -> bool:
|
|
206
|
-
"""Subclass match (inheritance). Returns True if both sides are subclasses."""
|
|
207
|
-
return issubclass(other_left_feature_group, self.left_feature_group) and issubclass(
|
|
208
|
-
other_right_feature_group, self.right_feature_group
|
|
209
|
-
)
|
|
210
|
-
|
|
211
|
-
def matches(
|
|
212
|
-
self,
|
|
213
|
-
other_left_feature_group: Type[Any],
|
|
214
|
-
other_right_feature_group: Type[Any],
|
|
215
|
-
) -> bool:
|
|
216
|
-
"""Combined match: exact OR polymorphic."""
|
|
217
|
-
return self.matches_exact(other_left_feature_group, other_right_feature_group) or self.matches_polymorphic(
|
|
218
|
-
other_left_feature_group, other_right_feature_group
|
|
219
|
-
)
|
|
220
|
-
|
|
221
|
-
def __eq__(self, other: Any) -> bool:
|
|
222
|
-
if not isinstance(other, Link):
|
|
223
|
-
return False
|
|
224
|
-
return (
|
|
225
|
-
self.jointype == other.jointype
|
|
226
|
-
and self.left_feature_group.get_class_name() == other.left_feature_group.get_class_name()
|
|
227
|
-
and self.right_feature_group.get_class_name() == other.right_feature_group.get_class_name()
|
|
228
|
-
and self.left_index == other.left_index
|
|
229
|
-
and self.right_index == other.right_index
|
|
230
|
-
)
|
|
231
|
-
|
|
232
|
-
def __hash__(self) -> int:
|
|
233
|
-
return hash(
|
|
234
|
-
(
|
|
235
|
-
self.jointype,
|
|
236
|
-
self.left_feature_group.get_class_name(),
|
|
237
|
-
self.right_feature_group.get_class_name(),
|
|
238
|
-
self.left_index,
|
|
239
|
-
self.right_index,
|
|
240
|
-
)
|
|
241
|
-
)
|
|
242
|
-
|
|
243
|
-
@staticmethod
|
|
244
|
-
def validate(links: Optional[Set[Link]] = None) -> None:
|
|
245
|
-
if links is None:
|
|
246
|
-
return
|
|
247
|
-
|
|
248
|
-
for i_link in links:
|
|
249
|
-
if i_link.jointype not in JoinType:
|
|
250
|
-
raise ValueError(f"Join type {i_link.jointype} is not supported")
|
|
251
|
-
|
|
252
|
-
for j_link in links:
|
|
253
|
-
if i_link == j_link:
|
|
254
|
-
continue
|
|
255
|
-
|
|
256
|
-
# case: A B and B A -> is not clear which join to use
|
|
257
|
-
# We exclude here append and union, because they are not directional.
|
|
258
|
-
if (
|
|
259
|
-
i_link.left_feature_group == j_link.right_feature_group
|
|
260
|
-
and i_link.right_feature_group == j_link.left_feature_group
|
|
261
|
-
and i_link.jointype not in [JoinType.APPEND, JoinType.UNION]
|
|
262
|
-
):
|
|
263
|
-
raise ValueError(
|
|
264
|
-
f"Link {i_link} and {j_link} have at least two different defined joins. Please remove one."
|
|
265
|
-
)
|
|
266
|
-
|
|
267
|
-
# case: Multiple different join types between two feature groups
|
|
268
|
-
if (
|
|
269
|
-
i_link.left_feature_group == j_link.left_feature_group
|
|
270
|
-
and i_link.right_feature_group == j_link.right_feature_group
|
|
271
|
-
and i_link.jointype != j_link.jointype
|
|
272
|
-
):
|
|
273
|
-
raise ValueError(
|
|
274
|
-
f"Link {i_link} and {j_link} have different join types for the same feature groups. Please remove one."
|
|
275
|
-
)
|
|
276
|
-
|
|
277
|
-
# case: Multiple right joins
|
|
278
|
-
# For now, only small right joins are supported. Lets see if any use case will need this in future.
|
|
279
|
-
if i_link.jointype == JoinType.RIGHT:
|
|
280
|
-
if (
|
|
281
|
-
i_link.left_feature_group == j_link.left_feature_group
|
|
282
|
-
or i_link.left_feature_group == j_link.right_feature_group
|
|
283
|
-
):
|
|
284
|
-
raise ValueError(
|
|
285
|
-
f"Link {i_link} and {j_link} have multiple right joins for the same feature group on the left side or switching from left to right side although using right join. Please reconsider your joinlogic and if possible, use left joins instead of rightjoins. This will currently break the planner or during execution."
|
|
286
|
-
)
|
|
@@ -1,34 +0,0 @@
|
|
|
1
|
-
from abc import ABC, abstractmethod
|
|
2
|
-
from enum import Enum
|
|
3
|
-
from typing import Any, Set
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
class WrapperFunctionEnum(Enum):
|
|
7
|
-
FEATURE_GROUP_CALCULATE_FEATURE = "feature_group_calculate_feature"
|
|
8
|
-
VALIDATE_INPUT_FEATURE = "validate_input_feature"
|
|
9
|
-
VALIDATE_OUTPUT_FEATURE = "validate_output_feature"
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
class WrapperFunctionExtender(ABC):
|
|
13
|
-
"""
|
|
14
|
-
- Automated Metadata harvestor connector
|
|
15
|
-
- Messaging Integration ( email )
|
|
16
|
-
- Automation Tools
|
|
17
|
-
- data lineage mapping
|
|
18
|
-
- Impact Analysis
|
|
19
|
-
- Audit Trail
|
|
20
|
-
- Monitoring alerts
|
|
21
|
-
- metadata capture
|
|
22
|
-
- Event logging
|
|
23
|
-
- metrics on feature calculation
|
|
24
|
-
- visibility / observibility
|
|
25
|
-
- Performance
|
|
26
|
-
"""
|
|
27
|
-
|
|
28
|
-
@abstractmethod
|
|
29
|
-
def wraps(self) -> Set[WrapperFunctionEnum]:
|
|
30
|
-
pass
|
|
31
|
-
|
|
32
|
-
@abstractmethod
|
|
33
|
-
def __call__(self, func: Any, *args: Any, **kwargs: Any) -> Any:
|
|
34
|
-
pass
|