mloda 0.3.3__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (201) hide show
  1. mloda/__init__.py +17 -0
  2. {mloda_core → mloda/core}/abstract_plugins/components/base_artifact.py +2 -2
  3. {mloda_core → mloda/core}/abstract_plugins/components/base_validator.py +13 -0
  4. {mloda_core → mloda/core}/abstract_plugins/components/data_access_collection.py +1 -1
  5. {mloda_core → mloda/core}/abstract_plugins/components/data_types.py +39 -0
  6. {mloda_core → mloda/core}/abstract_plugins/components/feature.py +39 -33
  7. {mloda_core → mloda/core}/abstract_plugins/components/feature_chainer/feature_chain_parser.py +19 -19
  8. mloda/core/abstract_plugins/components/feature_chainer/feature_chain_parser_mixin.py +197 -0
  9. {mloda_core → mloda/core}/abstract_plugins/components/feature_collection.py +6 -6
  10. {mloda_core → mloda/core}/abstract_plugins/components/feature_group_version.py +8 -8
  11. {mloda_core → mloda/core}/abstract_plugins/components/feature_set.py +18 -24
  12. {mloda_core → mloda/core}/abstract_plugins/components/framework_transformer/cfw_transformer.py +2 -2
  13. {mloda_core → mloda/core}/abstract_plugins/components/index/add_index_feature.py +4 -4
  14. {mloda_core → mloda/core}/abstract_plugins/components/input_data/api/api_input_data.py +3 -3
  15. {mloda_core → mloda/core}/abstract_plugins/components/input_data/api/api_input_data_collection.py +2 -2
  16. {mloda_core → mloda/core}/abstract_plugins/components/input_data/api/base_api_data.py +1 -1
  17. {mloda_core → mloda/core}/abstract_plugins/components/input_data/base_input_data.py +6 -6
  18. {mloda_core → mloda/core}/abstract_plugins/components/input_data/creator/data_creator.py +3 -3
  19. mloda/core/abstract_plugins/components/link.py +437 -0
  20. {mloda_core → mloda/core}/abstract_plugins/components/match_data/match_data.py +3 -3
  21. {mloda_core → mloda/core}/abstract_plugins/components/merge/base_merge_engine.py +2 -2
  22. {mloda_core → mloda/core}/abstract_plugins/components/options.py +12 -36
  23. {mloda_core → mloda/core}/abstract_plugins/components/parallelization_modes.py +1 -1
  24. {mloda_core → mloda/core}/abstract_plugins/components/plugin_option/plugin_collector.py +14 -14
  25. mloda/core/abstract_plugins/components/validators/datatype_validator.py +96 -0
  26. mloda/core/abstract_plugins/components/validators/feature_set_validator.py +38 -0
  27. mloda/core/abstract_plugins/components/validators/feature_validator.py +23 -0
  28. mloda/core/abstract_plugins/components/validators/link_validator.py +79 -0
  29. mloda/core/abstract_plugins/components/validators/options_validator.py +57 -0
  30. mloda_core/abstract_plugins/compute_frame_work.py → mloda/core/abstract_plugins/compute_framework.py +46 -37
  31. mloda_core/abstract_plugins/abstract_feature_group.py → mloda/core/abstract_plugins/feature_group.py +56 -33
  32. mloda/core/abstract_plugins/function_extender.py +78 -0
  33. mloda/core/api/plugin_docs.py +220 -0
  34. mloda/core/api/plugin_info.py +32 -0
  35. {mloda_core → mloda/core}/api/prepare/setup_compute_framework.py +11 -11
  36. {mloda_core → mloda/core}/api/request.py +42 -33
  37. {mloda_core → mloda/core}/core/cfw_manager.py +8 -8
  38. {mloda_core → mloda/core}/core/engine.py +47 -46
  39. {mloda_core → mloda/core}/core/step/abstract_step.py +7 -7
  40. {mloda_core → mloda/core}/core/step/feature_group_step.py +12 -12
  41. {mloda_core → mloda/core}/core/step/join_step.py +14 -14
  42. {mloda_core → mloda/core}/core/step/transform_frame_work_step.py +16 -16
  43. {mloda_core → mloda/core}/filter/filter_engine.py +1 -1
  44. {mloda_core → mloda/core}/filter/filter_type_enum.py +1 -1
  45. {mloda_core → mloda/core}/filter/global_filter.py +23 -23
  46. {mloda_core → mloda/core}/filter/single_filter.py +6 -6
  47. {mloda_core → mloda/core}/prepare/accessible_plugins.py +16 -18
  48. {mloda_core → mloda/core}/prepare/execution_plan.py +65 -39
  49. {mloda_core → mloda/core}/prepare/graph/build_graph.py +6 -6
  50. {mloda_core → mloda/core}/prepare/graph/graph.py +1 -1
  51. {mloda_core → mloda/core}/prepare/graph/properties.py +5 -5
  52. {mloda_core → mloda/core}/prepare/identify_feature_group.py +12 -14
  53. {mloda_core → mloda/core}/prepare/joinstep_collection.py +3 -3
  54. {mloda_core → mloda/core}/prepare/resolve_compute_frameworks.py +6 -6
  55. {mloda_core → mloda/core}/prepare/resolve_graph.py +11 -11
  56. {mloda_core → mloda/core}/prepare/resolve_links.py +11 -31
  57. mloda/core/prepare/validators/resolve_link_validator.py +32 -0
  58. mloda/core/runtime/compute_framework_executor.py +271 -0
  59. mloda/core/runtime/data_lifecycle_manager.py +160 -0
  60. mloda/core/runtime/flight/__init__.py +0 -0
  61. {mloda_core → mloda/core}/runtime/flight/runner_flight_server.py +1 -1
  62. mloda/core/runtime/run.py +317 -0
  63. mloda/core/runtime/worker/__init__.py +0 -0
  64. {mloda_core → mloda/core}/runtime/worker/multiprocessing_worker.py +15 -10
  65. {mloda_core → mloda/core}/runtime/worker/thread_worker.py +2 -2
  66. mloda/core/runtime/worker_manager.py +96 -0
  67. mloda/provider/__init__.py +101 -0
  68. mloda/steward/__init__.py +25 -0
  69. mloda/user/__init__.py +57 -0
  70. {mloda-0.3.3.dist-info → mloda-0.4.0.dist-info}/METADATA +18 -22
  71. mloda-0.4.0.dist-info/RECORD +248 -0
  72. {mloda-0.3.3.dist-info → mloda-0.4.0.dist-info}/top_level.txt +1 -1
  73. mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_filter_engine.py +2 -2
  74. mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_framework.py +15 -13
  75. mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_merge_engine.py +3 -3
  76. mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_pyarrow_transformer.py +1 -1
  77. mloda_plugins/compute_framework/base_implementations/iceberg/iceberg_filter_engine.py +2 -2
  78. mloda_plugins/compute_framework/base_implementations/iceberg/iceberg_framework.py +12 -10
  79. mloda_plugins/compute_framework/base_implementations/iceberg/iceberg_pyarrow_transformer.py +1 -1
  80. mloda_plugins/compute_framework/base_implementations/pandas/dataframe.py +18 -16
  81. mloda_plugins/compute_framework/base_implementations/pandas/pandas_filter_engine.py +36 -13
  82. mloda_plugins/compute_framework/base_implementations/pandas/pandas_merge_engine.py +7 -7
  83. mloda_plugins/compute_framework/base_implementations/pandas/pandaspyarrowtransformer.py +1 -1
  84. mloda_plugins/compute_framework/base_implementations/polars/dataframe.py +16 -14
  85. mloda_plugins/compute_framework/base_implementations/polars/lazy_dataframe.py +13 -12
  86. mloda_plugins/compute_framework/base_implementations/polars/polars_filter_engine.py +2 -2
  87. mloda_plugins/compute_framework/base_implementations/polars/polars_lazy_pyarrow_transformer.py +1 -1
  88. mloda_plugins/compute_framework/base_implementations/polars/polars_merge_engine.py +3 -3
  89. mloda_plugins/compute_framework/base_implementations/polars/polars_pyarrow_transformer.py +1 -1
  90. mloda_plugins/compute_framework/base_implementations/pyarrow/pyarrow_filter_engine.py +2 -2
  91. mloda_plugins/compute_framework/base_implementations/pyarrow/pyarrow_merge_engine.py +3 -3
  92. mloda_plugins/compute_framework/base_implementations/pyarrow/table.py +12 -10
  93. mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_filter_engine.py +2 -2
  94. mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_framework.py +11 -9
  95. mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_merge_engine.py +3 -3
  96. mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_pyarrow_transformer.py +1 -1
  97. mloda_plugins/compute_framework/base_implementations/spark/spark_filter_engine.py +2 -2
  98. mloda_plugins/compute_framework/base_implementations/spark/spark_framework.py +17 -15
  99. mloda_plugins/compute_framework/base_implementations/spark/spark_merge_engine.py +3 -3
  100. mloda_plugins/compute_framework/base_implementations/spark/spark_pyarrow_transformer.py +1 -1
  101. mloda_plugins/config/feature/loader.py +2 -2
  102. mloda_plugins/feature_group/experimental/aggregated_feature_group/base.py +45 -62
  103. mloda_plugins/feature_group/experimental/aggregated_feature_group/pandas.py +2 -2
  104. mloda_plugins/feature_group/experimental/aggregated_feature_group/polars_lazy.py +2 -2
  105. mloda_plugins/feature_group/experimental/aggregated_feature_group/pyarrow.py +2 -2
  106. mloda_plugins/feature_group/experimental/clustering/base.py +69 -97
  107. mloda_plugins/feature_group/experimental/clustering/pandas.py +2 -2
  108. mloda_plugins/feature_group/experimental/data_quality/missing_value/base.py +58 -79
  109. mloda_plugins/feature_group/experimental/data_quality/missing_value/pandas.py +2 -2
  110. mloda_plugins/feature_group/experimental/data_quality/missing_value/pyarrow.py +2 -2
  111. mloda_plugins/feature_group/experimental/data_quality/missing_value/python_dict.py +2 -2
  112. mloda_plugins/feature_group/experimental/default_options_key.py +16 -19
  113. mloda_plugins/feature_group/experimental/dimensionality_reduction/base.py +80 -94
  114. mloda_plugins/feature_group/experimental/dimensionality_reduction/pandas.py +2 -2
  115. mloda_plugins/feature_group/experimental/dynamic_feature_group_factory/dynamic_feature_group_factory.py +24 -24
  116. mloda_plugins/feature_group/experimental/forecasting/base.py +106 -104
  117. mloda_plugins/feature_group/experimental/forecasting/forecasting_artifact.py +2 -2
  118. mloda_plugins/feature_group/experimental/forecasting/pandas.py +15 -15
  119. mloda_plugins/feature_group/experimental/geo_distance/base.py +50 -42
  120. mloda_plugins/feature_group/experimental/geo_distance/pandas.py +2 -2
  121. mloda_plugins/feature_group/experimental/llm/cli.py +4 -4
  122. mloda_plugins/feature_group/experimental/llm/cli_features/refactor_git_cached.py +19 -19
  123. mloda_plugins/feature_group/experimental/llm/installed_packages_feature_group.py +8 -8
  124. mloda_plugins/feature_group/experimental/llm/list_directory_feature_group.py +5 -5
  125. mloda_plugins/feature_group/experimental/llm/llm_api/claude.py +3 -3
  126. mloda_plugins/feature_group/experimental/llm/llm_api/gemini.py +3 -3
  127. mloda_plugins/feature_group/experimental/llm/llm_api/llm_base_request.py +5 -5
  128. mloda_plugins/feature_group/experimental/llm/llm_api/openai.py +3 -3
  129. mloda_plugins/feature_group/experimental/llm/llm_api/request_loop.py +6 -6
  130. mloda_plugins/feature_group/experimental/llm/llm_file_selector.py +10 -10
  131. mloda_plugins/feature_group/experimental/llm/tools/tool_collection.py +1 -1
  132. mloda_plugins/feature_group/experimental/node_centrality/base.py +46 -72
  133. mloda_plugins/feature_group/experimental/node_centrality/pandas.py +2 -2
  134. mloda_plugins/feature_group/experimental/sklearn/encoding/base.py +51 -51
  135. mloda_plugins/feature_group/experimental/sklearn/encoding/pandas.py +2 -2
  136. mloda_plugins/feature_group/experimental/sklearn/pipeline/base.py +52 -39
  137. mloda_plugins/feature_group/experimental/sklearn/pipeline/pandas.py +2 -2
  138. mloda_plugins/feature_group/experimental/sklearn/scaling/base.py +44 -58
  139. mloda_plugins/feature_group/experimental/sklearn/scaling/pandas.py +2 -2
  140. mloda_plugins/feature_group/experimental/sklearn/sklearn_artifact.py +2 -2
  141. mloda_plugins/feature_group/experimental/source_input_feature.py +15 -15
  142. mloda_plugins/feature_group/experimental/text_cleaning/base.py +38 -61
  143. mloda_plugins/feature_group/experimental/text_cleaning/pandas.py +2 -2
  144. mloda_plugins/feature_group/experimental/text_cleaning/python_dict.py +2 -2
  145. mloda_plugins/feature_group/experimental/time_window/base.py +106 -93
  146. mloda_plugins/feature_group/experimental/time_window/pandas.py +13 -13
  147. mloda_plugins/feature_group/experimental/time_window/pyarrow.py +12 -12
  148. mloda_plugins/feature_group/input_data/api_data/api_data.py +9 -11
  149. mloda_plugins/feature_group/input_data/read_context_files.py +7 -7
  150. mloda_plugins/feature_group/input_data/read_db.py +7 -9
  151. mloda_plugins/feature_group/input_data/read_db_feature.py +4 -4
  152. mloda_plugins/feature_group/input_data/read_dbs/sqlite.py +23 -13
  153. mloda_plugins/feature_group/input_data/read_file.py +8 -8
  154. mloda_plugins/feature_group/input_data/read_file_feature.py +4 -4
  155. mloda_plugins/feature_group/input_data/read_files/csv.py +6 -6
  156. mloda_plugins/feature_group/input_data/read_files/feather.py +5 -5
  157. mloda_plugins/feature_group/input_data/read_files/json.py +5 -5
  158. mloda_plugins/feature_group/input_data/read_files/orc.py +5 -5
  159. mloda_plugins/feature_group/input_data/read_files/parquet.py +5 -5
  160. mloda_plugins/feature_group/input_data/read_files/text_file_reader.py +5 -5
  161. mloda_plugins/function_extender/base_implementations/otel/otel_extender.py +4 -4
  162. mloda-0.3.3.dist-info/RECORD +0 -230
  163. mloda_core/abstract_plugins/components/link.py +0 -286
  164. mloda_core/abstract_plugins/function_extender.py +0 -34
  165. mloda_core/runtime/run.py +0 -617
  166. {mloda_core → mloda/core}/__init__.py +0 -0
  167. {mloda_core → mloda/core}/abstract_plugins/__init__.py +0 -0
  168. {mloda_core → mloda/core}/abstract_plugins/components/__init__.py +0 -0
  169. {mloda_core → mloda/core}/abstract_plugins/components/domain.py +0 -0
  170. {mloda_core → mloda/core}/abstract_plugins/components/feature_chainer/__init__.py +0 -0
  171. {mloda_core → mloda/core}/abstract_plugins/components/feature_name.py +0 -0
  172. {mloda_core → mloda/core}/abstract_plugins/components/framework_transformer/__init__.py +0 -0
  173. {mloda_core → mloda/core}/abstract_plugins/components/framework_transformer/base_transformer.py +0 -0
  174. {mloda_core → mloda/core}/abstract_plugins/components/hashable_dict.py +0 -0
  175. {mloda_core → mloda/core}/abstract_plugins/components/index/__init__.py +0 -0
  176. {mloda_core → mloda/core}/abstract_plugins/components/index/index.py +0 -0
  177. {mloda_core → mloda/core}/abstract_plugins/components/input_data/__init__.py +0 -0
  178. {mloda_core → mloda/core}/abstract_plugins/components/input_data/api/__init__.py +0 -0
  179. {mloda_core → mloda/core}/abstract_plugins/components/input_data/creator/__init__.py +0 -0
  180. {mloda_core → mloda/core}/abstract_plugins/components/match_data/__init__.py +0 -0
  181. {mloda_core → mloda/core}/abstract_plugins/components/merge/__init__.py +0 -0
  182. {mloda_core → mloda/core}/abstract_plugins/components/plugin_option/__init__.py +0 -0
  183. {mloda_core → mloda/core}/abstract_plugins/components/utils.py +0 -0
  184. {mloda_core/abstract_plugins/plugin_loader → mloda/core/abstract_plugins/components/validators}/__init__.py +0 -0
  185. {mloda_core/api → mloda/core/abstract_plugins/plugin_loader}/__init__.py +0 -0
  186. {mloda_core → mloda/core}/abstract_plugins/plugin_loader/plugin_loader.py +0 -0
  187. {mloda_core/api/prepare → mloda/core/api}/__init__.py +0 -0
  188. {mloda_core/core → mloda/core/api/prepare}/__init__.py +0 -0
  189. {mloda_core/core/step → mloda/core/core}/__init__.py +0 -0
  190. {mloda_core/filter → mloda/core/core/step}/__init__.py +0 -0
  191. {mloda_core/prepare → mloda/core/filter}/__init__.py +0 -0
  192. {mloda_core → mloda/core}/filter/filter_parameter.py +0 -0
  193. {mloda_core/prepare/graph → mloda/core/prepare}/__init__.py +0 -0
  194. {mloda_core/runtime → mloda/core/prepare/graph}/__init__.py +0 -0
  195. {mloda_core/runtime/flight → mloda/core/prepare/validators}/__init__.py +0 -0
  196. {mloda_core/runtime/worker → mloda/core/runtime}/__init__.py +0 -0
  197. {mloda_core → mloda/core}/runtime/flight/flight_server.py +0 -0
  198. {mloda-0.3.3.dist-info → mloda-0.4.0.dist-info}/WHEEL +0 -0
  199. {mloda-0.3.3.dist-info → mloda-0.4.0.dist-info}/entry_points.txt +0 -0
  200. {mloda-0.3.3.dist-info → mloda-0.4.0.dist-info}/licenses/LICENSE.TXT +0 -0
  201. {mloda-0.3.3.dist-info → mloda-0.4.0.dist-info}/licenses/NOTICE.md +0 -0
@@ -1,286 +0,0 @@
1
- from __future__ import annotations
2
-
3
- from dataclasses import dataclass, FrozenInstanceError
4
- from enum import Enum
5
- from uuid import uuid4
6
- from typing import Any, Dict, Optional, Set, Tuple, Type, Union
7
-
8
-
9
- from mloda_core.abstract_plugins.components.index.index import Index
10
-
11
-
12
- class JoinType(Enum):
13
- """
14
- Enum defining types of dataset merge operations.
15
-
16
- Attributes:
17
- INNER: Includes rows with matching keys from both datasets.
18
- LEFT: Includes all rows from the left dataset, with matches from the right.
19
- RIGHT: Includes all rows from the right dataset, with matches from the left.
20
- OUTER: Includes all rows from both datasets, filling unmatched values with nulls.
21
- APPEND: Stacks datasets vertically, preserving all rows from both.
22
- UNION: Combines datasets, removing duplicate rows.
23
- """
24
-
25
- INNER = "inner"
26
- LEFT = "left"
27
- RIGHT = "right"
28
- OUTER = "outer"
29
- APPEND = "append"
30
- UNION = "union"
31
-
32
-
33
- class JoinSpec:
34
- """Specification for one side of a join operation.
35
-
36
- Args:
37
- feature_group: The feature group class for this side of the join.
38
- index: Join column(s) - can be:
39
- - str: single column name, e.g., "id"
40
- - Tuple[str, ...]: multiple columns, e.g., ("col1", "col2")
41
- - Index: explicit Index object
42
- """
43
-
44
- feature_group: Type[Any]
45
- index: Index
46
-
47
- def __init__(self, feature_group: Type[Any], index: Union[Index, Tuple[str, ...], str]) -> None:
48
- """Create JoinSpec, converting index input to Index if needed."""
49
- if isinstance(index, str):
50
- if not index:
51
- raise ValueError("Index column name cannot be empty")
52
- index = Index((index,))
53
- elif isinstance(index, tuple):
54
- if not index:
55
- raise ValueError("Index tuple cannot be empty")
56
- index = Index(index)
57
-
58
- object.__setattr__(self, "feature_group", feature_group)
59
- object.__setattr__(self, "index", index)
60
-
61
- def __setattr__(self, name: str, value: Any) -> None:
62
- raise FrozenInstanceError(f"cannot assign to field '{name}'")
63
-
64
- def __eq__(self, other: Any) -> bool:
65
- if not isinstance(other, JoinSpec):
66
- return False
67
- return self.feature_group == other.feature_group and self.index == other.index
68
-
69
- def __hash__(self) -> int:
70
- return hash((self.feature_group, self.index))
71
-
72
-
73
- class Link:
74
- """
75
- Defines a join relationship between two feature groups.
76
-
77
- Args:
78
- jointype: Type of join operation (inner, left, right, outer, append, union).
79
- left: JoinSpec for the left side of the join.
80
- right: JoinSpec for the right side of the join.
81
- left_pointer: Optional dict to distinguish left instance in self-joins.
82
- Must match key-value pairs in the left feature's options.
83
- right_pointer: Optional dict to distinguish right instance in self-joins.
84
- Must match key-value pairs in the right feature's options.
85
-
86
- Example:
87
- >>> # Simple join using string index (single column)
88
- >>> Link.inner(JoinSpec(UserFG, "user_id"), JoinSpec(OrderFG, "user_id"))
89
- >>>
90
- >>> # Multi-column join using tuple index
91
- >>> Link.inner(JoinSpec(UserFG, ("id", "date")), JoinSpec(OrderFG, ("user_id", "order_date")))
92
- >>>
93
- >>> # Self-join with pointers
94
- >>> Link("inner", JoinSpec(UserFG, "user_id"), JoinSpec(UserFG, "user_id"),
95
- ... left_pointer={"side": "manager"},
96
- ... right_pointer={"side": "employee"})
97
-
98
- Polymorphic Matching:
99
- Links support inheritance-based matching, allowing a link defined with base
100
- classes to automatically apply to subclasses. The matching follows these rules:
101
-
102
- 1. **Exact match first**: If a link's feature groups exactly match the classes
103
- being joined, it takes priority over any polymorphic matches.
104
-
105
- 2. **Balanced inheritance**: For polymorphic matches, both sides must have the
106
- same inheritance distance. This prevents sibling class mismatches.
107
-
108
- Example - Given hierarchy:
109
- BaseFeatureGroup
110
- ├── ChildA
111
- └── ChildB
112
-
113
- Link(BaseFeatureGroup, BaseFeatureGroup) will match:
114
- - (ChildA, ChildA) ✓ - both sides distance=1
115
- - (ChildB, ChildB) ✓ - both sides distance=1
116
- - (ChildA, ChildB) ✗ - rejected: siblings, not balanced inheritance
117
-
118
- 3. **Most specific wins**: Among valid matches, the link closest in the
119
- inheritance hierarchy is selected.
120
- """
121
-
122
- def __init__(
123
- self,
124
- jointype: Union[JoinType, str],
125
- left: JoinSpec,
126
- right: JoinSpec,
127
- left_pointer: Optional[Dict[str, Any]] = None,
128
- right_pointer: Optional[Dict[str, Any]] = None,
129
- ) -> None:
130
- self.jointype = JoinType(jointype) if isinstance(jointype, str) else jointype
131
- self.left_feature_group = left.feature_group
132
- self.right_feature_group = right.feature_group
133
- self.left_index = left.index
134
- self.right_index = right.index
135
- self.left_pointer = left_pointer
136
- self.right_pointer = right_pointer
137
-
138
- self.uuid = uuid4()
139
-
140
- def __str__(self) -> str:
141
- return f"{self.jointype.value} {self.left_feature_group.get_class_name()} {self.left_index} {self.right_feature_group.get_class_name()} {self.right_index} {self.uuid}"
142
-
143
- @classmethod
144
- def inner(
145
- cls,
146
- left: JoinSpec,
147
- right: JoinSpec,
148
- ) -> Link:
149
- return cls(JoinType.INNER, left, right)
150
-
151
- @classmethod
152
- def left(
153
- cls,
154
- left: JoinSpec,
155
- right: JoinSpec,
156
- ) -> Link:
157
- return cls(JoinType.LEFT, left, right)
158
-
159
- @classmethod
160
- def right(
161
- cls,
162
- left: JoinSpec,
163
- right: JoinSpec,
164
- ) -> Link:
165
- return cls(JoinType.RIGHT, left, right)
166
-
167
- @classmethod
168
- def outer(
169
- cls,
170
- left: JoinSpec,
171
- right: JoinSpec,
172
- ) -> Link:
173
- return cls(JoinType.OUTER, left, right)
174
-
175
- @classmethod
176
- def append(
177
- cls,
178
- left: JoinSpec,
179
- right: JoinSpec,
180
- ) -> Link:
181
- return cls(JoinType.APPEND, left, right)
182
-
183
- @classmethod
184
- def union(
185
- cls,
186
- left: JoinSpec,
187
- right: JoinSpec,
188
- ) -> Link:
189
- return cls(JoinType.UNION, left, right)
190
-
191
- def matches_exact(
192
- self,
193
- other_left_feature_group: Type[Any],
194
- other_right_feature_group: Type[Any],
195
- ) -> bool:
196
- """Exact class name match only."""
197
- left_match: bool = self.left_feature_group.get_class_name() == other_left_feature_group.get_class_name()
198
- right_match: bool = self.right_feature_group.get_class_name() == other_right_feature_group.get_class_name()
199
- return left_match and right_match
200
-
201
- def matches_polymorphic(
202
- self,
203
- other_left_feature_group: Type[Any],
204
- other_right_feature_group: Type[Any],
205
- ) -> bool:
206
- """Subclass match (inheritance). Returns True if both sides are subclasses."""
207
- return issubclass(other_left_feature_group, self.left_feature_group) and issubclass(
208
- other_right_feature_group, self.right_feature_group
209
- )
210
-
211
- def matches(
212
- self,
213
- other_left_feature_group: Type[Any],
214
- other_right_feature_group: Type[Any],
215
- ) -> bool:
216
- """Combined match: exact OR polymorphic."""
217
- return self.matches_exact(other_left_feature_group, other_right_feature_group) or self.matches_polymorphic(
218
- other_left_feature_group, other_right_feature_group
219
- )
220
-
221
- def __eq__(self, other: Any) -> bool:
222
- if not isinstance(other, Link):
223
- return False
224
- return (
225
- self.jointype == other.jointype
226
- and self.left_feature_group.get_class_name() == other.left_feature_group.get_class_name()
227
- and self.right_feature_group.get_class_name() == other.right_feature_group.get_class_name()
228
- and self.left_index == other.left_index
229
- and self.right_index == other.right_index
230
- )
231
-
232
- def __hash__(self) -> int:
233
- return hash(
234
- (
235
- self.jointype,
236
- self.left_feature_group.get_class_name(),
237
- self.right_feature_group.get_class_name(),
238
- self.left_index,
239
- self.right_index,
240
- )
241
- )
242
-
243
- @staticmethod
244
- def validate(links: Optional[Set[Link]] = None) -> None:
245
- if links is None:
246
- return
247
-
248
- for i_link in links:
249
- if i_link.jointype not in JoinType:
250
- raise ValueError(f"Join type {i_link.jointype} is not supported")
251
-
252
- for j_link in links:
253
- if i_link == j_link:
254
- continue
255
-
256
- # case: A B and B A -> is not clear which join to use
257
- # We exclude here append and union, because they are not directional.
258
- if (
259
- i_link.left_feature_group == j_link.right_feature_group
260
- and i_link.right_feature_group == j_link.left_feature_group
261
- and i_link.jointype not in [JoinType.APPEND, JoinType.UNION]
262
- ):
263
- raise ValueError(
264
- f"Link {i_link} and {j_link} have at least two different defined joins. Please remove one."
265
- )
266
-
267
- # case: Multiple different join types between two feature groups
268
- if (
269
- i_link.left_feature_group == j_link.left_feature_group
270
- and i_link.right_feature_group == j_link.right_feature_group
271
- and i_link.jointype != j_link.jointype
272
- ):
273
- raise ValueError(
274
- f"Link {i_link} and {j_link} have different join types for the same feature groups. Please remove one."
275
- )
276
-
277
- # case: Multiple right joins
278
- # For now, only small right joins are supported. Lets see if any use case will need this in future.
279
- if i_link.jointype == JoinType.RIGHT:
280
- if (
281
- i_link.left_feature_group == j_link.left_feature_group
282
- or i_link.left_feature_group == j_link.right_feature_group
283
- ):
284
- raise ValueError(
285
- f"Link {i_link} and {j_link} have multiple right joins for the same feature group on the left side or switching from left to right side although using right join. Please reconsider your joinlogic and if possible, use left joins instead of rightjoins. This will currently break the planner or during execution."
286
- )
@@ -1,34 +0,0 @@
1
- from abc import ABC, abstractmethod
2
- from enum import Enum
3
- from typing import Any, Set
4
-
5
-
6
- class WrapperFunctionEnum(Enum):
7
- FEATURE_GROUP_CALCULATE_FEATURE = "feature_group_calculate_feature"
8
- VALIDATE_INPUT_FEATURE = "validate_input_feature"
9
- VALIDATE_OUTPUT_FEATURE = "validate_output_feature"
10
-
11
-
12
- class WrapperFunctionExtender(ABC):
13
- """
14
- - Automated Metadata harvestor connector
15
- - Messaging Integration ( email )
16
- - Automation Tools
17
- - data lineage mapping
18
- - Impact Analysis
19
- - Audit Trail
20
- - Monitoring alerts
21
- - metadata capture
22
- - Event logging
23
- - metrics on feature calculation
24
- - visibility / observibility
25
- - Performance
26
- """
27
-
28
- @abstractmethod
29
- def wraps(self) -> Set[WrapperFunctionEnum]:
30
- pass
31
-
32
- @abstractmethod
33
- def __call__(self, func: Any, *args: Any, **kwargs: Any) -> Any:
34
- pass