mloda 0.3.3__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (201) hide show
  1. mloda/__init__.py +17 -0
  2. {mloda_core → mloda/core}/abstract_plugins/components/base_artifact.py +2 -2
  3. {mloda_core → mloda/core}/abstract_plugins/components/base_validator.py +13 -0
  4. {mloda_core → mloda/core}/abstract_plugins/components/data_access_collection.py +1 -1
  5. {mloda_core → mloda/core}/abstract_plugins/components/data_types.py +39 -0
  6. {mloda_core → mloda/core}/abstract_plugins/components/feature.py +39 -33
  7. {mloda_core → mloda/core}/abstract_plugins/components/feature_chainer/feature_chain_parser.py +19 -19
  8. mloda/core/abstract_plugins/components/feature_chainer/feature_chain_parser_mixin.py +197 -0
  9. {mloda_core → mloda/core}/abstract_plugins/components/feature_collection.py +6 -6
  10. {mloda_core → mloda/core}/abstract_plugins/components/feature_group_version.py +8 -8
  11. {mloda_core → mloda/core}/abstract_plugins/components/feature_set.py +18 -24
  12. {mloda_core → mloda/core}/abstract_plugins/components/framework_transformer/cfw_transformer.py +2 -2
  13. {mloda_core → mloda/core}/abstract_plugins/components/index/add_index_feature.py +4 -4
  14. {mloda_core → mloda/core}/abstract_plugins/components/input_data/api/api_input_data.py +3 -3
  15. {mloda_core → mloda/core}/abstract_plugins/components/input_data/api/api_input_data_collection.py +2 -2
  16. {mloda_core → mloda/core}/abstract_plugins/components/input_data/api/base_api_data.py +1 -1
  17. {mloda_core → mloda/core}/abstract_plugins/components/input_data/base_input_data.py +6 -6
  18. {mloda_core → mloda/core}/abstract_plugins/components/input_data/creator/data_creator.py +3 -3
  19. mloda/core/abstract_plugins/components/link.py +437 -0
  20. {mloda_core → mloda/core}/abstract_plugins/components/match_data/match_data.py +3 -3
  21. {mloda_core → mloda/core}/abstract_plugins/components/merge/base_merge_engine.py +2 -2
  22. {mloda_core → mloda/core}/abstract_plugins/components/options.py +12 -36
  23. {mloda_core → mloda/core}/abstract_plugins/components/parallelization_modes.py +1 -1
  24. {mloda_core → mloda/core}/abstract_plugins/components/plugin_option/plugin_collector.py +14 -14
  25. mloda/core/abstract_plugins/components/validators/datatype_validator.py +96 -0
  26. mloda/core/abstract_plugins/components/validators/feature_set_validator.py +38 -0
  27. mloda/core/abstract_plugins/components/validators/feature_validator.py +23 -0
  28. mloda/core/abstract_plugins/components/validators/link_validator.py +79 -0
  29. mloda/core/abstract_plugins/components/validators/options_validator.py +57 -0
  30. mloda_core/abstract_plugins/compute_frame_work.py → mloda/core/abstract_plugins/compute_framework.py +46 -37
  31. mloda_core/abstract_plugins/abstract_feature_group.py → mloda/core/abstract_plugins/feature_group.py +56 -33
  32. mloda/core/abstract_plugins/function_extender.py +78 -0
  33. mloda/core/api/plugin_docs.py +220 -0
  34. mloda/core/api/plugin_info.py +32 -0
  35. {mloda_core → mloda/core}/api/prepare/setup_compute_framework.py +11 -11
  36. {mloda_core → mloda/core}/api/request.py +42 -33
  37. {mloda_core → mloda/core}/core/cfw_manager.py +8 -8
  38. {mloda_core → mloda/core}/core/engine.py +47 -46
  39. {mloda_core → mloda/core}/core/step/abstract_step.py +7 -7
  40. {mloda_core → mloda/core}/core/step/feature_group_step.py +12 -12
  41. {mloda_core → mloda/core}/core/step/join_step.py +14 -14
  42. {mloda_core → mloda/core}/core/step/transform_frame_work_step.py +16 -16
  43. {mloda_core → mloda/core}/filter/filter_engine.py +1 -1
  44. {mloda_core → mloda/core}/filter/filter_type_enum.py +1 -1
  45. {mloda_core → mloda/core}/filter/global_filter.py +23 -23
  46. {mloda_core → mloda/core}/filter/single_filter.py +6 -6
  47. {mloda_core → mloda/core}/prepare/accessible_plugins.py +16 -18
  48. {mloda_core → mloda/core}/prepare/execution_plan.py +65 -39
  49. {mloda_core → mloda/core}/prepare/graph/build_graph.py +6 -6
  50. {mloda_core → mloda/core}/prepare/graph/graph.py +1 -1
  51. {mloda_core → mloda/core}/prepare/graph/properties.py +5 -5
  52. {mloda_core → mloda/core}/prepare/identify_feature_group.py +12 -14
  53. {mloda_core → mloda/core}/prepare/joinstep_collection.py +3 -3
  54. {mloda_core → mloda/core}/prepare/resolve_compute_frameworks.py +6 -6
  55. {mloda_core → mloda/core}/prepare/resolve_graph.py +11 -11
  56. {mloda_core → mloda/core}/prepare/resolve_links.py +11 -31
  57. mloda/core/prepare/validators/resolve_link_validator.py +32 -0
  58. mloda/core/runtime/compute_framework_executor.py +271 -0
  59. mloda/core/runtime/data_lifecycle_manager.py +160 -0
  60. mloda/core/runtime/flight/__init__.py +0 -0
  61. {mloda_core → mloda/core}/runtime/flight/runner_flight_server.py +1 -1
  62. mloda/core/runtime/run.py +317 -0
  63. mloda/core/runtime/worker/__init__.py +0 -0
  64. {mloda_core → mloda/core}/runtime/worker/multiprocessing_worker.py +15 -10
  65. {mloda_core → mloda/core}/runtime/worker/thread_worker.py +2 -2
  66. mloda/core/runtime/worker_manager.py +96 -0
  67. mloda/provider/__init__.py +101 -0
  68. mloda/steward/__init__.py +25 -0
  69. mloda/user/__init__.py +57 -0
  70. {mloda-0.3.3.dist-info → mloda-0.4.0.dist-info}/METADATA +18 -22
  71. mloda-0.4.0.dist-info/RECORD +248 -0
  72. {mloda-0.3.3.dist-info → mloda-0.4.0.dist-info}/top_level.txt +1 -1
  73. mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_filter_engine.py +2 -2
  74. mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_framework.py +15 -13
  75. mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_merge_engine.py +3 -3
  76. mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_pyarrow_transformer.py +1 -1
  77. mloda_plugins/compute_framework/base_implementations/iceberg/iceberg_filter_engine.py +2 -2
  78. mloda_plugins/compute_framework/base_implementations/iceberg/iceberg_framework.py +12 -10
  79. mloda_plugins/compute_framework/base_implementations/iceberg/iceberg_pyarrow_transformer.py +1 -1
  80. mloda_plugins/compute_framework/base_implementations/pandas/dataframe.py +18 -16
  81. mloda_plugins/compute_framework/base_implementations/pandas/pandas_filter_engine.py +36 -13
  82. mloda_plugins/compute_framework/base_implementations/pandas/pandas_merge_engine.py +7 -7
  83. mloda_plugins/compute_framework/base_implementations/pandas/pandaspyarrowtransformer.py +1 -1
  84. mloda_plugins/compute_framework/base_implementations/polars/dataframe.py +16 -14
  85. mloda_plugins/compute_framework/base_implementations/polars/lazy_dataframe.py +13 -12
  86. mloda_plugins/compute_framework/base_implementations/polars/polars_filter_engine.py +2 -2
  87. mloda_plugins/compute_framework/base_implementations/polars/polars_lazy_pyarrow_transformer.py +1 -1
  88. mloda_plugins/compute_framework/base_implementations/polars/polars_merge_engine.py +3 -3
  89. mloda_plugins/compute_framework/base_implementations/polars/polars_pyarrow_transformer.py +1 -1
  90. mloda_plugins/compute_framework/base_implementations/pyarrow/pyarrow_filter_engine.py +2 -2
  91. mloda_plugins/compute_framework/base_implementations/pyarrow/pyarrow_merge_engine.py +3 -3
  92. mloda_plugins/compute_framework/base_implementations/pyarrow/table.py +12 -10
  93. mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_filter_engine.py +2 -2
  94. mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_framework.py +11 -9
  95. mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_merge_engine.py +3 -3
  96. mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_pyarrow_transformer.py +1 -1
  97. mloda_plugins/compute_framework/base_implementations/spark/spark_filter_engine.py +2 -2
  98. mloda_plugins/compute_framework/base_implementations/spark/spark_framework.py +17 -15
  99. mloda_plugins/compute_framework/base_implementations/spark/spark_merge_engine.py +3 -3
  100. mloda_plugins/compute_framework/base_implementations/spark/spark_pyarrow_transformer.py +1 -1
  101. mloda_plugins/config/feature/loader.py +2 -2
  102. mloda_plugins/feature_group/experimental/aggregated_feature_group/base.py +45 -62
  103. mloda_plugins/feature_group/experimental/aggregated_feature_group/pandas.py +2 -2
  104. mloda_plugins/feature_group/experimental/aggregated_feature_group/polars_lazy.py +2 -2
  105. mloda_plugins/feature_group/experimental/aggregated_feature_group/pyarrow.py +2 -2
  106. mloda_plugins/feature_group/experimental/clustering/base.py +69 -97
  107. mloda_plugins/feature_group/experimental/clustering/pandas.py +2 -2
  108. mloda_plugins/feature_group/experimental/data_quality/missing_value/base.py +58 -79
  109. mloda_plugins/feature_group/experimental/data_quality/missing_value/pandas.py +2 -2
  110. mloda_plugins/feature_group/experimental/data_quality/missing_value/pyarrow.py +2 -2
  111. mloda_plugins/feature_group/experimental/data_quality/missing_value/python_dict.py +2 -2
  112. mloda_plugins/feature_group/experimental/default_options_key.py +16 -19
  113. mloda_plugins/feature_group/experimental/dimensionality_reduction/base.py +80 -94
  114. mloda_plugins/feature_group/experimental/dimensionality_reduction/pandas.py +2 -2
  115. mloda_plugins/feature_group/experimental/dynamic_feature_group_factory/dynamic_feature_group_factory.py +24 -24
  116. mloda_plugins/feature_group/experimental/forecasting/base.py +106 -104
  117. mloda_plugins/feature_group/experimental/forecasting/forecasting_artifact.py +2 -2
  118. mloda_plugins/feature_group/experimental/forecasting/pandas.py +15 -15
  119. mloda_plugins/feature_group/experimental/geo_distance/base.py +50 -42
  120. mloda_plugins/feature_group/experimental/geo_distance/pandas.py +2 -2
  121. mloda_plugins/feature_group/experimental/llm/cli.py +4 -4
  122. mloda_plugins/feature_group/experimental/llm/cli_features/refactor_git_cached.py +19 -19
  123. mloda_plugins/feature_group/experimental/llm/installed_packages_feature_group.py +8 -8
  124. mloda_plugins/feature_group/experimental/llm/list_directory_feature_group.py +5 -5
  125. mloda_plugins/feature_group/experimental/llm/llm_api/claude.py +3 -3
  126. mloda_plugins/feature_group/experimental/llm/llm_api/gemini.py +3 -3
  127. mloda_plugins/feature_group/experimental/llm/llm_api/llm_base_request.py +5 -5
  128. mloda_plugins/feature_group/experimental/llm/llm_api/openai.py +3 -3
  129. mloda_plugins/feature_group/experimental/llm/llm_api/request_loop.py +6 -6
  130. mloda_plugins/feature_group/experimental/llm/llm_file_selector.py +10 -10
  131. mloda_plugins/feature_group/experimental/llm/tools/tool_collection.py +1 -1
  132. mloda_plugins/feature_group/experimental/node_centrality/base.py +46 -72
  133. mloda_plugins/feature_group/experimental/node_centrality/pandas.py +2 -2
  134. mloda_plugins/feature_group/experimental/sklearn/encoding/base.py +51 -51
  135. mloda_plugins/feature_group/experimental/sklearn/encoding/pandas.py +2 -2
  136. mloda_plugins/feature_group/experimental/sklearn/pipeline/base.py +52 -39
  137. mloda_plugins/feature_group/experimental/sklearn/pipeline/pandas.py +2 -2
  138. mloda_plugins/feature_group/experimental/sklearn/scaling/base.py +44 -58
  139. mloda_plugins/feature_group/experimental/sklearn/scaling/pandas.py +2 -2
  140. mloda_plugins/feature_group/experimental/sklearn/sklearn_artifact.py +2 -2
  141. mloda_plugins/feature_group/experimental/source_input_feature.py +15 -15
  142. mloda_plugins/feature_group/experimental/text_cleaning/base.py +38 -61
  143. mloda_plugins/feature_group/experimental/text_cleaning/pandas.py +2 -2
  144. mloda_plugins/feature_group/experimental/text_cleaning/python_dict.py +2 -2
  145. mloda_plugins/feature_group/experimental/time_window/base.py +106 -93
  146. mloda_plugins/feature_group/experimental/time_window/pandas.py +13 -13
  147. mloda_plugins/feature_group/experimental/time_window/pyarrow.py +12 -12
  148. mloda_plugins/feature_group/input_data/api_data/api_data.py +9 -11
  149. mloda_plugins/feature_group/input_data/read_context_files.py +7 -7
  150. mloda_plugins/feature_group/input_data/read_db.py +7 -9
  151. mloda_plugins/feature_group/input_data/read_db_feature.py +4 -4
  152. mloda_plugins/feature_group/input_data/read_dbs/sqlite.py +23 -13
  153. mloda_plugins/feature_group/input_data/read_file.py +8 -8
  154. mloda_plugins/feature_group/input_data/read_file_feature.py +4 -4
  155. mloda_plugins/feature_group/input_data/read_files/csv.py +6 -6
  156. mloda_plugins/feature_group/input_data/read_files/feather.py +5 -5
  157. mloda_plugins/feature_group/input_data/read_files/json.py +5 -5
  158. mloda_plugins/feature_group/input_data/read_files/orc.py +5 -5
  159. mloda_plugins/feature_group/input_data/read_files/parquet.py +5 -5
  160. mloda_plugins/feature_group/input_data/read_files/text_file_reader.py +5 -5
  161. mloda_plugins/function_extender/base_implementations/otel/otel_extender.py +4 -4
  162. mloda-0.3.3.dist-info/RECORD +0 -230
  163. mloda_core/abstract_plugins/components/link.py +0 -286
  164. mloda_core/abstract_plugins/function_extender.py +0 -34
  165. mloda_core/runtime/run.py +0 -617
  166. {mloda_core → mloda/core}/__init__.py +0 -0
  167. {mloda_core → mloda/core}/abstract_plugins/__init__.py +0 -0
  168. {mloda_core → mloda/core}/abstract_plugins/components/__init__.py +0 -0
  169. {mloda_core → mloda/core}/abstract_plugins/components/domain.py +0 -0
  170. {mloda_core → mloda/core}/abstract_plugins/components/feature_chainer/__init__.py +0 -0
  171. {mloda_core → mloda/core}/abstract_plugins/components/feature_name.py +0 -0
  172. {mloda_core → mloda/core}/abstract_plugins/components/framework_transformer/__init__.py +0 -0
  173. {mloda_core → mloda/core}/abstract_plugins/components/framework_transformer/base_transformer.py +0 -0
  174. {mloda_core → mloda/core}/abstract_plugins/components/hashable_dict.py +0 -0
  175. {mloda_core → mloda/core}/abstract_plugins/components/index/__init__.py +0 -0
  176. {mloda_core → mloda/core}/abstract_plugins/components/index/index.py +0 -0
  177. {mloda_core → mloda/core}/abstract_plugins/components/input_data/__init__.py +0 -0
  178. {mloda_core → mloda/core}/abstract_plugins/components/input_data/api/__init__.py +0 -0
  179. {mloda_core → mloda/core}/abstract_plugins/components/input_data/creator/__init__.py +0 -0
  180. {mloda_core → mloda/core}/abstract_plugins/components/match_data/__init__.py +0 -0
  181. {mloda_core → mloda/core}/abstract_plugins/components/merge/__init__.py +0 -0
  182. {mloda_core → mloda/core}/abstract_plugins/components/plugin_option/__init__.py +0 -0
  183. {mloda_core → mloda/core}/abstract_plugins/components/utils.py +0 -0
  184. {mloda_core/abstract_plugins/plugin_loader → mloda/core/abstract_plugins/components/validators}/__init__.py +0 -0
  185. {mloda_core/api → mloda/core/abstract_plugins/plugin_loader}/__init__.py +0 -0
  186. {mloda_core → mloda/core}/abstract_plugins/plugin_loader/plugin_loader.py +0 -0
  187. {mloda_core/api/prepare → mloda/core/api}/__init__.py +0 -0
  188. {mloda_core/core → mloda/core/api/prepare}/__init__.py +0 -0
  189. {mloda_core/core/step → mloda/core/core}/__init__.py +0 -0
  190. {mloda_core/filter → mloda/core/core/step}/__init__.py +0 -0
  191. {mloda_core/prepare → mloda/core/filter}/__init__.py +0 -0
  192. {mloda_core → mloda/core}/filter/filter_parameter.py +0 -0
  193. {mloda_core/prepare/graph → mloda/core/prepare}/__init__.py +0 -0
  194. {mloda_core/runtime → mloda/core/prepare/graph}/__init__.py +0 -0
  195. {mloda_core/runtime/flight → mloda/core/prepare/validators}/__init__.py +0 -0
  196. {mloda_core/runtime/worker → mloda/core/runtime}/__init__.py +0 -0
  197. {mloda_core → mloda/core}/runtime/flight/flight_server.py +0 -0
  198. {mloda-0.3.3.dist-info → mloda-0.4.0.dist-info}/WHEEL +0 -0
  199. {mloda-0.3.3.dist-info → mloda-0.4.0.dist-info}/entry_points.txt +0 -0
  200. {mloda-0.3.3.dist-info → mloda-0.4.0.dist-info}/licenses/LICENSE.TXT +0 -0
  201. {mloda-0.3.3.dist-info → mloda-0.4.0.dist-info}/licenses/NOTICE.md +0 -0
@@ -1,10 +1,10 @@
1
1
  import logging
2
2
  from typing import Any, Set, Type, Optional
3
- from mloda_core.abstract_plugins.components.merge.base_merge_engine import BaseMergeEngine
3
+ from mloda.provider import BaseMergeEngine
4
4
  from mloda_plugins.compute_framework.base_implementations.duckdb.duckdb_merge_engine import DuckDBMergeEngine
5
- from mloda_core.abstract_plugins.components.feature_name import FeatureName
6
- from mloda_core.abstract_plugins.compute_frame_work import ComputeFrameWork
7
- from mloda_core.filter.filter_engine import BaseFilterEngine
5
+ from mloda.user import FeatureName
6
+ from mloda import ComputeFramework
7
+ from mloda.provider import BaseFilterEngine
8
8
  from mloda_plugins.compute_framework.base_implementations.duckdb.duckdb_filter_engine import DuckDBFilterEngine
9
9
 
10
10
  try:
@@ -15,8 +15,8 @@ except ImportError:
15
15
  logger = logging.getLogger(__name__)
16
16
 
17
17
 
18
- class DuckDBFramework(ComputeFrameWork):
19
- """DuckDB framework implementation for ComputeFrameWork.
18
+ class DuckDBFramework(ComputeFramework):
19
+ """DuckDB framework implementation for ComputeFramework.
20
20
 
21
21
  This framework does not support multiprocessing, so it should not be used with multiprocessing.
22
22
  """
@@ -43,11 +43,12 @@ class DuckDBFramework(ComputeFrameWork):
43
43
  except ImportError:
44
44
  return False
45
45
 
46
- @staticmethod
47
- def expected_data_framework() -> Any:
48
- return DuckDBFramework.duckdb_relation()
46
+ @classmethod
47
+ def expected_data_framework(cls) -> Any:
48
+ return cls.duckdb_relation()
49
49
 
50
- def merge_engine(self) -> Type[BaseMergeEngine]:
50
+ @classmethod
51
+ def merge_engine(cls) -> Type[BaseMergeEngine]:
51
52
  return DuckDBMergeEngine
52
53
 
53
54
  def select_data_by_column_names(self, data: Any, selected_feature_names: Set[FeatureName]) -> Any:
@@ -61,8 +62,8 @@ class DuckDBFramework(ComputeFrameWork):
61
62
  def set_column_names(self) -> None:
62
63
  self.column_names = set(self.data.columns)
63
64
 
64
- @staticmethod
65
- def duckdb_relation() -> Any:
65
+ @classmethod
66
+ def duckdb_relation(cls) -> Any:
66
67
  if duckdb is None:
67
68
  raise ImportError("DuckDB is not installed. To be able to use this framework, please install duckdb.")
68
69
  return duckdb.DuckDBPyRelation
@@ -117,5 +118,6 @@ class DuckDBFramework(ComputeFrameWork):
117
118
 
118
119
  raise ValueError(f"Data {type(data)} is not supported by {self.__class__.__name__}")
119
120
 
120
- def filter_engine(self) -> Type[BaseFilterEngine]:
121
+ @classmethod
122
+ def filter_engine(cls) -> Type[BaseFilterEngine]:
121
123
  return DuckDBFilterEngine
@@ -1,8 +1,8 @@
1
1
  from typing import Any
2
2
 
3
- from mloda_core.abstract_plugins.components.index.index import Index
4
- from mloda_core.abstract_plugins.components.link import JoinType
5
- from mloda_core.abstract_plugins.components.merge.base_merge_engine import BaseMergeEngine
3
+ from mloda.user import Index
4
+ from mloda.user import JoinType
5
+ from mloda.provider import BaseMergeEngine
6
6
 
7
7
  try:
8
8
  import duckdb
@@ -1,6 +1,6 @@
1
1
  from typing import Any, Optional
2
2
 
3
- from mloda_core.abstract_plugins.components.framework_transformer.base_transformer import BaseTransformer
3
+ from mloda.provider import BaseTransformer
4
4
 
5
5
  try:
6
6
  import duckdb
@@ -1,6 +1,6 @@
1
1
  from typing import Any, Optional, Type
2
- from mloda_core.filter.filter_engine import BaseFilterEngine
3
- from mloda_core.filter.single_filter import SingleFilter
2
+ from mloda.provider import BaseFilterEngine
3
+ from mloda.user import SingleFilter
4
4
 
5
5
  try:
6
6
  from pyiceberg.table import Table as IcebergTable
@@ -1,8 +1,8 @@
1
1
  from typing import Any, Set, Type, Optional
2
- from mloda_core.abstract_plugins.components.merge.base_merge_engine import BaseMergeEngine
3
- from mloda_core.abstract_plugins.components.feature_name import FeatureName
4
- from mloda_core.abstract_plugins.compute_frame_work import ComputeFrameWork
5
- from mloda_core.filter.filter_engine import BaseFilterEngine
2
+ from mloda.provider import BaseMergeEngine
3
+ from mloda.user import FeatureName
4
+ from mloda import ComputeFramework
5
+ from mloda.provider import BaseFilterEngine
6
6
  from mloda_plugins.compute_framework.base_implementations.iceberg.iceberg_filter_engine import IcebergFilterEngine
7
7
 
8
8
  try:
@@ -15,7 +15,7 @@ except ImportError:
15
15
  pa = None
16
16
 
17
17
 
18
- class IcebergFramework(ComputeFrameWork):
18
+ class IcebergFramework(ComputeFramework):
19
19
  """
20
20
  Iceberg compute framework implementation.
21
21
 
@@ -60,17 +60,18 @@ class IcebergFramework(ComputeFrameWork):
60
60
  except ImportError:
61
61
  return False
62
62
 
63
- @staticmethod
64
- def expected_data_framework() -> Any:
63
+ @classmethod
64
+ def expected_data_framework(cls) -> Any:
65
65
  """Return the expected Iceberg table type."""
66
66
  if IcebergTable is None:
67
67
  raise ImportError("PyIceberg is not installed. To use this framework, please install pyiceberg.")
68
68
  return IcebergTable
69
69
 
70
- def merge_engine(self) -> Type[BaseMergeEngine]:
70
+ @classmethod
71
+ def merge_engine(cls) -> Type[BaseMergeEngine]:
71
72
  """Iceberg tables don't support direct merging in this framework context."""
72
73
  raise NotImplementedError(
73
- f"Merge functionality is not implemented for {self.__class__.__name__}. "
74
+ f"Merge functionality is not implemented for {cls.__name__}. "
74
75
  "Iceberg tables are typically used for data lake scenarios where merging "
75
76
  "is handled at the catalog/table/engine level, not at the compute framework level."
76
77
  )
@@ -163,6 +164,7 @@ class IcebergFramework(ComputeFrameWork):
163
164
 
164
165
  raise ValueError(f"Data type {type(self.data)} is not supported by {self.__class__.__name__}")
165
166
 
166
- def filter_engine(self) -> Type[BaseFilterEngine]:
167
+ @classmethod
168
+ def filter_engine(cls) -> Type[BaseFilterEngine]:
167
169
  """Return the Iceberg filter engine."""
168
170
  return IcebergFilterEngine
@@ -1,5 +1,5 @@
1
1
  from typing import Any, Optional
2
- from mloda_core.abstract_plugins.components.framework_transformer.base_transformer import BaseTransformer
2
+ from mloda.provider import BaseTransformer
3
3
 
4
4
  try:
5
5
  from pyiceberg.table import Table as IcebergTable
@@ -1,9 +1,9 @@
1
1
  from typing import Any, Set, Type
2
- from mloda_core.abstract_plugins.components.merge.base_merge_engine import BaseMergeEngine
2
+ from mloda.provider import BaseMergeEngine
3
3
  from mloda_plugins.compute_framework.base_implementations.pandas.pandas_merge_engine import PandasMergeEngine
4
- from mloda_core.abstract_plugins.components.feature_name import FeatureName
5
- from mloda_core.abstract_plugins.compute_frame_work import ComputeFrameWork
6
- from mloda_core.filter.filter_engine import BaseFilterEngine
4
+ from mloda.user import FeatureName
5
+ from mloda import ComputeFramework
6
+ from mloda.provider import BaseFilterEngine
7
7
  from mloda_plugins.compute_framework.base_implementations.pandas.pandas_filter_engine import PandasFilterEngine
8
8
 
9
9
  try:
@@ -12,7 +12,7 @@ except ImportError:
12
12
  pd = None
13
13
 
14
14
 
15
- class PandasDataFrame(ComputeFrameWork):
15
+ class PandasDataFrame(ComputeFramework):
16
16
  @staticmethod
17
17
  def is_available() -> bool:
18
18
  """Check if Pandas is installed and available."""
@@ -23,11 +23,12 @@ class PandasDataFrame(ComputeFrameWork):
23
23
  except ImportError:
24
24
  return False
25
25
 
26
- @staticmethod
27
- def expected_data_framework() -> Any:
28
- return PandasDataFrame.pd_dataframe()
26
+ @classmethod
27
+ def expected_data_framework(cls) -> Any:
28
+ return cls.pd_dataframe()
29
29
 
30
- def merge_engine(self) -> Type[BaseMergeEngine]:
30
+ @classmethod
31
+ def merge_engine(cls) -> Type[BaseMergeEngine]:
31
32
  return PandasMergeEngine
32
33
 
33
34
  def select_data_by_column_names(self, data: Any, selected_feature_names: Set[FeatureName]) -> Any:
@@ -38,20 +39,20 @@ class PandasDataFrame(ComputeFrameWork):
38
39
  def set_column_names(self) -> None:
39
40
  self.column_names = set(self.data.columns)
40
41
 
41
- @staticmethod
42
- def pd_dataframe() -> Any:
42
+ @classmethod
43
+ def pd_dataframe(cls) -> Any:
43
44
  if pd is None:
44
45
  raise ImportError("Pandas is not installed. To be able to use this framework, please install pandas.")
45
46
  return pd.DataFrame
46
47
 
47
- @staticmethod
48
- def pd_series() -> Any:
48
+ @classmethod
49
+ def pd_series(cls) -> Any:
49
50
  if pd is None:
50
51
  raise ImportError("Pandas is not installed. To be able to use this framework, please install pandas.")
51
52
  return pd.Series
52
53
 
53
- @staticmethod
54
- def pd_merge() -> Any:
54
+ @classmethod
55
+ def pd_merge(cls) -> Any:
55
56
  if pd is None:
56
57
  raise ImportError("Pandas is not installed. To be able to use this framework, please install pandas.")
57
58
  return pd.merge
@@ -83,5 +84,6 @@ class PandasDataFrame(ComputeFrameWork):
83
84
 
84
85
  raise ValueError(f"Data {type(data)} is not supported by {self.__class__.__name__}")
85
86
 
86
- def filter_engine(self) -> Type[BaseFilterEngine]:
87
+ @classmethod
88
+ def filter_engine(cls) -> Type[BaseFilterEngine]:
87
89
  return PandasFilterEngine
@@ -1,6 +1,6 @@
1
1
  from typing import Any
2
- from mloda_core.filter.filter_engine import BaseFilterEngine
3
- from mloda_core.filter.single_filter import SingleFilter
2
+ from mloda.provider import BaseFilterEngine
3
+ from mloda.user import SingleFilter
4
4
 
5
5
 
6
6
  class PandasFilterEngine(BaseFilterEngine):
@@ -23,11 +23,19 @@ class PandasFilterEngine(BaseFilterEngine):
23
23
 
24
24
  @classmethod
25
25
  def do_min_filter(cls, data: Any, filter_feature: SingleFilter) -> Any:
26
- return data[data[filter_feature.name] >= filter_feature.parameter]
26
+ value = filter_feature.parameter.value
27
+ if value is None:
28
+ raise ValueError(f"Filter parameter 'value' not found in {filter_feature.parameter}")
29
+ return data[data[filter_feature.name] >= value]
27
30
 
28
31
  @classmethod
29
32
  def do_max_filter(cls, data: Any, filter_feature: SingleFilter) -> Any:
30
- if isinstance(filter_feature.parameter, tuple):
33
+ # Check if this is a complex parameter with max/max_exclusive or a simple one with value
34
+ has_max = filter_feature.parameter.max_value is not None
35
+ has_value = filter_feature.parameter.value is not None
36
+
37
+ if has_max:
38
+ # Complex parameter - use get_min_max_operator
31
39
  min_parameter, max_parameter, max_operator = cls.get_min_max_operator(filter_feature)
32
40
 
33
41
  if min_parameter is not None:
@@ -40,21 +48,36 @@ class PandasFilterEngine(BaseFilterEngine):
40
48
  f"Filter parameter {filter_feature.parameter} is None although expected: {filter_feature.name}"
41
49
  )
42
50
 
43
- return (
44
- data[data[filter_feature.name] < max_parameter]
45
- if max_operator
46
- else data[data[filter_feature.name] <= max_parameter]
47
- )
48
- return data[data[filter_feature.name] <= filter_feature.parameter]
51
+ if max_operator is True:
52
+ return data[data[filter_feature.name] < max_parameter]
53
+ else:
54
+ return data[data[filter_feature.name] <= max_parameter]
55
+ elif has_value:
56
+ # Simple parameter - extract the value
57
+ value = filter_feature.parameter.value
58
+ if value is None:
59
+ raise ValueError(f"Filter parameter 'value' not found in {filter_feature.parameter}")
60
+ return data[data[filter_feature.name] <= value]
61
+ else:
62
+ raise ValueError(f"No valid filter parameter found in {filter_feature.parameter}")
49
63
 
50
64
  @classmethod
51
65
  def do_equal_filter(cls, data: Any, filter_feature: SingleFilter) -> Any:
52
- return data[data[filter_feature.name] == filter_feature.parameter]
66
+ value = filter_feature.parameter.value
67
+ if value is None:
68
+ raise ValueError(f"Filter parameter 'value' not found in {filter_feature.parameter}")
69
+ return data[data[filter_feature.name] == value]
53
70
 
54
71
  @classmethod
55
72
  def do_regex_filter(cls, data: Any, filter_feature: SingleFilter) -> Any:
56
- return data[data[filter_feature.name].astype(str).str.match(filter_feature.parameter)]
73
+ value = filter_feature.parameter.value
74
+ if value is None:
75
+ raise ValueError(f"Filter parameter 'value' not found in {filter_feature.parameter}")
76
+ return data[data[filter_feature.name].astype(str).str.match(value)]
57
77
 
58
78
  @classmethod
59
79
  def do_categorical_inclusion_filter(cls, data: Any, filter_feature: SingleFilter) -> Any:
60
- return data[data[filter_feature.name].isin(filter_feature.parameter)]
80
+ values = filter_feature.parameter.values
81
+ if values is None:
82
+ raise ValueError(f"Filter parameter 'values' not found in {filter_feature.parameter}")
83
+ return data[data[filter_feature.name].isin(values)]
@@ -1,8 +1,8 @@
1
1
  from typing import Any, Union
2
2
 
3
- from mloda_core.abstract_plugins.components.index.index import Index
4
- from mloda_core.abstract_plugins.components.link import JoinType
5
- from mloda_core.abstract_plugins.components.merge.base_merge_engine import BaseMergeEngine
3
+ from mloda.user import Index
4
+ from mloda.user import JoinType
5
+ from mloda.provider import BaseMergeEngine
6
6
 
7
7
  try:
8
8
  import pandas as pd
@@ -49,14 +49,14 @@ class PandasMergeEngine(BaseMergeEngine):
49
49
  left_data = self.pd_merge()(left_data, right_data, left_on=left_idx, right_on=right_idx, how=join_type)
50
50
  return left_data
51
51
 
52
- @staticmethod
53
- def pd_merge() -> Any:
52
+ @classmethod
53
+ def pd_merge(cls) -> Any:
54
54
  if pd is None:
55
55
  raise ImportError("Pandas is not installed. To be able to use this framework, please install pandas.")
56
56
  return pd.merge
57
57
 
58
- @staticmethod
59
- def pd_concat() -> Any:
58
+ @classmethod
59
+ def pd_concat(cls) -> Any:
60
60
  if pd is None:
61
61
  raise ImportError("Pandas is not installed. To be able to use this framework, please install pandas.")
62
62
  return pd.concat
@@ -1,6 +1,6 @@
1
1
  from typing import Any, Optional
2
2
 
3
- from mloda_core.abstract_plugins.components.framework_transformer.base_transformer import BaseTransformer
3
+ from mloda.provider import BaseTransformer
4
4
 
5
5
  try:
6
6
  import pandas as pd
@@ -1,9 +1,9 @@
1
1
  from typing import Any, Set, Type
2
- from mloda_core.abstract_plugins.components.merge.base_merge_engine import BaseMergeEngine
2
+ from mloda.provider import BaseMergeEngine
3
3
  from mloda_plugins.compute_framework.base_implementations.polars.polars_merge_engine import PolarsMergeEngine
4
- from mloda_core.abstract_plugins.components.feature_name import FeatureName
5
- from mloda_core.abstract_plugins.compute_frame_work import ComputeFrameWork
6
- from mloda_core.filter.filter_engine import BaseFilterEngine
4
+ from mloda.user import FeatureName
5
+ from mloda import ComputeFramework
6
+ from mloda.provider import BaseFilterEngine
7
7
  from mloda_plugins.compute_framework.base_implementations.polars.polars_filter_engine import PolarsFilterEngine
8
8
 
9
9
  try:
@@ -12,7 +12,7 @@ except ImportError:
12
12
  pl = None # type: ignore[assignment]
13
13
 
14
14
 
15
- class PolarsDataFrame(ComputeFrameWork):
15
+ class PolarsDataFrame(ComputeFramework):
16
16
  @staticmethod
17
17
  def is_available() -> bool:
18
18
  """Check if Polars is installed and available."""
@@ -23,11 +23,12 @@ class PolarsDataFrame(ComputeFrameWork):
23
23
  except ImportError:
24
24
  return False
25
25
 
26
- @staticmethod
27
- def expected_data_framework() -> Any:
28
- return PolarsDataFrame.pl_dataframe()
26
+ @classmethod
27
+ def expected_data_framework(cls) -> Any:
28
+ return cls.pl_dataframe()
29
29
 
30
- def merge_engine(self) -> Type[BaseMergeEngine]:
30
+ @classmethod
31
+ def merge_engine(cls) -> Type[BaseMergeEngine]:
31
32
  return PolarsMergeEngine
32
33
 
33
34
  def select_data_by_column_names(self, data: Any, selected_feature_names: Set[FeatureName]) -> Any:
@@ -38,14 +39,14 @@ class PolarsDataFrame(ComputeFrameWork):
38
39
  def set_column_names(self) -> None:
39
40
  self.column_names = set(self.data.columns)
40
41
 
41
- @staticmethod
42
- def pl_dataframe() -> Any:
42
+ @classmethod
43
+ def pl_dataframe(cls) -> Any:
43
44
  if pl is None:
44
45
  raise ImportError("Polars is not installed. To be able to use this framework, please install polars.")
45
46
  return pl.DataFrame
46
47
 
47
- @staticmethod
48
- def pl_series() -> Any:
48
+ @classmethod
49
+ def pl_series(cls) -> Any:
49
50
  if pl is None:
50
51
  raise ImportError("Polars is not installed. To be able to use this framework, please install polars.")
51
52
  return pl.Series
@@ -77,5 +78,6 @@ class PolarsDataFrame(ComputeFrameWork):
77
78
 
78
79
  raise ValueError(f"Data {type(data)} is not supported by {self.__class__.__name__}")
79
80
 
80
- def filter_engine(self) -> Type[BaseFilterEngine]:
81
+ @classmethod
82
+ def filter_engine(cls) -> Type[BaseFilterEngine]:
81
83
  return PolarsFilterEngine
@@ -1,7 +1,7 @@
1
1
  from typing import Any, Set, Type
2
- from mloda_core.abstract_plugins.components.feature_name import FeatureName
2
+ from mloda.user import FeatureName
3
3
  from mloda_plugins.compute_framework.base_implementations.polars.dataframe import PolarsDataFrame
4
- from mloda_core.abstract_plugins.components.merge.base_merge_engine import BaseMergeEngine
4
+ from mloda.provider import BaseMergeEngine
5
5
  from mloda_plugins.compute_framework.base_implementations.polars.polars_lazy_merge_engine import PolarsLazyMergeEngine
6
6
 
7
7
  try:
@@ -18,11 +18,12 @@ class PolarsLazyDataFrame(PolarsDataFrame):
18
18
  requested, enabling query optimization and reduced memory usage for large datasets.
19
19
  """
20
20
 
21
- @staticmethod
22
- def expected_data_framework() -> Any:
23
- return PolarsLazyDataFrame.pl_lazy_frame()
21
+ @classmethod
22
+ def expected_data_framework(cls) -> Any:
23
+ return cls.pl_lazy_frame()
24
24
 
25
- def merge_engine(self) -> Type[BaseMergeEngine]:
25
+ @classmethod
26
+ def merge_engine(cls) -> Type[BaseMergeEngine]:
26
27
  return PolarsLazyMergeEngine
27
28
 
28
29
  def select_data_by_column_names(self, data: Any, selected_feature_names: Set[FeatureName]) -> Any:
@@ -39,20 +40,20 @@ class PolarsLazyDataFrame(PolarsDataFrame):
39
40
  else:
40
41
  raise ValueError("Data does not have a collect_schema method, cannot set column names.")
41
42
 
42
- @staticmethod
43
- def pl_lazy_frame() -> Any:
43
+ @classmethod
44
+ def pl_lazy_frame(cls) -> Any:
44
45
  if pl is None:
45
46
  raise ImportError("Polars is not installed. To be able to use this framework, please install polars.")
46
47
  return pl.LazyFrame
47
48
 
48
- @staticmethod
49
- def pl_dataframe() -> Any:
49
+ @classmethod
50
+ def pl_dataframe(cls) -> Any:
50
51
  if pl is None:
51
52
  raise ImportError("Polars is not installed. To be able to use this framework, please install polars.")
52
53
  return pl.DataFrame
53
54
 
54
- @staticmethod
55
- def pl_series() -> Any:
55
+ @classmethod
56
+ def pl_series(cls) -> Any:
56
57
  if pl is None:
57
58
  raise ImportError("Polars is not installed. To be able to use this framework, please install polars.")
58
59
  return pl.Series
@@ -1,6 +1,6 @@
1
1
  from typing import Any
2
- from mloda_core.filter.filter_engine import BaseFilterEngine
3
- from mloda_core.filter.single_filter import SingleFilter
2
+ from mloda.provider import BaseFilterEngine
3
+ from mloda.user import SingleFilter
4
4
 
5
5
  try:
6
6
  import polars as pl
@@ -1,6 +1,6 @@
1
1
  from typing import Any, Optional
2
2
 
3
- from mloda_core.abstract_plugins.components.framework_transformer.base_transformer import BaseTransformer
3
+ from mloda.provider import BaseTransformer
4
4
 
5
5
  try:
6
6
  import polars as pl
@@ -1,8 +1,8 @@
1
1
  from typing import Any, Union
2
2
 
3
- from mloda_core.abstract_plugins.components.index.index import Index
4
- from mloda_core.abstract_plugins.components.link import JoinType
5
- from mloda_core.abstract_plugins.components.merge.base_merge_engine import BaseMergeEngine
3
+ from mloda.user import Index
4
+ from mloda.user import JoinType
5
+ from mloda.provider import BaseMergeEngine
6
6
 
7
7
  try:
8
8
  import polars as pl
@@ -1,6 +1,6 @@
1
1
  from typing import Any, Optional
2
2
 
3
- from mloda_core.abstract_plugins.components.framework_transformer.base_transformer import BaseTransformer
3
+ from mloda.provider import BaseTransformer
4
4
 
5
5
  try:
6
6
  import polars as pl
@@ -2,8 +2,8 @@ from typing import Any
2
2
  import pyarrow as pa
3
3
  import pyarrow.compute as pc
4
4
 
5
- from mloda_core.filter.filter_engine import BaseFilterEngine
6
- from mloda_core.filter.single_filter import SingleFilter
5
+ from mloda.provider import BaseFilterEngine
6
+ from mloda.user import SingleFilter
7
7
 
8
8
 
9
9
  class PyArrowFilterEngine(BaseFilterEngine):
@@ -3,9 +3,9 @@ from typing import Any
3
3
  import pyarrow as pa
4
4
  import pyarrow.compute as pc
5
5
 
6
- from mloda_core.abstract_plugins.components.index.index import Index
7
- from mloda_core.abstract_plugins.components.link import JoinType
8
- from mloda_core.abstract_plugins.components.merge.base_merge_engine import BaseMergeEngine
6
+ from mloda.user import Index
7
+ from mloda.user import JoinType
8
+ from mloda.provider import BaseMergeEngine
9
9
 
10
10
 
11
11
  class PyArrowMergeEngine(BaseMergeEngine):
@@ -1,13 +1,13 @@
1
1
  from typing import Any, Optional, Set, Type
2
- from mloda_core.abstract_plugins.components.data_access_collection import DataAccessCollection
3
- from mloda_core.abstract_plugins.components.merge.base_merge_engine import BaseMergeEngine
4
- from mloda_core.filter.filter_engine import BaseFilterEngine
2
+ from mloda.user import DataAccessCollection
3
+ from mloda.provider import BaseMergeEngine
4
+ from mloda.provider import BaseFilterEngine
5
5
  from mloda_plugins.compute_framework.base_implementations.pyarrow.pyarrow_merge_engine import PyArrowMergeEngine
6
6
  from mloda_plugins.compute_framework.base_implementations.pyarrow.pyarrow_filter_engine import PyArrowFilterEngine
7
7
  import pyarrow as pa
8
8
 
9
- from mloda_core.abstract_plugins.components.feature_name import FeatureName
10
- from mloda_core.abstract_plugins.compute_frame_work import ComputeFrameWork
9
+ from mloda.user import FeatureName
10
+ from mloda import ComputeFramework
11
11
 
12
12
 
13
13
  try:
@@ -16,7 +16,7 @@ except ImportError:
16
16
  pd = None
17
17
 
18
18
 
19
- class PyArrowTable(ComputeFrameWork):
19
+ class PyArrowTable(ComputeFramework):
20
20
  @staticmethod
21
21
  def is_available() -> bool:
22
22
  """Check if PyArrow is installed and available."""
@@ -27,14 +27,16 @@ class PyArrowTable(ComputeFrameWork):
27
27
  except ImportError:
28
28
  return False
29
29
 
30
- @staticmethod
31
- def expected_data_framework() -> Any:
30
+ @classmethod
31
+ def expected_data_framework(cls) -> Any:
32
32
  return pa.Table
33
33
 
34
- def merge_engine(self) -> Type[BaseMergeEngine]:
34
+ @classmethod
35
+ def merge_engine(cls) -> Type[BaseMergeEngine]:
35
36
  return PyArrowMergeEngine
36
37
 
37
- def filter_engine(self) -> Type[BaseFilterEngine]:
38
+ @classmethod
39
+ def filter_engine(cls) -> Type[BaseFilterEngine]:
38
40
  return PyArrowFilterEngine
39
41
 
40
42
  def select_data_by_column_names(self, data: Any, selected_feature_names: Set[FeatureName]) -> Any:
@@ -1,7 +1,7 @@
1
1
  import re
2
2
  from typing import Any
3
- from mloda_core.filter.filter_engine import BaseFilterEngine
4
- from mloda_core.filter.single_filter import SingleFilter
3
+ from mloda.provider import BaseFilterEngine
4
+ from mloda.user import SingleFilter
5
5
 
6
6
 
7
7
  class PythonDictFilterEngine(BaseFilterEngine):
@@ -1,17 +1,17 @@
1
1
  from typing import Any, Set, Type, List, Dict
2
- from mloda_core.abstract_plugins.components.merge.base_merge_engine import BaseMergeEngine
2
+ from mloda.provider import BaseMergeEngine
3
3
  from mloda_plugins.compute_framework.base_implementations.python_dict.python_dict_merge_engine import (
4
4
  PythonDictMergeEngine,
5
5
  )
6
- from mloda_core.abstract_plugins.components.feature_name import FeatureName
7
- from mloda_core.abstract_plugins.compute_frame_work import ComputeFrameWork
8
- from mloda_core.filter.filter_engine import BaseFilterEngine
6
+ from mloda.user import FeatureName
7
+ from mloda import ComputeFramework
8
+ from mloda.provider import BaseFilterEngine
9
9
  from mloda_plugins.compute_framework.base_implementations.python_dict.python_dict_filter_engine import (
10
10
  PythonDictFilterEngine,
11
11
  )
12
12
 
13
13
 
14
- class PythonDictFramework(ComputeFrameWork):
14
+ class PythonDictFramework(ComputeFramework):
15
15
  """
16
16
  PythonDict Compute Framework
17
17
 
@@ -29,11 +29,12 @@ class PythonDictFramework(ComputeFrameWork):
29
29
  ]
30
30
  """
31
31
 
32
- @staticmethod
33
- def expected_data_framework() -> Any:
32
+ @classmethod
33
+ def expected_data_framework(cls) -> Any:
34
34
  return list
35
35
 
36
- def merge_engine(self) -> Type[BaseMergeEngine]:
36
+ @classmethod
37
+ def merge_engine(cls) -> Type[BaseMergeEngine]:
37
38
  return PythonDictMergeEngine
38
39
 
39
40
  def select_data_by_column_names(
@@ -118,7 +119,8 @@ class PythonDictFramework(ComputeFrameWork):
118
119
 
119
120
  raise ValueError(f"Data type {type(data)} is not supported by {self.__class__.__name__}")
120
121
 
121
- def filter_engine(self) -> Type[BaseFilterEngine]:
122
+ @classmethod
123
+ def filter_engine(cls) -> Type[BaseFilterEngine]:
122
124
  """
123
125
  Returns the filter engine for PythonDict framework.
124
126