mloda 0.3.0__py3-none-any.whl → 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. {mloda-0.3.0.dist-info → mloda-0.3.2.dist-info}/METADATA +10 -10
  2. {mloda-0.3.0.dist-info → mloda-0.3.2.dist-info}/RECORD +92 -91
  3. mloda_core/abstract_plugins/components/base_artifact.py +3 -1
  4. mloda_core/abstract_plugins/components/feature.py +4 -4
  5. mloda_core/abstract_plugins/components/feature_chainer/feature_chain_parser.py +44 -17
  6. mloda_core/abstract_plugins/components/feature_collection.py +2 -2
  7. mloda_core/abstract_plugins/components/feature_group_version.py +4 -4
  8. mloda_core/abstract_plugins/components/feature_name.py +0 -3
  9. mloda_core/abstract_plugins/components/input_data/base_input_data.py +3 -3
  10. mloda_core/abstract_plugins/components/link.py +113 -29
  11. mloda_core/abstract_plugins/components/options.py +10 -10
  12. mloda_core/api/prepare/setup_compute_framework.py +2 -2
  13. mloda_core/api/request.py +44 -13
  14. mloda_core/core/step/feature_group_step.py +2 -1
  15. mloda_core/filter/filter_engine.py +3 -12
  16. mloda_core/filter/filter_parameter.py +55 -0
  17. mloda_core/filter/single_filter.py +4 -4
  18. mloda_core/prepare/execution_plan.py +12 -6
  19. mloda_core/prepare/graph/graph.py +3 -3
  20. mloda_core/prepare/identify_feature_group.py +10 -3
  21. mloda_core/prepare/resolve_links.py +86 -18
  22. mloda_core/runtime/flight/flight_server.py +1 -1
  23. mloda_core/runtime/run.py +7 -5
  24. mloda_core/runtime/worker/multiprocessing_worker.py +11 -9
  25. mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_filter_engine.py +7 -33
  26. mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_pyarrow_transformer.py +1 -1
  27. mloda_plugins/compute_framework/base_implementations/iceberg/iceberg_filter_engine.py +22 -12
  28. mloda_plugins/compute_framework/base_implementations/iceberg/iceberg_framework.py +2 -2
  29. mloda_plugins/compute_framework/base_implementations/iceberg/iceberg_pyarrow_transformer.py +2 -2
  30. mloda_plugins/compute_framework/base_implementations/pandas/dataframe.py +2 -2
  31. mloda_plugins/compute_framework/base_implementations/pandas/pandaspyarrowtransformer.py +1 -1
  32. mloda_plugins/compute_framework/base_implementations/polars/dataframe.py +3 -3
  33. mloda_plugins/compute_framework/base_implementations/polars/lazy_dataframe.py +5 -5
  34. mloda_plugins/compute_framework/base_implementations/polars/polars_filter_engine.py +8 -34
  35. mloda_plugins/compute_framework/base_implementations/polars/polars_lazy_merge_engine.py +1 -1
  36. mloda_plugins/compute_framework/base_implementations/polars/polars_lazy_pyarrow_transformer.py +3 -3
  37. mloda_plugins/compute_framework/base_implementations/polars/polars_merge_engine.py +1 -1
  38. mloda_plugins/compute_framework/base_implementations/polars/polars_pyarrow_transformer.py +2 -2
  39. mloda_plugins/compute_framework/base_implementations/pyarrow/pyarrow_filter_engine.py +7 -33
  40. mloda_plugins/compute_framework/base_implementations/pyarrow/table.py +1 -1
  41. mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_filter_engine.py +13 -32
  42. mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_framework.py +1 -1
  43. mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_pyarrow_transformer.py +1 -1
  44. mloda_plugins/compute_framework/base_implementations/spark/spark_filter_engine.py +13 -32
  45. mloda_plugins/compute_framework/base_implementations/spark/spark_framework.py +4 -4
  46. mloda_plugins/compute_framework/base_implementations/spark/spark_pyarrow_transformer.py +1 -1
  47. mloda_plugins/config/feature/loader.py +12 -18
  48. mloda_plugins/feature_group/experimental/aggregated_feature_group/base.py +20 -17
  49. mloda_plugins/feature_group/experimental/aggregated_feature_group/pandas.py +8 -8
  50. mloda_plugins/feature_group/experimental/aggregated_feature_group/polars_lazy.py +8 -8
  51. mloda_plugins/feature_group/experimental/aggregated_feature_group/pyarrow.py +7 -7
  52. mloda_plugins/feature_group/experimental/clustering/base.py +26 -26
  53. mloda_plugins/feature_group/experimental/clustering/pandas.py +31 -29
  54. mloda_plugins/feature_group/experimental/data_quality/missing_value/base.py +23 -22
  55. mloda_plugins/feature_group/experimental/data_quality/missing_value/pandas.py +16 -16
  56. mloda_plugins/feature_group/experimental/data_quality/missing_value/pyarrow.py +9 -11
  57. mloda_plugins/feature_group/experimental/data_quality/missing_value/python_dict.py +8 -8
  58. mloda_plugins/feature_group/experimental/default_options_key.py +1 -1
  59. mloda_plugins/feature_group/experimental/dimensionality_reduction/base.py +17 -15
  60. mloda_plugins/feature_group/experimental/dimensionality_reduction/pandas.py +30 -18
  61. mloda_plugins/feature_group/experimental/dynamic_feature_group_factory/dynamic_feature_group_factory.py +35 -35
  62. mloda_plugins/feature_group/experimental/forecasting/base.py +39 -29
  63. mloda_plugins/feature_group/experimental/forecasting/pandas.py +18 -18
  64. mloda_plugins/feature_group/experimental/geo_distance/base.py +18 -20
  65. mloda_plugins/feature_group/experimental/geo_distance/pandas.py +2 -2
  66. mloda_plugins/feature_group/experimental/llm/cli_features/refactor_git_cached.py +6 -6
  67. mloda_plugins/feature_group/experimental/llm/installed_packages_feature_group.py +2 -2
  68. mloda_plugins/feature_group/experimental/llm/list_directory_feature_group.py +2 -2
  69. mloda_plugins/feature_group/experimental/llm/llm_api/llm_base_request.py +2 -2
  70. mloda_plugins/feature_group/experimental/llm/llm_api/request_loop.py +3 -2
  71. mloda_plugins/feature_group/experimental/llm/llm_file_selector.py +1 -1
  72. mloda_plugins/feature_group/experimental/node_centrality/base.py +8 -12
  73. mloda_plugins/feature_group/experimental/node_centrality/pandas.py +2 -2
  74. mloda_plugins/feature_group/experimental/sklearn/encoding/base.py +11 -12
  75. mloda_plugins/feature_group/experimental/sklearn/encoding/pandas.py +2 -2
  76. mloda_plugins/feature_group/experimental/sklearn/pipeline/base.py +9 -14
  77. mloda_plugins/feature_group/experimental/sklearn/pipeline/pandas.py +2 -2
  78. mloda_plugins/feature_group/experimental/sklearn/scaling/base.py +8 -9
  79. mloda_plugins/feature_group/experimental/sklearn/scaling/pandas.py +2 -2
  80. mloda_plugins/feature_group/experimental/source_input_feature.py +10 -10
  81. mloda_plugins/feature_group/experimental/text_cleaning/base.py +8 -11
  82. mloda_plugins/feature_group/experimental/text_cleaning/pandas.py +2 -2
  83. mloda_plugins/feature_group/experimental/time_window/base.py +27 -25
  84. mloda_plugins/feature_group/experimental/time_window/pandas.py +8 -8
  85. mloda_plugins/feature_group/experimental/time_window/pyarrow.py +6 -6
  86. mloda_plugins/feature_group/input_data/read_context_files.py +1 -1
  87. mloda_plugins/function_extender/base_implementations/otel/otel_extender.py +1 -1
  88. {mloda-0.3.0.dist-info → mloda-0.3.2.dist-info}/WHEEL +0 -0
  89. {mloda-0.3.0.dist-info → mloda-0.3.2.dist-info}/entry_points.txt +0 -0
  90. {mloda-0.3.0.dist-info → mloda-0.3.2.dist-info}/licenses/LICENSE.TXT +0 -0
  91. {mloda-0.3.0.dist-info → mloda-0.3.2.dist-info}/licenses/NOTICE.md +0 -0
  92. {mloda-0.3.0.dist-info → mloda-0.3.2.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  from collections import defaultdict
2
2
  from copy import copy
3
- from typing import Dict, List, Set
3
+ from typing import DefaultDict, Dict, List, Set
4
4
  from uuid import UUID
5
5
 
6
6
  from mloda_core.prepare.graph.properties import EdgeProperties, NodeProperties
@@ -8,8 +8,8 @@ from mloda_core.prepare.graph.properties import EdgeProperties, NodeProperties
8
8
 
9
9
  class Graph:
10
10
  def __init__(self) -> None:
11
- self.nodes: Dict[UUID, NodeProperties] = defaultdict(lambda: NodeProperties(None, None)) # type: ignore
12
- self.edges: Dict[tuple[UUID, UUID], EdgeProperties] = defaultdict(lambda: EdgeProperties(None, None)) # type: ignore
11
+ self.nodes: DefaultDict[UUID, NodeProperties] = defaultdict(lambda: NodeProperties(None, None)) # type: ignore[arg-type]
12
+ self.edges: DefaultDict[tuple[UUID, UUID], EdgeProperties] = defaultdict(lambda: EdgeProperties(None, None)) # type: ignore[arg-type]
13
13
 
14
14
  self.adjacency_list: Dict[UUID, list[UUID]] = defaultdict(list)
15
15
 
@@ -1,4 +1,5 @@
1
1
  from typing import Optional, Set, Tuple, Type
2
+
2
3
  from mloda_core.prepare.accessible_plugins import FeatureGroupEnvironmentMapping
3
4
  from mloda_core.abstract_plugins.components.data_access_collection import DataAccessCollection
4
5
  from mloda_core.abstract_plugins.compute_frame_work import ComputeFrameWork
@@ -145,14 +146,20 @@ class IdentifyFeatureGroupClass:
145
146
  Check if the code is running in a notebook environment.
146
147
  """
147
148
  try:
148
- from IPython import get_ipython # type: ignore
149
+ from IPython import get_ipython # type: ignore[attr-defined]
149
150
 
150
- shell = get_ipython().__class__.__name__ # type: ignore
151
+ ipython_instance = get_ipython() # type: ignore[no-untyped-call]
152
+ if ipython_instance is None:
153
+ return ""
154
+ shell: str = ipython_instance.__class__.__name__
151
155
  if shell == "ZMQInteractiveShell":
152
- return """If you are running this in a notebook, please restart the kernel to clear any cached plugins.
156
+ return """If you are running this in a notebook, please restart the kernel to clear any cached plugins.
153
157
  If you experience this multiple times, please open an issue or contact the maintainers for prioritization.
154
158
  https://github.com/mloda-ai/mloda/issues
155
159
  """
160
+ except ImportError:
161
+ # IPython not installed
162
+ pass
156
163
  except Exception:
157
164
  # An exception here means we are not in a notebook environment.
158
165
  pass # nosec B110
@@ -270,24 +270,92 @@ class ResolveLinks:
270
270
  if not self.links:
271
271
  return
272
272
 
273
- for link in self.links:
274
- for child, parents in self.graph.parent_to_children_mapping.items():
275
- for parent_in in parents:
276
- for parent_out in parents:
277
- if parent_in == parent_out:
278
- continue
279
-
280
- r_left = self.graph.get_nodes()[parent_in]
281
- r_right = self.graph.get_nodes()[parent_out]
282
-
283
- if link.matches(
284
- other_left_feature_group=r_left.feature_group_class,
285
- other_right_feature_group=r_right.feature_group_class,
286
- ):
287
- key = self.create_link_trekker_key(
288
- link, r_left.feature.compute_frameworks, r_right.feature.compute_frameworks
289
- )
290
- self.set_link_trekker(key, child)
273
+ for child, parents in self.graph.parent_to_children_mapping.items():
274
+ for parent_in in parents:
275
+ for parent_out in parents:
276
+ if parent_in == parent_out:
277
+ continue
278
+
279
+ r_left = self.graph.get_nodes()[parent_in]
280
+ r_right = self.graph.get_nodes()[parent_out]
281
+ left_fg = r_left.feature_group_class
282
+ right_fg = r_right.feature_group_class
283
+
284
+ # Two-pass matching: exact match first, then polymorphic
285
+ matched_links = self._find_matching_links(left_fg, right_fg)
286
+
287
+ for matched_link in matched_links:
288
+ key = self.create_link_trekker_key(
289
+ matched_link, r_left.feature.compute_frameworks, r_right.feature.compute_frameworks
290
+ )
291
+ self.set_link_trekker(key, child)
292
+
293
+ def _find_matching_links(self, left_fg: type, right_fg: type) -> List[Link]:
294
+ """Find all matching links using two-pass matching: exact first, then polymorphic.
295
+
296
+ Returns all exact matches if any exist, otherwise returns the most specific
297
+ polymorphic matches (closest in inheritance hierarchy).
298
+ """
299
+ if self.links is None:
300
+ return []
301
+
302
+ # Pass 1: Collect all exact matches
303
+ exact_matches = [link for link in self.links if link.matches_exact(left_fg, right_fg)]
304
+ if exact_matches:
305
+ return exact_matches
306
+
307
+ # Pass 2: If no exact matches, find most specific polymorphic matches
308
+ polymorphic_matches = [link for link in self.links if link.matches_polymorphic(left_fg, right_fg)]
309
+ if not polymorphic_matches:
310
+ return []
311
+
312
+ # Find the most specific match (smallest inheritance distance)
313
+ return self._select_most_specific_links(polymorphic_matches, left_fg, right_fg)
314
+
315
+ def _inheritance_distance(self, child: type, parent: type) -> int:
316
+ """Calculate the inheritance distance from child to parent in the MRO.
317
+
318
+ Returns the number of steps in the Method Resolution Order from child to parent.
319
+ Returns a large number if parent is not in child's MRO.
320
+ """
321
+ try:
322
+ mro = child.__mro__
323
+ return mro.index(parent)
324
+ except (ValueError, AttributeError):
325
+ return 9999 # Not in hierarchy
326
+
327
+ def _select_most_specific_links(self, links: List[Link], left_fg: type, right_fg: type) -> List[Link]:
328
+ """Select links that are most specific (closest in inheritance hierarchy).
329
+
330
+ For each link, calculates the inheritance distance on both sides.
331
+ Only considers links where both sides have the same inheritance distance
332
+ (to avoid sibling mismatches). Returns links with the minimum distance.
333
+ """
334
+ if not links:
335
+ return []
336
+
337
+ # Calculate distance for each link, filtering out unbalanced matches
338
+ link_distances: List[Tuple[Link, int]] = []
339
+ for link in links:
340
+ left_dist = self._inheritance_distance(left_fg, link.left_feature_group)
341
+ right_dist = self._inheritance_distance(right_fg, link.right_feature_group)
342
+
343
+ # Only consider links where both sides have the same inheritance level
344
+ # This prevents sibling class mismatches for self-join patterns
345
+ link_is_self_join = link.left_feature_group == link.right_feature_group
346
+ # For self-joins: require same concrete class to prevent sibling mismatches
347
+ # For different-class joins: balanced distance is sufficient
348
+ if left_dist == right_dist and (not link_is_self_join or left_fg == right_fg):
349
+ link_distances.append((link, left_dist))
350
+
351
+ if not link_distances:
352
+ return []
353
+
354
+ # Find minimum distance
355
+ min_dist = min(dist for _, dist in link_distances)
356
+
357
+ # Return all links with minimum distance
358
+ return [link for link, dist in link_distances if dist == min_dist]
291
359
 
292
360
  def set_link_trekker(self, link_trekker_key: LinkFrameworkTrekker, uuid: UUID) -> None:
293
361
  self.link_trekker.update(link_trekker_key, uuid)
@@ -18,7 +18,7 @@ def create_location(host: str = "0.0.0.0") -> str:
18
18
  return f"grpc://{host}:{port}"
19
19
 
20
20
 
21
- class FlightServer(flight.FlightServerBase): # type: ignore
21
+ class FlightServer(flight.FlightServerBase): # type: ignore[misc]
22
22
  def __init__(self, location: Any = create_location()) -> None:
23
23
  self.tables: Dict[str, Any] = {} # Dictionary to store tables
24
24
  self.location = location
mloda_core/runtime/run.py CHANGED
@@ -1,3 +1,5 @@
1
+ from __future__ import annotations
2
+
1
3
  from collections import defaultdict
2
4
  import multiprocessing
3
5
  import queue
@@ -59,10 +61,10 @@ class Runner:
59
61
  # multiprocessing
60
62
  self.location: Optional[str] = None
61
63
  self.tasks: List[Union[threading.Thread, multiprocessing.Process]] = []
62
- self.process_register: Dict[ # type: ignore
63
- UUID, Tuple[multiprocessing.Process, multiprocessing.Queue, multiprocessing.Queue]
64
+ self.process_register: Dict[
65
+ UUID, Tuple[multiprocessing.Process, multiprocessing.Queue[Any], multiprocessing.Queue[Any]]
64
66
  ] = defaultdict()
65
- self.result_queues_collection: Set[multiprocessing.Queue] = set() # type: ignore
67
+ self.result_queues_collection: Set[multiprocessing.Queue[Any]] = set()
66
68
  self.result_uuids_collection: Set[UUID] = set()
67
69
 
68
70
  # Initialize framework transformer
@@ -524,7 +526,7 @@ class Runner:
524
526
  """
525
527
  MyManager.register("CfwManager", CfwManager)
526
528
  self.manager = MyManager().__enter__()
527
- self.cfw_register = self.manager.CfwManager(parallelization_modes, function_extender) # type: ignore
529
+ self.cfw_register = self.manager.CfwManager(parallelization_modes, function_extender) # type: ignore[attr-defined]
528
530
 
529
531
  if self.flight_server:
530
532
  if self.flight_server.flight_server_process is None:
@@ -587,7 +589,7 @@ class Runner:
587
589
 
588
590
  def _get_execution_function(
589
591
  self, mode_by_cfw_register: Set[ParallelizationModes], mode_by_step: Set[ParallelizationModes]
590
- ) -> Callable: # type: ignore
592
+ ) -> Callable[[Any], None]:
591
593
  """
592
594
  Identifies the execution mode and returns the corresponding execute step function.
593
595
 
@@ -1,8 +1,10 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
4
  import multiprocessing
3
5
  import time
4
6
  import traceback
5
- from typing import Any, Union
7
+ from typing import Any, Set, Union
6
8
  from uuid import UUID
7
9
  from queue import Empty
8
10
 
@@ -16,16 +18,16 @@ from mloda_core.core.step.transform_frame_work_step import TransformFrameworkSte
16
18
  logger = logging.getLogger(__name__)
17
19
 
18
20
 
19
- def _handle_stop_command(command_queue: multiprocessing.Queue) -> None: # type: ignore
21
+ def _handle_stop_command(command_queue: multiprocessing.Queue[Any]) -> None:
20
22
  """Puts a 'STOP' command in the command queue."""
21
23
  if command_queue:
22
24
  command_queue.put("STOP", block=False)
23
25
 
24
26
 
25
27
  def _handle_data_dropping(
26
- command_queue: multiprocessing.Queue, # type: ignore
28
+ command_queue: multiprocessing.Queue[Any],
27
29
  cfw: ComputeFrameWork,
28
- command: set, # type: ignore
30
+ command: Set[Any],
29
31
  location: str,
30
32
  ) -> bool:
31
33
  """Handles dropping already calculated data based on the provided command."""
@@ -46,7 +48,7 @@ def _execute_command(
46
48
  """Executes a given command based on its type."""
47
49
  if isinstance(command, JoinStep):
48
50
  # Left framework here, because it is already transformed beforehand
49
- from_cfw = cfw_register.get_cfw_uuid(command.left_framework.get_class_name(), command.link.uuid) # type: ignore
51
+ from_cfw = cfw_register.get_cfw_uuid(command.left_framework.get_class_name(), command.link.uuid) # type: ignore[assignment]
50
52
 
51
53
  if from_cfw is None:
52
54
  from_cfw = cfw_register.get_cfw_uuid(
@@ -74,7 +76,7 @@ def _handle_command_result(
74
76
  cfw: ComputeFrameWork,
75
77
  location: str,
76
78
  data: Any,
77
- result_queue: multiprocessing.Queue, # type: ignore
79
+ result_queue: multiprocessing.Queue[Any],
78
80
  ) -> None:
79
81
  """Handles the result of a command execution, including uploading data if necessary."""
80
82
  if not isinstance(data, str) and isinstance(command, FeatureGroupStep):
@@ -89,8 +91,8 @@ def _handle_command_result(
89
91
 
90
92
 
91
93
  def worker(
92
- command_queue: multiprocessing.Queue, # type: ignore
93
- result_queue: multiprocessing.Queue, # type: ignore
94
+ command_queue: multiprocessing.Queue[Any],
95
+ result_queue: multiprocessing.Queue[Any],
94
96
  cfw_register: CfwManager,
95
97
  cfw: ComputeFrameWork,
96
98
  from_cfw: UUID,
@@ -135,7 +137,7 @@ def worker(
135
137
  time.sleep(0.0001)
136
138
 
137
139
 
138
- def error_out(cfw_register: CfwManager, command_queue: multiprocessing.Queue) -> None: # type: ignore
140
+ def error_out(cfw_register: CfwManager, command_queue: multiprocessing.Queue[Any]) -> None:
139
141
  msg = """This is a critical error, the location should not be None."""
140
142
  logging.error(msg)
141
143
  exc_info = traceback.format_exc()
@@ -31,11 +31,7 @@ class DuckDBFilterEngine(BaseFilterEngine):
31
31
  column_name = filter_feature.name.name
32
32
 
33
33
  # Extract the value from the parameter
34
- value = None
35
- for param in filter_feature.parameter:
36
- if param[0] == "value":
37
- value = param[1]
38
- break
34
+ value = filter_feature.parameter.value
39
35
 
40
36
  if value is None:
41
37
  raise ValueError(f"Filter parameter 'value' not found in {filter_feature.parameter}")
@@ -49,14 +45,8 @@ class DuckDBFilterEngine(BaseFilterEngine):
49
45
  column_name = filter_feature.name.name
50
46
 
51
47
  # Check if this is a complex parameter with max/max_exclusive or a simple one with value
52
- has_max = False
53
- has_value = False
54
-
55
- for param in filter_feature.parameter:
56
- if param[0] == "max":
57
- has_max = True
58
- elif param[0] == "value":
59
- has_value = True
48
+ has_max = filter_feature.parameter.max_value is not None
49
+ has_value = filter_feature.parameter.value is not None
60
50
 
61
51
  if has_max:
62
52
  # Complex parameter - use get_min_max_operator
@@ -78,11 +68,7 @@ class DuckDBFilterEngine(BaseFilterEngine):
78
68
  condition = f'"{column_name}" <= {max_parameter}'
79
69
  elif has_value:
80
70
  # Simple parameter - extract the value
81
- value = None
82
- for param in filter_feature.parameter:
83
- if param[0] == "value":
84
- value = param[1]
85
- break
71
+ value = filter_feature.parameter.value
86
72
 
87
73
  if value is None:
88
74
  raise ValueError(f"Filter parameter 'value' not found in {filter_feature.parameter}")
@@ -98,11 +84,7 @@ class DuckDBFilterEngine(BaseFilterEngine):
98
84
  column_name = filter_feature.name.name
99
85
 
100
86
  # Extract the value from the parameter
101
- value = None
102
- for param in filter_feature.parameter:
103
- if param[0] == "value":
104
- value = param[1]
105
- break
87
+ value = filter_feature.parameter.value
106
88
 
107
89
  if value is None:
108
90
  raise ValueError(f"Filter parameter 'value' not found in {filter_feature.parameter}")
@@ -120,11 +102,7 @@ class DuckDBFilterEngine(BaseFilterEngine):
120
102
  column_name = filter_feature.name.name
121
103
 
122
104
  # Extract the value from the parameter
123
- value = None
124
- for param in filter_feature.parameter:
125
- if param[0] == "value":
126
- value = param[1]
127
- break
105
+ value = filter_feature.parameter.value
128
106
 
129
107
  if value is None:
130
108
  raise ValueError(f"Filter parameter 'value' not found in {filter_feature.parameter}")
@@ -139,11 +117,7 @@ class DuckDBFilterEngine(BaseFilterEngine):
139
117
  column_name = filter_feature.name.name
140
118
 
141
119
  # Extract the values from the parameter
142
- values = None
143
- for param in filter_feature.parameter:
144
- if param[0] == "values":
145
- values = param[1]
146
- break
120
+ values = filter_feature.parameter.values
147
121
 
148
122
  if values is None:
149
123
  raise ValueError(f"Filter parameter 'values' not found in {filter_feature.parameter}")
@@ -13,7 +13,7 @@ except ImportError:
13
13
  pa = None
14
14
 
15
15
 
16
- class DuckDBPyarrowTransformer(BaseTransformer):
16
+ class DuckDBPyArrowTransformer(BaseTransformer):
17
17
  """
18
18
  Transformer for converting between DuckDB relations and PyArrow Table.
19
19
 
@@ -6,13 +6,13 @@ try:
6
6
  from pyiceberg.table import Table as IcebergTable
7
7
  from pyiceberg.expressions import GreaterThan, LessThan, GreaterThanOrEqual, LessThanOrEqual, EqualTo, And
8
8
  except ImportError:
9
- IcebergTable: Optional[Type] = None # type: ignore
10
- GreaterThan: Optional[Type] = None # type: ignore
11
- LessThan: Optional[Type] = None # type: ignore
12
- GreaterThanOrEqual: Optional[Type] = None # type: ignore
13
- LessThanOrEqual: Optional[Type] = None # type: ignore
14
- EqualTo: Optional[Type] = None # type: ignore
15
- And: Optional[Type] = None # type: ignore
9
+ IcebergTable: Optional[Type[Any]] = None # type: ignore[no-redef]
10
+ GreaterThan: Optional[Type[Any]] = None # type: ignore[no-redef]
11
+ LessThan: Optional[Type[Any]] = None # type: ignore[no-redef]
12
+ GreaterThanOrEqual: Optional[Type[Any]] = None # type: ignore[no-redef]
13
+ LessThanOrEqual: Optional[Type[Any]] = None # type: ignore[no-redef]
14
+ EqualTo: Optional[Type[Any]] = None # type: ignore[no-redef]
15
+ And: Optional[Type[Any]] = None # type: ignore[no-redef]
16
16
 
17
17
 
18
18
  class IcebergFilterEngine(BaseFilterEngine):
@@ -111,7 +111,7 @@ class IcebergFilterEngine(BaseFilterEngine):
111
111
  max_expr = (
112
112
  LessThan(column_name, max_param) if is_max_exclusive else LessThanOrEqual(column_name, max_param)
113
113
  )
114
- expressions.append(max_expr) # type: ignore
114
+ expressions.append(max_expr) # type: ignore[arg-type]
115
115
 
116
116
  if len(expressions) == 1:
117
117
  return expressions[0]
@@ -123,15 +123,25 @@ class IcebergFilterEngine(BaseFilterEngine):
123
123
  @classmethod
124
124
  def _extract_parameter_value(cls, filter_feature: SingleFilter, param_name: str) -> Any:
125
125
  """Extract a parameter value from filter feature."""
126
- for param in filter_feature.parameter:
127
- if param[0] == param_name:
128
- return param[1]
126
+ if param_name == "value":
127
+ return filter_feature.parameter.value
128
+ elif param_name == "values":
129
+ return filter_feature.parameter.values
130
+ elif param_name == "min":
131
+ return filter_feature.parameter.min_value
132
+ elif param_name == "max":
133
+ return filter_feature.parameter.max_value
134
+ elif param_name == "max_exclusive":
135
+ return filter_feature.parameter.max_exclusive
129
136
  return None
130
137
 
131
138
  @classmethod
132
139
  def _has_parameter(cls, filter_feature: SingleFilter, param_name: str) -> bool:
133
140
  """Check if filter feature has a specific parameter."""
134
- return any(param[0] == param_name for param in filter_feature.parameter)
141
+ value = cls._extract_parameter_value(filter_feature, param_name)
142
+ if param_name == "max_exclusive":
143
+ return True
144
+ return value is not None
135
145
 
136
146
  # Standard filter methods - not used for Iceberg but required by interface
137
147
  @classmethod
@@ -10,8 +10,8 @@ try:
10
10
  from pyiceberg.table import Table as IcebergTable
11
11
  import pyarrow as pa
12
12
  except ImportError:
13
- Catalog = None # type: ignore
14
- IcebergTable = None # type: ignore
13
+ Catalog = None # type: ignore[assignment,misc]
14
+ IcebergTable = None # type: ignore[assignment,misc]
15
15
  pa = None
16
16
 
17
17
 
@@ -5,11 +5,11 @@ try:
5
5
  from pyiceberg.table import Table as IcebergTable
6
6
  import pyarrow as pa
7
7
  except ImportError:
8
- IcebergTable = None # type: ignore
8
+ IcebergTable = None # type: ignore[assignment,misc]
9
9
  pa = None
10
10
 
11
11
 
12
- class IcebergPyarrowTransformer(BaseTransformer):
12
+ class IcebergPyArrowTransformer(BaseTransformer):
13
13
  """
14
14
  Transformer for converting between Iceberg tables and PyArrow tables.
15
15
 
@@ -12,7 +12,7 @@ except ImportError:
12
12
  pd = None
13
13
 
14
14
 
15
- class PandasDataframe(ComputeFrameWork):
15
+ class PandasDataFrame(ComputeFrameWork):
16
16
  @staticmethod
17
17
  def is_available() -> bool:
18
18
  """Check if Pandas is installed and available."""
@@ -25,7 +25,7 @@ class PandasDataframe(ComputeFrameWork):
25
25
 
26
26
  @staticmethod
27
27
  def expected_data_framework() -> Any:
28
- return PandasDataframe.pd_dataframe()
28
+ return PandasDataFrame.pd_dataframe()
29
29
 
30
30
  def merge_engine(self) -> Type[BaseMergeEngine]:
31
31
  return PandasMergeEngine
@@ -13,7 +13,7 @@ except ImportError:
13
13
  pa = None
14
14
 
15
15
 
16
- class PandasPyarrowTransformer(BaseTransformer):
16
+ class PandasPyArrowTransformer(BaseTransformer):
17
17
  """
18
18
  Transformer for converting between Pandas DataFrame and PyArrow Table.
19
19
 
@@ -9,10 +9,10 @@ from mloda_plugins.compute_framework.base_implementations.polars.polars_filter_e
9
9
  try:
10
10
  import polars as pl
11
11
  except ImportError:
12
- pl = None # type: ignore
12
+ pl = None # type: ignore[assignment]
13
13
 
14
14
 
15
- class PolarsDataframe(ComputeFrameWork):
15
+ class PolarsDataFrame(ComputeFrameWork):
16
16
  @staticmethod
17
17
  def is_available() -> bool:
18
18
  """Check if Polars is installed and available."""
@@ -25,7 +25,7 @@ class PolarsDataframe(ComputeFrameWork):
25
25
 
26
26
  @staticmethod
27
27
  def expected_data_framework() -> Any:
28
- return PolarsDataframe.pl_dataframe()
28
+ return PolarsDataFrame.pl_dataframe()
29
29
 
30
30
  def merge_engine(self) -> Type[BaseMergeEngine]:
31
31
  return PolarsMergeEngine
@@ -1,18 +1,18 @@
1
1
  from typing import Any, Set, Type
2
2
  from mloda_core.abstract_plugins.components.feature_name import FeatureName
3
- from mloda_plugins.compute_framework.base_implementations.polars.dataframe import PolarsDataframe
3
+ from mloda_plugins.compute_framework.base_implementations.polars.dataframe import PolarsDataFrame
4
4
  from mloda_core.abstract_plugins.components.merge.base_merge_engine import BaseMergeEngine
5
5
  from mloda_plugins.compute_framework.base_implementations.polars.polars_lazy_merge_engine import PolarsLazyMergeEngine
6
6
 
7
7
  try:
8
8
  import polars as pl
9
9
  except ImportError:
10
- pl = None # type: ignore
10
+ pl = None # type: ignore[assignment]
11
11
 
12
12
 
13
- class PolarsLazyDataframe(PolarsDataframe):
13
+ class PolarsLazyDataFrame(PolarsDataFrame):
14
14
  """
15
- Lazy evaluation version of PolarsDataframe using pl.LazyFrame.
15
+ Lazy evaluation version of PolarsDataFrame using pl.LazyFrame.
16
16
 
17
17
  This compute framework defers execution of operations until results are explicitly
18
18
  requested, enabling query optimization and reduced memory usage for large datasets.
@@ -20,7 +20,7 @@ class PolarsLazyDataframe(PolarsDataframe):
20
20
 
21
21
  @staticmethod
22
22
  def expected_data_framework() -> Any:
23
- return PolarsLazyDataframe.pl_lazy_frame()
23
+ return PolarsLazyDataFrame.pl_lazy_frame()
24
24
 
25
25
  def merge_engine(self) -> Type[BaseMergeEngine]:
26
26
  return PolarsLazyMergeEngine
@@ -5,7 +5,7 @@ from mloda_core.filter.single_filter import SingleFilter
5
5
  try:
6
6
  import polars as pl
7
7
  except ImportError:
8
- pl = None # type: ignore
8
+ pl = None # type: ignore[assignment]
9
9
 
10
10
 
11
11
  class PolarsFilterEngine(BaseFilterEngine):
@@ -37,11 +37,7 @@ class PolarsFilterEngine(BaseFilterEngine):
37
37
  column_name = filter_feature.name.name
38
38
 
39
39
  # Extract the value from the parameter
40
- value = None
41
- for param in filter_feature.parameter:
42
- if param[0] == "value":
43
- value = param[1]
44
- break
40
+ value = filter_feature.parameter.value
45
41
 
46
42
  if value is None:
47
43
  raise ValueError(f"Filter parameter 'value' not found in {filter_feature.parameter}")
@@ -53,14 +49,8 @@ class PolarsFilterEngine(BaseFilterEngine):
53
49
  column_name = filter_feature.name.name
54
50
 
55
51
  # Check if this is a complex parameter with max/max_exclusive or a simple one with value
56
- has_max = False
57
- has_value = False
58
-
59
- for param in filter_feature.parameter:
60
- if param[0] == "max":
61
- has_max = True
62
- elif param[0] == "value":
63
- has_value = True
52
+ has_max = filter_feature.parameter.max_value is not None
53
+ has_value = filter_feature.parameter.value is not None
64
54
 
65
55
  if has_max:
66
56
  # Complex parameter - use get_min_max_operator
@@ -82,11 +72,7 @@ class PolarsFilterEngine(BaseFilterEngine):
82
72
  return data.filter(pl.col(column_name) <= max_parameter)
83
73
  elif has_value:
84
74
  # Simple parameter - extract the value
85
- value = None
86
- for param in filter_feature.parameter:
87
- if param[0] == "value":
88
- value = param[1]
89
- break
75
+ value = filter_feature.parameter.value
90
76
 
91
77
  if value is None:
92
78
  raise ValueError(f"Filter parameter 'value' not found in {filter_feature.parameter}")
@@ -100,11 +86,7 @@ class PolarsFilterEngine(BaseFilterEngine):
100
86
  column_name = filter_feature.name.name
101
87
 
102
88
  # Extract the value from the parameter
103
- value = None
104
- for param in filter_feature.parameter:
105
- if param[0] == "value":
106
- value = param[1]
107
- break
89
+ value = filter_feature.parameter.value
108
90
 
109
91
  if value is None:
110
92
  raise ValueError(f"Filter parameter 'value' not found in {filter_feature.parameter}")
@@ -116,11 +98,7 @@ class PolarsFilterEngine(BaseFilterEngine):
116
98
  column_name = filter_feature.name.name
117
99
 
118
100
  # Extract the value from the parameter
119
- value = None
120
- for param in filter_feature.parameter:
121
- if param[0] == "value":
122
- value = param[1]
123
- break
101
+ value = filter_feature.parameter.value
124
102
 
125
103
  if value is None:
126
104
  raise ValueError(f"Filter parameter 'value' not found in {filter_feature.parameter}")
@@ -132,11 +110,7 @@ class PolarsFilterEngine(BaseFilterEngine):
132
110
  column_name = filter_feature.name.name
133
111
 
134
112
  # Extract the values from the parameter
135
- values = None
136
- for param in filter_feature.parameter:
137
- if param[0] == "values":
138
- values = param[1]
139
- break
113
+ values = filter_feature.parameter.values
140
114
 
141
115
  if values is None:
142
116
  raise ValueError(f"Filter parameter 'values' not found in {filter_feature.parameter}")
@@ -5,7 +5,7 @@ from mloda_plugins.compute_framework.base_implementations.polars.polars_merge_en
5
5
  try:
6
6
  import polars as pl
7
7
  except ImportError:
8
- pl = None # type: ignore
8
+ pl = None # type: ignore[assignment]
9
9
 
10
10
 
11
11
  class PolarsLazyMergeEngine(PolarsMergeEngine):